attempting to make a webpage scraper...

06 Aug 2010

So far I am able to load a webpage using a program based off of the HTTPClientStreamingExample program and pick out the info that I want. I have attached the code below...

#include "mbed.h"
#include "EthernetNetIf.h"
#include "HTTPClient.h"

EthernetNetIf eth;
HTTPClient http;

HTTPResult result;
bool completed = false;
void request_callback(HTTPResult r) {
    result = r;
    completed = true;
}

int main() {
    printf("Start\n");
    printf("Setting up...\n");
    EthernetErr ethErr = eth.setup();
    if (ethErr) {
        printf("Error %d in setup.\n", ethErr);
        return -1;
    }
    printf("Setup OK\n");
    HTTPStream stream;
    char * location;
    char temperature[2];
    char humidity[2];
    char BigBuf[512 + 1] = {0};
    stream.readNext((byte*)BigBuf, 512); 
    
    
    HTTPResult r = http.get("http://mobile.weather.gov/port_mp_ns.php?select=3&CityName=Lubbock&site=LUB&State=TX&warnzone=TXZ035", &stream, request_callback); //Load a very large page
    while (!completed) {
        Net::poll(); //Polls the Networking stack
        if (stream.readable()) {
            BigBuf[stream.readLen()] = 0; //Transform this buffer in a zero-terminated char* string
            
            // look for key words in the html text            
            location = strstr(BigBuf,"Temperature");
            if (location != NULL)
            {
                strncpy(temperature,location+13,2);                
                location = NULL;
            }
                        
            location = strstr(BigBuf,"Humidity");
            if (location != NULL)
            {
                strncpy(humidity,location+10,2);                
                location = NULL;
            }
            
            stream.readNext((byte*)BigBuf, 512); //Buffer has been read, now we can put more data in it
        }
    }
    printf("\n--------------\n");
    if (result == HTTP_OK) {
        printf("Read completely\n");
        printf("Temperature: %s deg F \n",temperature);
        printf("Humidity: %s percent \n",humidity);
        
    } else {
        printf("Error %d\n", result);
    }

    while (1) {

    }
    return 0;
}
This program picks out the temperature and the humidity values from some weather site successfully. I would like to compartmentalize the data retrieval process so I can call the function with a ticker so I can perform logging. Any ideas?

Thanks,

Elliot

10 Aug 2010

This program has been turned into a notebook and has been published. People can follow the program here...

http://mbed.org/users/elliotb/notebook/weather-checking-webpage-scraper/