Exemple #1
0
bool ETagUrlChecker::check(HttpHeader& header, Url&url) {
    if (header.getEtag() == "")
        return false;
    if (header.getEtag() == url.getEtag())
        return false;
    return true;
}
Exemple #2
0
void wmtor::CheckUrl_wholePage(Url &url, HttpClient&cli, ofstream &ofile) {
    HttpHeader header;
    HttpContent content;
    cli.requestWebPage(url, header, content);
    ofile << "url:" << url.getUrlStr() << endl;
    ofile << "header:" << header.getHeaderStr() << endl;
    cout << "url:" << url.getUrlStr() << endl;
    cout << "urlEtag:" << header.getEtag() << endl;
    string contentMD5 = content.getMD5Str();

    if ( !contentMD5.empty() && contentMD5 != url.getPageMD5Str()) {
        url.setEtag(header.getEtag());
        url.setAge(0);
        url.setPageMD5Str(contentMD5);
        saveNewPage(url,content, header);
        reportNewWebPage(url);
    }
}
Exemple #3
0
void wmtor::CheckUrl(Url &url, HttpClient&cli, ofstream &ofile) {
    HttpHeader header;
    cli.requestHeader(url, header);
    ofile << "url:" << url.getUrlStr() << endl;
    ofile << "header:" << header.getHeaderStr() << endl;
    cout << "url:" << url.getUrlStr() << endl;
    cout << "urlEtag:" << header.getEtag() << endl;
    if (header.getEtag() != "") {
        if (header.getEtag() != url.getEtag()) {
            HttpContent content;
            cli.requestContent(url, content, header.getContentLength());
            string contentMD5 = content.getMD5Str();
            url.setEtag(header.getEtag());
            url.setAge(header.getAge());
            url.setPageMD5Str(contentMD5);
            savePage(content, header);
            reportNewWebPage(url);
        }
    } else {
    }
}