Exemplo n.º 1
0
void wmtor::reportNewWebPage(Url& url) {
    ofstream logFile("newPageReport.txt", ios::app);
    if (logFile.is_open()) {
        time_t cur_time = time(NULL);
        char *t_str;
        t_str = ctime(&cur_time);
        logFile << t_str << endl;
        logFile << "Url Changed:" << url.getUrlStr() << "\nPageMD5:" << url.getPageMD5Str() << endl;
        logFile.close();
    }
    cout << "Url Changed:" << url.getUrlStr() << "\nPageMD5:" << url.getPageMD5Str() << endl;
}
Exemplo n.º 2
0
void wmtor::CheckUrl_wholePage(Url &url, HttpClient&cli, ofstream &ofile) {
    HttpHeader header;
    HttpContent content;
    cli.requestWebPage(url, header, content);
    ofile << "url:" << url.getUrlStr() << endl;
    ofile << "header:" << header.getHeaderStr() << endl;
    cout << "url:" << url.getUrlStr() << endl;
    cout << "urlEtag:" << header.getEtag() << endl;
    string contentMD5 = content.getMD5Str();

    if ( !contentMD5.empty() && contentMD5 != url.getPageMD5Str()) {
        url.setEtag(header.getEtag());
        url.setAge(0);
        url.setPageMD5Str(contentMD5);
        saveNewPage(url,content, header);
        reportNewWebPage(url);
    }
}
Exemplo n.º 3
0
void wmtor::start() {
    if (urlist.empty()) {
        cout << "Empty queue." << endl;
        return;
    }
    else {
        ofstream recordFile("recordFile.txt");
        if (!recordFile.is_open()) {
            cerr << "Cannot open recordFile.txt!" << endl;
            return;
        }

        int count = 0;
        int max_count = 500;
        HttpClient cli;
        ofstream ofile("hfiles.txt", ios::app);
        if (!ofile.is_open()) {
            cerr << "Cannot open file: header.txt" << endl;
            recordFile.close();
            return;
        }

        char *t_str;
        time_t cur_time;

        priority_queue<Url> urlist2;
        while (count < max_count) {
            cout << "Now checking for " << ++count << endl;
            cur_time = time(NULL);
            t_str = ctime(&cur_time);
            ofile << t_str << endl;
            //free(t_str);
            while (!urlist.empty()) {
                Url url = urlist.top();
                urlist.pop();
                CheckUrl_wholePage(url, cli, ofile);
                recordFile << count << '\t' << url.getUrlStr() << '\t';
                recordFile << url.getPageMD5Str() << '\t' << url.getAge() << '\t' << url.getEtag() << endl;
                urlist2.push(url);
            }
#ifdef _MSC_VER
            Sleep(sleep_time);
#else
            sleep(sleep_time);
#endif

            cout << "Now checking for " << ++count << endl;
            cur_time = time(NULL);
            t_str = ctime(&cur_time);
            ofile << t_str << endl;
            //free(t_str);
            while (!urlist2.empty()) {
                Url url = urlist2.top();
                urlist2.pop();
                CheckUrl_wholePage(url, cli, ofile);
                recordFile << count << '\t' << url.getUrlStr() << '\t';
                recordFile << url.getPageMD5Str() << '\t' << url.getAge() << '\t' << url.getEtag() << endl;
                urlist.push(url);
            }
#ifdef _MSC_VER
            Sleep(sleep_time);
#else
            sleep(sleep_time);
#endif

        }
        ofile.close();
        recordFile.close();
    }
}