void CSSStyleSheet::addSubresourceURLStrings(HashSet<String>& urls, const String& base) const { RefPtr<CSSRuleList> ruleList = const_cast<CSSStyleSheet*>(this)->cssRules(); // Add the URLs for each child import rule, and recurse for the stylesheet belonging to each of those rules. for (unsigned i = 0; i < ruleList->length(); ++i) { CSSRule* rule = ruleList->item(i); if (rule->type() != CSSRule::IMPORT_RULE) continue; CSSImportRule* importRule = static_cast<CSSImportRule*>(rule); CSSStyleSheet* ruleSheet = importRule->styleSheet(); if (!ruleSheet) continue; KURL fullURL(KURL(base), importRule->href()); urls.add(fullURL.string()); ruleSheet->addSubresourceURLStrings(urls, fullURL.string()); } }
void page_handler::process() { //Ok we need to download the page. //Get an instance of curl CURL *curl = 0; //Just cause cout << "Processing " << myArgs->URL << endl; CURLcode res; std::stringstream buffer; curl = curl_easy_init(); if (curl) { //URL Encode the string as to not break anything. string loc(curl_easy_escape(curl, this->myArgs->URL.c_str(), 0)); //Set download URL string get = html_helpers::getDomParserServerURL(loc); curl_easy_setopt(curl, CURLOPT_URL, get.c_str()); //Set our user agent, cause He wants us to. curl_easy_setopt(curl, CURLOPT_USERAGENT, "cs3505_NullPointerExceptionBot"); //No progress bar, just in case curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1); // Set the callback function to send data to as "write_function" aka convert to stringstream curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, this->write_data); // Set the buffer to put temp data into as buffer this will allow for string conversion. curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer); //Download that page! res = curl_easy_perform(curl); if(res != 0){ cout << "I died" << endl; cout << "Error found " << res << endl; } // always cleanup curl_easy_cleanup(curl); //Get the results as a string. string htmlPage = buffer.str(); //JSON TIME! cJSON *root = cJSON_Parse(htmlPage.c_str()); if(cJSON_GetObjectItem(root, "Error") != 0){ //I think this is the error condition if we get a pointer to something //searchflag would we like to track errors and log this? this->myArgs->Success = false; return; } /* images links rawText isAmazon isReview isProduct ReviewDescription ReviewerName ProductReviewed */ cJSON *isAmazonNode = cJSON_GetObjectItem(root, "isAmazon"); cJSON *isProductReviewNode = NULL; cJSON *isProductPageNode = NULL; cJSON *imagesNode = cJSON_GetObjectItem(root, "images"); cJSON *linksNode = cJSON_GetObjectItem(root, "links"); cJSON *rawTextNode = cJSON_GetObjectItem(root, "rawText"); cJSON *reviewDescriptionNode = NULL; cJSON *reviewerNameNode = NULL; cJSON *productReviewedNode = NULL; cJSON *pageSizeNode = cJSON_GetObjectItem(root, "somethinghere"); if (isAmazonNode == 0) { cout << "Error: isAmazonNode is null" << endl; } if (imagesNode == 0) { cout << "Error: imagesNode is null" << endl; } if (linksNode == 0) { cout << "Error: linksNode is null" << endl; } if (rawTextNode == 0) { cout << "Error: rawTextNode is null" << endl; } // Not used for now //if (pageSizeNode == 0) //{ // cout << "Error: pageSizeNode is null" << endl; //} if (isAmazonNode != 0) { myArgs->pageStats.setIsAmazon(isAmazonNode->valueint == 1); isProductReviewNode = cJSON_GetObjectItem(root, "isReview"); if (isProductReviewNode != 0) { myArgs->pageStats.setIsAmazonReviewPage(isProductReviewNode->valueint == 1); reviewDescriptionNode = cJSON_GetObjectItem(root, "reviewDescription"); reviewerNameNode = cJSON_GetObjectItem(root, "reviewerName"); productReviewedNode = cJSON_GetObjectItem(root, "productReviewed"); } isProductPageNode = cJSON_GetObjectItem(root, "isProduct"); if (isProductPageNode != 0) { myArgs->pageStats.setIsAmazonProductPage(isProductPageNode->valueint == 1); } } int imageCount = imagesNode->valueint; //The hard part >.< the array of images. //cout << "Found " << imageCount << " images"; myArgs->pageStats.setTotalImageCount(imageCount); int totalLinkCount = cJSON_GetArraySize(linksNode); myArgs->pageStats.setTotalLinkCount(totalLinkCount); // searchflag set these values myArgs->pageStats.setTotalKiloBytes(0); string rawText(rawTextNode->valuestring); //Ok time to extract the children... Yeah, I hate the lack of C++ Objects but it is so easy. //cJSON *linksRoot = cJSON_GetObjectItem(root, "links"); //if we begin to run down like a mad man i guess. //cout << "Found " << cJSON_GetArraySize(linksRoot) << " links" << endl; string fullURL(""); for(int i = 0; i < totalLinkCount; i++){ fullURL = html_helpers::getFullURL(string(cJSON_GetArrayItem(linksNode,i)->valuestring), this->myArgs->URL); this->myArgs->links.push_back(fullURL); //cout << "Link to: " << fullURL << endl; } // send rawText to file reader stringstream ss(rawText); vector<string> *words; int misspelledWordsCount = 0; words = FileReader::readLine(&ss); misspelledWordsCount += getMisspelledWordsCount(words, myDictionary); ss.clear(); myArgs->pageStats.setTotalWordCount(words->size()); myArgs->pageStats.setTotalMisspelledWordCount(misspelledWordsCount); delete words; if (myArgs->pageStats.getIsAmazonReviewPage()) { // Use file reader to get words misspelledWordsCount = 0; int totalWordsCount = 0; if(reviewDescriptionNode != 0) { ss << reviewDescriptionNode->valuestring; } words = FileReader::readLine(&ss); misspelledWordsCount += getMisspelledWordsCount(words, myDictionary); totalWordsCount += words->size(); myArgs->productReviewer.update(misspelledWordsCount, totalWordsCount, 0, 0); delete words; } if(reviewDescriptionNode != 0) { string reviewText(reviewDescriptionNode->valuestring); } } else { this->myArgs->Success = false; } }