void pr_htmldefs(FILE *fp) { int i; fprintf(fp,"<title>GROMACS</title>\n"); fprintf(fp,"<h1>GROMACS Files</h1>\n"); fprintf(fp,"<b>GRO</b>ningen <b>MA</b>chine for <b>S</b>imulating <b>C</b>hemistry\n"); fprintf(fp,"<p>\n"); fprintf(fp,"The following %d filetypes are used by Gromacs:\n",efNR); fprintf(fp,"<dl>\n"); for(i=0; (i<efNR); i++) { fprintf(fp,"<dt><a href=\"%s.html\">%s.%s</a> (%s)<dd>%s\n", ftp2ext(i),ftp2defnm(i),ftp2ext(i),ftp2ftype(i), check_html(ftp2desc(i),NULL)); } fprintf(fp,"</dl>\n"); }
// Definition of stringsearch() function void session::stringsearch(){ // Whole body is in loop // Loop ends if the list is empty or a certain number of urls have been parsed do{ // File that stores source code of URLs std::ofstream FILE("source.txt"); std::ofstream wordfile("index.txt", std::ios::app|std::ios::out); // Object of url class created url u(list.front()); // Object of Http class (found in SFML) created // Http class only takes host names as input, so host name passed sf::Http site(u.host()); //Generate the request, i.e the path within the webpage sf::Http::Request request(u.path()); // Send the request to generate a response sf::Http::Response response = site.sendRequest(request); // Check the status code and display the result sf::Http::Response::Status status = response.getStatus(); if (status == sf::Http::Response::Ok){ FILE << response.getBody() << std::endl; } else{ std::cout << "Error " << status << std::endl; std::cout << "Skipping webpage. " << std::endl; list.pop(); continue; } FILE.close(); std::cout << "Popping URL: " << list.front() << std::endl << std::endl; Sleep(1000); // Condition check to see if file is of html format if (check_html() == false){ std::cout << "\nWebpage not in html format. Skipping... " << std::endl << std::endl; list.pop(); continue; } // Condition check to ensure URL hasn't been crawled previously. First URL is skipped if (count > 1 && check_urls(list.front()) == true){ std::cout << "\nWebpage has already been crawled. Skipping...\n\n"; list.pop(); continue; } // Opens file for reading std::ifstream file("source.txt"); std::string s1; // String stores lines from html file std::string s2 = "href=\""; // Reference string to locate hyperlinks char s3[300]; // C string variable to store hyperlink URLs // Condition check to ensure file is open if (file.is_open()){ do{ // Both string storage variables initialised to null s1 = ""; std::getline(file, s1); // Gets line by line information from the page source std::size_t location = s1.find(s2); // Variable to point to location in string int i = 0; // Check to see if location is within string, i.e the desired element has been found if (location != std::string::npos){ std::cout << "Hyperlink found: "; // Location set to first element of hyperlink // Adding 6 accounts for the 6 tag characters, i.e href" std::size_t x = location + 6; // Loop adds characters to array until quotation mark is reached while (s1.at(x) != '\"'){ s3[i] = s1.at(x); x++; i++; } // Last character set to termination character s3[i] = '\0'; std::string s4 = s3; // Converts C string to std::string std::cout << s4 << std::endl; // Creates url object that takes s4 as constructor argument url u2(s4); // Condition check to see if hyperlink has a host // If it doesn't, it means it is an extension of the current webpage // So current webpage is appended to it if (u2.host() == ""){ s4 = u.protocol() + "://" + u.host() + s4; } std::cout << "Adding URL to queue..." << std::endl; // Hyperlink is added to queue list.push(s4); std::cout << s4 << std::endl << std::endl; } if (wordfile.is_open()){ std::string word = "Boost"; std::size_t point = s1.find(word); if (point != std::string::npos){ // while () wordfile << list.front() << std::endl; wordfile << word << std::endl; } // else{ // std::cout << "word not found" << std::endl; //std::cout << "\n"; // } } else{ std::cout << "index could not be opened" << std::endl; } } while (!file.eof()); // Adds current webpage to list of URLs parsed urls_parsed(list.front()); // Removes current webpage from queue list.pop(); file.close(); wordfile.close(); std::cout << "\n\nCount: " << count << std::endl; count++; Sleep(1000); } else{ std::cout << "Error in opening file" << std::endl; exit(-1); } //Sleep(1000); } while (count <= 10 && list.empty() == false); remove("source.txt"); remove("urls.txt"); return; }