示例#1
0
文件: prfn.c 项目: Ruyk/gromacs
void pr_htmldefs(FILE *fp)
{
  int i;

  fprintf(fp,"<title>GROMACS</title>\n");
  fprintf(fp,"<h1>GROMACS Files</h1>\n");
  fprintf(fp,"<b>GRO</b>ningen <b>MA</b>chine for <b>S</b>imulating <b>C</b>hemistry\n");
  fprintf(fp,"<p>\n");
  fprintf(fp,"The following %d filetypes are used by Gromacs:\n",efNR);
  fprintf(fp,"<dl>\n");
  for(i=0; (i<efNR); i++) {
    fprintf(fp,"<dt><a href=\"%s.html\">%s.%s</a> (%s)<dd>%s\n",
	    ftp2ext(i),ftp2defnm(i),ftp2ext(i),ftp2ftype(i),
	    check_html(ftp2desc(i),NULL));
  }
  fprintf(fp,"</dl>\n");
}
// Definition of stringsearch() function
void session::stringsearch(){

	// Whole body is in loop
	// Loop ends if the list is empty or a certain number of urls have been parsed
	do{

		// File that stores source code of URLs
		std::ofstream FILE("source.txt");

		std::ofstream wordfile("index.txt", std::ios::app|std::ios::out);

		// Object of url class created
		url u(list.front());

		// Object of Http class (found in SFML) created
		// Http class only takes host names as input, so host name passed
		sf::Http site(u.host());

		//Generate the request, i.e the path within the webpage
		sf::Http::Request request(u.path());

		// Send the request to generate a response
		sf::Http::Response response = site.sendRequest(request);

		// Check the status code and display the result
		sf::Http::Response::Status status = response.getStatus();
		if (status == sf::Http::Response::Ok){
			FILE << response.getBody() << std::endl;
		}
		else{
			std::cout << "Error " << status << std::endl;
			std::cout << "Skipping webpage. " << std::endl;
			list.pop();
			continue;
		}
		FILE.close();

		std::cout << "Popping URL: " << list.front() << std::endl << std::endl;
		Sleep(1000);

		// Condition check to see if file is of html format
		if (check_html() == false){
			std::cout << "\nWebpage not in html format. Skipping... " << std::endl << std::endl;
			list.pop();
			continue;
		}

		// Condition check to ensure URL hasn't been crawled previously. First URL is skipped
		if (count > 1 && check_urls(list.front()) == true){
			std::cout << "\nWebpage has already been crawled. Skipping...\n\n";
			list.pop();
			continue;
		}

		// Opens file for reading
		std::ifstream file("source.txt");

		std::string s1; // String stores lines from html file
		std::string s2 = "href=\""; // Reference string to locate hyperlinks
		char s3[300]; // C string variable to store hyperlink URLs

		// Condition check to ensure file is open
		if (file.is_open()){

			do{
				// Both string storage variables initialised to null
				s1 = "";

				std::getline(file, s1); // Gets line by line information from the page source
				
				std::size_t location = s1.find(s2); // Variable to point to location in string

				int i = 0;
				// Check to see if location is within string, i.e the desired element has been found
				if (location != std::string::npos){
					std::cout << "Hyperlink found: ";

					// Location set to first element of hyperlink
					// Adding 6 accounts for the 6 tag characters, i.e href"
					std::size_t x = location + 6;

					// Loop adds characters to array until quotation mark is reached
					while (s1.at(x) != '\"'){
						s3[i] = s1.at(x);
						x++;
						i++;
					}

					// Last character set to termination character
					s3[i] = '\0';

					std::string s4 = s3; // Converts C string to std::string

					std::cout << s4 << std::endl;

					// Creates url object that takes s4 as constructor argument
					url u2(s4);

					// Condition check to see if hyperlink has a host
					// If it doesn't, it means it is an extension of the current webpage
					// So current webpage is appended to it
					if (u2.host() == ""){
						s4 = u.protocol() + "://" + u.host() + s4;
					}

					std::cout << "Adding URL to queue..." << std::endl;

					// Hyperlink is added to queue
					list.push(s4);

					std::cout << s4 << std::endl << std::endl;
				}

				if (wordfile.is_open()){
					std::string word = "Boost";

					std::size_t point = s1.find(word);
					
					if (point != std::string::npos){

				//		while ()

						wordfile << list.front() << std::endl;
						wordfile << word << std::endl;
					}
				//	else{
					//	std::cout << "word not found" << std::endl;
						//std::cout << "\n";
				//	}
				}
				else{
					std::cout << "index could not be opened" << std::endl;
				}

			} while (!file.eof());

			// Adds current webpage to list of URLs parsed
			urls_parsed(list.front());
			
			// Removes current webpage from queue
			list.pop();
			file.close();
			wordfile.close();

			std::cout << "\n\nCount: " << count << std::endl;
			count++;

			Sleep(1000);			
		}
		else{
			std::cout << "Error in opening file" << std::endl;
			exit(-1);
		}
		//Sleep(1000);
	} while (count <= 10 && list.empty() == false);
	remove("source.txt");
	remove("urls.txt");

	return;
}