예제 #1
0
// Read every word in this file.
// Insert a pair into the hash table (word,fname)
void processFile (const char *fname, InvertedIndex& inverted_index) {

	ifstream inputfile;  // ifstream for reading from input file.
	inputfile.open (fname);
	string fnames(fname); // file name as a string object, not as a char * (c-style string, which is an array of characters with \0 at the end).

	// Tokenize the input.
	// Read one character at a time.
	// If the character is not in a-z or A-Z, terminate current string.
	char c;
	char curr_str[MAX_STRING_LEN];
	int str_i = 0;  // Index into curr_str.
	bool flush_it = false;  // Whether we have a complete string to flush.
	
	while (inputfile.good()) {
		// Read one character, convert it to lowercase.
		inputfile.get(c);
		c = tolower(c);

		if (c >= 'a' && c <= 'z') {
			// c is a letter.
			curr_str[str_i] = c;
			str_i++;
			
			// Check over-length string.
			if (str_i >= MAX_STRING_LEN) {
				flush_it = true;
			}
		} else {
			// c is not a letter.
			// Create a new string if curr_str is non-empty.
			if (str_i>0) {
				flush_it = true;
			}
		}

		if (flush_it) {
			// Create the new string from curr_str.
			string the_str(curr_str,str_i);
			// cout << the_str << endl;


			// Insert the string-file_name tuple into the inverted index.
			inverted_index.add(the_str,fnames);

			// cout << "Add " << the_str << "," << fname << endl;
			

			// Reset state variables.
			str_i = 0;
			flush_it = false;
		}
	}
}