// Read every word in this file. // Insert a pair into the hash table (word,fname) void processFile (const char *fname, InvertedIndex& inverted_index) { ifstream inputfile; // ifstream for reading from input file. inputfile.open (fname); string fnames(fname); // file name as a string object, not as a char * (c-style string, which is an array of characters with \0 at the end). // Tokenize the input. // Read one character at a time. // If the character is not in a-z or A-Z, terminate current string. char c; char curr_str[MAX_STRING_LEN]; int str_i = 0; // Index into curr_str. bool flush_it = false; // Whether we have a complete string to flush. while (inputfile.good()) { // Read one character, convert it to lowercase. inputfile.get(c); c = tolower(c); if (c >= 'a' && c <= 'z') { // c is a letter. curr_str[str_i] = c; str_i++; // Check over-length string. if (str_i >= MAX_STRING_LEN) { flush_it = true; } } else { // c is not a letter. // Create a new string if curr_str is non-empty. if (str_i>0) { flush_it = true; } } if (flush_it) { // Create the new string from curr_str. string the_str(curr_str,str_i); // cout << the_str << endl; // Insert the string-file_name tuple into the inverted index. inverted_index.add(the_str,fnames); // cout << "Add " << the_str << "," << fname << endl; // Reset state variables. str_i = 0; flush_it = false; } } }