/********************************************************************** main *********************************************************************** Author: Katherine MacMillan *********************************************************************** The main function creates an instance of the hashtable class, and then uses the second commannd line argument to open a text file. Once the file has been verified as opened, main reads in all of the lines in the file, converting them to lowercase, and then parsing it into individual word tokens. The word tokens are stored in a vector of strings, and then as they are inserted into the hashtable, they are counted to keep track of the total number of words read in. After all of the words are inserted into the hashtable all of the entries in the hashtable are copied into an array of integer-string pairs. This array is then sorted, first by integer value and then alphabetically. After the list is sorted two files, a .wrd and a .cvs file are written outputting the words and their frequency count. The main function times the process of reading in, hashing, sorting, and outputting the data. This time is displayed to the user before the program exits. A return of 0 indicates that the program ran correctly. A return of 1 indicates that an incorrect number of commandline arguments was entered by the user. A return of 2 indicates that the file indicated by the user could not be opened. **********************************************************************/ int main( int argc, char** argv ) { Hashtable table; vector<string> tokens; string temp; // track longest string count and total word count int maxStrLen = 0; int wordCount = 0; // must be two arguments if ( argc != 2 ) { cout << "Wrong number of arguments\nUsage: zipf <file.txt>" << endl; return 1; } // make sure file opens ifstream infile( argv[1] ); if ( !infile ) { cout << "Unable to open the file " << argv[1] << endl; return 2; } // time process clock_t t = clock(); while ( !infile.eof() ) { // read in line, make all characters lowercase and tokenize into words getline( infile, temp ); transform( temp.begin(), temp.end(), temp.begin(), ::tolower ); Tokenize( temp, tokens, VALID ); } for ( auto t: tokens ) { // insert each token, count total words, and check string length table.Insert( t ); wordCount++; if ( t.length() > (unsigned) maxStrLen ) maxStrLen = t.length(); } // retrieve total entry count and table size int count = table.GetEntryCount(); int size = table.GetSize(); // copy table data to array of pairs tableEntry* wordList = new pair<int, string> [count+1]; int j = 0; for ( int i = 0; i < size; i++ ) { int c = table.GetCount( i ); if ( c != 0 ) { wordList[j].first = c; wordList[j].second = table.GetKey( i ); j++; } } // sort list and write data to file qsort( wordList, count, sizeof( tableEntry ), TableComparator ); WriteFiles( argv[1],wordList, wordCount, maxStrLen, count ); // display program runtime t = clock() - t; cout << "Time to read in, hash, sort and write out data: " << (float) t / CLOCKS_PER_SEC << " seconds" << endl; return 0; }