Esempio n. 1
0
/**********************************************************************
                            main
***********************************************************************
Author: Katherine MacMillan
***********************************************************************
The main function creates an instance of the hashtable class, and then
uses the second commannd line argument to open a text file.
Once the file has been verified as opened, main reads in all of the
lines in the file, converting them to lowercase, and then parsing it
into individual word tokens.

The word tokens are stored in a vector of strings, and then as they are
inserted into the hashtable, they are counted to keep track of the
total number of words read in.

After all of the words are inserted into the hashtable all of the 
entries in the hashtable are copied into an array of integer-string
pairs. This array is then sorted, first by integer value and then
alphabetically.

After the list is sorted two files, a .wrd and a .cvs file are written
outputting the words and their frequency count.

The main function times the process of reading in, hashing, sorting,
and outputting the data. This time is displayed to the user before 
the program exits.

A return of 0 indicates that the program ran correctly.
A return of 1 indicates that an incorrect number of commandline 
	arguments was entered by the user.
A return of 2 indicates that the file indicated by the user could
	not be opened.
**********************************************************************/
int main( int argc, char** argv )
{

	Hashtable table;
	vector<string> tokens;
	string temp;

	// track longest string count and total word count
    int maxStrLen = 0;
    int wordCount = 0;

    // must be two arguments
    if ( argc != 2 )
    {
		cout << "Wrong number of arguments\nUsage: zipf <file.txt>" << endl;
		return 1;
    }

	// make sure file opens
    ifstream infile( argv[1] );
    if ( !infile )
	{
		cout << "Unable to open the file " << argv[1] << endl;
		return 2;
	}

	// time process
    clock_t t = clock();

	while ( !infile.eof() )
	{
		// read in line, make all characters lowercase and tokenize into words
		getline( infile, temp );
		transform( temp.begin(), temp.end(), temp.begin(), ::tolower );
		Tokenize( temp, tokens, VALID );
	}

	for ( auto t: tokens ) 
	{
		// insert each token, count total words, and check string length
		table.Insert( t );
		wordCount++;
		if ( t.length() > (unsigned) maxStrLen )
			maxStrLen = t.length();			
	}

	// retrieve total entry count and table size
	int count = table.GetEntryCount();
	int size = table.GetSize();

    // copy table data to array of  pairs
	tableEntry* wordList = new pair<int, string> [count+1];
    int j = 0;
    for ( int i = 0; i < size; i++ )
    {
    	int c = table.GetCount( i );
    	if ( c != 0 )
    	{
    		wordList[j].first = c;
    		wordList[j].second = table.GetKey( i );
    		j++;
    	}
    }

    // sort list and write data to file
    qsort( wordList, count, sizeof( tableEntry ), TableComparator );
    WriteFiles( argv[1],wordList, wordCount, maxStrLen, count );

    // display program runtime
    t = clock() - t;
    cout << "Time to read in, hash, sort and write out data: " << (float) t / CLOCKS_PER_SEC << " seconds" << endl;

	return 0;
}