/* * processFile should: * 1) Create a new instance of your MyTrends class * 2) Read in the file whose words you want to count: The file name is passed in as fname * 3) Add all words to the Trends data structure, and calculate the time per call to addtoTrends * 4) Use getNthPopular and getCount to print out the total results in fname + ".out" */ int processFile(const char* fname){ Trends* tr = new MyTrends(); // First, read in the file ifstream in(fname); string s; vector<string> wordlist; while(in >> s){ // s = reduceWord(s); if (s.size() > 0) wordlist.push_back(s); } // We only want to time how long addToTrends takes, so we get // the starting time, which is the clock time, in milliseconds std::clock_t start, end; start = std::clock(); // int start = timeGetTime(); // Now add all the words to the Trends data structure for(unsigned int i = 0; i < wordlist.size(); i++){ tr->increaseCount(wordlist[i], 1); } // Now get the end time // int end = timeGetTime(); end = std::clock(); double millisec = (end - start) / ((double)CLOCKS_PER_SEC / 1000); std::cout << "Time: " << ((1000.0 * millisec) / wordlist.size()) << " microseconds per word" << endl; // Now we will print out the complete results. This could be REALLY slow, if // your getNthPopular is not a little bit smart. int p = ((string)fname).rfind('.'); string outfname = ((string)fname).substr(0, p) + ".out"; ofstream out(outfname.c_str()); for(int i=0; i< tr->numEntries(); i++){ string s = tr->getNthPopular(i); out << tr->getCount(s) << ": " << s << endl; } out.close(); delete tr; return end - start; }
/** * This tests a simple (but unlikely) use case, which is to read in all the data, and then print out the data in sorted order * based on popularity. * * Compare your 28885.txt.out to 28885_txt.out, using diff,s to see if your code is producing correct output. */ double useCase_addAllThenGetInOrder(){ Trends* tr = new smarterTrends(); //You will need to change this to match your own class! std::vector<std::string> wordlist = getWordList("data/28885.txt"); //We only want to time how long addToTrends takes, so we get // the starting time, which is the clock time, in milliseconds double start = getTimeInMillis(); //Now add all the words to the Trends data structure for(unsigned int i=0; i<wordlist.size(); i++){ tr->increaseCount(wordlist[i],1); } //Now get the end time double end = getTimeInMillis(); std::cout << "increaseCount time: " << (end-start)/wordlist.size() << " ms per word" << std::endl; //Now we will print out the complete results. This could be REALLY clow, if // your getNthPopular is not a little bit smart. std::string outfname = "data/28885.txt.out"; std::ofstream out(outfname.c_str()); start = getTimeInMillis(); for(unsigned int i=0; i< tr->numEntries(); i++){ std::string s = tr->getNthPopular(i); out << tr->getCount(s) << ": " << s << std::endl; } out.close(); end = getTimeInMillis(); std::cout << "getNth followed by getCount, time: " << (end - start) / tr->numEntries() << " ms per entry" << std::endl; delete tr; return end - start; }
/* * processFile should: * 1) Create a new instance of your trends class * 2) Read in the file whose words you want to count: The file name is passed in as fname * 3) Add all words to the Trends data structure, and calculate the time per call to addtoTrends * 4) Use getNthPopular and getCount to print out the total results in fname + ".out" */ int processFile(const char* fname){ Trends* tr = new brinkmwjTrends(); //You will need to change this to match your own class! //First, read in the file // I'm not doing much error checking here, because I am a bad person std::ifstream in(fname); std::string s; std::vector<std::string> wordlist; while(in >> s){ wordlist.push_back(s); } //We only want to time how long addToTrends takes, so we get // the starting time, which is the clock time, in milliseconds clock_t start = clock(); //Now add all the words to the Trends data structure for(unsigned int i=0; i<wordlist.size(); i++){ tr->increaseCount(wordlist[i],1); } //Now get the end time clock_t end = clock(); std::cout << "Time: " << ((1000.0*(end - start)/CLOCKS_PER_SEC)/wordlist.size()) << " ms per word" << std::endl; //Now we will print out the complete results. This could be REALLY clow, if // your getNthPopular is not a little bit smart. std::string outfname = fname; outfname = outfname + ".out"; std::ofstream out(outfname.c_str()); for(int i=0; i< tr->numEntries(); i++){ std::string s = tr->getNthPopular(i); out << tr->getCount(s) << ": " << s << std::endl; } out.close(); delete tr; return end - start; }