Ejemplo n.º 1
0
/*
 * processFile should:
 *  1) Create a new instance of your MyTrends class
 *  2) Read in the file whose words you want to count: The file name is passed in as fname
 *  3) Add all words to the Trends data structure, and calculate the time per call to addtoTrends
 *  4) Use getNthPopular and getCount to print out the total results in fname + ".out"
 */
int processFile(const char* fname){
	Trends* tr = new MyTrends();

	// First, read in the file
	ifstream in(fname);
	string s;
	vector<string> wordlist;

	while(in >> s){
	  // s = reduceWord(s);
	  if (s.size() > 0)
	    wordlist.push_back(s);
	}

	// We only want to time how long addToTrends takes, so we get
	// the starting time, which is the clock time, in milliseconds
    std::clock_t start, end;
	start = std::clock();
	// int start = timeGetTime();

	// Now add all the words to the Trends data structure
	for(unsigned int i = 0; i < wordlist.size(); i++){
		tr->increaseCount(wordlist[i], 1);
	}

	// Now get the end time
	// int end = timeGetTime();
	end = std::clock();
    double millisec = (end - start) / ((double)CLOCKS_PER_SEC / 1000);
	std::cout << "Time: " << ((1000.0 * millisec) / wordlist.size()) << " microseconds per word" << endl;

	// Now we will print out the complete results. This could be REALLY slow, if
	// your getNthPopular is not a little bit smart.
	int p = ((string)fname).rfind('.');
	string outfname = ((string)fname).substr(0, p) + ".out";
	ofstream out(outfname.c_str());
	for(int i=0; i< tr->numEntries(); i++){
		string s = tr->getNthPopular(i);
		out << tr->getCount(s) << ": " << s << endl;
	}
	out.close();

	delete tr;

	return end - start;
}
Ejemplo n.º 2
0
/**
 * This tests a simple (but unlikely) use case, which is to read in all the data, and then print out the data in sorted order
 * based on popularity.
 *
 * Compare your 28885.txt.out to 28885_txt.out, using diff,s to see if your code is producing correct output.
 */
double useCase_addAllThenGetInOrder(){
	Trends* tr = new smarterTrends(); //You will need to change this to match your own class!

	std::vector<std::string> wordlist = getWordList("data/28885.txt");

	//We only want to time how long addToTrends takes, so we get
	// the starting time, which is the clock time, in milliseconds
	double start = getTimeInMillis();
	//Now add all the words to the Trends data structure
	for(unsigned int i=0; i<wordlist.size(); i++){
		tr->increaseCount(wordlist[i],1);
	}
	//Now get the end time
	double end = getTimeInMillis();
	std::cout << "increaseCount time: " << (end-start)/wordlist.size() << " ms per word" << std::endl;

	//Now we will print out the complete results. This could be REALLY clow, if
	// your getNthPopular is not a little bit smart.
	std::string outfname = "data/28885.txt.out";
	std::ofstream out(outfname.c_str());

	start = getTimeInMillis();
	for(unsigned int i=0; i< tr->numEntries(); i++){
		std::string s = tr->getNthPopular(i);
		out << tr->getCount(s) << ": " << s << std::endl;
	}
	out.close();
	end = getTimeInMillis();
	std::cout << "getNth followed by getCount, time: " << (end - start) / tr->numEntries() << " ms per entry" << std::endl;

	delete tr;

	return end - start;
}
Ejemplo n.º 3
0
/*
 * processFile should:
 *  1) Create a new instance of your trends class
 *  2) Read in the file whose words you want to count: The file name is passed in as fname
 *  3) Add all words to the Trends data structure, and calculate the time per call to addtoTrends
 *  4) Use getNthPopular and getCount to print out the total results in fname + ".out"
 */
int processFile(const char* fname){
	Trends* tr = new brinkmwjTrends(); //You will need to change this to match your own class!

	//First, read in the file
	// I'm not doing much error checking here, because I am a bad person
	std::ifstream in(fname);
	std::string s;
	std::vector<std::string> wordlist;
	while(in >> s){
		wordlist.push_back(s);
	}

	//We only want to time how long addToTrends takes, so we get
	// the starting time, which is the clock time, in milliseconds
	clock_t start = clock();
	//Now add all the words to the Trends data structure
	for(unsigned int i=0; i<wordlist.size(); i++){
		tr->increaseCount(wordlist[i],1);
	}
	//Now get the end time
	clock_t end = clock();
	std::cout << "Time: " << ((1000.0*(end - start)/CLOCKS_PER_SEC)/wordlist.size()) << " ms per word" << std::endl;

	//Now we will print out the complete results. This could be REALLY clow, if
	// your getNthPopular is not a little bit smart.
	std::string outfname = fname;
	outfname = outfname + ".out";
	std::ofstream out(outfname.c_str());
	for(int i=0; i< tr->numEntries(); i++){
		std::string s = tr->getNthPopular(i);
		out << tr->getCount(s) << ": " << s << std::endl;
	}
	out.close();

	delete tr;

	return end - start;
}