void test_add_all_get_count() { std::cout << "test: add all words then get count of random words" << std::endl; std::vector<std::string> words = getWordList("data/28885.txt"); std::ofstream out("data/28885.txt.out" + OUT_FILE); std::random_device rd; std::default_random_engine rand(rd()); TRENDS_CLASS tr; for (unsigned int i = 0; i < words.size(); i++){ tr.increaseCount(words[i], 1); } double start = getTimeInMillis(); for (unsigned int i = 0; i < words.size(); i++) { std::string w = words[rand() % words.size()]; unsigned int temp = tr.getCount(w); out << w << " " << temp << "\n"; } double end = getTimeInMillis(); out.close(); std::cout << "time per item: " << (end - start) / words.size() << " ms " << std::endl << "total time: " << (end - start) << " ms" << std::endl << std::endl; }
void test_hardest(void) { std::cout << "test: add all words, get a random nth popular word, increase that " << "words count by 1 then get the most common word along with its count" << std::endl; std::vector<std::string> words = getWordList("data/28885.txt"); std::random_device rd; std::default_random_engine rand(rd()); TRENDS_CLASS tr; for (unsigned int i = 0; i < words.size(); i++){ tr.increaseCount(words[i], 1); } double start = getTimeInMillis(); for (unsigned int i = 0; i < words.size(); i++) { unsigned int temp = rand() % tr.numEntries(); std::string word = tr.getNthPopular(temp); tr.increaseCount(word, 1); word = tr.getNthPopular(0); temp = tr.getCount(word); } double end = getTimeInMillis(); std::cout << "time per item: " << (end - start) / words.size() << " ms " << std::endl << "total time: " << (end - start) << " ms" << std::endl << std::endl; }
void test_print_words_in_order_with_count(void) { std::cout << "test: add all words then print them in order with word count" << std::endl; std::vector<std::string> words = getWordList("data/28885.txt"); std::ofstream out("data/28885.txt.out" + OUT_FILE); TRENDS_CLASS tr; for(unsigned int i = 0; i < words.size(); i++) { tr.increaseCount(words[i], 1); } double start = getTimeInMillis(); for(unsigned int i = 0; i < tr.numEntries(); i++) { std::string word = tr.getNthPopular(i); out << tr.getCount(word) << " " << word << "\n"; } double end = getTimeInMillis(); out.close(); std::cout << "time per item: " << (end - start) / words.size() << " ms " << std::endl << "total time: " << (end - start) << " ms" << std::endl << std::endl; }
void test_add_all_increment_get_pop(void) { std::cout << "test: add all words then increment every word by 1 then get the most " << "popular word" << std::endl; std::vector<std::string> words = getWordList("data/28885.txt"); std::ofstream out("data/28885.txt.out" + OUT_FILE); TRENDS_CLASS tr; for (unsigned int i = 0; i < words.size(); i++){ tr.increaseCount(words[i], 1); } double start = getTimeInMillis(); for (unsigned int i = 0; i < words.size(); i++) { tr.increaseCount(words[i], 1); std::string pop = tr.getNthPopular(0); out << pop << "\n"; } double end = getTimeInMillis(); out.close(); std::cout << "time per item: " << (end - start) / words.size() << " ms " << std::endl << "total time: " << (end - start) << " ms" << std::endl << std::endl; }
/** * This tests a simple (but unlikely) use case, which is to read in all the data, and then print out the data in sorted order * based on popularity. * * Compare your 28885.txt.out to 28885_txt.out, using diff,s to see if your code is producing correct output. */ double useCase_addAllThenGetInOrder(){ Trends* tr = new smarterTrends(); //You will need to change this to match your own class! std::vector<std::string> wordlist = getWordList("data/28885.txt"); //We only want to time how long addToTrends takes, so we get // the starting time, which is the clock time, in milliseconds double start = getTimeInMillis(); //Now add all the words to the Trends data structure for(unsigned int i=0; i<wordlist.size(); i++){ tr->increaseCount(wordlist[i],1); } //Now get the end time double end = getTimeInMillis(); std::cout << "increaseCount time: " << (end-start)/wordlist.size() << " ms per word" << std::endl; //Now we will print out the complete results. This could be REALLY clow, if // your getNthPopular is not a little bit smart. std::string outfname = "data/28885.txt.out"; std::ofstream out(outfname.c_str()); start = getTimeInMillis(); for(unsigned int i=0; i< tr->numEntries(); i++){ std::string s = tr->getNthPopular(i); out << tr->getCount(s) << ": " << s << std::endl; } out.close(); end = getTimeInMillis(); std::cout << "getNth followed by getCount, time: " << (end - start) / tr->numEntries() << " ms per entry" << std::endl; delete tr; return end - start; }
void test_adding(void) { std::cout << "test: add all words" << std::endl; std::vector<std::string> words = getWordList("data/28885.txt"); TRENDS_CLASS tr; double start = getTimeInMillis(); for(unsigned int i = 0; i < words.size(); i++){ tr.increaseCount(words[i], 1); } double end = getTimeInMillis(); std::cout << "time per item: " << (end - start) / words.size() << " ms " << std::endl << "total time: " << (end - start) << " ms" << std::endl << std::endl; }
vector<vector<Word *>> Web::getRealConcept(vector<string> wordsIn) { int realWordIdx; vector<vector<Word *>> realWordSet, possibleWords = getWordList(wordsIn); vector<Node *> conceptsPresent = getConceptsInWordList(possibleWords); for(int i = 0; i < possibleWords.size(); i++) { realWordSet.push_back(vector<Word *>()); for(int j = 0; j < possibleWords[i].size(); j++) { for(int k = 0; k < possibleWords[i][j]->getConcept()->getNumReqGrp(); k++) { if(haveReqs(possibleWords[i][j]->getConcept()->getReqGrp(k)->getReqPtr(), conceptsPresent) == true) { realWordSet[i].push_back(possibleWords[i][j]); break; } } } } return realWordSet; }
void test_add_get_count_of_word(void) { std::cout << "test: add a word then get the count of that word" << std::endl; std::vector<std::string> words = getWordList("data/28885.txt"); std::ofstream out("data/28885.txt.out" + OUT_FILE); TRENDS_CLASS tr; double start = getTimeInMillis(); for(unsigned int i = 0; i < words.size(); i++){ tr.increaseCount(words[i], 1); unsigned int value = tr.getCount(words[i]); out << words[i] << " " << value << "\n"; } double end = getTimeInMillis(); out.close(); std::cout << "time per item: " << (end - start) / words.size() << " ms " << std::endl << "total time: " << (end - start) << " ms" << std::endl << std::endl; }
/** * This can either be a basis for a member function of some class or static function or whatever... */ xxxx xxxx::readSongsFromFile(std::string songsFileName) { std::ifstream instream(songsFileName.c_str()); if (!instream.is_open()) { std::cerr << "Error! Can't open file: " << songsFileName << "." << std::endl; } std::string line = ""; int lastSong = 0; while(instream.good() && !lastSong) { if (line.compare(SEPARATOR) != 0) { getline(instream, line); // Expect a line of "=" if (END_OF_SONGS.compare(line) == 0) { lastSong = 1; break; } } getline(instream, line); // Expect a line of "title: ..." size_t pos = TITLE.size() + 2; std::string title = line.substr(pos); getline(instream, line); // Expect a line of "tags: {...}" std::string tags = getWordList(line); std::string lyrics = ""; std::string lyricsBy = ""; std::string instruments = ""; std::string performedBy = ""; std::string bpmStr = ""; getline(instream, line); // Expect either lyrics or instruments. if (line.substr(0, LYRICS.size()).compare(LYRICS) == 0) { // Then we have a lyric song // Lets get the lyrics: lyrics = getWordList(line); // Lets get the lyricsBy: getline(instream, line); pos = LYRICS_BY.size() + 2; lyricsBy = line.substr(pos); // TODO.... } else { // Then we have an instrumental song // Lets get the instruments: instruments = getWordList(line); // Lets get the performedBy: getline(instream, line); pos = PERFORMED_BY.size() + 2; performedBy = line.substr(pos); // Lets see if we have bpm: if (!instream.good()) { break; } getline(instream, line); if (END_OF_SONGS.compare(line) == 0) { lastSong = 1; } if (line.substr(0, BPM.size()).compare(BPM) == 0) { pos = BPM.size() + 2; bpmStr = line.substr(pos); // TODO.... } else { assert ( (line.compare(SEPARATOR) == 0) || (line.compare(END_OF_SONGS) == 0)); // TODO.... } } } instream.close(); // return something }
int main() { int counter = 0, orig_words_len = 0, i; char orig_words[LEXICON_LEN][MAX_STR]; // Initialize orig_words for (counter=0;counter<LEXICON_LEN;counter++) orig_words[counter][0] = '\0'; puts(" GET WORDS "); getWordList(orig_words); puts(" GOT 'EM "); for (counter=0;counter<LEXICON_LEN;counter++) { if (orig_words[counter][0] == '\0') { orig_words_len = counter; break; } } // Initialize original lexicon short lexicon[orig_words_len][WORD_LEN]; for (i=0;i<orig_words_len;i++) { letterCount(orig_words[i], lexicon[i]); lexicon[i][END] = i; } // Initialize input short inpt[WORD_LEN]; const char *inpt_str = "wellpunchmeinthefac"; letterCount(inpt_str, inpt); // Create Original Lexicon Queue... /* Add to orig_lex the index of the words that could possibly be spelled with the letters in the input. This list will be passed to the finding function */ lexiptr orig_lex; for (i=0;i<orig_words_len;i++) if (wordCheck(inpt, lexicon[i])) PushQ(&orig_lex, i); // Initialize results short temp_results[SINGLE_RESULT]; for (i=0;i<SINGLE_RESULT;i++) temp_results[i] = -1; short results[MAX_RESULTS][SINGLE_RESULT]; for (counter=0;counter<MAX_RESULTS;counter++) for (i=0;i<SINGLE_RESULT;i++) results[counter][i] = -1; /* Current Variables: char orig_words[][30], short lexicon[][27], short inpt[], short temp_results[], short results[][15], next_result */ puts(" GO! "); printf("Start = %d\n", time(NULL)); MainLoop(lexicon, inpt, temp_results, results, 0, orig_lex, orig_words); printf("Finish = %d, %i results found.\n", time(NULL), COUNT_RESULTS); for (counter=0;counter<50;counter++) { if (results[counter][0] == -1) continue; for (i=0;i<SINGLE_RESULT;i++) if (results[counter][i] != -1) printf("%s ", orig_words[results[counter][i]]); puts(""); } return 1; }