/* CheckTopTen ~ This function goes through the lyrics of the song to see if ~ the current song's frequency of a word should be stored in the top 10 */ void WordTable::CheckTopTen(Song* song){ for (size_t i = 0; i < song->lyrics.size(); i++){ uint32_t hash = hashWord(alphaOnly(song->lyrics[i])); wordNode* node = table[hash]; if (node->numSongs == 11){ //if top 10 songs + current song //currSong = the current song's info (song + word) SongFreq curr = node->songs_and_freqs[node->numSongs-1]; for (size_t j = 0; j < node->numSongs; j++){ //find song with minimum frequency of word size_t minIndex = j; SongFreq minSong = node->songs_and_freqs[j]; for (size_t k = 0; k < node->numSongs; k++){ if (node->songs_and_freqs[k].freq < minSong.freq){ minIndex = k; minSong = node->songs_and_freqs[k]; } } if (minSong.freq < curr.freq){ //swap if needed node->songs_and_freqs[minIndex] = curr; break; } } node->numSongs--; } } }
// makeLocalHashTable: Loop through a song's lyrics, making an association // list with information about how many times each word appears. void LocalHashTable::makeLocalHashTable(Song *aSong, int inIndex) { string lyricToAdd; uint32_t hashCode; uint32_t hashPosition; for (int j = 0; j < aSong->getLyricsLength(); j++) { // for all lyrics, // expanding if load factor exceeds ~.8 if (HTLength > (HTCapacity/1.2)) expandLocalHashTable(); lyricToAdd = alphaOnly(aSong->getLyrics(j)); //strip lyric of formatting hashCode = hash_string(lyricToAdd); // find hashposition hashPosition = hashCode % (uint32_t) HTCapacity; uint32_t iter = hashPosition; while ((iter % (uint32_t) HTCapacity) < (uint32_t) HTCapacity) { iter = iter % (uint32_t) HTCapacity; if (freqNodes[iter].getTheWord() == "") { //empty node, initializing freqNodes[iter].setTheWord(lyricToAdd); freqNodes[iter].setSongIndex(inIndex); freqNodes[iter].addPosition(j); HTLength++; break; } if (freqNodes[iter].getTheWord() == lyricToAdd) { freqNodes[iter].addPosition(j); // a match has been found break; } iter++; } } }
void LocalHashTable::expandLocalHashTable() { int newCap = (HTCapacity*2)-1; // effectively doubling, but keeping odd FreqNode* temp = new FreqNode[newCap]; for (int i = 0; i < HTCapacity; i++) { // if you find a word node, rehash the word and set the new location string iterword = freqNodes[i].getTheWord(); if (iterword != "") { uint32_t hashPosition = hash_string(alphaOnly(iterword)); hashPosition = hashPosition % (uint32_t) newCap; while (hashPosition < (uint32_t) newCap) { if (temp[hashPosition].getTheWord() == "") { temp[hashPosition] = freqNodes[i]; break; } hashPosition++; } } } delete[] freqNodes; freqNodes = temp; HTCapacity = newCap; }
void Data::print(songwithfreq song, string theword){ size_t count = 0; for (size_t i = 0; i < songlist[song.index].lyrics.size(); i++){ if (alphaOnly(songlist[song.index].lyrics[i]) == theword){ cout << "Title: " << songlist[song.index].title << endl; cout << "Artist: " << songlist[song.index].artist << endl; cout << "Context: "; // print 5 before and after each instance, unless there aren't 5 if (i < 5) { for (size_t j = 0; j < (i+6); j++){ cout << songlist[song.index].lyrics[j] << " "; } cout << endl; count++; } else { for (size_t j = (i-5); j < (i+6); j++){ cout << songlist[song.index].lyrics[j] << " "; if (j + 1 == songlist[song.index].lyrics.size()){ break; } } cout << endl; count++; } cout << endl; } if (count == song.frequency) break; } }
/* analyzeSong ~ Processes song lyrics into the hash table: */ void WordTable::analyzeSong(Song* song){ for (size_t i = 0; i < song->lyrics.size(); i++){ if (load_factor() > LOADMAX) expand(); //expand if necessary uint32_t hash = hashWord(alphaOnly(song->lyrics[i])); if (table[hash] == NULL){ //if word doesn't exist in table //make new wordNode wordNode *node = new wordNode; node->word = alphaOnly(song->lyrics[i]); //first songs_and_freqs index initialized node->songs_and_freqs[0].song = song; node->songs_and_freqs[0].freq = 1; node->numSongs=1; table[hash] = node; numWords++; } else duplicateWord(hash, song); //word is in table already } }
void Data::search(string word){ // strip the word of any punctuation string theword = alphaOnly(word); // retrieve the topten vector vector<songwithfreq> list = thetable.search(theword); // print all in topten if (list.empty()){ cout << endl; } else { for (size_t i = 0; i < list.size(); i++){ print(list[i], theword); } } cout << "<END-OF-REPORT>" << endl; }
void Data::read_lyrics(char * filename, bool show_progress) { ifstream in(filename); // creates an input stream int song_count = 0; // for progress indicator string artist, title, word; // data from the file vector<string> lyrics; size_t index = 0; // -- While more data to read... while (!in.eof()) { // -- First line is the artist getline(in, artist); if (in.fail()) break; // -- Second line is the title getline(in, title); if (in.fail()) break; if ( show_progress ) { song_count++; if (song_count % 10000 == 0) { cout << "At " << song_count << " Artist: " << artist << " Title:" << title << endl; } } // -- Then read all words and insert into HT until we hit the // -- special <BREAK> token while ( in >> word && word != "<BREAK>" ){ lyrics.push_back(word); thetable.insert_word(alphaOnly(word), index); } // -- Important: skip the newline left behind in.ignore(); process(title, artist, lyrics); lyrics.clear(); index++; } }
/* search ~ Takes in a word, hashes it to find the word in the hash table ~ Creates a descending list of the songs_and_freqs array for that word. */ void WordTable::search(){ string word; //cout << "Put Word " << endl; cin >> word; uint32_t hash = hashWord(alphaOnly(word)); if (table[hash]!= NULL) { wordNode* node = table[hash]; Sort(node); //songs_and_freqs array now in descending order //calls printContext for (size_t i = 0; i < node->numSongs; i++){ printContext(node->songs_and_freqs[i].song, node->word); } } cout << "<END OF REPORT>" << endl; }
/* printContext ~ prints the five words before and the five words after the ~ searched word for each song. */ void WordTable::printContext(Song* song, string word){ for (size_t i = 0; i < song->lyrics.size(); i++){ if(alphaOnly(song->lyrics[i]) == word){ size_t j; //Check if word is in the first 5 words in the song if (i < 5) j = 0; //if i is in the first 5 words // start at index 0 else j = i - 5; //otherwise start 5 words //before our word cout << "Title: " << song->title << endl; cout << "Artist: " << song->artist << endl; cout << "Context: "; while (j < i+6){ if (j >= song->lyrics.size()) break; cout << song->lyrics[j] << " "; j++; } cout << endl << endl; } } }