/*
        CheckTopTen
        ~ This function goes through the lyrics of the song to see if 
        ~ the current song's frequency of a word should be stored in the top 10 

*/
void WordTable::CheckTopTen(Song* song){
	for (size_t i = 0; i < song->lyrics.size(); i++){
		uint32_t hash = hashWord(alphaOnly(song->lyrics[i]));
		wordNode* node = table[hash];
		if (node->numSongs == 11){ //if top 10 songs + current song
			//currSong = the current song's info (song + word)
			SongFreq curr = node->songs_and_freqs[node->numSongs-1];
			for (size_t j = 0; j < node->numSongs; j++){
				//find song with minimum frequency of word
				size_t minIndex = j;
				SongFreq minSong = node->songs_and_freqs[j];
				for (size_t k = 0; k < node->numSongs; k++){
					if (node->songs_and_freqs[k].freq < 
					minSong.freq){
					minIndex = k;
					minSong = node->songs_and_freqs[k];
					}
				}
				if (minSong.freq < curr.freq){ //swap if needed
					node->songs_and_freqs[minIndex] = curr;
					break;
				}
			}
			node->numSongs--;
		}
	}
}
// makeLocalHashTable: Loop through a song's lyrics, making an association
// list with information about how many times each word appears.
void LocalHashTable::makeLocalHashTable(Song *aSong, int inIndex) {
    string lyricToAdd; uint32_t hashCode; uint32_t hashPosition;
    for (int j = 0; j < aSong->getLyricsLength(); j++) { // for all lyrics,
        // expanding if load factor exceeds ~.8
        if (HTLength > (HTCapacity/1.2)) expandLocalHashTable();
        lyricToAdd = alphaOnly(aSong->getLyrics(j)); //strip lyric of formatting
        hashCode = hash_string(lyricToAdd); // find hashposition
        hashPosition = hashCode % (uint32_t) HTCapacity;
        
        uint32_t iter = hashPosition;
        
        while ((iter % (uint32_t) HTCapacity) < (uint32_t) HTCapacity) {
            
            iter = iter % (uint32_t) HTCapacity;
            
            if (freqNodes[iter].getTheWord() == "") { //empty node, initializing
                freqNodes[iter].setTheWord(lyricToAdd);
                freqNodes[iter].setSongIndex(inIndex);
                freqNodes[iter].addPosition(j);
                HTLength++;
                break;
            }
            if (freqNodes[iter].getTheWord() == lyricToAdd) {
                freqNodes[iter].addPosition(j); // a match has been found
                break;
            }
            iter++;
        }
    }
}
void LocalHashTable::expandLocalHashTable() {
        int newCap = (HTCapacity*2)-1; // effectively doubling, but keeping odd
        FreqNode* temp = new FreqNode[newCap];
        for (int i = 0; i < HTCapacity; i++) {
            // if you find a word node, rehash the word and set the new location
            string iterword = freqNodes[i].getTheWord();
            
            if (iterword != "") {
                uint32_t hashPosition = hash_string(alphaOnly(iterword));
                hashPosition = hashPosition % (uint32_t) newCap;
                
                while (hashPosition < (uint32_t) newCap) {
                    
                    if (temp[hashPosition].getTheWord() == "") {
                        
                        temp[hashPosition] = freqNodes[i];
                        break;
                    }
                    hashPosition++;
                }
            }
        }
    delete[] freqNodes;
    freqNodes = temp;
    HTCapacity = newCap;
}
Example #4
0
void Data::print(songwithfreq song, string theword){
    size_t count = 0;
    
    for (size_t i = 0; i < songlist[song.index].lyrics.size(); i++){
        if (alphaOnly(songlist[song.index].lyrics[i]) == theword){
            cout << "Title: " << songlist[song.index].title << endl;
            cout << "Artist: " << songlist[song.index].artist << endl;
            cout << "Context: ";
            // print 5 before and after each instance, unless there aren't 5
            if (i < 5) {
                for (size_t j = 0; j < (i+6); j++){
                    cout << songlist[song.index].lyrics[j] << " ";
                }
                cout << endl;
                count++;
            }
            else {
                for (size_t j = (i-5); j < (i+6); j++){
                    cout << songlist[song.index].lyrics[j] << " ";
                    if (j + 1 == songlist[song.index].lyrics.size()){
                        break;
                    }
                }
                cout << endl;
                count++;
            }
            cout << endl;
        }
        if (count == song.frequency) break;
    }
}
/*
        analyzeSong
        ~ Processes song lyrics into the hash table:
*/
void WordTable::analyzeSong(Song* song){
	for (size_t i = 0; i < song->lyrics.size(); i++){		
		if (load_factor() > LOADMAX) expand(); //expand if necessary
		uint32_t hash = hashWord(alphaOnly(song->lyrics[i]));
		if (table[hash] == NULL){ //if word doesn't exist in table
			//make new wordNode 
			wordNode *node = new wordNode;
			node->word = alphaOnly(song->lyrics[i]);
			//first songs_and_freqs index initialized
			node->songs_and_freqs[0].song = song;
			node->songs_and_freqs[0].freq = 1;
			node->numSongs=1;
			table[hash] = node;
			numWords++;
		}
		else duplicateWord(hash, song); //word is in table already
	}
}
Example #6
0
void Data::search(string word){
    // strip the word of any punctuation
    string theword = alphaOnly(word);
    
    // retrieve the topten vector
    vector<songwithfreq> list = thetable.search(theword);
    
    // print all in topten
    if (list.empty()){
        cout << endl;
    }
    else {
        for (size_t i = 0; i < list.size(); i++){
            print(list[i], theword);
        }
    }
    cout << "<END-OF-REPORT>" << endl;
}
Example #7
0
void Data::read_lyrics(char * filename, bool show_progress)
{
    ifstream in(filename); // creates an input stream
    int song_count = 0; // for progress indicator
    string artist, title, word; // data from the file
    vector<string> lyrics;
    size_t index = 0;
    
    // -- While more data to read...
    while (!in.eof())
    {
        // -- First line is the artist
        getline(in, artist);
        if (in.fail()) break;
        
        // -- Second line is the title
        getline(in, title);
        if (in.fail()) break;
        
        if ( show_progress )
        {
            song_count++;
            if (song_count % 10000 == 0) {
                cout << "At " << song_count <<
                " Artist: " << artist <<
                " Title:" << title << endl;
            }
        }
        
        // -- Then read all words and insert into HT until we hit the
        // -- special <BREAK> token
        while ( in >> word && word != "<BREAK>" ){
            lyrics.push_back(word);
            thetable.insert_word(alphaOnly(word), index);
        }
        
        // -- Important: skip the newline left behind
        in.ignore();
        process(title, artist, lyrics);
        lyrics.clear();
        index++;
    }
}
/*
        search
        ~ Takes in a word, hashes it to find the word in the hash table 
        ~ Creates a descending list of the songs_and_freqs array for that word.

*/
void WordTable::search(){
	string word;
	//cout << "Put Word " << endl;
	cin >> word;
	uint32_t hash = hashWord(alphaOnly(word));
	
	if (table[hash]!= NULL) {
		wordNode* node = table[hash];
		
		Sort(node);
		
		//songs_and_freqs array now in descending order
		//calls printContext 
		for (size_t i = 0; i < node->numSongs; i++){
		  printContext(node->songs_and_freqs[i].song, node->word);
		}
	}
	
	cout << "<END OF REPORT>" << endl;
}
/*      
        printContext
        ~ prints the five words before and the five words after the
        ~ searched word for each song. 
*/
void WordTable::printContext(Song* song, string word){
	for (size_t i = 0; i < song->lyrics.size(); i++){
		if(alphaOnly(song->lyrics[i]) == word){
			size_t j;
			//Check if word is in the first 5 words in the song
			if (i < 5) j = 0; //if i is in the first 5 words
					 // start at index 0
			else j = i - 5; //otherwise start 5 words 
				       //before our word
			cout << "Title: " << song->title << endl;
			cout << "Artist: " << song->artist << endl;
			cout << "Context: ";
			while (j < i+6){
				if (j >= song->lyrics.size()) break;
				cout << song->lyrics[j] << " ";
				j++;
			}
			cout << endl << endl;
		}
	}
}