void CopyWordCounts( WordCount& targetWordCount, WordCount& sourceWordCount, int weight ) { for( wciter iwc=sourceWordCount.begin(); iwc!=sourceWordCount.end(); iwc++ ) { if( targetWordCount.find( iwc->first ) == targetWordCount.end() ) targetWordCount[ iwc->first ] = 0; targetWordCount[ iwc->first ] += weight * iwc->second; } }
void DumpPositionTable( PositionText& ptext ) { for( ptiter ip=ptext.begin(); ip!=ptext.end(); ip++ ) { cout << "DumpPositionTable for key value ='" << ip->first << "'" << endl; WordCount* wc = ip->second; for( wciter iwc=wc->begin(); iwc!=wc->end(); iwc++ ) { cout << iwc->first << ": " << iwc->second << endl; } } }
string GetRandWord( WordCount& wordCount, int accumulation ) { WordCount wordTable; int totalCount = 0; // cout << "-----[ RANDOM TABLE ]-----" << endl; int terminalWeight = 0; if( accumulation > 5 ) { int distance = accumulation - 5; terminalWeight = distance * distance * distance; } for( wciter iwc=wordCount.begin(); iwc!=wordCount.end(); iwc++ ) { totalCount += iwc->second * iwc->second; if( iwc->first == BEGIN_KEY || iwc->first == END_KEY ) { totalCount += terminalWeight; } wordTable[ iwc->first ] = totalCount; // cout << totalCount << " " << iwc->first << endl; } int roll = rand() % totalCount; // cout << " --> roll: " << roll << endl; totalCount = 0; string word; // cout << "-----[ SAME TABLE? ]-----" << endl; for( wciter iwc=wordTable.begin(); iwc!=wordTable.end(); iwc++ ) { // cout << iwc->second << " " << iwc->first << endl; if( roll <= iwc->second ) { word = iwc->first; break; } } // cout << " --> word: " << word << endl; return word; }
string GetMaxWord( WordCount& wordCount ) { string word; int maxCount = 0; for( wciter iwc=wordCount.begin(); iwc!=wordCount.end(); iwc++ ) { // cout << iwc->first << ": " << iwc->second << endl; if( iwc->second > maxCount ) { word = iwc->first; maxCount = iwc->second; } } return word; }
// impl by chenshuo void sort_words_by_frequencies(const WordCount& counts) { typedef std::vector<std::pair<int, WordCount::const_iterator> > FreqList; FreqList freq; freq.reserve(counts.size()); for (WordCount::const_iterator it = counts.begin(); it != counts.end(); ++it) { freq.push_back(make_pair(it->second, it)); } std::sort(freq.begin(), freq.end(), Greater()); for (FreqList::iterator itr = freq.begin(); itr!=freq.end(); ++itr) { std::cout << itr->first << '\t' << itr->second->first << '\n'; } }
int main( int argc, char* argv[] ) { bool isEnd; string triplet, pair, token, word; string peripenultimate = WORD_KEY; string penultimate = WORD_KEY; string previous = BEGIN_KEY; WordCount mainWordCount; while( true ) { cin >> token; if( token == END_KEY ) break; word = Convert( token, isEnd ); cout << "READ WORD FROM INPUT: " << word << endl; if( mainWordCount.find( word ) == mainWordCount.end() ) { mainWordCount[ word ] = 0; } ++mainWordCount[ word ]; AddCount( PreText, word, previous ); AddCount( PostText, previous, word ); if( penultimate != WORD_KEY ) { pair = previous + " " + word; AddCount( PrePairText, pair, penultimate ); pair = penultimate + " " + previous; AddCount( PostPairText, pair, word ); if( penultimate != WORD_KEY ) { triplet = penultimate + " " + previous + " " + word; AddCount( PreTripletText, triplet, peripenultimate ); triplet = peripenultimate + " " + penultimate + " " + previous; AddCount( PostTripletText, triplet, word ); } } if( isEnd ) { AddCount( PostText, word, END_KEY ); if( previous != WORD_KEY ) { pair = previous + " " + word; AddCount( PostPairText, pair, END_KEY ); if( penultimate != WORD_KEY ) { triplet = penultimate + " " + previous + " " + word; AddCount( PostTripletText, triplet, END_KEY ); } } previous = BEGIN_KEY; peripenultimate = WORD_KEY; penultimate = WORD_KEY; } else { peripenultimate = penultimate; penultimate = previous; previous = word; } } // cout << endl; cout << "PreText" << endl; DumpPositionTable( PreText ); cout << "PostText" << endl; DumpPositionTable( PostText ); cout << "PrePairText" << endl; DumpPositionTable( PrePairText ); cout << "PostPairText" << endl; DumpPositionTable( PostPairText ); cout << "PreTripletText" << endl; DumpPositionTable( PreTripletText ); cout << "PostTripletText" << endl; DumpPositionTable( PostTripletText ); while(1) { cin >> word; if( word == END_KEY ) break; word = Convert( word, isEnd ); // cout << endl << "create utterance for word: " << word << endl; penultimate = WORD_KEY; previous = WORD_KEY; BackTrack( penultimate, previous, word ); int accumulation = 0; while( word != END_KEY ) { cout << word; pair = previous + " " + word; triplet = penultimate + " " + previous + " " + word; // cout << endl << "pair='" << pair << "'" << endl; // cout << "triplet='" << triplet << "'" << endl; peripenultimate = penultimate; penultimate = previous; previous = word; WordCount wordCount; if( PostTripletText.find(triplet) != PostTripletText.end() ) { // cout << " -> call GetRandWord( PostTripletText[" << triplet << "] )" << endl; // word = GetRandWord( *PostTripletText[triplet], accumulation++ ); CopyWordCounts( wordCount, *PostTripletText[triplet], 9 ); } else if( PostPairText.find(pair) != PostPairText.end() ) { // cout << " -> call GetRandWord( PostPairText[" << pair << "] )" << endl; // word = GetRandWord( *PostPairText[pair], accumulation++ ); CopyWordCounts( wordCount, *PostPairText[pair], 3 ); } else { // cout << " -> call GetRandWord( PostText[" << word << "] )" << endl; // word = GetRandWord( *PostText[word], accumulation++ ); CopyWordCounts( wordCount, *PostText[word], 1 ); } word = GetRandWord( wordCount, accumulation++ ); if( word == END_KEY ) cout << "." << endl << endl; else cout << " "; } } }