void CopyWordCounts( WordCount& targetWordCount, WordCount& sourceWordCount, int weight ) { for( wciter iwc=sourceWordCount.begin(); iwc!=sourceWordCount.end(); iwc++ ) { if( targetWordCount.find( iwc->first ) == targetWordCount.end() ) targetWordCount[ iwc->first ] = 0; targetWordCount[ iwc->first ] += weight * iwc->second; } }
void DumpPositionTable( PositionText& ptext ) { for( ptiter ip=ptext.begin(); ip!=ptext.end(); ip++ ) { cout << "DumpPositionTable for key value ='" << ip->first << "'" << endl; WordCount* wc = ip->second; for( wciter iwc=wc->begin(); iwc!=wc->end(); iwc++ ) { cout << iwc->first << ": " << iwc->second << endl; } } }
string GetRandWord( WordCount& wordCount, int accumulation ) { WordCount wordTable; int totalCount = 0; // cout << "-----[ RANDOM TABLE ]-----" << endl; int terminalWeight = 0; if( accumulation > 5 ) { int distance = accumulation - 5; terminalWeight = distance * distance * distance; } for( wciter iwc=wordCount.begin(); iwc!=wordCount.end(); iwc++ ) { totalCount += iwc->second * iwc->second; if( iwc->first == BEGIN_KEY || iwc->first == END_KEY ) { totalCount += terminalWeight; } wordTable[ iwc->first ] = totalCount; // cout << totalCount << " " << iwc->first << endl; } int roll = rand() % totalCount; // cout << " --> roll: " << roll << endl; totalCount = 0; string word; // cout << "-----[ SAME TABLE? ]-----" << endl; for( wciter iwc=wordTable.begin(); iwc!=wordTable.end(); iwc++ ) { // cout << iwc->second << " " << iwc->first << endl; if( roll <= iwc->second ) { word = iwc->first; break; } } // cout << " --> word: " << word << endl; return word; }
string GetMaxWord( WordCount& wordCount ) { string word; int maxCount = 0; for( wciter iwc=wordCount.begin(); iwc!=wordCount.end(); iwc++ ) { // cout << iwc->first << ": " << iwc->second << endl; if( iwc->second > maxCount ) { word = iwc->first; maxCount = iwc->second; } } return word; }
// impl by chenshuo void sort_words_by_frequencies(const WordCount& counts) { typedef std::vector<std::pair<int, WordCount::const_iterator> > FreqList; FreqList freq; freq.reserve(counts.size()); for (WordCount::const_iterator it = counts.begin(); it != counts.end(); ++it) { freq.push_back(make_pair(it->second, it)); } std::sort(freq.begin(), freq.end(), Greater()); for (FreqList::iterator itr = freq.begin(); itr!=freq.end(); ++itr) { std::cout << itr->first << '\t' << itr->second->first << '\n'; } }