void processBlank(const WordMap &words, string &originalWord, set<string> &bestWords, short &bestCount, const LetterMap &letterMap) { bestCount = 0; string word = originalWord; int pos = word.find(' '); for(char letter = 'a'; letter <= 'z'; letter++) { string temp(1, letter); word.replace(pos, 1, temp); for(int i = 0; i < MAX_PERMUTATIONS; i++) { for(unsigned int j = 1; j <= word.length(); j++) { string shortenedWord; shortenedWord.assign(word, 0, j); WordMap::const_iterator itr = words.find(shortenedWord); if(itr != words.end()) processBlankWord(words, originalWord, bestWords, bestCount, letterMap, itr, letter); } // for each word size next_permutation(word.begin(), word.end()); } // for i } // for each letter in blank } // processBlank()
Dictionary<vector<FeatVal> > * KyteaModel::makeDictionaryFromPrefixes(const vector<KyteaString> & prefs, StringUtil* util, bool adjustPos) { typedef Dictionary<vector<FeatVal> >::WordMap WordMap; WordMap wm; int pos; for(int i = 0; i < (int)names_.size(); i++) { const KyteaString & str = names_[i]; for(pos = 0; pos < (int)prefs.size() && !str.beginsWith(prefs[pos]); pos++); if(pos != (int)prefs.size()) { featuresAdded_++; KyteaString name = str.substr(prefs[pos].length()); WordMap::iterator it = wm.find(name); if(it == wm.end()) { pair<WordMap::iterator, bool> p = wm.insert(WordMap::value_type(name,new vector<FeatVal>(prefs.size()*numW_))); it = p.first; } // If this is an n-gram dictionary, adjust the position according to // n-gram length, otherwise just use the location of th eprefix int id = (adjustPos ? (prefs.size()-pos-name.length())*numW_ : pos*numW_ ); for(int j = 0; j < numW_; j++) { // cerr << "adding for "<<util->showString(str)<<" @ "<<util->showString(name) << " ["<<id<<"]"<<"/"<<(*it->second).size()<<" == "<<getWeight(i,j)<<"/"<<weights_.size()<< " == " <<getWeight(i-1,j) * labels_[0]<<endl; (*it->second)[id+j] = getWeight(i-1,j) * labels_[0]; } } } if(wm.size() > 0) { Dictionary<vector<FeatVal> > * ret = new Dictionary<vector<FeatVal> >(util); ret->buildIndex(wm); return ret; } return NULL; }
void processNormal(const WordMap &words, string word, set<string> &bestWords, short &bestCount) { bestCount = 0; for(int i = 0; i < MAX_PERMUTATIONS; i++) { for(unsigned int j = 1; j <= word.length(); j++) { string shortenedWord; shortenedWord.assign(word, 0, j); WordMap::const_iterator itr = words.find(shortenedWord); if(itr != words.end() && itr->second >= bestCount) { if(itr->second > bestCount) { bestWords.clear(); bestCount = itr->second; } // if better than those previous bestWords.insert(itr->first); } // if at least as good as previous } // for each word size next_permutation(word.begin(), word.end()); } // for i } // processNormal()
void incrWordData(int lhsInt, ECString wupper) { char temp[128]; ECString w(toLower(wupper.c_str(), temp)); numTerm[lhsInt]++; WordMap::iterator wmi = wordMap.find(w); if(wmi == wordMap.end()) { wordMap[w][lhsInt] = 1; return; } PosD& posd = (*wmi).second; PosD::iterator pdi = posd.find(lhsInt); if(pdi == posd.end()) { posd[lhsInt] = 1; } else (*pdi).second++; }