Пример #1
0
void processBlank(const WordMap &words, string &originalWord,
                  set<string> &bestWords, short &bestCount, 
                  const LetterMap &letterMap)
{
  bestCount = 0;

  string word = originalWord;
  int pos = word.find(' ');

  for(char letter = 'a'; letter <= 'z'; letter++)
  {
    string temp(1, letter);
    word.replace(pos, 1, temp);

    for(int i = 0; i <  MAX_PERMUTATIONS; i++)
    {
      for(unsigned int j = 1; j <= word.length(); j++)
      {
        string shortenedWord;
        shortenedWord.assign(word, 0, j);
        WordMap::const_iterator itr = words.find(shortenedWord);

        if(itr != words.end())
          processBlankWord(words, originalWord, bestWords, bestCount, 
                           letterMap, itr, letter);
      } // for each word size

      next_permutation(word.begin(), word.end());
    } // for i
  } // for each letter in blank
}  // processBlank()
Пример #2
0
Dictionary<vector<FeatVal> > * KyteaModel::makeDictionaryFromPrefixes(const vector<KyteaString> & prefs, StringUtil* util, bool adjustPos) {
    typedef Dictionary<vector<FeatVal> >::WordMap WordMap;
    WordMap wm;
    int pos;
    for(int i = 0; i < (int)names_.size(); i++) {
        const KyteaString & str = names_[i];
        for(pos = 0; pos < (int)prefs.size() && !str.beginsWith(prefs[pos]); pos++);
        if(pos != (int)prefs.size()) {
            featuresAdded_++;
            KyteaString name = str.substr(prefs[pos].length());
            WordMap::iterator it = wm.find(name);
            if(it == wm.end()) {
                pair<WordMap::iterator, bool> p = wm.insert(WordMap::value_type(name,new vector<FeatVal>(prefs.size()*numW_)));
                it = p.first;
            }
            // If this is an n-gram dictionary, adjust the position according to
            // n-gram length, otherwise just use the location of th eprefix
            int id = (adjustPos ?
                (prefs.size()-pos-name.length())*numW_ :
                pos*numW_
            );
            for(int j = 0; j < numW_; j++) {
                // cerr << "adding for "<<util->showString(str)<<" @ "<<util->showString(name) << " ["<<id<<"]"<<"/"<<(*it->second).size()<<" == "<<getWeight(i,j)<<"/"<<weights_.size()<< " == " <<getWeight(i-1,j) * labels_[0]<<endl;
                (*it->second)[id+j] = getWeight(i-1,j) * labels_[0];
            }
        }
    }
    if(wm.size() > 0) {
        Dictionary<vector<FeatVal> > * ret = new Dictionary<vector<FeatVal> >(util);
        ret->buildIndex(wm);
        return ret;
    }
    return NULL;
}
Пример #3
0
void processNormal(const WordMap &words, string word, set<string> &bestWords,
                   short &bestCount)
{
  bestCount = 0;

  for(int i = 0; i <  MAX_PERMUTATIONS; i++)
  {
    for(unsigned int j = 1; j <= word.length(); j++)
    {
      string shortenedWord;
      shortenedWord.assign(word, 0, j);
      WordMap::const_iterator itr = words.find(shortenedWord);

      if(itr != words.end() && itr->second >= bestCount)
      {

        if(itr->second > bestCount)
        {
          bestWords.clear();
          bestCount = itr->second;
        } // if better than those previous

        bestWords.insert(itr->first);
      } // if at least as good as previous
    } // for each word size

    next_permutation(word.begin(), word.end());
  } // for i
}  // processNormal()
Пример #4
0
void
incrWordData(int lhsInt, ECString wupper)
{
  char temp[128];
  ECString w(toLower(wupper.c_str(), temp));
  numTerm[lhsInt]++;
  WordMap::iterator wmi = wordMap.find(w);
  if(wmi == wordMap.end())
    {
      wordMap[w][lhsInt] = 1;
      return;
    }
  PosD& posd = (*wmi).second;
  PosD::iterator pdi = posd.find(lhsInt);
  if(pdi == posd.end())
    {
      posd[lhsInt] = 1;
    }
  else
    (*pdi).second++;
}