Пример #1
0
Dictionary<vector<FeatVal> > * KyteaModel::makeDictionaryFromPrefixes(const vector<KyteaString> & prefs, StringUtil* util, bool adjustPos) {
    typedef Dictionary<vector<FeatVal> >::WordMap WordMap;
    WordMap wm;
    int pos;
    for(int i = 0; i < (int)names_.size(); i++) {
        const KyteaString & str = names_[i];
        for(pos = 0; pos < (int)prefs.size() && !str.beginsWith(prefs[pos]); pos++);
        if(pos != (int)prefs.size()) {
            featuresAdded_++;
            KyteaString name = str.substr(prefs[pos].length());
            WordMap::iterator it = wm.find(name);
            if(it == wm.end()) {
                pair<WordMap::iterator, bool> p = wm.insert(WordMap::value_type(name,new vector<FeatVal>(prefs.size()*numW_)));
                it = p.first;
            }
            // If this is an n-gram dictionary, adjust the position according to
            // n-gram length, otherwise just use the location of th eprefix
            int id = (adjustPos ?
                (prefs.size()-pos-name.length())*numW_ :
                pos*numW_
            );
            for(int j = 0; j < numW_; j++) {
                // cerr << "adding for "<<util->showString(str)<<" @ "<<util->showString(name) << " ["<<id<<"]"<<"/"<<(*it->second).size()<<" == "<<getWeight(i,j)<<"/"<<weights_.size()<< " == " <<getWeight(i-1,j) * labels_[0]<<endl;
                (*it->second)[id+j] = getWeight(i-1,j) * labels_[0];
            }
        }
    }
    if(wm.size() > 0) {
        Dictionary<vector<FeatVal> > * ret = new Dictionary<vector<FeatVal> >(util);
        ret->buildIndex(wm);
        return ret;
    }
    return NULL;
}
Пример #2
0
void loadFile (string fileName)
{
  ifstream fin;
  fin.open (fileName.c_str());
  if (!fin.fail()){
    char nextChar;
    bool add = true;
    string newWord("");
    while (fin.good()){
      nextChar = fin.get();
      if (nextChar != fin.eof()){
        if (nextChar == ' '){
          add = false;
          wordmap.insert(newWord);
          //cout << newWord << endl;
        }
        else if (nextChar == '\n'){
          newWord = ("");
          add = true;
        }
        else if (add){
          newWord+=nextChar;
        }
      }
    }
  }
}
Пример #3
0
bool Nce::parseFile(const QString &nce, int _class)
{
    QString path = qApp->applicationDirPath();
#ifdef Q_OS_MAC
    path += "/../../..";
#endif
    path += "/nce/";
    path += nce + "/";
    QString classStr = QString::number(_class);
    if (_class < 10) classStr = "0" + QString::number(_class);
    path += classStr;
    QFileInfo fileInfo(path + ".txt");
    if (!fileInfo.exists())
        path += ".TXT";
    else
        path += ".txt";
    file_.setFileName(path);
    if (!file_.open(QIODevice::ReadOnly)) {
        qDebug() << "Read File Error!"+ path;
        return false;
    }
    QString text = file_.readAll();
    if (file_.isOpen())
        file_.close();
    text = simpleChange(text);

    QStringList sentenceList = text.split(QRegExp("[\\.!\\?]"),QString::SkipEmptyParts);
    int sentenceSize = sentenceList.size();

    ClassIndex index;
    index.nce = nce;
    index.class_ = _class;
    data_.classContent_.insert(index,text);
    WordMap wordMap;
    foreach (QString sentence, sentenceList) {
        QStringList wordList;
        wordList = sentence.split(QRegExp("\\W+"), QString::SkipEmptyParts);
        NceWordInfo wordInfo;
        wordInfo.index = index;
//        wordInfo.sentences.push_back(sentence);
        foreach (QString w, wordList) {
            Word word;
            word.index = index;
            word.word = w;
            wordMap.insert(word,wordInfo);
            for (WordMap::iterator it = wordMap.begin(), ie = wordMap.end();
                 it != ie; ++it){
                if (it.key() == word) {
                    NceWordInfo& info = it.value();
                    info.sentences.push_back(sentence);
                    info.sentences.removeDuplicates();
                }
            }
        }
Пример #4
0
void readWords(const LetterMap &letterMap, WordMap &words)
{
  ifstream inf("words.txt");
  string word, sortedWord;
  short value;

  while( getline(inf, word))
  {
    if(word.length() <= MAX_WORD_LENGTH
       && word.find_first_not_of("abcedefghijklmnopqrstuvwxyz") == string::npos)
    {
      value = 0;

      for(string::const_iterator itr = word.begin(); itr != word.end(); itr++)
        value += (letterMap.find(*itr))->second;

      words.insert(WordMap::value_type(word, value));
    } // if word up to MAX_WORD_LENGTH characters and no captial letters.
  } // while
}  // readWords()