Beispiel #1
0
void processBlank(const WordMap &words, string &originalWord,
                  set<string> &bestWords, short &bestCount, 
                  const LetterMap &letterMap)
{
  bestCount = 0;

  string word = originalWord;
  int pos = word.find(' ');

  for(char letter = 'a'; letter <= 'z'; letter++)
  {
    string temp(1, letter);
    word.replace(pos, 1, temp);

    for(int i = 0; i <  MAX_PERMUTATIONS; i++)
    {
      for(unsigned int j = 1; j <= word.length(); j++)
      {
        string shortenedWord;
        shortenedWord.assign(word, 0, j);
        WordMap::const_iterator itr = words.find(shortenedWord);

        if(itr != words.end())
          processBlankWord(words, originalWord, bestWords, bestCount, 
                           letterMap, itr, letter);
      } // for each word size

      next_permutation(word.begin(), word.end());
    } // for i
  } // for each letter in blank
}  // processBlank()
Beispiel #2
0
Dictionary<vector<FeatVal> > * KyteaModel::makeDictionaryFromPrefixes(const vector<KyteaString> & prefs, StringUtil* util, bool adjustPos) {
    typedef Dictionary<vector<FeatVal> >::WordMap WordMap;
    WordMap wm;
    int pos;
    for(int i = 0; i < (int)names_.size(); i++) {
        const KyteaString & str = names_[i];
        for(pos = 0; pos < (int)prefs.size() && !str.beginsWith(prefs[pos]); pos++);
        if(pos != (int)prefs.size()) {
            featuresAdded_++;
            KyteaString name = str.substr(prefs[pos].length());
            WordMap::iterator it = wm.find(name);
            if(it == wm.end()) {
                pair<WordMap::iterator, bool> p = wm.insert(WordMap::value_type(name,new vector<FeatVal>(prefs.size()*numW_)));
                it = p.first;
            }
            // If this is an n-gram dictionary, adjust the position according to
            // n-gram length, otherwise just use the location of th eprefix
            int id = (adjustPos ?
                (prefs.size()-pos-name.length())*numW_ :
                pos*numW_
            );
            for(int j = 0; j < numW_; j++) {
                // cerr << "adding for "<<util->showString(str)<<" @ "<<util->showString(name) << " ["<<id<<"]"<<"/"<<(*it->second).size()<<" == "<<getWeight(i,j)<<"/"<<weights_.size()<< " == " <<getWeight(i-1,j) * labels_[0]<<endl;
                (*it->second)[id+j] = getWeight(i-1,j) * labels_[0];
            }
        }
    }
    if(wm.size() > 0) {
        Dictionary<vector<FeatVal> > * ret = new Dictionary<vector<FeatVal> >(util);
        ret->buildIndex(wm);
        return ret;
    }
    return NULL;
}
Beispiel #3
0
void processNormal(const WordMap &words, string word, set<string> &bestWords,
                   short &bestCount)
{
  bestCount = 0;

  for(int i = 0; i <  MAX_PERMUTATIONS; i++)
  {
    for(unsigned int j = 1; j <= word.length(); j++)
    {
      string shortenedWord;
      shortenedWord.assign(word, 0, j);
      WordMap::const_iterator itr = words.find(shortenedWord);

      if(itr != words.end() && itr->second >= bestCount)
      {

        if(itr->second > bestCount)
        {
          bestWords.clear();
          bestCount = itr->second;
        } // if better than those previous

        bestWords.insert(itr->first);
      } // if at least as good as previous
    } // for each word size

    next_permutation(word.begin(), word.end());
  } // for i
}  // processNormal()
Beispiel #4
0
void Dictionary<Entry>::buildIndex(const WordMap & input) {
    if(input.size() == 0)
        THROW_ERROR("Cannot build dictionary for no input");
    clearData();
    states_.push_back(new DictionaryState());
    buildGoto(input.begin(), input.end(), 0, 0);
    buildFailures();
}
Beispiel #5
0
bool Nce::parseFile(const QString &nce, int _class)
{
    QString path = qApp->applicationDirPath();
#ifdef Q_OS_MAC
    path += "/../../..";
#endif
    path += "/nce/";
    path += nce + "/";
    QString classStr = QString::number(_class);
    if (_class < 10) classStr = "0" + QString::number(_class);
    path += classStr;
    QFileInfo fileInfo(path + ".txt");
    if (!fileInfo.exists())
        path += ".TXT";
    else
        path += ".txt";
    file_.setFileName(path);
    if (!file_.open(QIODevice::ReadOnly)) {
        qDebug() << "Read File Error!"+ path;
        return false;
    }
    QString text = file_.readAll();
    if (file_.isOpen())
        file_.close();
    text = simpleChange(text);

    QStringList sentenceList = text.split(QRegExp("[\\.!\\?]"),QString::SkipEmptyParts);
    int sentenceSize = sentenceList.size();

    ClassIndex index;
    index.nce = nce;
    index.class_ = _class;
    data_.classContent_.insert(index,text);
    WordMap wordMap;
    foreach (QString sentence, sentenceList) {
        QStringList wordList;
        wordList = sentence.split(QRegExp("\\W+"), QString::SkipEmptyParts);
        NceWordInfo wordInfo;
        wordInfo.index = index;
//        wordInfo.sentences.push_back(sentence);
        foreach (QString w, wordList) {
            Word word;
            word.index = index;
            word.word = w;
            wordMap.insert(word,wordInfo);
            for (WordMap::iterator it = wordMap.begin(), ie = wordMap.end();
                 it != ie; ++it){
                if (it.key() == word) {
                    NceWordInfo& info = it.value();
                    info.sentences.push_back(sentence);
                    info.sentences.removeDuplicates();
                }
            }
        }
Beispiel #6
0
void CodeAtlas::SymbolWordAttr::mergeWords( WordMap& dst, const WordMap& src )
{
	QMap<int, float>::ConstIterator pWordMap;
	for (pWordMap = src.constBegin(); pWordMap != src.constEnd(); ++pWordMap)
	{
		if (dst.contains(pWordMap.key()))
			dst[pWordMap.key()] += pWordMap.value();
		else
			dst[pWordMap.key()] =  pWordMap.value();
	}
}
Beispiel #7
0
void Console::printWordMap(const WordMap &wordMap) {
	Common::StringArray words;
	WordMap::const_iterator verb;

	for (verb = wordMap.begin(); verb != wordMap.end(); ++verb)
		words.push_back(Common::String::format("%s: %3d", toAscii(verb->_key).c_str(), wordMap[verb->_key]));

	Common::sort(words.begin(), words.end());

	debugPrintColumns(words);
}
Beispiel #8
0
int main(int argc, char* argv[]) {
  typedef map<string, int> WordMap;
  typedef WordMap::iterator WMIter;
  const char* fname = "WordCount.cpp";
  if(argc > 1) fname = argv[1];
  ifstream in(fname);
  assure(in, fname);
  WordMap wordmap;
  string word;
  while(in >> word)
    wordmap[word]++;
  for(WMIter w = wordmap.begin(); w != wordmap.end(); w++)
    cout << w->first << ": " << w->second << endl;
} ///:~
Beispiel #9
0
int main(int argc, char* argv[]) {
  char* fname = "WordCount.cpp";
  if(argc > 1) fname = argv[1];
  ifstream in(fname);
  assure(in, fname);
  StreamTokenizer words(in);
  WordMap wordmap;
  string word;
  while((word = words.next()).size() != 0)
    wordmap[word]++;
  for(WMIter w = wordmap.begin(); 
      w != wordmap.end(); w++)
    cout << (*w).first << ": "
      << (*w).second.val() << endl;
} ///:~
Beispiel #10
0
int main(int argc, char* argv[])
{
	requireArgs(argc, 1);
	ifstream in(argv[1]);
	assure(in, argv[1]);
	StreamTokenizer words(in);
	WordMap wordmap;
	string word;
	while ((word = words.next()).size() != 0)
		wordmap[word]++;
	for (WMIter w = wordmap.begin();
			w != wordmap.end(); w++)
		cout << (*w).first << ": "
			<< (*w).second.val() << endl;
} ///:~
Beispiel #11
0
void loadFile (string fileName)
{
  ifstream fin;
  fin.open (fileName.c_str());
  if (!fin.fail()){
    char nextChar;
    bool add = true;
    string newWord("");
    while (fin.good()){
      nextChar = fin.get();
      if (nextChar != fin.eof()){
        if (nextChar == ' '){
          add = false;
          wordmap.insert(newWord);
          //cout << newWord << endl;
        }
        else if (nextChar == '\n'){
          newWord = ("");
          add = true;
        }
        else if (add){
          newWord+=nextChar;
        }
      }
    }
  }
}
Beispiel #12
0
void
incrWordData(int lhsInt, ECString wupper)
{
  char temp[128];
  ECString w(toLower(wupper.c_str(), temp));
  numTerm[lhsInt]++;
  WordMap::iterator wmi = wordMap.find(w);
  if(wmi == wordMap.end())
    {
      wordMap[w][lhsInt] = 1;
      return;
    }
  PosD& posd = (*wmi).second;
  PosD::iterator pdi = posd.find(lhsInt);
  if(pdi == posd.end())
    {
      posd[lhsInt] = 1;
    }
  else
    (*pdi).second++;
}
Beispiel #13
0
int WordRectFinder<MapT>::findWordRectRowsMapUpper(int haveTall, const WordMap& rowMap, char wordCols[][sBufSize])
{
    mNowTall     = haveTall;                // rectangle height == stack height
    int wantWide = mRowTrie.getWordLength();
    int wantTall = mColTrie.getWordLength();
    if (wantTall == haveTall) {            // Success: the row just added made words of all columns   
        return mWantArea;                   // Return the area
    }
    if (haveTall > 2 && mWantArea <= WordRectSearchMgr<MapT>::getTrumpingArea()) {
        return -mWantArea;                  // Abort because a wordRect bigger than wantArea has been found
    }

    char temp[sBufSize];

    int area = 0;
    for(WordMap::const_iterator itr = rowMap.begin(), end = rowMap.end(); itr != end; ) {
        const char *word = itr->first;
        for (int k = 0; k < wantWide; k++) {
            wordCols[k][haveTall] = temp[k] = word[k];
            if ( ! mColTrie.subTrix(wordCols[k], haveTall) ) {
                temp[k+1] = '{';				// ASCII decimal 123, the char after 'z'
                temp[k+2] = '\0';				// NULL-terminate the C-string
                itr = rowMap.upper_bound(temp);	// Get first word-node alphabetically > temp
                goto END_LOOP;          		// break out of both inner and outer loop
            }
        }
        mRowWordsNow[haveTall] = word;
        area = findWordRectRowsMapUpper(haveTall+1, rowMap, wordCols);
        if (area > 0) {
            return area;
        }
        ++itr;
END_LOOP:;
    }
    return 0;
}
Beispiel #14
0
void readWords(const LetterMap &letterMap, WordMap &words)
{
  ifstream inf("words.txt");
  string word, sortedWord;
  short value;

  while( getline(inf, word))
  {
    if(word.length() <= MAX_WORD_LENGTH
       && word.find_first_not_of("abcedefghijklmnopqrstuvwxyz") == string::npos)
    {
      value = 0;

      for(string::const_iterator itr = word.begin(); itr != word.end(); itr++)
        value += (letterMap.find(*itr))->second;

      words.insert(WordMap::value_type(word, value));
    } // if word up to MAX_WORD_LENGTH characters and no captial letters.
  } // while
}  // readWords()
Beispiel #15
0
int main(int argc, char** argv)
{
  if (argc == 2){
    cout << "Loading " << argv[1] << "...";
    loadFile(argv[1]);
    cout << "complete.\n";
  }
  while (true){
    string input;
    cout << "Enter a string of upper-case letters from which to find anagrams\n:> ";
    cin >> input;

    vector <string> wordList = wordmap.getAllWordsFromString (input);

    for (unsigned int i=0; i<wordList.size(); i++){
      cout << wordList[i] << "\t";
    }
    cout << wordList.size() << "\n\n";
  }
  return 0;
}
Beispiel #16
0
int
main(int argc, char *argv[])
{
  ECArgs args( argc, argv );
  assert(args.nargs() == 1);
  ECString path(args.arg(0));
  cerr << "At start of pHsgt" << endl;

  for(int n = 0 ; n < MAXNUMNTS ; n++)
    numTerm[n] = 0;

  Term::init( path );
  readHeadInfo(path);

  int sentenceCount = 0;

  ECString s1lex("^^");
  ECString s1nm("S1");
  int s1Int = Term::get(s1nm)->toInt();
	
  UnitRules ur;
  ur.init();
  while(cin)
    {
      //if(sentenceCount > 4000) break;
      if(sentenceCount%10000 == 0) cerr << sentenceCount << endl;
      InputTree  parse;
      cin >> parse;
      //cerr << parse << endl;
      if(!cin) break;
      if(parse.length() == 0) break;
       EcSPairs wtList;
       parse.make(wtList); 
       InputTree* par;
       par = &parse;

      addWwData(par);
      incrWordData(s1Int, s1lex);
      ur.gatherData(par);
      sentenceCount++;
    }
  ECString resultsString(path);
  resultsString += "pSgT.txt";
  ofstream     resultsStream(resultsString.c_str());
  assert(resultsStream);

  int numWords = 0;
  resultsStream << "       \n";  //leave space for number of words;
  resultsStream.precision(3);
  ECString lastWord;
  int wordFreq = 0;
  WordMap::iterator wmi = wordMap.begin();
  resultsStream << wordMap.size() << "\n\n";
  for( ; wmi != wordMap.end() ; wmi++)
    {
      ECString w = (*wmi).first;
      resultsStream << w << "\t";
      PosD& posd = (*wmi).second;
      PosD::iterator pdi = posd.begin();
      int count = 0;
      for( ; pdi != posd.end(); pdi++)
	{
	  int posInt = (*pdi).first;
	  int c = (*pdi).second;
	  count += c;
	  float p = (float)c/(float)numTerm[posInt];
	  resultsStream << posInt << " " << p << " ";
	}
      resultsStream << "| " << count << "\n";
    }
  ur.setData(path);
  return 1;
}