void Dictionary<Entry>::buildIndex(const WordMap & input) { if(input.size() == 0) THROW_ERROR("Cannot build dictionary for no input"); clearData(); states_.push_back(new DictionaryState()); buildGoto(input.begin(), input.end(), 0, 0); buildFailures(); }
bool Nce::parseFile(const QString &nce, int _class) { QString path = qApp->applicationDirPath(); #ifdef Q_OS_MAC path += "/../../.."; #endif path += "/nce/"; path += nce + "/"; QString classStr = QString::number(_class); if (_class < 10) classStr = "0" + QString::number(_class); path += classStr; QFileInfo fileInfo(path + ".txt"); if (!fileInfo.exists()) path += ".TXT"; else path += ".txt"; file_.setFileName(path); if (!file_.open(QIODevice::ReadOnly)) { qDebug() << "Read File Error!"+ path; return false; } QString text = file_.readAll(); if (file_.isOpen()) file_.close(); text = simpleChange(text); QStringList sentenceList = text.split(QRegExp("[\\.!\\?]"),QString::SkipEmptyParts); int sentenceSize = sentenceList.size(); ClassIndex index; index.nce = nce; index.class_ = _class; data_.classContent_.insert(index,text); WordMap wordMap; foreach (QString sentence, sentenceList) { QStringList wordList; wordList = sentence.split(QRegExp("\\W+"), QString::SkipEmptyParts); NceWordInfo wordInfo; wordInfo.index = index; // wordInfo.sentences.push_back(sentence); foreach (QString w, wordList) { Word word; word.index = index; word.word = w; wordMap.insert(word,wordInfo); for (WordMap::iterator it = wordMap.begin(), ie = wordMap.end(); it != ie; ++it){ if (it.key() == word) { NceWordInfo& info = it.value(); info.sentences.push_back(sentence); info.sentences.removeDuplicates(); } } }
void Console::printWordMap(const WordMap &wordMap) { Common::StringArray words; WordMap::const_iterator verb; for (verb = wordMap.begin(); verb != wordMap.end(); ++verb) words.push_back(Common::String::format("%s: %3d", toAscii(verb->_key).c_str(), wordMap[verb->_key])); Common::sort(words.begin(), words.end()); debugPrintColumns(words); }
int main(int argc, char* argv[]) { typedef map<string, int> WordMap; typedef WordMap::iterator WMIter; const char* fname = "WordCount.cpp"; if(argc > 1) fname = argv[1]; ifstream in(fname); assure(in, fname); WordMap wordmap; string word; while(in >> word) wordmap[word]++; for(WMIter w = wordmap.begin(); w != wordmap.end(); w++) cout << w->first << ": " << w->second << endl; } ///:~
int main(int argc, char* argv[]) { char* fname = "WordCount.cpp"; if(argc > 1) fname = argv[1]; ifstream in(fname); assure(in, fname); StreamTokenizer words(in); WordMap wordmap; string word; while((word = words.next()).size() != 0) wordmap[word]++; for(WMIter w = wordmap.begin(); w != wordmap.end(); w++) cout << (*w).first << ": " << (*w).second.val() << endl; } ///:~
int main(int argc, char* argv[]) { requireArgs(argc, 1); ifstream in(argv[1]); assure(in, argv[1]); StreamTokenizer words(in); WordMap wordmap; string word; while ((word = words.next()).size() != 0) wordmap[word]++; for (WMIter w = wordmap.begin(); w != wordmap.end(); w++) cout << (*w).first << ": " << (*w).second.val() << endl; } ///:~
int WordRectFinder<MapT>::findWordRectRowsMapUpper(int haveTall, const WordMap& rowMap, char wordCols[][sBufSize]) { mNowTall = haveTall; // rectangle height == stack height int wantWide = mRowTrie.getWordLength(); int wantTall = mColTrie.getWordLength(); if (wantTall == haveTall) { // Success: the row just added made words of all columns return mWantArea; // Return the area } if (haveTall > 2 && mWantArea <= WordRectSearchMgr<MapT>::getTrumpingArea()) { return -mWantArea; // Abort because a wordRect bigger than wantArea has been found } char temp[sBufSize]; int area = 0; for(WordMap::const_iterator itr = rowMap.begin(), end = rowMap.end(); itr != end; ) { const char *word = itr->first; for (int k = 0; k < wantWide; k++) { wordCols[k][haveTall] = temp[k] = word[k]; if ( ! mColTrie.subTrix(wordCols[k], haveTall) ) { temp[k+1] = '{'; // ASCII decimal 123, the char after 'z' temp[k+2] = '\0'; // NULL-terminate the C-string itr = rowMap.upper_bound(temp); // Get first word-node alphabetically > temp goto END_LOOP; // break out of both inner and outer loop } } mRowWordsNow[haveTall] = word; area = findWordRectRowsMapUpper(haveTall+1, rowMap, wordCols); if (area > 0) { return area; } ++itr; END_LOOP:; } return 0; }
int main(int argc, char *argv[]) { ECArgs args( argc, argv ); assert(args.nargs() == 1); ECString path(args.arg(0)); cerr << "At start of pHsgt" << endl; for(int n = 0 ; n < MAXNUMNTS ; n++) numTerm[n] = 0; Term::init( path ); readHeadInfo(path); int sentenceCount = 0; ECString s1lex("^^"); ECString s1nm("S1"); int s1Int = Term::get(s1nm)->toInt(); UnitRules ur; ur.init(); while(cin) { //if(sentenceCount > 4000) break; if(sentenceCount%10000 == 0) cerr << sentenceCount << endl; InputTree parse; cin >> parse; //cerr << parse << endl; if(!cin) break; if(parse.length() == 0) break; EcSPairs wtList; parse.make(wtList); InputTree* par; par = &parse; addWwData(par); incrWordData(s1Int, s1lex); ur.gatherData(par); sentenceCount++; } ECString resultsString(path); resultsString += "pSgT.txt"; ofstream resultsStream(resultsString.c_str()); assert(resultsStream); int numWords = 0; resultsStream << " \n"; //leave space for number of words; resultsStream.precision(3); ECString lastWord; int wordFreq = 0; WordMap::iterator wmi = wordMap.begin(); resultsStream << wordMap.size() << "\n\n"; for( ; wmi != wordMap.end() ; wmi++) { ECString w = (*wmi).first; resultsStream << w << "\t"; PosD& posd = (*wmi).second; PosD::iterator pdi = posd.begin(); int count = 0; for( ; pdi != posd.end(); pdi++) { int posInt = (*pdi).first; int c = (*pdi).second; count += c; float p = (float)c/(float)numTerm[posInt]; resultsStream << posInt << " " << p << " "; } resultsStream << "| " << count << "\n"; } ur.setData(path); return 1; }