void addWwData(InputTree* tree) { InputTrees& st = tree->subTrees(); InputTrees::iterator subTreeIter= st.begin(); InputTree *subTree; for( ; subTreeIter != st.end() ; subTreeIter++ ) { subTree = *subTreeIter; addWwData(subTree); } if( tree->word() != "" ) { ECString w = tree->word(); const Term* trm = Term::get(tree->term()); assert(trm); int trmInt = trm->toInt(); incrWordData(trmInt, w); } }
int main(int argc, char *argv[]) { ECArgs args( argc, argv ); assert(args.nargs() == 1); ECString path(args.arg(0)); cerr << "At start of pHsgt" << endl; for(int n = 0 ; n < MAXNUMNTS ; n++) numTerm[n] = 0; Term::init( path ); readHeadInfo(path); int sentenceCount = 0; ECString s1lex("^^"); ECString s1nm("S1"); int s1Int = Term::get(s1nm)->toInt(); UnitRules ur; ur.init(); while(cin) { //if(sentenceCount > 4000) break; if(sentenceCount%10000 == 0) cerr << sentenceCount << endl; InputTree parse; cin >> parse; //cerr << parse << endl; if(!cin) break; if(parse.length() == 0) break; EcSPairs wtList; parse.make(wtList); InputTree* par; par = &parse; addWwData(par); incrWordData(s1Int, s1lex); ur.gatherData(par); sentenceCount++; } ECString resultsString(path); resultsString += "pSgT.txt"; ofstream resultsStream(resultsString.c_str()); assert(resultsStream); int numWords = 0; resultsStream << " \n"; //leave space for number of words; resultsStream.precision(3); ECString lastWord; int wordFreq = 0; WordMap::iterator wmi = wordMap.begin(); resultsStream << wordMap.size() << "\n\n"; for( ; wmi != wordMap.end() ; wmi++) { ECString w = (*wmi).first; resultsStream << w << "\t"; PosD& posd = (*wmi).second; PosD::iterator pdi = posd.begin(); int count = 0; for( ; pdi != posd.end(); pdi++) { int posInt = (*pdi).first; int c = (*pdi).second; count += c; float p = (float)c/(float)numTerm[posInt]; resultsStream << posInt << " " << p << " "; } resultsStream << "| " << count << "\n"; } ur.setData(path); return 1; }