void goThroughSents(InputTree* trainingData[1301], int sc) { int sentenceCount; for(sentenceCount = 0 ; sentenceCount < sc ; sentenceCount++) { InputTree* par = trainingData[sentenceCount]; //if(sentenceCount%50 == 1) //cerr << sentenceCount << endl; makeSent(par); gatherFfCounts(par,0); if(whichInt == TTCALC) { list<InputTree*> dummy2; InputTree stopInputTree(par->finish(),par->finish(), whichInt==TTCALC ? "" : "^^", "STOP","", dummy2,NULL,NULL); stopInputTree.headTree() = &stopInputTree; TreeHist treeh(&stopInputTree,0); treeh.hpos = 0; callProcG(&treeh); } } }
int tree_effEnd(TreeHist* treeh) { InputTree* tree = treeh->tree; int pos = tree->finish(); bool ans; if(pos > endPos) { cout << "Pos > endPos" << endl; ans = 0; } else if(pos == endPos) ans = 1; else { ECString wrd = sentence[pos]->word(); ECString trm = sentence[pos]->term(); if(trm == "." || wrd == ";") ans = 1; else if((pos+2) > endPos) ans = 0; else if(wrd == ",") { if(sentence[pos+1]->word() == "''") ans = 1; // ,'' acts like end of sentence; else ans = 0; //ans = 2 for alt version??? } else ans = 0; } return ans; }
int tree_size(TreeHist* treeh) { static int bucs[9] = {1, 3, 6, 10, 15, 21, 28, 36, 999}; InputTree* tree = treeh->tree; int sz = tree->finish() - tree->start(); for(int i = 0 ; i < 9 ; i++) if(sz <= bucs[i]) return i; assert("Never get here"); return -1; }
int tree_B(TreeHist* treeh, int blInd) { InputTree* tree = treeh->tree; int i; int pos = treeh->pos; int hpos = treeh->hpos; //cerr << "tb1 " << pos << " " << hpos << " " << *tree << endl; int sz = tree->subTrees().size(); int wpos; assert(pos <= sz); //cerr << "tb " << pos << " " << hpos << " " << sz << endl; if(pos < 0) wpos = tree->start()-1; else if(sz == 0) wpos = tree->start()-1; else if(pos == sz) wpos = tree->finish(); else { InputTreesIter iti = tree->subTrees().begin(); i = 0; for( ; iti != tree->subTrees().end() ; iti++) { if(i < pos) { i++; continue; } InputTree* st = *iti; if(pos < hpos) wpos = st->start()-1; else if(pos > hpos) wpos = st->finish(); else if(blInd) wpos = st->start()-1; else wpos = st->finish(); //cerr << "tbf " << *st << " " << wpos << endl; break; } } //cerr << "tb2 " << wpos << endl; assert(wpos <= endPos); if(wpos < 0 || wpos == endPos) return Term::stopTerm->toInt(); else return Term::get(sentence[wpos]->term())->toInt(); }
int main(int argc, char *argv[]) { struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); ECArgs args( argc, argv ); assert(args.nargs() == 2); if(args.isset('N')) numGram = atoi(args.value('N').c_str()); Feature::setLM(); if(args.isset('L')) Term::Language = args.value('L'); string path( args.arg( 1 ) ); if(Term::Language == "Ch") readHeadInfoCh(path); else readHeadInfo(path); string conditionedType( args.arg(0) ); cerr << "start kn3Counts " << conditionedType << endl; int minCount = 1; if(args.isset('m')) minCount = atoi(args.value('m').c_str()); Feat::Usage = KNCOUNTS; FeatureTree::minCount = minCount; Term::init(path); readHeadInfo(path); Pst pst(path); addSubFeatureFns(); Feature::assignCalc(conditionedType); FeatureTree::root() = new FeatureTree(); Feature::init(path, conditionedType); int wI = Feature::whichInt; int ceFunInt = Feature::conditionedFeatureInt[wI]; Feature::conditionedEvent = SubFeature::Funs[ceFunInt]; string trainingString( path ); int sentenceCount = 0; for( ; ; sentenceCount++) { if(sentenceCount%10000 == 1) { cerr << "rCounts " << sentenceCount << endl; } InputTree correct; cin >> correct; //if(sentenceCount > 1000) break; if(correct.length() == 0) break; //cerr <<sentenceCount << correct << endl; EcSPairs wtList; correct.make(wtList); InputTree* par; int strt = 0; par = &correct; makeSent(par); curS = par; gatherFfCounts(par, 0); if(wI == TTCALC || wI == WWCALC) { list<InputTree*> dummy2; InputTree stopInputTree(par->finish(),par->finish(), wI==TTCALC ? "" : "^^", "STOP","", dummy2,NULL,NULL); stopInputTree.headTree() = &stopInputTree; TreeHist treeh(&stopInputTree,0); treeh.hpos = 0; callProcG(&treeh); } } finalProbComputation(); string resS(path); resS += conditionedType; resS += ".g"; ofstream res(resS.c_str()); assert(res); FTreeMap& fts = FeatureTree::root()->subtree; FTreeMap::iterator fti = fts.begin(); for( ; fti != fts.end() ; fti++) { int asVal = (*fti).first; (*fti).second->printFTree(asVal, res); } res.close(); cout << "Tot words: " << totWords << endl; cout << "Total params for " << conditionedType << " = " << FeatureTree::totParams << endl; }