void goThroughSents(InputTree* trainingData[1301], int sc) { int sentenceCount; for(sentenceCount = 0 ; sentenceCount < sc ; sentenceCount++) { InputTree* par = trainingData[sentenceCount]; //if(sentenceCount%50 == 1) //cerr << sentenceCount << endl; makeSent(par); gatherFfCounts(par,0); if(whichInt == TTCALC) { list<InputTree*> dummy2; InputTree stopInputTree(par->finish(),par->finish(), whichInt==TTCALC ? "" : "^^", "STOP","", dummy2,NULL,NULL); stopInputTree.headTree() = &stopInputTree; TreeHist treeh(&stopInputTree,0); treeh.hpos = 0; callProcG(&treeh); } } }
void gatherFfCounts(InputTree* tree, int inHpos) { InputTrees& st = tree->subTrees();; InputTrees::iterator subTreeIter= st.begin(); InputTree *subTree; int hpos = 0; if(st.size() != 0) hpos = headPosFromTree(tree); //cerr << hpos << *tree << endl; int pos = 0; for( ; subTreeIter != st.end() ; subTreeIter++ ) { subTree = *subTreeIter; gatherFfCounts(subTree, pos==hpos ? 1 : 0); pos++; } //cerr << "g " << *tree << endl; curTree = tree; TreeHist treeh(tree, 0); treeh.pos = pos; treeh.hpos = hpos; const Term* lhsTerm = Term::get(tree->term()); if(lhsTerm->terminal_p()) { if(Feature::whichInt == TTCALC) callProcG(&treeh); return; } //if(procGSwitch) cerr << "gff " << *tree << endl; if(Feature::whichInt == HCALC || Feature::whichInt == UCALC) { if(!inHpos) callProcG(&treeh); return; } if(st.size() == 1 && st.front()->term() == tree->term()) return; subTreeIter = st.begin(); int cVal; treeh.pos = -1; if(Feature::whichInt == LMCALC) callProcG(&treeh); if(Feature::whichInt == LCALC) callProcG(&treeh); pos = 0; for( ; subTreeIter != st.end() ; subTreeIter++) { treeh.pos = pos; if(pos == hpos && Feature::whichInt == MCALC) callProcG(&treeh); if(pos < hpos && Feature::whichInt == LCALC) callProcG(&treeh); if(pos > hpos && Feature::whichInt == RCALC) callProcG(&treeh); if(pos == hpos && Feature::whichInt == RUCALC) callProcG(&treeh); if(pos >= hpos && Feature::whichInt == RMCALC) callProcG(&treeh); if(pos <= hpos && Feature::whichInt == LMCALC) callProcG(&treeh); pos++; } //cerr << "gg " << *tree << endl; treeh.pos = pos; if(Feature::whichInt == RCALC) callProcG(&treeh); if(Feature::whichInt == RMCALC) callProcG(&treeh); }
int main(int argc, char *argv[]) { struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); ECArgs args( argc, argv ); assert(args.nargs() == 2); if(args.isset('N')) numGram = atoi(args.value('N').c_str()); Feature::setLM(); if(args.isset('L')) Term::Language = args.value('L'); string path( args.arg( 1 ) ); if(Term::Language == "Ch") readHeadInfoCh(path); else readHeadInfo(path); string conditionedType( args.arg(0) ); cerr << "start kn3Counts " << conditionedType << endl; int minCount = 1; if(args.isset('m')) minCount = atoi(args.value('m').c_str()); Feat::Usage = KNCOUNTS; FeatureTree::minCount = minCount; Term::init(path); readHeadInfo(path); Pst pst(path); addSubFeatureFns(); Feature::assignCalc(conditionedType); FeatureTree::root() = new FeatureTree(); Feature::init(path, conditionedType); int wI = Feature::whichInt; int ceFunInt = Feature::conditionedFeatureInt[wI]; Feature::conditionedEvent = SubFeature::Funs[ceFunInt]; string trainingString( path ); int sentenceCount = 0; for( ; ; sentenceCount++) { if(sentenceCount%10000 == 1) { cerr << "rCounts " << sentenceCount << endl; } InputTree correct; cin >> correct; //if(sentenceCount > 1000) break; if(correct.length() == 0) break; //cerr <<sentenceCount << correct << endl; EcSPairs wtList; correct.make(wtList); InputTree* par; int strt = 0; par = &correct; makeSent(par); curS = par; gatherFfCounts(par, 0); if(wI == TTCALC || wI == WWCALC) { list<InputTree*> dummy2; InputTree stopInputTree(par->finish(),par->finish(), wI==TTCALC ? "" : "^^", "STOP","", dummy2,NULL,NULL); stopInputTree.headTree() = &stopInputTree; TreeHist treeh(&stopInputTree,0); treeh.hpos = 0; callProcG(&treeh); } } finalProbComputation(); string resS(path); resS += conditionedType; resS += ".g"; ofstream res(resS.c_str()); assert(res); FTreeMap& fts = FeatureTree::root()->subtree; FTreeMap::iterator fti = fts.begin(); for( ; fti != fts.end() ; fti++) { int asVal = (*fti).first; (*fti).second->printFTree(asVal, res); } res.close(); cout << "Tot words: " << totWords << endl; cout << "Total params for " << conditionedType << " = " << FeatureTree::totParams << endl; }
void gatherFfCounts(InputTree* tree, int inHpos) { int wI =Feature::whichInt; InputTrees& st = tree->subTrees();; InputTrees::iterator subTreeIter= st.begin(); InputTree *subTree; int hpos = 0; if(st.size() != 0) hpos = headPosFromTree(tree); int pos = 0; for( ; subTreeIter != st.end() ; subTreeIter++ ) { subTree = *subTreeIter; gatherFfCounts(subTree, pos==hpos ? 1 : 0); pos++; } //cerr << "g " << inHpos << " " << *tree << endl; //cerr << "t " << wI << endl; TreeHist treeh(tree, 0); treeh.pos = pos; treeh.hpos = hpos; const Term* lhsTerm = Term::get(tree->term()); if(wI == HCALC || wI == UCALC) { if(!inHpos) { callProcG(&treeh); } return; } if(lhsTerm->terminal_p()) { if(wI == TTCALC || wI == WWCALC) callProcG(&treeh); return; } if(st.size() == 1 && st.front()->term() == tree->term()) return; //cerr << "gff " << *tree << endl; //if(tree->term() == "PP" && !st.empty() && st.front()->term() == "VBG") // cerr << *tree << "\n" << *curS << "\n----\n"; //???; subTreeIter = st.begin(); int cVal; treeh.pos = -1; if(wI == LMCALC) callProcG(&treeh); if(wI == LCALC) callProcG(&treeh); pos = 0; for( ; subTreeIter != st.end() ; subTreeIter++) { treeh.pos = pos; if(pos == hpos && wI == MCALC) callProcG(&treeh); if(pos < hpos && wI == LCALC) callProcG(&treeh); if(pos > hpos && wI == RCALC) callProcG(&treeh); if(pos == hpos && wI == RUCALC) callProcG(&treeh); if(pos >= hpos && wI == RMCALC) callProcG(&treeh); if(pos <= hpos && wI == LMCALC) callProcG(&treeh); pos++; } treeh.pos = pos; if(wI == RCALC) callProcG(&treeh); if(wI == RMCALC) callProcG(&treeh); //cerr << "gg " << *tree << endl; }