void MeChart:: init(ECString path) { Feat::Usage = PARSE; addEdgeSubFeatureFns(); addSubFeatureFns(); ECString tmpA[MAXNUMCALCS] = {"r","h","u","m","l","lm","ru","rm","tt", "s","t","ww","dummy","dummy","dummy"}; for(int which = 0 ; which < Feature::numCalcs ; which++) { ECString tmp = tmpA[which]; Feature::init(path, tmp); if(tmp == "s" || tmp == "t") continue; ECString ftstr(path); ftstr += tmp; ftstr += ".g"; ifstream fts(ftstr.c_str()); if(!fts) cerr << "could not find " << ftstr << endl; assert(fts); FeatureTree* ft = new FeatureTree(fts); //puts it in root; if(tmp == "ww") continue; Feature::readLam(which, tmp, path); } int cntxSzReq = Feature::total[TCALC]; int scSz = Feature::total[SCALC]; if(scSz > cntxSzReq) cntxSzReq = scSz; //assert(CntxArray::sz == cntxSzReq); //assert(CntxArray::sz == (Feature::total[UCALC] -1)); }
int main(int argc, char *argv[]) { struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); ECArgs args( argc, argv ); assert(args.nargs() == 2); conditionedType = args.arg(0); cerr << "start trainRs: " << conditionedType << endl; ECString path( args.arg( 1 ) ); if(args.isset('L')) Feature::setLM(); Term::init(path); readHeadInfo(path); Pst pst(path); if(Feature::isLM) ClassRule::readCRules(path); addSubFeatureFns(); Feature::init(path, conditionedType); whichInt = Feature::whichInt; int ceFunInt = Feature::conditionedFeatureInt[Feature::whichInt]; Feature::conditionedEvent = SubFeature::Funs[ceFunInt]; Feat::Usage = PARSE; ECString ftstr(path); ftstr += conditionedType; ftstr += ".g"; ifstream fts(ftstr.c_str()); if(!fts) { cerr << "Could not find " << ftstr << endl; assert(fts); } tRoot = new FeatureTree(fts); //puts it in root; cout.precision(3); cerr.precision(3); lamInit(); InputTree* trainingData[1001]; int usedCount = 0; sentenceCount = 0; for( ; ; sentenceCount++) { if(sentenceCount%10000 == 1) { // cerr << conditionedType << ".tr " //<< sentenceCount << endl; } if(usedCount >= 1000) break; InputTree* correct = new InputTree; cin >> (*correct); if(correct->length() == 0) break; if(!cin) break; EcSPairs wtList; correct->make(wtList); InputTree* par; par = correct; trainingData[usedCount++] = par; } if(Feature::isLM) pickLogBases(trainingData,sentenceCount); procGSwitch = true; for(pass = 0 ; pass < 10 ; pass++) { if(pass%2 == 1) cout << "Pass " << pass << endl; goThroughSents(trainingData, sentenceCount); updateLambdas(); //printLambdas(cout); zeroData(); } ECString resS(path); resS += conditionedType; resS += ".lambdas"; ofstream res(resS.c_str()); res.precision(3); printLambdas(res); printLambdas(cout); cout << "Total params = " << FeatureTree::totParams << endl; cout << "Done: " << (int)sbrk(0) << endl; }
int main(int argc, char *argv[]) { struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); ECArgs args( argc, argv ); assert(args.nargs() == 2); if(args.isset('N')) numGram = atoi(args.value('N').c_str()); Feature::setLM(); if(args.isset('L')) Term::Language = args.value('L'); string path( args.arg( 1 ) ); if(Term::Language == "Ch") readHeadInfoCh(path); else readHeadInfo(path); string conditionedType( args.arg(0) ); cerr << "start kn3Counts " << conditionedType << endl; int minCount = 1; if(args.isset('m')) minCount = atoi(args.value('m').c_str()); Feat::Usage = KNCOUNTS; FeatureTree::minCount = minCount; Term::init(path); readHeadInfo(path); Pst pst(path); addSubFeatureFns(); Feature::assignCalc(conditionedType); FeatureTree::root() = new FeatureTree(); Feature::init(path, conditionedType); int wI = Feature::whichInt; int ceFunInt = Feature::conditionedFeatureInt[wI]; Feature::conditionedEvent = SubFeature::Funs[ceFunInt]; string trainingString( path ); int sentenceCount = 0; for( ; ; sentenceCount++) { if(sentenceCount%10000 == 1) { cerr << "rCounts " << sentenceCount << endl; } InputTree correct; cin >> correct; //if(sentenceCount > 1000) break; if(correct.length() == 0) break; //cerr <<sentenceCount << correct << endl; EcSPairs wtList; correct.make(wtList); InputTree* par; int strt = 0; par = &correct; makeSent(par); curS = par; gatherFfCounts(par, 0); if(wI == TTCALC || wI == WWCALC) { list<InputTree*> dummy2; InputTree stopInputTree(par->finish(),par->finish(), wI==TTCALC ? "" : "^^", "STOP","", dummy2,NULL,NULL); stopInputTree.headTree() = &stopInputTree; TreeHist treeh(&stopInputTree,0); treeh.hpos = 0; callProcG(&treeh); } } finalProbComputation(); string resS(path); resS += conditionedType; resS += ".g"; ofstream res(resS.c_str()); assert(res); FTreeMap& fts = FeatureTree::root()->subtree; FTreeMap::iterator fti = fts.begin(); for( ; fti != fts.end() ; fti++) { int asVal = (*fti).first; (*fti).second->printFTree(asVal, res); } res.close(); cout << "Tot words: " << totWords << endl; cout << "Total params for " << conditionedType << " = " << FeatureTree::totParams << endl; }