void processG(int i, FeatureTree* ginfo[], TreeHist* treeh, int cVal) { Feature* feat = Feature::fromInt(i, Feature::whichInt); /* e.g., g(rtlu) starts from where g(rtl) left off (after tl)*/ int searchStartInd = feat->startPos; FeatureTree* strt = ginfo[searchStartInd]; assert(strt); SubFeature* sf = SubFeature::fromInt(feat->subFeat, Feature::whichInt); int nfeatV = (*(sf->fun))(treeh); nfeatVs[i] = nfeatV; int ufi = sf->usf; FeatureTree* histPt = strt->next(nfeatV, feat->auxCnt); assert(histPt); ginfo[i] = histPt; //cerr << "i" << i << " " << nfeatV << " " // << histPt->ind << " " << histPt->count << " " << cVal << endl; histPt->feats[cVal].cnt()++; //cerr << "HP " << histPt->feats[cVal].cnt() << endl; if(i == numGram) { incrHistPt(histPt, cVal,i); } }
int main(int argc, char *argv[]) { struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); ECArgs args( argc, argv ); assert(args.nargs() == 3); ECString conditionedType = args.arg(0); Feat::Usage = SEL; percentDesiredFeatures = (float)atoi(args.arg(1).c_str())/100.0; cerr << "start selFeats: " << conditionedType << " " << percentDesiredFeatures << endl; ECString path( args.arg( 2 ) ); ECString fHp(path); fHp += conditionedType; fHp += ".ff"; Feature::init(path, conditionedType); ifstream fHps(fHp.c_str()); new FeatureTree(fHps); whichInt = Feature::whichInt; cerr << "Before doRanking" << endl; doRanking(); totDesiredFeatures = (int) (percentDesiredFeatures * totStates); cerr << "Before markFeats" << endl; markFeats(); ECString resS(path); resS += conditionedType; resS += ".f"; ofstream res(resS.c_str()); FeatureTree* root = FeatureTree::roots(whichInt); FTreeMap::iterator ftmIter = root->subtree.begin(); cerr << "About to print featuretree" << endl; for( ; ftmIter != root->subtree.end() ; ftmIter++) { int asVal = (*ftmIter).first; FeatureTree* subRoot = root->subtree[asVal]; subRoot->printFTree(asVal, res); } res << "\n\nSelected " << totSelectedStates << " of " << totStates << endl; return 0; }
void processG(int i, FeatureTree* ginfo[], TreeHist* treeh, int cVal) { ginfo[i] = NULL; Feature* feat = Feature::fromInt(i, whichInt); /* e.g., g(rtlu) starts from where g(rtl) left off (after tl)*/ int searchStartInd = feat->startPos; FeatureTree* strt = ginfo[searchStartInd]; bucketVals[i] = 0; lambdas[i] = 0; unsmoothedPs[i] = 0; if(!strt) { if(i == 1) cerr << "no start for i = 1\n" << *curTree << endl; return; } SubFeature* sf = SubFeature::fromInt(feat->subFeat, whichInt); int nfeatV = (*(sf->fun))(treeh); //if(procGSwitch) cerr << "pg " << i << ", " << cVal << ", " << nfeatV << endl; FeatureTree* histPt = strt->follow(nfeatV, feat->auxCnt); ginfo[i] = histPt; if(i == 1) { unsmoothedPs[0] = 0; FeatMap::iterator fti1 = histPt->feats.find(cVal); float unsmoothedVal1; if(fti1 == histPt->feats.end()) unsmoothedVal1 = 0; else unsmoothedVal1 = (*fti1).second.g(); unsmoothedPs[1] = unsmoothedVal1; lambdas[1] = 1; if(unsmoothedPs[1] == 0) { /* if(pass == 0) cerr << "Zero at level 1 " << treeh->pos << "\n" << *curTree << endl; */ unsmoothedPs[1] = .0001; } return; } if(!histPt) { return; } int b; if(Feature::isLM) { int sz = histPt->feats.size(); assert(sz > 0); float estm = ((float)histPt->count / (float)sz); b = Smoother::bucket(estm,i); } else { float estm = histPt->count * unsmoothedPs[1]; b = Smoother::bucket(estm); } FeatMap::iterator fti = histPt->feats.find(cVal); float unsmoothedVal; if(fti == histPt->feats.end()) unsmoothedVal = 0; else unsmoothedVal = (*fti).second.g(); float lam = Feature::getLambda(whichInt, i, b); lambdas[i] = lam; unsmoothedPs[i] = unsmoothedVal; bucketVals[i] = b; //if(procGSwitch) cerr << i << " " << nfeatV << " " << histPt->featureInt << " " // << estm << " " << b << " " << unsmoothedVal << endl; }
float Bchart:: meFHProb(const Term* trm, FullHist& fh, int whichInt) { Edge* edge = fh.e; int pos = 0; /* the left to right position we are working on is either the far left (0) or the far right */ if(!globalGi) {} //else if(edge->item() != globalGi->index(0)) ; else if(whichInt == RUCALC || whichInt == RMCALC || whichInt == RCALC) pos = globalGi->size()-1; fh.pos = pos; int cVal = trm->toInt(); if(printDebug() > 138) { cerr << "meP " << *trm << " " << cVal << " " << whichInt << " "; if(edge) cerr << *edge << endl; else cerr << fh.preTerm << endl; } //int subfVals[MAXNUMFS]; FeatureTree* ginfo[MAXNUMFS]; ginfo[0] = FeatureTree::roots(whichInt); assert(ginfo[0]); float smoothedPs[MAXNUMFS]; float ans = 1; for(int i = 1 ; i <= Feature::total[whichInt] ; i++) { ginfo[i] = NULL; Feature* feat = Feature::fromInt(i, whichInt); /* e.g., g(rtlu) starts from where g(rtl) left off (after tl)*/ int searchStartInd = feat->startPos; FeatureTree* strt = ginfo[searchStartInd]; if(!strt) { continue; } SubFeature* sf = SubFeature::fromInt(feat->subFeat, whichInt); int usf = sf->usf; int nfeatV = (edgeFnsArray[usf])(&fh); FeatureTree* histPt = strt->follow(nfeatV, feat->auxCnt); ginfo[i] = histPt; if(i == 1) { smoothedPs[0] = 1; assert(histPt); Feat* f =histPt->feats.find(cVal); if(!f) { return 0.0; } smoothedPs[1] = f->g(); if(printDebug() > 238) { cerr << i << " " << nfeatV << " " << smoothedPs[1] << endl; } for(int j = 2; j <= Feature::total[whichInt] ; j++) smoothedPs[j] = 0; ans = smoothedPs[1]; continue; } if(nfeatV < -1) { if(printDebug() > 128) { cerr<<"p"<<whichInt<< "(" << cVal << "|"; if(edge) cerr << *edge; else cerr << fh.preTerm; cerr << ") = " << ans << endl; } return ans; } if(!histPt) { continue; } int b; if(Feature::isLM) { /*new bucketing */ float sz = (float)histPt->feats.size(); float estm = (float)histPt->count / sz; assert(i >= 2); b = bucket(estm, whichInt,i); } else { /* old bucketing*/ float estm; //estm = histPt->count * smoothedPs[1]; estm = histPt->count * 0.1; b = bucket(estm); } Feat* ft = histPt->feats.find(cVal); float unsmoothedVal; if(!ft) unsmoothedVal = 0; else unsmoothedVal = ft->g(); float lam = Feature::getLambda(whichInt, i, b); float uspathprob = lam*unsmoothedVal; float osmoothedVal = smoothedPs[searchStartInd]; //float osmoothedVal = smoothedPs[i-1]; //for deleted interp. float smpathprob = (1-lam)*osmoothedVal; float nsmoothedVal = uspathprob+smpathprob; if(printDebug() > 238) { cerr << i << " " << nfeatV << " " << usf << " " << b <<" "<<unsmoothedVal << " " << lam << " " << nsmoothedVal << endl; } smoothedPs[i] = nsmoothedVal; ans *= (nsmoothedVal/osmoothedVal); } if(printDebug() > 128) { cerr<<"p"<<whichInt<< "(" << cVal << "|"; if(edge) cerr << *edge; else cerr << fh.preTerm; cerr << ") = " << ans << endl; } return ans; }
float MeChart:: meProb(int cVal, FullHist* h, int whichInt) { if(printDebug() > 68) { prDp(); cerr << "meP" << whichInt << "(" << cVal << " | " << *h << ")" <<endl; } FeatureTree* ginfo[MAXNUMFS]; ginfo[0] = FeatureTree::roots(whichInt); float smoothedPs[MAXNUMFS]; Feature::whichInt = whichInt; float ans = 1; for(int i = 1 ; i <= Feature::total[whichInt] ; i++) { int knp = useKn(i,whichInt); ginfo[i] = NULL; Feature* feat = Feature::fromInt(i, whichInt); /* e.g., g(rtlu) starts from where g(rtl) left off (after tl)*/ int searchStartInd = feat->startPos; if(i > 1) smoothedPs[i] = smoothedPs[i-1]; FeatureTree* strt = ginfo[searchStartInd]; if(!strt) { continue; } SubFeature* sf = SubFeature::fromInt(feat->subFeat, whichInt); int nfeatV = (*(sf->fun))(h); FeatureTree* histPt = strt->follow(nfeatV, feat->auxCnt); ginfo[i] = histPt; if(i == 1) { smoothedPs[0] = 1; if(!histPt) { cerr << cVal << " " << whichInt << " " << nfeatV << " " << searchStartInd <<" " << feat->auxCnt << endl; assert(histPt); } Feat* f =histPt->feats.find(cVal); if(!f) { if(printDebug() > 60) { prDp(); cerr << "Zero p" << feat->name << " " << nfeatV << endl; } if(whichInt == HCALC) return 0.001; return 0.0; } smoothedPs[1] = f->g(); if(printDebug() > 68) { prDp(); cerr << i << " " << nfeatV << " " << smoothedPs[1] << endl; } for(int j = 2; j <= Feature::total[whichInt] ; j++) smoothedPs[j] = 0; ans = smoothedPs[1]; continue; } if(!histPt) { continue; } int b; if(Feature::isLM) { /* this section for new bucketing */ float sz = (float)histPt->feats.size(); float estm = (float)histPt->count / sz; b = bucket(estm, whichInt,i); } else { /* this section for old bucketing */ float estm = histPt->count * smoothedPs[1]; b = bucket(estm); } Feat* ft = histPt->feats.find(cVal); float unsmoothedVal; if(!ft) unsmoothedVal = 0; else unsmoothedVal = ft->g(); float lam = 1; if(!knp) lam = Feature::getLambda(whichInt, i, b); float uspathprob = lam*unsmoothedVal; float osmoothedVal; /* First version is for parsing, second for language modeling */ if(Feature::isLM) osmoothedVal = smoothedPs[i-1]; //for deleted interp. else osmoothedVal = smoothedPs[searchStartInd]; float oneMlam = (1-lam); if(knp) { oneMlam = histPt->count/1000.0; } float smpathprob = oneMlam*osmoothedVal; float nsmoothedVal = uspathprob+smpathprob; smoothedPs[i] = nsmoothedVal; ans *= (nsmoothedVal/osmoothedVal); if(printDebug() > 68) { prDp(); cerr << i << " " << nfeatV << " " << b <<" "<<unsmoothedVal << " " << lam << " " << nsmoothedVal << endl; } } if(whichInt == HCALC) ans *= 600; if(printDebug() > 30) { prDp(); cerr<<"p"<<whichInt<< "(" << cVal << "|" << *h << ") = " << ans << endl; } return ans; }
void processG(bool getProb, int whichInt, int i, FeatureTree* ginfo[], TreeHist* treeh, int cVal) { ginfo[i] = NULL; Feature* feat = Feature::fromInt(i, whichInt); // e.g., g(rtlu) starts from where g(rtl) left off (after tl) int searchStartInd = feat->startPos; FeatureTree* strt = ginfo[searchStartInd]; bucketVals[i] = 0; lambdas[i] = 0; unsmoothedPs[i] = 0; if(!strt) { if(i == 1) cerr << "no start for i = 1\n" << *curTree << endl; //if (getProb)unsmoothedPs[i] = unsmoothedPs[i-1]; //???; return; } SubFeature* sf = SubFeature::fromInt(feat->subFeat, whichInt); int nfeatV = (*(sf->fun))(treeh); //if(procGSwitch) cerr << "pg " << i << ", " << cVal << ", " << nfeatV << endl; FeatureTree* histPt = strt->follow(nfeatV, feat->auxCnt); ginfo[i] = histPt; if(i == 1) { unsmoothedPs[0]-0; /* if (!getProb) unsmoothedPs[0] = 0; else unsmoothedPs[0] = 1; */ if (histPt == NULL) { // cerr << "histPt was null" << endl; unsmoothedPs[1] = .0001; return; } FeatMap::iterator fti1 = histPt->feats.find(cVal); float unsmoothedVal1; if(fti1 == histPt->feats.end()) unsmoothedVal1 = 0; else { /* if (getProb) unsmoothedVal1 = (*fti1).second.g()-1; else unsmoothedVal1 = (*fti1).second.g(); */ unsmoothedVal1 = (*fti1).second.g(); } unsmoothedPs[1] = unsmoothedVal1; lambdas[1] = 1; if(unsmoothedPs[1] == 0) { /*if(pass == 0) cerr << "Zero at level 1 " << treeh->pos << "\n" << *curTree << endl;*/ unsmoothedPs[1] = .0001; } /* if (getProb){ //cerr <<i<< " " << nfeatV << " " << cVal << " "<< smoothedPs[1] << endl; for(int j = 2; j <= Feature::total[whichInt] ; j++) unsmoothedPs[j] = 0; } */ return; } if(!histPt) { if (getProb){ unsmoothedPs[i] = unsmoothedPs[i-1]; //???; } return; } int b; if(Feature::isLM || getProb) { int sz = histPt->feats.size(); assert(sz > 0); float estm = ((float)histPt->count / (float)sz); b = Smoother::bucket(estm,whichInt,i); } else { float estm = histPt->count * unsmoothedPs[1]; b = Smoother::bucket(estm); } FeatMap::iterator fti = histPt->feats.find(cVal); float unsmoothedVal; if(fti == histPt->feats.end()) unsmoothedVal = 0; else unsmoothedVal = (*fti).second.g(); float lam = Feature::getLambda(whichInt, i, b); lambdas[i] = lam; if (!getProb){ unsmoothedPs[i] = unsmoothedVal; bucketVals[i] = b; //if(procGSwitch) cerr << i << " " << nfeatV << " " << histPt->featureInt << " " // << estm << " " << b << " " << unsmoothedVal << endl; } else{ float uspathprob = lam*unsmoothedVal; float osmoothedVal = unsmoothedPs[i-1]; //???; float smpathprob = (1-lam)*osmoothedVal; float nsmoothedVal = uspathprob+smpathprob; //cerr << i << " " << nfeatV << " " << histPt->featureInt << " " // << estm << " " << b << " " << unsmoothedVal // << " " << nsmoothedVal << endl; unsmoothedPs[i] = nsmoothedVal; } }