double Pst:: psutt(const ECString& shU, int t, int word_num) { //cerr << "Unknown word: " << shU << " for tag: " << t << endl; double ans = pHugt(t); //cerr << "pHugt = " << ans << endl; if(ans == 0) return 0; double phyp = pHypgt(shU,t); ans *= phyp; //cerr << "pHypgt = " << phyp << endl; double phcp = pCapgt(shU,t, word_num); ans *= phcp; ans *= .000001; if(Term::fromInt(t)->openClass()) { char temp[1024]; ECString sh(langAwareToLower(shU.c_str(),temp)); float phegt = pegt(sh,t); if(phegt == 0) phegt = .00001; //if(phegt == 0) phegt = .00005; //cerr << "pegt( " << sh << " | " << t << " ) = " << phegt << endl; ans *= phegt; } else ans *= .00000001; //cerr << "psutt( " << shU << " | " << t << " ) = " << ans << endl; return ans; }
double Pst:: psktt(const ECString& shU, int t, int word_num) { char temp[1024]; ECString sh(langAwareToLower(shU.c_str(), temp)); double ans = pHst(sh, t); double phcp = pCapgt(shU,t, word_num); ans *= phcp; double put = pHugt(t); ans *= (1-put); //cerr << "psktt( " << shU << " | " << t << " ) = " << ans << endl; return ans; }
Bst& MeChart:: bestParse(Item* itm, FullHist* h, Val* cval, Val* gcval, int cdir) { curVal = cval; gcurVal = gcval; curDir = cdir; Bst& bst = recordedBP(itm, h); curVal = gcurVal = NULL; curDir = -1; if(bst.explored()) { if(printDebug() > 19) { prDp(); cerr << "already known bestParse(" << *itm << ", ...) has p = " << bst.prob() << endl; } return bst; } if(printDebug() > 10) { prDp(); cerr << "bestParse(" << *itm << ", ...)" << endl; } bst.explored() = true; //David McClosky bug; int itermInt = itm->term()->toInt(); PosMap& pm = itm->posAndheads(); PosIter pi = pm.begin(); ECString bestW; for( ; pi != pm.end() ; pi++ ) { int posInt = (*pi).first; if(printDebug() > 16) { prDp(); cerr << "consider Pos(" << *itm << ") = " << posInt << endl; } HeadMap& hm = (*pi).second; /* we are using collected counts for p(u|t) */ float hposprob = 1; /* if we have reached a preterminal, then termInt == posInt and p(posInt|termInt) == 1 */ if( itermInt != posInt) { curVal = cval; gcurVal = gcval; curDir = cdir; hposprob = meProb(posInt, h, UCALC); if(hposprob == 0) hposprob = .00001; //??? this can happen; curVal = gcurVal = NULL; curDir = -1; if(printDebug() > 16) { prDp(); cerr << "p(pos) = " << hposprob << endl; } } h->preTerm = posInt; HeadIter hi = hm.begin(); for( ;hi != hm.end();hi++) { const Wrd& subhw = (*hi).first; int wrdInt = subhw.toInt(); ECString subh = subhw.lexeme(); if(printDebug() > 16) { prDp(); cerr << "consider head(" << *itm << ") = " << subh << endl; } float hprob = 0; if(wrdInt >= 0 && wrdInt <= lastKnownWord) { hprob = pCapgt(&subhw,posInt); hprob *= (1 - pHugt(posInt)); curVal = cval; gcurVal = gcval; curDir = cdir; float hprob2 = meHeadProb(wrdInt, h); curVal = gcurVal = NULL; curDir = -1; hprob *= hprob2; if(hprob < 0) { cerr << posInt << " " << pHugt(posInt) <<" "<<hprob2 << endl; assert(hprob >=0); } } //hprob can be zero if lower case NNPS. if(wrdInt > lastKnownWord || hprob == 0) { hprob = psutt(&subhw,posInt); } if(printDebug() > 16) { prDp(); cerr << "p(hd) = "<< hprob << endl; } float hhprob = (hposprob * hprob); if(hhprob < 0) { cerr << hposprob << " " << hprob << endl; assert(hhprob >= 0); } h->hd = &subhw; Bst& bst2 = bestParseGivenHead(posInt,subhw,itm,h,(*hi).second,cval,gcval); if(bst2.empty()) continue; Val* nval = new Val(); Val* oldval0 = bst2.nth(0); nval->prob() = oldval0->prob()*hhprob; nval->bsts().push_back(&bst2); nval->status = EXTRAVAL; bst.push(nval); bst.sum() += bst2.sum()*hhprob; } } Val* nbest = bst.pop(); if(nbest) bst.addnth(nbest); if(printDebug() > 10) { prDp(); cerr << "Bestp for " << *itm << " = " << bst.prob() <<endl; } return bst; }