double Pst:: pstt(ECString& shU, int t, int word_num) { char temp[1024]; ECString sh(langAwareToLower(shU.c_str(), temp)); const Term* tTerm = Term::fromInt(t); double phst = pHst(sh, t); double ans; if(phst > 0) ans = psktt(shU, t, word_num); else ans = psutt(shU, t, word_num); return ans; }
double Pst:: pstt(ECString& shU, int t, int word_num) { char temp[MAXWORDLENGTH]; ECString sh(toLower(shU.c_str(), temp, MAXWORDLENGTH)); const Term* tTerm = Term::fromInt(t); double phst = pHst(sh, t); double ans; if(phst > 0) ans = psktt(shU, t, word_num); else ans = psutt(shU, t, word_num); return ans; }
list<double> Pst:: wordPlistConstruct(const ECString& head, int word_num) { list<double> ans; char temp[1024]; ECString headL(langAwareToLower(head.c_str(), temp)); const WordInfo* wi = useHeadC( headL ); if( wi ) { int sz = wi->stSize(); for( int i = 0 ; i < sz ; i ++ ) { Phsgt& wti = wi->st_[i]; int tInt = wti.term; if(tInt > Term::lastTagInt()) continue; double prob = psktt(head,tInt,word_num); ans.push_back(tInt); ans.push_back(prob); if(prob == 0) cerr << "Warning, prob = 0 for word = " << head << " and pos = " << tInt << endl; //cerr << "wordPlist: " << word << "\t" << tInt // << "\t" << prob << endl; } } else { for(int i = 0 ; i <= Term::lastTagInt() ; i++) { double phut = pHugt(i); if(phut == 0) continue; double prob = psutt(head,i,word_num); ans.push_back(i); ans.push_back(prob); } } return ans; }
Bst& MeChart:: bestParse(Item* itm, FullHist* h, Val* cval, Val* gcval, int cdir) { curVal = cval; gcurVal = gcval; curDir = cdir; Bst& bst = recordedBP(itm, h); curVal = gcurVal = NULL; curDir = -1; if(bst.explored()) { if(printDebug() > 19) { prDp(); cerr << "already known bestParse(" << *itm << ", ...) has p = " << bst.prob() << endl; } return bst; } if(printDebug() > 10) { prDp(); cerr << "bestParse(" << *itm << ", ...)" << endl; } bst.explored() = true; //David McClosky bug; int itermInt = itm->term()->toInt(); PosMap& pm = itm->posAndheads(); PosIter pi = pm.begin(); ECString bestW; for( ; pi != pm.end() ; pi++ ) { int posInt = (*pi).first; if(printDebug() > 16) { prDp(); cerr << "consider Pos(" << *itm << ") = " << posInt << endl; } HeadMap& hm = (*pi).second; /* we are using collected counts for p(u|t) */ float hposprob = 1; /* if we have reached a preterminal, then termInt == posInt and p(posInt|termInt) == 1 */ if( itermInt != posInt) { curVal = cval; gcurVal = gcval; curDir = cdir; hposprob = meProb(posInt, h, UCALC); if(hposprob == 0) hposprob = .00001; //??? this can happen; curVal = gcurVal = NULL; curDir = -1; if(printDebug() > 16) { prDp(); cerr << "p(pos) = " << hposprob << endl; } } h->preTerm = posInt; HeadIter hi = hm.begin(); for( ;hi != hm.end();hi++) { const Wrd& subhw = (*hi).first; int wrdInt = subhw.toInt(); ECString subh = subhw.lexeme(); if(printDebug() > 16) { prDp(); cerr << "consider head(" << *itm << ") = " << subh << endl; } float hprob = 0; if(wrdInt >= 0 && wrdInt <= lastKnownWord) { hprob = pCapgt(&subhw,posInt); hprob *= (1 - pHugt(posInt)); curVal = cval; gcurVal = gcval; curDir = cdir; float hprob2 = meHeadProb(wrdInt, h); curVal = gcurVal = NULL; curDir = -1; hprob *= hprob2; if(hprob < 0) { cerr << posInt << " " << pHugt(posInt) <<" "<<hprob2 << endl; assert(hprob >=0); } } //hprob can be zero if lower case NNPS. if(wrdInt > lastKnownWord || hprob == 0) { hprob = psutt(&subhw,posInt); } if(printDebug() > 16) { prDp(); cerr << "p(hd) = "<< hprob << endl; } float hhprob = (hposprob * hprob); if(hhprob < 0) { cerr << hposprob << " " << hprob << endl; assert(hhprob >= 0); } h->hd = &subhw; Bst& bst2 = bestParseGivenHead(posInt,subhw,itm,h,(*hi).second,cval,gcval); if(bst2.empty()) continue; Val* nval = new Val(); Val* oldval0 = bst2.nth(0); nval->prob() = oldval0->prob()*hhprob; nval->bsts().push_back(&bst2); nval->status = EXTRAVAL; bst.push(nval); bst.sum() += bst2.sum()*hhprob; } } Val* nbest = bst.pop(); if(nbest) bst.addnth(nbest); if(printDebug() > 10) { prDp(); cerr << "Bestp for " << *itm << " = " << bst.prob() <<endl; } return bst; }