示例#1
0
文件: Pst.C 项目: BLLIP/bllip-parser
double
Pst::
psutt(const ECString& shU, int t, int word_num)
{
  //cerr << "Unknown word: " << shU << " for tag: " << t << endl; 
  double ans = pHugt(t);
  //cerr << "pHugt = " << ans << endl;
  if(ans == 0) return 0;
  double phyp = pHypgt(shU,t);
  ans *= phyp;
  //cerr << "pHypgt = " << phyp << endl;
  double phcp = pCapgt(shU,t, word_num);
  ans *= phcp;
  ans *= .000001;
  if(Term::fromInt(t)->openClass())
    {
      char temp[1024];
      ECString sh(langAwareToLower(shU.c_str(),temp));
      float phegt = pegt(sh,t);
      if(phegt == 0) phegt = .00001;
      //if(phegt == 0) phegt = .00005;
      //cerr << "pegt( " << sh << " | " << t << " ) = " << phegt << endl;
      ans *= phegt;
    }
  else
    ans *= .00000001;

  //cerr << "psutt( " << shU << " | " << t << " ) = " << ans << endl;
  return ans;
}
示例#2
0
void
Bchart::
makepUgT(ECString path)
{
  int i;
  int nm = Term::lastTagInt()+1;
  ECString pth(path);
  for(int f = 0 ; f < 20 ; f++)
    {
      ECString pUString(pth);
      pUString += "pUgT.txt";
      ifstream pUstream(pUString.c_str());
      if(!pUstream)
	{
	  cerr << "Could not find " << pUString << endl;
	  assert(pUstream);
	}
      for( i = 0 ; i <  nm ; i++ )
	{
	  int t;
	  pUstream >> t;
	  float p;
	  if(f == 0)
	    {
	      pUstream >> p;
	      pHugt(t) = p;
	      //cerr << "set pHugt " << t << " = " << p << endl;
	      pUstream >> p;
	      pHcapgt(t) = p;
	      pUstream >> p;
	      pHhypgt(t) = p;
	    }
	  else
	    {
	      pUstream >> p;
	      pHugt(t) += p;
	      //cerr << "set pHugt " << t << " = " << p << endl;
	      pUstream >> p;
	      pHcapgt(t) += p;
	      pUstream >> p;
	      pHhypgt(t) += p;
	    }
	}
示例#3
0
文件: Pst.C 项目: BLLIP/bllip-parser
double
Pst::
psktt(const ECString& shU, int t, int word_num)
{
  char temp[1024];
  ECString sh(langAwareToLower(shU.c_str(), temp));
  double ans = pHst(sh, t);
  double phcp = pCapgt(shU,t, word_num);
  ans *= phcp;
  double put = pHugt(t);
  ans *= (1-put);
  //cerr << "psktt( " << shU << " | " << t << " ) = " << ans << endl;
  return ans;
}
示例#4
0
文件: Pst.C 项目: BLLIP/bllip-parser
void
Pst::
readTermProbs(ECString& pTString, ECString& pUstring)
{
  ifstream pTstream(pTString.c_str());
  assert(pTstream);
  ignoreComment(pTstream);
  ifstream pUstream(pUstring.c_str());
  assert(pUstream);
  ignoreComment(pUstream);
  int i, j;
  for( i = 0 ; i <=  Term::lastNTInt() ; i++ )
    {
      int t;
      pUstream >> t;
      float p;
      pUstream >> p;
      pHugt(t) = p;
      pUstream >> p;
      if(p == 0) p = .00001;  //Anything might be capitalized;
      pHcapgt(t) = p;
      pUstream >> p;
      pHhypgt(t) = p;
    }
  int numpT;
  pTstream >> numpT;
  pHegt_ = new Phegt[numpT];
  egtSize_ = numpT;

  i = 0;
  while(pTstream)
    {
      int t;
      char e0;
      char e1;
      float p;
      pTstream >> t;
      if(!pTstream) break;
      assert(i < numpT);
      pTstream >> e0;
      pTstream >> e1;
      pTstream >> p;
      pHegt_[i].t = t;
      pHegt_[i].e[0] = e0;
      pHegt_[i].e[1] = e1;
      pHegt_[i].p = p;
      i++;
    }
}
示例#5
0
文件: Pst.C 项目: BLLIP/bllip-parser
list<double>
Pst::
wordPlistConstruct(const ECString& head, int word_num)
{
  list<double> ans;
  char temp[1024];
  ECString headL(langAwareToLower(head.c_str(), temp));
  const WordInfo* wi = useHeadC( headL );
  if( wi )
    {
      int  sz = wi->stSize();   
      for( int i = 0 ; i < sz ; i ++ )
	{
	  Phsgt& wti = wi->st_[i];
	  int    tInt = wti.term;
	  if(tInt > Term::lastTagInt()) continue;
	  double prob = psktt(head,tInt,word_num);
	  ans.push_back(tInt);
	  ans.push_back(prob);
	  if(prob == 0)
	    cerr << "Warning, prob = 0 for word = " << head
	      << " and pos = " << tInt << endl;
	  //cerr << "wordPlist: " << word << "\t" << tInt
	    // << "\t" << prob << endl;
	}
    }
  else
    {
      for(int i = 0 ; i <= Term::lastTagInt() ; i++)
	{
	  double phut = pHugt(i);
	  if(phut == 0) continue;
	  double prob = psutt(head,i,word_num);
	  ans.push_back(i);
	  ans.push_back(prob);
	}
    }
  return ans;
}
示例#6
0
Bst&
MeChart::
bestParse(Item* itm, FullHist* h, Val* cval, Val* gcval, int cdir)
{
  curVal = cval;
  gcurVal = gcval;
  curDir = cdir;
  Bst& bst = recordedBP(itm, h);
  curVal = gcurVal = NULL;
  curDir = -1;
  if(bst.explored())
    {
      if(printDebug() > 19)
	{
	  prDp();
	  cerr << "already known bestParse(" << *itm << ", ...) has p = "
	       << bst.prob() << endl;
	}
      return bst;
    }
  if(printDebug() > 10)
    {
      prDp();
      cerr << "bestParse(" << *itm << ", ...)" << endl;
    }
  bst.explored() = true;  //David McClosky bug;
  int itermInt = itm->term()->toInt();
  PosMap& pm = itm->posAndheads();
  PosIter pi = pm.begin();
  ECString bestW;
  for( ; pi != pm.end() ; pi++ )
    {
      int posInt = (*pi).first;
      if(printDebug() > 16)
	{
	  prDp();
	  cerr << "consider Pos(" << *itm << ") = " << posInt << endl;
	}
      HeadMap& hm = (*pi).second;
      /* we are using collected counts for p(u|t) */
      float hposprob = 1;
      /* if we have reached a preterminal, then termInt == posInt
	 and p(posInt|termInt) == 1 */
      if( itermInt != posInt)
	{
          curVal = cval;
          gcurVal = gcval;
          curDir = cdir;
	  hposprob = meProb(posInt, h, UCALC); 
	  if(hposprob == 0) hposprob = .00001; //??? this can happen;
          curVal = gcurVal = NULL;
          curDir = -1;
	  if(printDebug() > 16)
	    {
	      prDp();
	      cerr <<  "p(pos) = " <<  hposprob << endl;
	    }
	}
      h->preTerm = posInt;
      HeadIter hi = hm.begin(); 
      for( ;hi != hm.end();hi++)
	{
	  const Wrd& subhw = (*hi).first;
	  int wrdInt = subhw.toInt();
	  ECString subh = subhw.lexeme();
	  if(printDebug() > 16)
	    {
	      prDp();
	      cerr << "consider head(" << *itm << ") = " << subh << endl;
	    }
	  float hprob = 0;
	  if(wrdInt >= 0 && wrdInt <= lastKnownWord)
	    {
	      hprob = pCapgt(&subhw,posInt); 
	      hprob *= (1 - pHugt(posInt)); 
              curVal = cval;
              gcurVal = gcval;
              curDir = cdir;
	      float hprob2 = meHeadProb(wrdInt, h);
              curVal = gcurVal = NULL;
              curDir = -1;
	      hprob *= hprob2;
	      if(hprob < 0)
		{
		  cerr << posInt << " " << pHugt(posInt) <<" "<<hprob2 << endl;
		  assert(hprob >=0);
		}
	    }
	  //hprob can be zero if lower case NNPS.
	  if(wrdInt > lastKnownWord || hprob == 0)
	    {
	      hprob = psutt(&subhw,posInt);
	    }
	  if(printDebug() > 16)
	    {
	      prDp();
	      cerr << "p(hd) = "<< hprob << endl;
	    }
	  float hhprob = (hposprob * hprob);
	  if(hhprob < 0)
	    {
	      cerr << hposprob << " " << hprob << endl;
	      assert(hhprob >= 0);
	    }
	  h->hd = &subhw;
	  Bst&  
	    bst2 = bestParseGivenHead(posInt,subhw,itm,h,(*hi).second,cval,gcval);
          if(bst2.empty()) continue;
          Val* nval = new Val();
	  Val* oldval0 = bst2.nth(0);
          nval->prob() = oldval0->prob()*hhprob;
          nval->bsts().push_back(&bst2);
	  nval->status = EXTRAVAL;
          bst.push(nval);
          bst.sum() += bst2.sum()*hhprob;
	}
    }
  Val* nbest = bst.pop();
  if(nbest) bst.addnth(nbest);
  if(printDebug() > 10)
    {
      prDp();
      cerr << "Bestp for " << *itm << " = " << bst.prob() <<endl;
    }
  return bst;
}