Пример #1
1
double
Pst::
pstt(ECString& shU, int t, int word_num)
{
  char temp[1024];
  ECString sh(langAwareToLower(shU.c_str(), temp));
  const Term* tTerm = Term::fromInt(t);
  double phst = pHst(sh, t);
  double ans;
  if(phst > 0)
    ans =  psktt(shU, t, word_num);
  else ans = psutt(shU, t, word_num);
  return ans;
}
Пример #2
0
double
Pst::
psutt(const ECString& shU, int t, int word_num)
{
  //cerr << "Unknown word: " << shU << " for tag: " << t << endl; 
  double ans = pHugt(t);
  //cerr << "pHugt = " << ans << endl;
  if(ans == 0) return 0;
  double phyp = pHypgt(shU,t);
  ans *= phyp;
  //cerr << "pHypgt = " << phyp << endl;
  double phcp = pCapgt(shU,t, word_num);
  ans *= phcp;
  ans *= .000001;
  if(Term::fromInt(t)->openClass())
    {
      char temp[1024];
      ECString sh(langAwareToLower(shU.c_str(),temp));
      float phegt = pegt(sh,t);
      if(phegt == 0) phegt = .00001;
      //if(phegt == 0) phegt = .00005;
      //cerr << "pegt( " << sh << " | " << t << " ) = " << phegt << endl;
      ans *= phegt;
    }
  else
    ans *= .00000001;

  //cerr << "psutt( " << shU << " | " << t << " ) = " << ans << endl;
  return ans;
}
Пример #3
0
int
tree_grandparent_head(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    InputTree* pt = tree->parent();
    static int topInt = -1;
    if(topInt < 0)
    {
        ECString temp("^^");
        topInt = Pst::get(temp)->toInt();
    }
    if(!pt) return topInt;
    pt = pt->parent();
    if(!pt) return topInt;

    char temp[1024];
    ECString wrdStr(langAwareToLower(pt->head().c_str(),temp));
    const WordInfo* wi = Pst::get(wrdStr);
    if(!wi)
    {
        cerr << *tree << endl;
        assert(wi);
    }
    int ans = wi->toInt();
    assert(ans >= 0);
    return ans;
}
Пример #4
0
double
Pst::
pCapgt(const ECString& shU, int t, int word_num)
{
  if(word_num == 0) return 1;
  //cerr << "pCapgt = " << pcap << endl;
  if(shU.length() < 2) return 1;  //ignore words of length 1;
  char temp[1024];
  ECString sh(langAwareToLower(shU.c_str(),temp));
  bool cap = false;
  if(shU[0] != sh[0] && shU[1] == sh[1]) cap = true;
  double pcap = pHcapgt(t);  
  return cap ? pcap : (1 - pcap);
}
Пример #5
0
double
Pst::
psktt(const ECString& shU, int t, int word_num)
{
  char temp[1024];
  ECString sh(langAwareToLower(shU.c_str(), temp));
  double ans = pHst(sh, t);
  double phcp = pCapgt(shU,t, word_num);
  ans *= phcp;
  double put = pHugt(t);
  ans *= (1-put);
  //cerr << "psktt( " << shU << " | " << t << " ) = " << ans << endl;
  return ans;
}
Пример #6
0
void
addWwData(InputTree* tree)
{
  ECString wTagNm = tree->term();
  const Term* trm = Term::get(wTagNm);
  int lhsInt = trm->toInt();
  totCounts[lhsInt]++;
  if( tree->word() != ""  )
    {
      ECString hdLexU(tree->word());
      char temp[512];
      ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
      int len = hdLex.length();
      const WordInfo* wi = Pst::get(hdLex); //???;
      if (!wi)
          cerr << "Couldn't find entry for word '" << hdLex << 
                  "' in pSgT.txt" << endl;

      assert(wi);
      /* Ignore words very close to start of sentence, those
	 that are of length 1, and those who's capitalization is
	 ambiguous. */
      if(tree->start() >= 2 && len > 1
	 &&!(hdLex[0] != hdLexU[0] && hdLex[1] != hdLexU[1]))
	{
	  posCounts[lhsInt]++;
	  if(hdLex[0] != hdLexU[0] && hdLex[1] == hdLexU[1])
	    {
	      posCapCounts[lhsInt]++;
	    }
	}
      posDenoms[lhsInt]++;
      if(wi->c() <= 2)
	{
	  posUCounts[lhsInt]++;
	  const char* hyppos =  strpbrk(hdLex.c_str(), "-");
	  if(hyppos) posDashCounts[lhsInt]++;
	}
      return;
    }
  InputTrees& st = tree->subTrees();
  InputTrees::iterator  subTreeIter= st.begin();
  InputTree  *subTree;
  for( ; subTreeIter != st.end() ; subTreeIter++ )
    {
      subTree = *subTreeIter;
      addWwData(subTree);
    }
}
Пример #7
0
int
tree_watpos(int pos)
{
    if(pos < 0)
    {
        return nullWordInt;
    }
    ECString wrd = sentence[pos]->head();
    char tmp[1024];
    ECString wrdl=langAwareToLower(wrd.c_str(), tmp);
    const WordInfo* wi = Pst::get(wrdl);
    assert(wi);
    int ans = wi->toInt();
    assert(ans >= 0);
    return ans;
}
Пример #8
0
int
headFromTree(InputTree* tree)
{

    char temp[1024];
    string wrdStr(langAwareToLower(tree->head().c_str(), temp));
    const WordInfo* wi = Pst::get(wrdStr);
    if(!wi)
    {
        if(Feat::Usage == PARSE) return -1;
        cerr << "Could not find " << wrdStr << endl;
        assert(wi);
    }
    int ans = wi->toInt();
    assert(ans >= 0);
    return ans;
}
Пример #9
0
void
incrWordData(int lhsInt, ECString wupper)
{
  char temp[1024];
  ECString w(langAwareToLower(wupper.c_str(), temp));
  numTerm[lhsInt]++;
  WordMap::iterator wmi = wordMap.find(w);
  if(wmi == wordMap.end())
    {
      wordMap[w][lhsInt] = 1;
      return;
    }
  PosD& posd = (*wmi).second;
  PosD::iterator pdi = posd.find(lhsInt);
  if(pdi == posd.end())
    {
      posd[lhsInt] = 1;
    }
  else
    (*pdi).second++;
}
Пример #10
0
list<double>
Pst::
wordPlistConstruct(const ECString& head, int word_num)
{
  list<double> ans;
  char temp[1024];
  ECString headL(langAwareToLower(head.c_str(), temp));
  const WordInfo* wi = useHeadC( headL );
  if( wi )
    {
      int  sz = wi->stSize();   
      for( int i = 0 ; i < sz ; i ++ )
	{
	  Phsgt& wti = wi->st_[i];
	  int    tInt = wti.term;
	  if(tInt > Term::lastTagInt()) continue;
	  double prob = psktt(head,tInt,word_num);
	  ans.push_back(tInt);
	  ans.push_back(prob);
	  if(prob == 0)
	    cerr << "Warning, prob = 0 for word = " << head
	      << " and pos = " << tInt << endl;
	  //cerr << "wordPlist: " << word << "\t" << tInt
	    // << "\t" << prob << endl;
	}
    }
  else
    {
      for(int i = 0 ; i <= Term::lastTagInt() ; i++)
	{
	  double phut = pHugt(i);
	  if(phut == 0) continue;
	  double prob = psutt(head,i,word_num);
	  ans.push_back(i);
	  ans.push_back(prob);
	}
    }
  return ans;
}
Пример #11
0
Bchart::
Bchart(SentRep & sentence, int id)
  : ChartBase( sentence,id ),
    depth(0),
    curDir(-1),
    gcurVal(NULL),
    alreadyPoppedNum( 0 )
{
  pretermNum = 0;
  heap = new EdgeHeap();
  int len = sentence.length();
  lastWord[id]=lastKnownWord;
  int i,j;
  assert(len <= MAXSENTLEN);
  for(i = 0 ; i < len ; i++)
    {
      ECString wl = langAwareToLower(sentence[i].lexeme());
      int val = wtoInt(wl);
      sentence_[i].toInt() = val;
    }
  for(i = 0 ; i < MAXSENTLEN ; i++)
    for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0;
}
Пример #12
0
void
addWwData(InputTree* tree)
{
  bool okSit = true;

  if( tree->word() != ""  )
    {
      ECString wTagNm = tree->term();
      const Term* trm = Term::get(wTagNm);
      int lhsInt = trm->toInt();
      if(trm->openClass())
	{
	  ECString hdLexU(tree->word());
	  char temp[512];
	  ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
	  int len = hdLex.length();
	  if(len >= 4)
	    {
	      ECString e=lastCharacter(hdLex);
	      // if the current count for lhs and e == 0, this is new;
	      //cout<<hdLex<<endl;
	      const WordInfo* wi = Pst::get(hdLex); //???;
	      if(!wi)
			{
			  assert(wi);
			}
	      if(wi->c() <= 4)
			{
			  incrEndData(lhsInt, e);
			  numTerm[lhsInt]++;
			}
	    }
	}//if openClass
      return;
    }
  /*ECString fixedTerm(tree->term());
  if(fixedTerm == "") fixedTerm = "S1";
  const Term* lhs = Term::get(fixedTerm);
  /* If we cannot recognize the term, don't abort, just warn and do not
     create a rule here or one level up. */
  /*if(!lhs)
    {
      lhs = Term::get("GARBAGE");
      okSit = false;
      cerr << "Garbage term: " << tree->term() << endl;
    }
  */
  InputTrees& st = tree->subTrees();
  InputTrees::iterator  subTreeIter= st.begin();
  InputTree  *subTree;
  for( ; subTreeIter != st.end() ; subTreeIter++ )
    {
      subTree = *subTreeIter;
      addWwData(subTree);
    }
  /*
  int lhsInt = lhs->toInt();
  int k, l;
  k = Term::get(tree->headTree()->term())->toInt();
  l = lhsInt - 1 - Term::lastTagInt();
  data[k][l]++;
  return lhs;*/
}