Ejemplo n.º 1
0
void
addWwData(InputTree* tree)
{
  ECString wTagNm = tree->term();
  const Term* trm = Term::get(wTagNm);
  int lhsInt = trm->toInt();
  totCounts[lhsInt]++;
  if( tree->word() != ""  )
    {
      ECString hdLexU(tree->word());
      char temp[512];
      ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
      int len = hdLex.length();
      const WordInfo* wi = Pst::get(hdLex); //???;
      if (!wi)
          cerr << "Couldn't find entry for word '" << hdLex << 
                  "' in pSgT.txt" << endl;

      assert(wi);
      /* Ignore words very close to start of sentence, those
	 that are of length 1, and those who's capitalization is
	 ambiguous. */
      if(tree->start() >= 2 && len > 1
	 &&!(hdLex[0] != hdLexU[0] && hdLex[1] != hdLexU[1]))
	{
	  posCounts[lhsInt]++;
	  if(hdLex[0] != hdLexU[0] && hdLex[1] == hdLexU[1])
	    {
	      posCapCounts[lhsInt]++;
	    }
	}
      posDenoms[lhsInt]++;
      if(wi->c() <= 2)
	{
	  posUCounts[lhsInt]++;
	  const char* hyppos =  strpbrk(hdLex.c_str(), "-");
	  if(hyppos) posDashCounts[lhsInt]++;
	}
      return;
    }
  InputTrees& st = tree->subTrees();
  InputTrees::iterator  subTreeIter= st.begin();
  InputTree  *subTree;
  for( ; subTreeIter != st.end() ; subTreeIter++ )
    {
      subTree = *subTreeIter;
      addWwData(subTree);
    }
}
Ejemplo n.º 2
0
void
addWwData(InputTree* tree)
{
  bool okSit = true;

  if( tree->word() != ""  )
    {
      ECString wTagNm = tree->term();
      const Term* trm = Term::get(wTagNm);
      int lhsInt = trm->toInt();
      if(trm->openClass())
	{
	  ECString hdLexU(tree->word());
	  char temp[512];
	  ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
	  int len = hdLex.length();
	  if(len >= 4)
	    {
	      ECString e=lastCharacter(hdLex);
	      // if the current count for lhs and e == 0, this is new;
	      //cout<<hdLex<<endl;
	      const WordInfo* wi = Pst::get(hdLex); //???;
	      if(!wi)
			{
			  assert(wi);
			}
	      if(wi->c() <= 4)
			{
			  incrEndData(lhsInt, e);
			  numTerm[lhsInt]++;
			}
	    }
	}//if openClass
      return;
    }
  /*ECString fixedTerm(tree->term());
  if(fixedTerm == "") fixedTerm = "S1";
  const Term* lhs = Term::get(fixedTerm);
  /* If we cannot recognize the term, don't abort, just warn and do not
     create a rule here or one level up. */
  /*if(!lhs)
    {
      lhs = Term::get("GARBAGE");
      okSit = false;
      cerr << "Garbage term: " << tree->term() << endl;
    }
  */
  InputTrees& st = tree->subTrees();
  InputTrees::iterator  subTreeIter= st.begin();
  InputTree  *subTree;
  for( ; subTreeIter != st.end() ; subTreeIter++ )
    {
      subTree = *subTreeIter;
      addWwData(subTree);
    }
  /*
  int lhsInt = lhs->toInt();
  int k, l;
  k = Term::get(tree->headTree()->term())->toInt();
  l = lhsInt - 1 - Term::lastTagInt();
  data[k][l]++;
  return lhs;*/
}