void addWwData(InputTree* tree) { ECString wTagNm = tree->term(); const Term* trm = Term::get(wTagNm); int lhsInt = trm->toInt(); totCounts[lhsInt]++; if( tree->word() != "" ) { ECString hdLexU(tree->word()); char temp[512]; ECString hdLex(langAwareToLower(hdLexU.c_str(),temp)); int len = hdLex.length(); const WordInfo* wi = Pst::get(hdLex); //???; if (!wi) cerr << "Couldn't find entry for word '" << hdLex << "' in pSgT.txt" << endl; assert(wi); /* Ignore words very close to start of sentence, those that are of length 1, and those who's capitalization is ambiguous. */ if(tree->start() >= 2 && len > 1 &&!(hdLex[0] != hdLexU[0] && hdLex[1] != hdLexU[1])) { posCounts[lhsInt]++; if(hdLex[0] != hdLexU[0] && hdLex[1] == hdLexU[1]) { posCapCounts[lhsInt]++; } } posDenoms[lhsInt]++; if(wi->c() <= 2) { posUCounts[lhsInt]++; const char* hyppos = strpbrk(hdLex.c_str(), "-"); if(hyppos) posDashCounts[lhsInt]++; } return; } InputTrees& st = tree->subTrees(); InputTrees::iterator subTreeIter= st.begin(); InputTree *subTree; for( ; subTreeIter != st.end() ; subTreeIter++ ) { subTree = *subTreeIter; addWwData(subTree); } }
void addWwData(InputTree* tree) { bool okSit = true; if( tree->word() != "" ) { ECString wTagNm = tree->term(); const Term* trm = Term::get(wTagNm); int lhsInt = trm->toInt(); if(trm->openClass()) { ECString hdLexU(tree->word()); char temp[512]; ECString hdLex(langAwareToLower(hdLexU.c_str(),temp)); int len = hdLex.length(); if(len >= 4) { ECString e=lastCharacter(hdLex); // if the current count for lhs and e == 0, this is new; //cout<<hdLex<<endl; const WordInfo* wi = Pst::get(hdLex); //???; if(!wi) { assert(wi); } if(wi->c() <= 4) { incrEndData(lhsInt, e); numTerm[lhsInt]++; } } }//if openClass return; } /*ECString fixedTerm(tree->term()); if(fixedTerm == "") fixedTerm = "S1"; const Term* lhs = Term::get(fixedTerm); /* If we cannot recognize the term, don't abort, just warn and do not create a rule here or one level up. */ /*if(!lhs) { lhs = Term::get("GARBAGE"); okSit = false; cerr << "Garbage term: " << tree->term() << endl; } */ InputTrees& st = tree->subTrees(); InputTrees::iterator subTreeIter= st.begin(); InputTree *subTree; for( ; subTreeIter != st.end() ; subTreeIter++ ) { subTree = *subTreeIter; addWwData(subTree); } /* int lhsInt = lhs->toInt(); int k, l; k = Term::get(tree->headTree()->term())->toInt(); l = lhsInt - 1 - Term::lastTagInt(); data[k][l]++; return lhs;*/ }