int is_effEnd(InputTree* tree, InputTree* child) { if(!tree) return 1; const Term* trm = Term::get(tree->term()); if(trm->isRoot()) return 1; InputTreesIter iti = tree->subTrees().begin(); for( ; ; iti++) { assert(iti != tree->subTrees().end()); InputTree* nxt = (*iti); assert(nxt); if(nxt != child) continue; iti++; if(iti == tree->subTrees().end()) return is_effEnd(tree->parent(),tree); nxt = (*iti); ECString ntrmNm = nxt->term(); const Term* ntrm = Term::get(ntrmNm); if(ntrm== Term::stopTerm) return is_effEnd(tree->parent(),tree); if(ntrm->isColon() || ntrm->isFinal()) return 1; if(ntrm->isComma()) return 0; iti++; if(iti == tree->subTrees().end()) return 0; nxt = (*iti); if(nxt->term() == "''") return 1; return 0; } error("should not get here"); return 0; }
void UnitRules:: gatherData(InputTree* tree) { const Term* trm = Term::get(tree->term()); assert(trm); int parInt = trm->toInt(); int rparI = parInt-( Term::lastTagInt() + 1); InputTreesIter iti = tree->subTrees().begin(); int len = tree->subTrees().size(); for( ; iti != tree->subTrees().end() ; iti++) { InputTree* stree = (*iti); if(len == 1) { const Term* strm = Term::get(stree->term()); if(strm->terminal_p()) continue; assert(strm); int chiInt = strm->toInt(); if(chiInt == parInt) continue; int rchiI = chiInt -( Term::lastTagInt() + 1); treeData_[rparI][rchiI]++; //cerr << "TD " << parInt<<" " << chiInt << " " << treeData_[rparI][rchiI] << endl; } gatherData(stree); } }
int headPosFromTree(InputTree* tree) { int ansPriority = 10; ECString lhsString(tree->term()); if(lhsString == "") lhsString = "S1"; int pos = -1; int ans = -1; ConstInputTreesIter subTreeIter = tree->subTrees().begin(); InputTree *subTree; for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ ) { subTree = *subTreeIter; pos++; ECString rhsString(subTree->term()); int nextPriority = headPriority(lhsString, rhsString, ansPriority); if(nextPriority <= ansPriority) { ans = pos; ansPriority = nextPriority; } } return ans; }
int ccIndFromTree(InputTree* tree) { InputTreesIter subTreeIter = tree->subTrees().begin(); ECString trmNm = tree->term(); bool sawComma = false; bool sawColen = false; bool sawCC = false; bool sawOTHNT = false; int numTrm = 0; int pos = 0; const Term* trm = Term::get(trmNm); int tint = trm->toInt(); /*Change next line to indicate which non-terminals get specially marked to indicate that they are conjoined together */ if(!trm->isNP() && !trm->isS() && !trm->isVP()) return tint; for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ ) { InputTree* subTree = *subTreeIter; ECString strmNm = subTree->term(); const Term* strm = Term::get(strmNm); if(pos != 0 && strm->isCC()) sawCC = true; else if(strmNm == trmNm) numTrm++; else if(pos != 0 && strm->isComma()) sawComma = true; else if(pos != 0 && strm->isColon()) sawColen = true; else if(!strm->terminal_p()) sawOTHNT = true; pos++; } if(trmNm == "NP" && numTrm == 2 && !sawCC) return Term::lastNTInt()+1; if((sawComma || sawColen || sawCC) && numTrm >= 2) return tint+Term::lastNTInt(); return tint; }
int tree_noopenQl(TreeHist* treeh) { InputTree* tree = treeh->tree; int pos = treeh->pos; int hpos = treeh->hpos; InputTree *subTree; InputTrees::reverse_iterator subTreeIter = tree->subTrees().rbegin(); int i = tree->subTrees().size()-1; bool sawOpen = false; bool sawClosed = false; for( ; ; subTreeIter++ ) { if(i == pos) break; if(i > hpos) { i-- ; continue; } assert(i >= 0); subTree = *subTreeIter; const Term* trm = Term::get(subTree->term()); if(trm->isClosed() && !sawOpen) sawOpen = true; else if(trm->isOpen() && sawOpen) sawOpen = false; i--; } if(sawOpen) return 0; else return 1; }
int tree_ngram(TreeHist* treeh, int n, int l) { static int stopTermInt = -1; if(stopTermInt < 0) { ECString stopStr("STOP"); const Term* stopTerm = Term::get(stopStr); stopTermInt = stopTerm->toInt(); } int pos = treeh->pos; int hp = treeh->hpos; int m = pos + (n * l); if(m < 0) return stopTermInt; InputTree* tree = treeh->tree; if(m >= tree->subTrees().size()) return stopTermInt; if(m > hp && l > 0) return stopTermInt; InputTree *subTree; InputTreesIter subTreeIter = tree->subTrees().begin(); int i = 0; for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ ) { if(i == m) { subTree = *subTreeIter; const Term* trm = Term::get(subTree->term()); return trm->toInt(); } i++; } assert("should never get here"); return -1; }
int tree_term_after(TreeHist* treeh) { static int stopint = 0; if(!stopint) { ECString stopnm("STOP"); stopint = Term::get(stopnm)->toInt(); } InputTree* tree = treeh->tree; InputTree* par = tree->parent(); if(!par) return stopint; InputTreesIter iti = par->subTrees().begin(); for( ; iti != par->subTrees().end() ; iti++ ) { InputTree* st = *iti; if(st != tree) continue; iti++; if(iti == par->subTrees().end()) return stopint; st = *iti; const ECString& trmStr = st->term(); const Term* trm = Term::get(trmStr); assert(trm); return trm->toInt(); } error("Should never get here"); return -1; }
int tree_term(TreeHist* treeh) { InputTree* tree = treeh->tree; const ECString& trmStr = tree->term(); const Term* trm = Term::get(trmStr); assert(trm); return trm->toInt(); }
int tree_ccparent_term(TreeHist* treeh) { static int s1int = 0; if(!s1int) { ECString s1nm("S1"); s1int = Term::get(s1nm)->toInt(); } assert(treeh); InputTree* tree = treeh->tree; assert(tree); InputTree* par = tree->parent(); if(!par) return s1int; const ECString& trmStr = par->term(); const Term* trm = Term::get(trmStr); assert(trm); int trmInt = trm->toInt(); if(trmStr != tree->term()) return trmInt; //??? new; assert(!trm->terminal_p()); int ccedtrmInt = ccIndFromTree(par); return ccedtrmInt; }
int tree_parent_term(TreeHist* treeh) { InputTree* tree = treeh->tree; static int s1int = 0; if(!s1int) { ECString s1nm("S1"); s1int = Term::get(s1nm)->toInt(); } InputTree* par = tree->parent(); if(!par) return s1int; const ECString& trmStr = par->term(); const Term* trm = Term::get(trmStr); assert(trm); assert(!trm->terminal_p()); return trm->toInt(); }
int tree_noopenQr(TreeHist* treeh) { InputTree* tree = treeh->tree; int pos = treeh->pos; int sz = tree->subTrees().size(); InputTree *subTree; InputTreesIter subTreeIter = tree->subTrees().begin(); int i = 0; bool sawOpen = false; for( ; ; subTreeIter++ ) { if(i == pos) break; subTree = *subTreeIter; assert(i < sz); const Term* trm = Term::get(subTree->term()); if(trm->isOpen() && !sawOpen) sawOpen=true; if(trm->isClosed() && sawOpen ) sawOpen = false; i++; } if(sawOpen) return 0; else return 1; }
Item* Bchart:: edgesFromTree(InputTree* tree) { int b, b0; b0 = tree->num(); const Term* trm = Term::get(tree->term()); assert(trm); //cerr << "ARI " << *trm << " " << b0 << endl; if(printDebug() > 1005) cerr << "EFIE " << trm->name() << " " << b0 << endl; /* If this is a terminal node, the rhs will be a word; otherwise it will be a rule expansion consisting of several Item s. */ if(trm->terminal_p()) { ECString tmpW1 = tree->word(); char chars[512]; ECString tmpW = toLower(tmpW1.c_str(), chars); int wInt = wtoInt(tmpW); Item* lhs = add_item(b0, trm, tree->start()); lhs->start() = tree->start(); lhs->finish() = tree->finish(); Item* rhs = add_item2(b0, trm, wInt,tmpW); rhs->finish() = tree->finish(); rhs->start() = tree->start(); if(!lhs && !rhs) { return NULL; } Items subItems; subItems.push_back(stops[tree->start()]); subItems.push_back(rhs); subItems.push_back(stops[tree->finish()]); Edge* edg = add_edge(lhs, subItems); if(!edg) { return NULL; } edg->prob() = pHst(wInt,trm->toInt()); edg->num() = b0; if(printDebug() > 5) cerr << "LHS " << *lhs << " " << tmpW << edg->prob() << endl; return lhs; } else { Item* lhs = add_item(b0, trm, -1); lhs->start() = tree->start(); lhs->finish() = tree->finish(); assert(lhs); Items subItems; subItems.push_back(stops[tree->start()]); InputTreesIter iti = tree->subTrees().begin(); for( ; iti != tree->subTrees().end() ; iti++) { InputTree* stree = (*iti); cerr << "WBA "<< stree->term() << *stree << endl; Item* itm = edgesFromTree(stree); if(!itm) { return NULL; } subItems.push_back(itm); } subItems.push_back(stops[tree->finish()]); Edge* edg = add_edge(lhs, subItems); if(!edg) { return false; } edg->num() = b0; assignRProb(edg); if (printDebug() > 5) { cerr << "Saw edge " << *edg << ": p=" << edg->prob() << endl; } //cerr << "endeFE " << *edg << endl; return lhs; rPendFactor(); } }