//"WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < PDT=target <- DT" inline const bool &predet3(const unsigned long &cons){ if (cons==PENN_CON_WHNP || cons==PENN_CON_NP){ CStateNodeList* childs=node.m_umbinarizedSubNodes; bool lastCond=false; while(childs!=0){ if (((*words)[childs->node->lexical_head].tag.code()==PENN_TAG_DT) && childs->next==0){ lastCond=true; } childs=childs->next; } if (lastCond){ childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if (((*words)[targ->lexical_head].tag.code()==PENN_TAG_PDT) && !isLinked(&node,targ)){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_PREDET); if (buildStanfordLink(STANFORD_DEP_PREDET, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } childs=childs->next; } } } return false; }
/* * "CONJP < TO=target < VB" * */ inline const bool &buildAux3(const unsigned long &cons) { if (cons==PENN_CON_CONJP) { CStateNodeList* childsConjp=node.m_umbinarizedSubNodes; bool child1=false; bool child2=false; const CStateNode* tOTarg=0; while(childsConjp!=0){ if ( ((*words)[childsConjp->node->lexical_head].tag.code()==PENN_TAG_TO) && (!isLinked(&node, childsConjp->node))){ tOTarg=childsConjp->node; child1=true; } if ((*words)[childsConjp->node->lexical_head].tag.code()==PENN_TAG_VERB) { //VB child2=true; } childsConjp=childsConjp->next; } if (child1 && child2) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_AUX); if (buildStanfordLink(STANFORD_DEP_AUX, tOTarg->lexical_head, node.lexical_head)){ addLinked(&node,tOTarg); //std::cout<<"nSubj13"<<" (head: "<<node.lexical_head<<")"<<"(npTarg->lexical_head<<")\n"; return true; } } } return false; }
/* *"SQ|SINV < (/^(?:VB|MD|AUX)/=target $++ /^(?:VP|ADJP)/)", * * */ inline const bool &buildAux2(const unsigned long &cons){ if (cons==PENN_CON_SQ || cons==PENN_CON_SINV) { CStateNodeList* childs=node.m_umbinarizedSubNodes; while (childs!=0) { const CStateNode* targ=childs->node; if ((((*words)[targ->lexical_head].tag.code()==PENN_TAG_VERB) || ((*words)[targ->lexical_head].tag.code()==PENN_TAG_MD)) && (!isLinked(&node, targ))) { CStateNodeList* rightSisters=childs; while(rightSisters!=0){ const CStateNode* rSis=rightSisters->node; if ((CConstituent::clearTmp(rSis->constituent.code())==PENN_CON_VP) || (CConstituent::clearTmp(rSis->constituent.code())==PENN_CON_ADJP)) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_AUX); if (buildStanfordLink(STANFORD_DEP_AUX, targ->lexical_head, node.lexical_head)){ addLinked(&node,targ); //std::cout<<"nSubj13"<<" (head: "<<node.lexical_head<<")"<<"(npTarg->lexical_head<<")\n"; return true; } } rightSisters=rightSisters->next; } } childs=childs->next; } } return false; }
//"/^(?:VP|ADJP|JJP|WHADJP|SQ?|SBARQ?|SINV|XS|RRC|(?:WH)?NP(?:-TMP|-ADV)?)$/ < (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + ")", inline const bool &advmod1(const unsigned long &cons){ if (cons==PENN_CON_VP || cons==PENN_CON_ADJP || cons==PENN_CON_S || //S (SBAR) is there because because SQ? (SBARQ?) matches SQ and S, being Q optional cons==PENN_CON_SQ || cons==PENN_CON_SBAR || cons==PENN_CON_SBARQ || cons==PENN_CON_WHNP || cons==PENN_CON_SBARQ || cons==PENN_CON_SINV || cons==PENN_CON_RRC || cons==PENN_CON_SBARQ){ CStateNodeList* childs = node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if (((*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB_COMPARATIVE || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB_SUPERLATIVE || (*words)[targ->lexical_head].tag.code()==PENN_TAG_WRB || CConstituent::clearTmp(targ->constituent.code())==PENN_CON_WHADVP) && !isLinked(&node,targ)){ CStateNodeList* childsT=targ->m_umbinarizedSubNodes; bool notCond=true; if (((*words)[targ->lexical_head].word==g_word_not) ||((*words)[targ->lexical_head].word==g_word_nt)){ notCond=false; } if (notCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ADVMOD); if (buildStanfordLink(STANFORD_DEP_ADVMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } childs=childs->next; } } return false; }
//"/(?:WH)?PP(?:-TMP|-ADV)?$/ <# (__ $-- (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + "))", inline const bool &advmod4(const unsigned long &cons){ if (cons==PENN_CON_PP || cons==PENN_CON_WHPP){ CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ if (childs->node->lexical_head==node.lexical_head){//<# CStateNodeList* leftSisters=childs->previous; while(leftSisters!=0){ const CStateNode* targ=childs->node; if (((*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB_COMPARATIVE || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB_SUPERLATIVE || (*words)[targ->lexical_head].tag.code()==PENN_TAG_WRB || CConstituent::clearTmp(targ->constituent.code())==PENN_CON_ADVP || CConstituent::clearTmp(targ->constituent.code())==PENN_CON_WHADVP) && !isLinked(&node,targ)){ bool notCond=true; if (((*words)[targ->lexical_head].word==g_word_not) ||((*words)[targ->lexical_head].word==g_word_nt)){ notCond=false; } if (notCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ADVMOD); if (buildStanfordLink(STANFORD_DEP_ADVMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } leftSisters=leftSisters->previous; } } childs=childs->next; } } }
//"WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (PDT|DT=target $+ DT $++ (/^JJ/ !$+ /^NN/)) !$++ CC", inline const bool &predet2(const unsigned long &cons){ CStateNodeList* childsN=node.m_umbinarizedSubNodes; while(childsN!=0){ const CStateNode* head=childsN->node; if (CConstituent::clearTmp(head->constituent.code())==PENN_CON_WHNP || CConstituent::clearTmp(head->constituent.code())==PENN_CON_NP){ bool rightSisCond=true; CStateNodeList* rightSisters=childsN->next; while(rightSisters!=0){ if (((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_CC)){ rightSisCond=false; } rightSisters=rightSisters->next; } if (rightSisCond){ CStateNodeList* childs=head->m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if (((*words)[targ->lexical_head].tag.code()==PENN_TAG_DT) ||((*words)[targ->lexical_head].tag.code()==PENN_TAG_PDT) && !isLinked(&node,targ)){ CStateNodeList* rightSis=childs->next; bool firstCond=false; bool secCond=false; if (rightSis!=0){ if (((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE) ||((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_COMPARATIVE) ||((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_SUPERLATIVE)){ firstCond=true; } if (firstCond){ while(rightSis!=0){ //PENN_TAG_NOUN, PENN_TAG_NOUN_PROPER, PENN_TAG_NOUN_PROPER_PLURAL, PENN_TAG_NOUN_PLURAL, if (((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_NOUN) ||((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER) ||((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER_PLURAL) ||((*words)[rightSis->node->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL)){ secCond=true; } rightSis=rightSis->next; } if (firstCond && secCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_PREDET); if (buildStanfordLink(STANFORD_DEP_PREDET, targ->lexical_head, head->lexical_head)) { addLinked(&node,targ); return true; } } } } } childs=childs->next; } } } childsN=childsN->next; } return false; }
//"@NP <1 (@NP <<# /^%$/) <2 (@NP=target <<# days|month|months) !<3 __", inline const bool &npadvmod4(const unsigned long &cons){ if (cons==PENN_CON_NP){ CStateNodeList* childsNp=node.m_umbinarizedSubNodes; if (childsNp!=0){ bool firstCond=false; if (CConstituent::clearTmp(childsNp->node->constituent.code())==PENN_CON_NP){ // CStateNodeList* desc=new CStateNodeList(); listDescendants(childsNp->node->m_umbinarizedSubNodes, desc); if (desc->node==0) { desc->clear(); desc=0; } while(desc!=0){ if (((*words)[desc->node->lexical_head].word==g_word_perc) && (childsNp->node->lexical_head==desc->node->lexical_head)){ firstCond=true; } desc=desc->next; } } childsNp=childsNp->next; if (childsNp!=0 && firstCond){ bool secCond=false; const CStateNode* targ=childsNp->node; if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_NP && !isLinked(&node,targ)){ CStateNodeList* desc2=new CStateNodeList(); listDescendants(targ->m_umbinarizedSubNodes, desc2); if (desc2->node==0) { desc2->clear(); desc2=0; } while(desc2!=0){ if (((*words)[desc2->node->lexical_head].word==g_word_days || (*words)[desc2->node->lexical_head].word==g_word_month || (*words)[desc2->node->lexical_head].word==g_word_months) && (targ->lexical_head==desc2->node->lexical_head)){ secCond=true; } desc2=desc2->next; } } childsNp=childsNp->next; if (childsNp==0 && secCond){ //there is no 3rd child // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_NPADVMOD); if (buildStanfordLink(STANFORD_DEP_NPADVMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } } } return false; }
//SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBG))", inline const bool &buildAux4(const unsigned long &cons) { if (cons==PENN_CON_SINV) { CStateNodeList* childsSinv=node.m_umbinarizedSubNodes; while (childsSinv!=0){ const CStateNode* vpTarg=childsSinv->node; if (CConstituent::clearTmp(vpTarg->constituent.code())==PENN_CON_VP && (isLinked(&node, vpTarg))) { CStateNodeList* childsVp=vpTarg->m_umbinarizedSubNodes; bool firstCondVp=false; bool secondCondVp=false; while(childsVp!=0) { const CStateNode* childVp=childsVp->node; if ((*words)[childVp->lexical_head].tag.code()==PENN_TAG_VERB || (*words)[childVp->lexical_head].tag.code()==PENN_TAG_POS) { //VB or POS if (compareWordToBeAuxiliaryWordRegex((*words)[childVp->lexical_head].word)) { firstCondVp=true; } } childsVp=childsVp->next; } //left sisters vp CStateNodeList* leftSistersVp=childsSinv->previous; while(leftSistersVp!=0){ const CStateNode* vpChild=leftSistersVp->node; if (CConstituent::clearTmp(vpChild->constituent.code())==PENN_CON_VP){ //child PENN_TAG_VERB_PROG? CStateNodeList* childsSecVp=vpChild->m_umbinarizedSubNodes; while (childsSecVp!=0) { if ((*words)[childsSecVp->node->lexical_head].tag.code()==PENN_TAG_VERB_PROG) { //VBG secondCondVp=true; } childsSecVp=childsSecVp->next; } } leftSistersVp=leftSistersVp->previous; } if (firstCondVp && secondCondVp) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_AUX); if (buildStanfordLink(STANFORD_DEP_AUX, vpTarg->lexical_head, node.lexical_head)){ addLinked(&node,vpTarg); //std::cout<<"nSubj13"<<" (head: "<<node.lexical_head<<")"<<"(npTarg->lexical_head<<")\n"; return true; } } } childsSinv=childsSinv->next; } } return false; }
inline const bool & buildXComp6(const unsigned long &cons) { if (cons==PENN_CON_VP){ CStateNodeList* childsfstVp=node.m_umbinarizedSubNodes; while(childsfstVp!=0){ const CStateNode* vpNode=childsfstVp->node; if (CConstituent::clearTmp(vpNode->constituent.code())==PENN_CON_VP){ CStateNodeList* childsVp=vpNode->m_umbinarizedSubNodes; while(childsVp!=0){ const CStateNode* sTarg=childsVp->node; if (CConstituent::clearTmp(sTarg->constituent.code())==PENN_CON_S && (!isLinked(vpNode,sTarg))) { bool firstCondition=true; //A $- B A is the immediate right sister of B if (childsVp->previous!=0){ const CStateNode* leftSisterS=childsVp->previous->node; if (((*words)[leftSisterS->lexical_head].tag.code()==PENN_TAG_NOUN)) { //CStateNodeList* childsNN=leftSisterS->m_umbinarizedSubNodes; //while(childsNN!=0){ //const CStateNode* orderChild=childsNN->node; if ((*words)[leftSisterS->lexical_head].word==g_word_order){ firstCondition=false; } //childsNN=childsNN->next; //} } } if (firstCondition){ //S=target <: NP //A <: B B is the only child of A CStateNodeList* childsS=sTarg->m_umbinarizedSubNodes; if (childsS!=0){ if ((CConstituent::clearTmp(childsS->node->constituent.code())==PENN_CON_NP) && (childsS->next ==0)){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_XCOMP); if (buildStanfordLink(STANFORD_DEP_XCOMP, sTarg->lexical_head, vpNode->lexical_head)) { //addLinked(vpNode,sTarg); addLinked(&node,sTarg); //I think this is not correct, in this specific case. return true; } } } } } childsVp=childsVp->next; } } childsfstVp=childsfstVp->next; } } return false; }
//"VP < (S=target !$- (NN < order) < (VP < TO))", // used to have !> (VP < (VB|AUX < be)) inline const bool & buildXComp1(const unsigned long &cons) { if (cons==PENN_CON_VP){ CStateNodeList* childsVp=node.m_umbinarizedSubNodes; while(childsVp!=0){ const CStateNode* sTarg=childsVp->node; if (CConstituent::clearTmp(sTarg->constituent.code())==PENN_CON_S && (!isLinked(&node,sTarg))){ //A $- B: A is the immediate right sister of B // std::cerr<<"S"<<((*words)[sTarg->lexical_head].word)<<"\n"; bool sisterCondition=false; if (childsVp->previous!=0){ const CStateNode* leftSisterS=childsVp->previous->node; if (((*words)[leftSisterS->lexical_head].tag.code()==PENN_TAG_NOUN)) { if ((*words)[leftSisterS->lexical_head].word==g_word_order){ sisterCondition=true; // std::cerr<<"NN order"<<((*words)[leftSisterS->lexical_head].word)<<"\n"; } } } // std::cerr<<"-->"<<sisterCondition<<"\n"; if (!sisterCondition){ CStateNodeList* childsS=sTarg->m_umbinarizedSubNodes; while(childsS!=0){ const CStateNode* vpChild=childsS->node; if (CConstituent::clearTmp(vpChild->constituent.code())==PENN_CON_VP){ // std::cerr<<"VP"<<((*words)[vpChild->lexical_head].word)<<"\n"; CStateNodeList* childsVps=vpChild->m_umbinarizedSubNodes; while(childsVps!=0){ if ((*words)[childsVps->node->lexical_head].tag.code()==PENN_TAG_TO) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_XCOMP); // std::cerr<<"TO"<<((*words)[childsVps->node->lexical_head].word)<<"\n"; if (buildStanfordLink(STANFORD_DEP_XCOMP, sTarg->lexical_head, node.lexical_head)) { addLinked(&node,sTarg); return true; } } childsVps=childsVps->next; } } childsS=childsS->next; } } } childsVp=childsVp->next; } } return false; }
//"@ADVP|ADJP|WHADJP|WHADVP|PP|WHPP <# (JJ|JJR|IN|RB|RBR !< notwithstanding $- (@NP=target !< NNP|NNPS))", inline const bool &npadvmod1(const unsigned long &cons){ if (cons==PENN_CON_ADVP||cons==PENN_CON_ADJP||cons==PENN_CON_WHADJP||cons==PENN_CON_WHADJP||cons==PENN_CON_PP||cons==PENN_CON_WHPP) { CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ if (((*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_COMPARATIVE || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_IN || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADVERB || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADVERB_COMPARATIVE) && childs->node->lexical_head==node.lexical_head) { bool inCond=true; //CStateNodeList* childsJ=childs->node->m_umbinarizedSubNodes; //while(childsJ!=0){ if ((*words)[childs->node->lexical_head].word==g_word_notwithstanding){ inCond=false; } // childsJ=childsJ->next; //} if (inCond){ if (childs->previous!=0){ const CStateNode* npTarg=childs->previous->node; if (CConstituent::clearTmp(npTarg->constituent.code())==PENN_CON_NP && !isLinked(&node,npTarg)){ bool lastCond=true; CStateNodeList* childsNp=npTarg->m_umbinarizedSubNodes; while(childsNp!=0){ if ((*words)[childsNp->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER || (*words)[childsNp->node->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL){ lastCond=false; } childsNp=childsNp->next; } if (lastCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_NPADVMOD); if (buildStanfordLink(STANFORD_DEP_NPADVMOD, npTarg->lexical_head, node.lexical_head)) { addLinked(&node,npTarg); return true; } } } } } } childs=childs->next; } } return false; }
//"/^NP(?:-[A-Z]+)?$/ < (SBAR=target < (S < (VP < TO)) $-- NP|NN|NNP|NNS)" inline const bool &infmod2(const unsigned long &cons){ if (cons==PENN_CON_NP){ CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_SBAR && !isLinked(&node,targ)){ bool sistCond=false; bool childsCond=false; CStateNodeList* leftSisters=childs->previous; while(leftSisters!=0){ if (CConstituent::clearTmp(leftSisters->node->constituent.code())==PENN_CON_NP || (*words)[leftSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN || (*words)[leftSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER || (*words)[leftSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL){ sistCond=true; } leftSisters=leftSisters->previous; } CStateNodeList* childsT=targ->m_umbinarizedSubNodes; while(childsT!=0){ if (CConstituent::clearTmp(childsT->node->constituent.code())==PENN_CON_S){ CStateNodeList* childsS=childsT->node->m_umbinarizedSubNodes; while(childsS!=0){ if (CConstituent::clearTmp(childsS->node->constituent.code())==PENN_CON_VP){ CStateNodeList* childsVp=childsS->node->m_umbinarizedSubNodes; while(childsVp!=0){ if ((*words)[childsVp->node->lexical_head].tag.code()==PENN_TAG_TO){ sistCond=true; } childsVp=childsVp->next; } } childsS=childsS->next; } } childsT=childsT->next; } if (sistCond && childsCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_INFMOD); if (buildStanfordLink(STANFORD_DEP_INFMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } childs=childs->next; } } return false; }
//"SQ <, (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (NP=target $-- (NP !< EX))" inline const bool &buildAttr4(const unsigned long &cons) { //A <, B (B is the FIRST child of A) if (cons==PENN_CON_SQ) { if (node.m_umbinarizedSubNodes!=0) { const CStateNode* firstChild=node.m_umbinarizedSubNodes->node; bool firstCondition=false; //SQ<, (/^(?:VB|AUX)/ < " + copularWordRegex + ") if (firstChild!=0 && ((*words)[firstChild->lexical_head].tag.code()==PENN_TAG_VERB)) { if ((compareWordToCopularWordRegex((*words)[firstChild->lexical_head].word))) { firstCondition=true; } } if (firstCondition){ CStateNodeList* childsSQ=node.m_umbinarizedSubNodes; while(childsSQ!=0){ const CStateNode* npTargChildSq=childsSQ->node; if (CConstituent::clearTmp(npTargChildSq->constituent.code())==PENN_CON_NP && (!isLinked(&node,npTargChildSq))) { //A $--B (A is RIGHT sister of B) CStateNodeList* leftSistersNp=childsSQ->previous; while(leftSistersNp!=0){ const CStateNode* sisterNp=leftSistersNp->node; if (CConstituent::clearTmp(sisterNp->constituent.code())==PENN_CON_NP) { CStateNodeList* childsNpNp=sisterNp->m_umbinarizedSubNodes; bool noEx=true; while(childsNpNp!=0) { const CStateNode* exChildNp= childsNpNp->node; if ((*words)[exChildNp->lexical_head].tag.code()==PENN_TAG_EX) { noEx=false; } childsNpNp=childsNpNp->next; } if (noEx){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ATTR); if (buildStanfordLink(STANFORD_DEP_ATTR, npTargChildSq->lexical_head, node.lexical_head)) { addLinked(&node,npTargChildSq); return true; } } } leftSistersNp=leftSistersNp->previous; } } childsSQ=childsSQ->next; } } } } return false; }
//"VP < (S=target !$- (NN < order) < (NP $+ NP|ADJP))", inline const bool &buildXComp3(const unsigned long &cons) { if (cons==PENN_CON_VP){ CStateNodeList* childsVp=node.m_umbinarizedSubNodes; while(childsVp!=0){ const CStateNode* sTarg=childsVp->node; if (CConstituent::clearTmp(sTarg->constituent.code())==PENN_CON_S && (!isLinked(&node,sTarg))){ //A $- B: A is the immediate right sister of B bool sisterCondition=false; if (childsVp->previous!=0){ const CStateNode* leftSisterS=childsVp->previous->node; if (((*words)[leftSisterS->lexical_head].tag.code()==PENN_TAG_NOUN)) { //CStateNodeList* childsNN=leftSisterS->m_umbinarizedSubNodes; //while(childsNN!=0){ //const CStateNode* orderChild=childsNN->node; if ((*words)[leftSisterS->lexical_head].word==g_word_order){ sisterCondition=true; } //childsNN=childsNN->next; //} } } if (sisterCondition){ CStateNodeList* childsS=sTarg->m_umbinarizedSubNodes; while(childsS!=0){ const CStateNode* npChildS=childsS->node; if (CConstituent::clearTmp(npChildS->constituent.code())==PENN_CON_NP){ //A $+ B A is the immediate left sister of B if (childsS->next!=0){ if (CConstituent::clearTmp(childsS->next->node->constituent.code())==PENN_CON_NP ||CConstituent::clearTmp(childsS->next->node->constituent.code())==PENN_CON_ADJP){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_XCOMP); if (buildStanfordLink(STANFORD_DEP_XCOMP, sTarg->lexical_head, node.lexical_head)) { addLinked(&node,sTarg); return true; } } } } childsS=childsS->next; } } } childsVp=childsVp->next; } } return false; }
//"SBARQ <, WHADVP=target" inline const bool &advmod31(const unsigned long &cons){ if (cons==PENN_CON_SBARQ){ CStateNodeList* childs=node.m_umbinarizedSubNodes; if (childs!=0){ const CStateNode* targ=childs->node; if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_WHADVP && !isLinked(&node,targ)){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ADVMOD); if (buildStanfordLink(STANFORD_DEP_ADVMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } } return false; }
//"/^(?:(?:WH)?NP(?:-TMP|-ADV)?|NX|NAC|NML)$/ < (PDT|DT=target $+ /^(?:DT|WP\\$|PRP\\$)$/ $++ /^N[NXM]/ !$++ CC)", inline const bool &predet1(const unsigned long &cons){ if (cons==PENN_CON_WHNP || cons==PENN_CON_NP || cons==PENN_CON_NAC || cons==PENN_CON_NX){ CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if ((((*words)[targ->lexical_head].tag.code()==PENN_TAG_PDT) ||((*words)[targ->lexical_head].tag.code()==PENN_TAG_DT)) && !isLinked(&node,targ)){ bool firstCond=false; bool secCond=false; bool thirdCond=true; CStateNodeList* rightSisters=childs->next; if (rightSisters!=0){ if (((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_PDT) ||((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_WP) ||((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_WP_DOLLAR) ||((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_PRP_DOLLAR) ||((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_PRP)) { firstCond=true; } } while(rightSisters!=0){ if ((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN || CConstituent::clearTmp(rightSisters->node->constituent.code())==PENN_CON_NX){ secCond=true; } if ((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_CC){ thirdCond=false; } rightSisters=rightSisters->next; } if (firstCond && secCond && thirdCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_PREDET); if (buildStanfordLink(STANFORD_DEP_PREDET, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } childs=childs->next; } } return false; }
//"__ [ < INTJ=target | < (PRN=target <1 /^(?:,|-LRB-)$/ <2 INTJ [ !<3 __ | <3 /^(?:,|-RRB-)$/ ] ) ]" inline const bool & discourse3(const unsigned long &cons){ bool cond1=false; bool cond2=false; CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_INTJ && !isLinked(&node,targ)){ cond1=true; } else if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_PRN && !isLinked(&node,targ)){ bool child1=false; bool child2=false; bool child3=false; CStateNodeList* childsT=targ->m_umbinarizedSubNodes; if (childsT!=0){ if ((*words)[childsT->node->lexical_head].word==g_word_comma || (*words)[childsT->node->lexical_head].tag.code()==PENN_TAG_L_BRACKET){ child1=true; } if (child1 && childsT->next!=0){ if (CConstituent::clearTmp(childsT->next->node->constituent.code())==PENN_CON_INTJ){ child2=true; } if (child2 && childsT->next->next!=0){ if ((*words)[childsT->next->next->node->lexical_head].word==g_word_comma || (*words)[childsT->next->next->node->lexical_head].tag.code()==PENN_TAG_R_BRACKET){ child3=true; } } } } cond2=child1 && child2 && child3; } if (cond1||cond2){ // CDependencyLabel* label=new CDependencyLabel(0); if (buildStanfordLink(0, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } childs=childs->next; } }
//"ADVP|WHADVP < (RB|RBR|RBS|WRB|ADVP|WHADVP|JJ=target !< " + NOT_PAT + ") !< CC !< CONJP", inline const bool &advmod2(const unsigned long &cons){ if (cons==PENN_CON_ADVP||cons==PENN_CON_WHADVP){ bool ccCond=true; bool conjpCond=true; CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ if (CConstituent::clearTmp(childs->node->constituent.code())==PENN_CON_CONJP){ conjpCond=false; } else if (((*words)[childs->node->lexical_head].tag.code()==PENN_TAG_CC)){ ccCond=false; } childs=childs->next; } if (ccCond && conjpCond){ childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if ((CConstituent::clearTmp(targ->constituent.code())==PENN_CON_WHADVP || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB_COMPARATIVE || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADVERB_SUPERLATIVE || CConstituent::clearTmp(targ->constituent.code())==PENN_CON_ADVP || (*words)[targ->lexical_head].tag.code()==PENN_TAG_ADJECTIVE || (*words)[targ->lexical_head].tag.code()==PENN_TAG_WRB) && !isLinked(&node,targ)){ bool notCond=true; CStateNodeList* childsT=targ->m_umbinarizedSubNodes; if (((*words)[targ->lexical_head].word==g_word_not) ||((*words)[targ->lexical_head].word==g_word_nt)){ notCond=false; } if (notCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ADVMOD); if (buildStanfordLink(STANFORD_DEP_ADVMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } childs=childs->next; } } } return false; }
//"VP < (/^(?:VB|AUX)/ $+ (VP=target < VB < NP))", inline const bool &buildXComp4(const unsigned long &cons) { if (cons==PENN_CON_VP){ bool firstCondition=false; CStateNodeList* childsVp=node.m_umbinarizedSubNodes; while(childsVp!=0){ if (((*words)[childsVp->node->lexical_head].tag.code()==PENN_TAG_VERB)){ firstCondition=true; } childsVp=childsVp->next; } if (firstCondition && childsVp!=0){ //A $+ B A is the immediate left sister of B if (childsVp->next!=0){ const CStateNode* vpTarg=childsVp->next->node; if ((CConstituent::clearTmp(vpTarg->constituent.code())==PENN_CON_VP) && (!isLinked(&node,vpTarg))){ CStateNodeList* childsVpTarg=vpTarg->m_umbinarizedSubNodes; while(childsVpTarg!=0){ const CStateNode* vbChildVp=childsVpTarg->node; if (((*words)[vbChildVp->lexical_head].tag.code()==PENN_TAG_VERB)){ CStateNodeList* childsVb=vbChildVp->m_umbinarizedSubNodes; while(childsVb!=0){ if (CConstituent::clearTmp(childsVb->node->constituent.code())==PENN_CON_NP){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_XCOMP); if (buildStanfordLink(STANFORD_DEP_XCOMP, vpTarg->lexical_head, node.lexical_head)) { addLinked(&node,vpTarg); return true; } } childsVb=childsVb->next; } } childsVpTarg=childsVpTarg->next; } } } } } return false; }
//"ADJP|ADVP < (FW $- FW=target)", inline const bool &nn3(const unsigned long &cons){ if (cons==PENN_CON_ADJP || cons==PENN_CON_ADVP){ CStateNodeList* childsAd=node.m_umbinarizedSubNodes; while(childsAd!=0){ if ((*words)[childsAd->node->lexical_head].tag.code()==PENN_TAG_FW){ if (childsAd->previous!=0){ const CStateNode* fwTarg=childsAd->previous->node; if (((*words)[fwTarg->lexical_head].tag.code()==PENN_TAG_FW)&&!(isLinked(&node,fwTarg))){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_NN); if (buildStanfordLink(STANFORD_DEP_NN, fwTarg->lexical_head, node.lexical_head)) { addLinked(&node,fwTarg); return true; } } } } childsAd=childsAd->next; } } return false; }
//"@ADJP < (NN=target $++ /^JJ/) !< CC|CONJP", inline const bool &npadvmod2(const unsigned long &cons){ if (cons==PENN_CON_ADJP){ CStateNodeList* childsAdjp=node.m_umbinarizedSubNodes; bool firstCondition=true; while(childsAdjp!=0){ if (((*words)[childsAdjp->node->lexical_head].tag.code()==PENN_TAG_CC) || CConstituent::clearTmp(childsAdjp->node->constituent.code())==PENN_CON_CONJP) { firstCondition=false; } childsAdjp=childsAdjp->next; } if (firstCondition){ childsAdjp=node.m_umbinarizedSubNodes; while(childsAdjp!=0){ const CStateNode* nnTarg=childsAdjp->node; if (((*words)[nnTarg->lexical_head].tag.code()==PENN_TAG_NOUN)&& !(isLinked(&node,nnTarg))) { CStateNodeList* rightSistersNN=childsAdjp; while(rightSistersNN!=0){ //PENN_TAG_ADJECTIVE, PENN_TAG_ADJECTIVE_COMPARATIVE, PENN_TAG_ADJECTIVE_SUPERLATIVE if (((*words)[rightSistersNN->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE)|| ((*words)[rightSistersNN->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_COMPARATIVE)|| ((*words)[rightSistersNN->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_SUPERLATIVE)) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_NPADVMOD); if (buildStanfordLink(STANFORD_DEP_NPADVMOD, nnTarg->lexical_head, node.lexical_head)) { addLinked(&node,nnTarg); return true; } } rightSistersNN=rightSistersNN->next; } } childsAdjp=childsAdjp->next; } } } return false; }
/* * * VP < VP < /^(?:TO|MD|VB.*|AUXG?|POS)$/=target", * * * PENN_TAG_VERB_PAST, PENN_TAG_VERB_PROG, PENN_TAG_VERB_PAST_PARTICIPATE, PENN_TAG_VERB_PRES, PENN_TAG_VERB_THIRD_SINGLE */ inline const bool &buildAux1(const unsigned long &cons){ if (cons==PENN_CON_VP) { CStateNodeList* childsConjp=node.m_umbinarizedSubNodes; bool child1=false; while(childsConjp!=0){ if (CConstituent::clearTmp(childsConjp->node->constituent.code())==PENN_CON_VP){ child1=true; } childsConjp=childsConjp->next; } if (child1){ CStateNodeList* childsConjp2=node.m_umbinarizedSubNodes; while(childsConjp2!=0){ if ((((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_VERB) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_VERB_PAST) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_VERB_PROG) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_VERB_PAST_PARTICIPATE) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_VERB_PRES) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_VERB_THIRD_SINGLE) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_MD) || ((*words)[childsConjp2->node->lexical_head].tag.code()==PENN_TAG_TO)) && (!isLinked(&node, childsConjp2->node))) { // const CStateNode* targ=childsConjp2->node; // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_AUX); if (buildStanfordLink(STANFORD_DEP_AUX, targ->lexical_head, node.lexical_head)){ addLinked(&node,targ); //std::cout<<"aux1 \n"; //std::cout<<"dependent:"<<(*words)[targ->lexical_head].word<<"\n"; //std::cout<<"head:"<<(*words)[node.lexical_head].word<<"\n"; return true; } } childsConjp2=childsConjp2->next; } } } return false; }
//"SBAR < (WHNP=target < WRB)", inline const bool &advmod3(const unsigned long &cons){ if (cons==PENN_CON_SBAR){ CStateNodeList* childsSbar=node.m_umbinarizedSubNodes; while(childsSbar!=0){ const CStateNode* targ=childsSbar->node; if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_WHNP && !isLinked(&node,targ)){ CStateNodeList* childsT=targ->m_umbinarizedSubNodes; while(childsT!=0){ if (((*words)[childsT->node->lexical_head].tag.code()==PENN_TAG_WRB)){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ADVMOD); if (buildStanfordLink(STANFORD_DEP_ADVMOD, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } childsT=childsT->next; } } childsSbar=childsSbar->next; } } return false; }
//"CONJP < (RB=target !< " + NOT_PAT + ")", inline const bool &advmod6(const unsigned long &cons){ if (cons==PENN_CON_CONJP){ CStateNodeList* childsConjp=node.m_umbinarizedSubNodes; while(childsConjp!=0){ const CStateNode* rbTarg=childsConjp->node; if ((*words)[rbTarg->lexical_head].tag.code()==PENN_TAG_ADVERB && !isLinked(&node, rbTarg)){ bool notCond=true; if (((*words)[rbTarg->lexical_head].word==g_word_not) ||((*words)[rbTarg->lexical_head].word==g_word_nt)){ notCond=false; } if (notCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ADVMOD); if (buildStanfordLink(STANFORD_DEP_ADVMOD, rbTarg->lexical_head, node.lexical_head)) { addLinked(&node,rbTarg); return true; } } } childsConjp=childsConjp->next; } } return false; }
//"/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < (NP|NML|NN|NNS|NNP|NNPS|FW|AFX=target $++ NN|NNS|NNP|NNPS|FW|CD !<<- POS !<<- (VBZ < /^\'s$/) !$- /^,$/ )", inline const bool &nn1(const unsigned long &cons){ //std::cout<<"nn1 \n"; //std::cout<<(*words)[node.lexical_head].word<<"\n"; //std::cout<<cons<<"\n"; if (cons==PENN_CON_WHNP || cons==PENN_CON_NP ||cons==PENN_CON_NAC || cons==PENN_CON_NX){ CStateNodeList* childs=node.m_umbinarizedSubNodes; //std::cout<<"After the constittuent test"<<(*words)[node.lexical_head].word<<"\n"; //std::cout<<(*words)[targ->lexical_head].word<<"\n"; while(childs!=0){ const CStateNode* targ=childs->node; //PENN_TAG_NOUN, PENN_TAG_NOUN_PROPER, PENN_TAG_NOUN_PROPER_PLURAL, PENN_TAG_NOUN_PLURAL, if ((CConstituent::clearTmp(targ->constituent.code())==PENN_CON_NP || (*words)[targ->lexical_head].tag.code()==PENN_TAG_NOUN || (*words)[targ->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER || (*words)[targ->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL || (*words)[targ->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER_PLURAL || (*words)[targ->lexical_head].tag.code()==PENN_TAG_FW ) && !isLinked(&node,targ)){ //std::cout<<"TARGET"<<(*words)[targ->lexical_head].word<<"\n"; bool rightsistCond=false; bool descCond1=true; bool descCond2=true; bool leftSistCond=true; //(*words)[targ->lexical_head].tag.code()==PENN_TAG_CD CStateNodeList* rightSisters=childs->next; while(rightSisters!=0){ //std::cout<<"RIGHTSIS"<<(*words)[rightSisters->node->lexical_head].word<<"\n"; if ((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_CD || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER_PLURAL || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_FW ){ rightsistCond=true; } rightSisters=rightSisters->next; } if (rightsistCond){ CStateNodeList* leftSisters=childs->previous; if (leftSisters!=0){ if ((*words)[leftSisters->node->lexical_head].word==g_word_comma){ leftSistCond=false; } } if (leftSistCond){ CStateNodeList* descendants=new CStateNodeList(); listRightMostDescendants(targ->m_umbinarizedSubNodes,descendants); if (descendants->node==0) { descendants->clear(); descendants=0; } while(descendants!=0){ if ((*words)[descendants->node->lexical_head].tag.code()==PENN_TAG_POS){ descCond1=false; } else if ((*words)[descendants->node->lexical_head].tag.code()==PENN_TAG_VERB_THIRD_SINGLE){ CStateNodeList* childsVbz=descendants->node->m_umbinarizedSubNodes; //while(childsVbz!=0){ if ((*words)[descendants->node->lexical_head].word==g_word_aps){ descCond2=false; } //childsVbz=childsVbz->next; //} } descendants=descendants->next; } } if (rightsistCond && descCond1 && descCond2 && leftSistCond){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_NN); if (buildStanfordLink(STANFORD_DEP_NN, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } } childs=childs->next; } } return false; }
//"(VP < (S=target < (VP < VBG ) !< NP !$- (/^,$/ [$- @NP |$- (@PP $-- @NP ) |$- (@ADVP $-- @NP)]) !$-- /^:$/))", inline const bool &buildXComp7(const unsigned long &cons) { if (cons==PENN_CON_VP){ CStateNodeList* childsVp=node.m_umbinarizedSubNodes; while(childsVp!=0){ const CStateNode* sTarg=childsVp->node; if (CConstituent::clearTmp(sTarg->constituent.code())==PENN_CON_S && (!isLinked(&node,sTarg))) { bool secondCondition=true;// !<NP bool thirdCondition=true; //!$- (/^,$/ [$- @NP |$- (@PP $-- @NP ) |$- (@ADVP $-- @NP)]) bool fourthCondition=true; //!$-- /^:$/ CStateNodeList* childsS=sTarg->m_umbinarizedSubNodes; while(childsS!=0){ if (CConstituent::clearTmp(childsS->node->constituent.code())==PENN_CON_NP) { secondCondition=false; } childsS=childsS->next; } if (secondCondition){ //A $-- B A is a right sister of B CStateNodeList* leftSistersS=childsVp->previous; while(leftSistersS!=0){ if ((*words)[leftSistersS->node->lexical_head].word==g_word_two_dots){ fourthCondition=false; } leftSistersS=leftSistersS->previous; } if (fourthCondition){ if (childsVp->previous!=0){ const CStateNode* commaLeftSisterS=childsVp->previous->node; if ((*words)[commaLeftSisterS->lexical_head].word==g_word_comma){ //and now, ///^,$/ $- @NP //or ///^,$/ $- (@PP $-- @NP ) //or ///^,$/ $- @NP $- (@ADVP $-- @NP) bool firstInCondition=false; bool secondInCondition=false; bool thirdInCondition=false; //A $- B. A is the immediate right sister of B //In this part of the code I matched the @NP as NP, what is the exact difference, what is the head category? if (childsVp->previous->previous!=0){ const CStateNode* leftSisterOfComma=childsVp->previous->previous->node; if (CConstituent::clearTmp(leftSisterOfComma->constituent.code())==PENN_CON_NP){ firstInCondition=true; // I don't see the difference between matching just one @NP to matching @NP $- (@ADVP $-- @NP), but anyway, let's make the code. if (childsVp->previous->previous->previous!=0){ const CStateNode* leftSisterOfNp=childsVp->previous->previous->previous->node; if (CConstituent::clearTmp(leftSisterOfNp->constituent.code())==PENN_CON_ADVP){ CStateNodeList* leftSistersADVP=childsVp->previous->previous->previous->previous; while(leftSistersADVP!=0){ if (CConstituent::clearTmp(leftSistersADVP->node->constituent.code())==PENN_CON_NP){ thirdInCondition=true; } leftSistersADVP=leftSistersADVP->next; } } } } if (CConstituent::clearTmp(leftSisterOfComma->constituent.code())==PENN_CON_PP){ CStateNodeList* leftSistersPP=childsVp->previous->previous->previous; while(leftSistersPP!=0){ if (CConstituent::clearTmp(leftSistersPP->node->constituent.code())==PENN_CON_NP){ secondInCondition=true; } leftSistersPP=leftSistersPP->next; } } } if (firstInCondition||secondInCondition||thirdInCondition){ thirdCondition=false; } } } if (thirdCondition){ childsS=sTarg->m_umbinarizedSubNodes; while(childsS!=0){ const CStateNode* vpChildS=childsS->node; if (CConstituent::clearTmp(vpChildS->constituent.code())==PENN_CON_VP){ CStateNodeList* childsVp=vpChildS->m_umbinarizedSubNodes; while(childsVp!=0){ if (((*words)[childsVp->node->lexical_head].tag.code()==PENN_TAG_VERB_PROG)) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_XCOMP); if (buildStanfordLink(STANFORD_DEP_XCOMP, sTarg->lexical_head, node.lexical_head)) { //addLinked(vpNode,sTarg); addLinked(&node,sTarg); return true; } } childsVp=childsVp->next; } } childsS=childsS->next; } } } } } childsVp=childsVp->next; } } return false; }
//"VP < (SBAR=target < (S !$- (NN < order) < (VP < TO))) !> (VP < (VB|AUX < be)) ", inline const bool & buildXComp5(const unsigned long &cons) { bool fstCond=true; if (cons==PENN_CON_VP){ CStateNodeList* childsFstVp=node.m_umbinarizedSubNodes; while(childsFstVp!=0){ if (((*words)[childsFstVp->node->lexical_head].tag.code()==PENN_TAG_VERB) && ((*words)[childsFstVp->node->lexical_head].word==g_word_be)) { fstCond=false; } childsFstVp=childsFstVp->next; } } if (fstCond){ CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* head=childs->node; if (CConstituent::clearTmp(head->constituent.code())==PENN_CON_VP){ CStateNodeList* childsHead=head->m_umbinarizedSubNodes; while(childsHead!=0){ const CStateNode* targ=childsHead->node; if (CConstituent::clearTmp(targ->constituent.code())==PENN_CON_SBAR && !isLinked(&node,targ)){ CStateNodeList* childsTarg=targ->m_umbinarizedSubNodes; while(childsTarg!=0){ if (CConstituent::clearTmp(childsTarg->node->constituent.code())==PENN_CON_S){ bool firstCondition=true; //!$- (NN < order) bool secCondition=false; //< (VP < TO) if (childsTarg->previous!=0){ if ((*words)[childsTarg->previous->node->lexical_head].tag.code()==PENN_TAG_NOUN && ((*words)[childsTarg->previous->node->lexical_head].word==g_word_order)){ firstCondition=false; } } if (firstCondition){ CStateNodeList* childsS=childsTarg->node->m_umbinarizedSubNodes; while(childsS!=0){ if (CConstituent::clearTmp(childsS->node->constituent.code())==PENN_CON_VP){ CStateNodeList* childsVpS=childsS->node->m_umbinarizedSubNodes; while(childsVpS!=0){ if ((*words)[childsVpS->node->lexical_head].tag.code()==PENN_TAG_TO){ secCondition=true; } childsVpS=childsVpS->next; } } childsS=childsS->next; } } if (firstCondition && secCondition){ if (buildStanfordLink(STANFORD_DEP_XCOMP, targ->lexical_head, head->lexical_head)) { //addLinked(vpNode,sTarg); addLinked(&node,targ); //I think this is not correct, in this specific case. return true; } } } childsTarg=childsTarg->next; } } childsHead=childsHead->next; } } childs=childs->next; } } return false; }
//"SBARQ < (WHNP|WHADJP=target $+ (SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + " !$++ VP) !< (VP <- (PP <:IN)) !<- (PP <: IN))) inline const bool &buildAttr2(const unsigned long &cons) { // A<:B (B is the only child of A) //A <- B (B is the last child of A) // A $+ B (A is the immediate left sister of B) if (cons==PENN_CON_SBARQ){ CStateNodeList* childsSbarq=node.m_umbinarizedSubNodes; while(childsSbarq!=0){ const CStateNode* whTarg=childsSbarq->node; if ((CConstituent::clearTmp(whTarg->constituent.code())==PENN_CON_WHNP || CConstituent::clearTmp(whTarg->constituent.code())==PENN_CON_WHADJP) && (!isLinked(&node,whTarg))) { if (childsSbarq->previous!=0) { const CStateNode* sqSisterWh=childsSbarq->previous->node; if (CConstituent::clearTmp(sqSisterWh->constituent.code())==PENN_CON_SQ) { bool secondCondition=true; bool thirdCondition=true; CStateNodeList* childsSq=sqSisterWh->m_umbinarizedSubNodes; while(childsSq!=0){ const CStateNode* vpChildSq=childsSq->node; if (CConstituent::clearTmp(vpChildSq->constituent.code())==PENN_CON_VP) { CStateNodeList* childsVp=vpChildSq->m_umbinarizedSubNodes; while(childsVp!=0){ const CStateNode* ppChildVp=childsVp->node; if (childsVp->next==0 && CConstituent::clearTmp(ppChildVp->constituent.code())==PENN_CON_PP) { //it is the last child of CStateNodeList* childsPP=ppChildVp->m_umbinarizedSubNodes; if (childsPP->next==0 && ((*words)[childsPP->node->lexical_head].tag.code()==PENN_TAG_IN)){ secondCondition=false; } } childsVp=childsVp->next; } } childsSq=childsSq->next; } if (secondCondition) { childsSq=sqSisterWh->m_umbinarizedSubNodes; while(childsSq!=0) { const CStateNode* ppChildSq=childsSq->node; if (childsSq->next==0 && CConstituent::clearTmp(ppChildSq->constituent.code())==PENN_CON_PP){ CStateNodeList* childsPP=ppChildSq->m_umbinarizedSubNodes; if (childsPP->next==0 && ((*words)[childsPP->node->lexical_head].tag.code()==PENN_TAG_IN)){ thirdCondition=false; } } childsSq=childsSq->next; } } if (secondCondition && thirdCondition){ //(SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + " !$++ VP) CStateNodeList* childsSq=sqSisterWh->m_umbinarizedSubNodes; while(childsSq!=0){ const CStateNode* vbChildSq=childsSq->node; if ((*words)[vbChildSq->lexical_head].tag.code()==PENN_TAG_VERB) { //const CStateNodeList* childsVb=vbChildSq->m_umbinarizedSubNodes; //while(childsVb!=0){ if ((compareWordToCopularWordRegex((*words)[vbChildSq->lexical_head].word))) { //A $++ B (A is a left sister of B) bool insideCondition=true; CStateNodeList* sistersVb=childsSq->next; while(sistersVb!=0){ const CStateNode* vpSisterVb=sistersVb->node; if (CConstituent::clearTmp(vpSisterVb->constituent.code())==PENN_CON_VP){ insideCondition=false; } sistersVb=sistersVb->next; } if (insideCondition){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ATTR); if (buildStanfordLink(STANFORD_DEP_ATTR, whTarg->lexical_head, node.lexical_head)) { addLinked(&node,whTarg); return true; } } } //childsVb=childsVb->next; //} } childsSq=childsSq->next; } } } } } childsSbarq=childsSbarq->next; } } return false; }
//"/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < JJ|JJR|JJS=sister < (NP|NML|NN|NNS|NNP|NNPS|FW=target !<<- POS !<<- (VBZ < /^\'s$/) $+ =sister) <# NN|NNS|NNP|NNPS !<<- POS !<<- (VBZ < /^\'s$/) ", inline const bool &nn2(const unsigned long &cons){ if (cons==PENN_CON_WHNP || cons==PENN_CON_NP || cons==PENN_CON_NAC || cons==PENN_CON_NX){ bool firstCond=false; //< JJ|JJR|JJS=sister bool thirdCond=false; //<# NN|NNS|NNP|NNPS bool fourthCond=true;//!<<- POS bool fifthCond=true; //!<<- (VBZ < /^\'s$/) const CStateNode* sister=0; const CStateNodeList* childs=node.m_umbinarizedSubNodes; while(childs!=0){ if ((*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_COMPARATIVE || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_SUPERLATIVE){ firstCond=true; sister=childs->node; } //PENN_TAG_NOUN, PENN_TAG_NOUN_PROPER, PENN_TAG_NOUN_PROPER_PLURAL, PENN_TAG_NOUN_PLURAL, else if (((*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER_PLURAL || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL) && childs->node->lexical_head==node.lexical_head){ thirdCond=true; } childs=childs->next; } if (firstCond && thirdCond){ CStateNodeList* descendants=new CStateNodeList(); listRightMostDescendants(node.m_umbinarizedSubNodes,descendants); if (descendants->node==0) { descendants->clear(); descendants=0; } while(descendants!=0){ if ((*words)[descendants->node->lexical_head].tag.code()==PENN_TAG_POS){ fourthCond=false; } else if ((*words)[descendants->node->lexical_head].tag.code()==PENN_TAG_VERB_THIRD_SINGLE){ //CStateNodeList* childsVbz=descendants->node->m_umbinarizedSubNodes; //while(childsVbz!=0){ if ((*words)[descendants->node->lexical_head].word==g_word_aps){ fifthCond=false; } // childsVbz=childsVbz->next; //} } descendants=descendants->next; } } if (firstCond && thirdCond && fourthCond && fifthCond){ //< (NP|NML|NN|NNS|NNP|NNPS|FW=target !<<- POS !<<- (VBZ < /^\'s$/) $+ =sister) childs=node.m_umbinarizedSubNodes; while(childs!=0){ const CStateNode* targ=childs->node; if (((*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER || CConstituent::clearTmp(childs->node->constituent.code())==PENN_CON_NP || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN_PROPER_PLURAL || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_FW || (*words)[childs->node->lexical_head].tag.code()==PENN_TAG_NOUN_PLURAL) && !isLinked(&node,targ)){ CStateNodeList* descendants2=new CStateNodeList(); listRightMostDescendants(targ->m_umbinarizedSubNodes,descendants2); if (descendants2->node==0) { descendants2->clear(); descendants2=0; } while(descendants2!=0){ if ((*words)[descendants2->node->lexical_head].tag.code()==PENN_TAG_POS){ fourthCond=false; } else if ((*words)[descendants2->node->lexical_head].tag.code()==PENN_TAG_VERB_THIRD_SINGLE){ CStateNodeList* childsVbz=descendants2->node->m_umbinarizedSubNodes; //while(childsVbz!=0){ if ((*words)[descendants2->node->lexical_head].word==g_word_aps){ CStateNodeList* rightSisters=childs->next; if (rightSisters!=0 && sister!=0){ if ((*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_COMPARATIVE || (*words)[rightSisters->node->lexical_head].tag.code()==PENN_TAG_ADJECTIVE_SUPERLATIVE){ // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_NN); if (buildStanfordLink(STANFORD_DEP_NN, targ->lexical_head, node.lexical_head)) { addLinked(&node,targ); return true; } } } } // childsVbz=childsVbz->next; //} } descendants2=descendants2->next; } } childs=childs->next; } } } return false; }
//"VP < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + ") !$ (NP < EX)", inline const bool &buildAttr1(const unsigned long &cons) { if (cons==PENN_CON_VP){ CStateNodeList* childsVp=node.m_umbinarizedSubNodes; while (childsVp!=0){ const CStateNode* npTarg=childsVp->node; if (CConstituent::clearTmp(npTarg->constituent.code())==PENN_CON_NP && !(isLinked(&node, npTarg))){ //1st check whether there is no sister that looks like (NP < EX) bool secondCondition=true; CStateNodeList* leftSisters=childsVp->previous; while(leftSisters!=0){ const CStateNode* npLeftSister=leftSisters->node; if (CConstituent::clearTmp(npLeftSister->constituent.code())==PENN_CON_NP){ CStateNodeList* childsExofNp=npLeftSister->m_umbinarizedSubNodes; while(childsExofNp!=0){ //Big doubt: do I have to check the childs of NP, or the EX is the lexical head of the NP?????? Yue? const CStateNode* childEx=childsExofNp->node; if ((*words)[childEx->lexical_head].tag.code()==PENN_TAG_EX) { secondCondition=false; } childsExofNp=childsExofNp->next; } } leftSisters=leftSisters->previous; } CStateNodeList* rightSisters=childsVp->next; while(rightSisters!=0){ const CStateNode* npRightSister=rightSisters->node; if (CConstituent::clearTmp(npRightSister->constituent.code())==PENN_CON_NP){ CStateNodeList* childsExofNp=npRightSister->m_umbinarizedSubNodes; while(childsExofNp!=0){ const CStateNode* childEx=childsExofNp->node; if ((*words)[childEx->lexical_head].tag.code()==PENN_TAG_EX) { secondCondition=false; } childsExofNp=childsExofNp->next; } } rightSisters=rightSisters->next; } if (secondCondition){ //now, check whether NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + ") CStateNodeList* childsNp=npTarg->m_umbinarizedSubNodes; while(childsNp!=0){ const CStateNode* vb=childsNp->node; if (((*words)[vb->lexical_head].tag.code()==PENN_TAG_VERB)) { if ((compareWordToCopularWordRegex((*words)[vb->lexical_head].word))) { // CDependencyLabel* label=new CDependencyLabel(STANFORD_DEP_ATTR); if (buildStanfordLink(STANFORD_DEP_ATTR, npTarg->lexical_head, node.lexical_head)) { addLinked(&node,npTarg); return true; } } } childsNp=childsNp->next; } } } childsVp=childsVp->next; } } return false; }