/** * Go through all the words. If a word is on the right end of * an S (or SF or SX), wordtype[w]=STYPE. If it's also on the left end of a * Pg*b, I, PP, or Pv, wordtype[w]=PTYPE. If it's a question-word * used in an indirect question, wordtype[w]=QTYPE. If it's a * question-word determiner, wordtype[w]=QDTYPE. Else wordtype[w]=NONE. * (This function is called once for each sublinkage.) */ static void generate_misc_word_info(con_context_t * ctxt, Linkage linkage) { int l1, l2, w1, w2; const char * label1, * label2; for (w1=0; w1<linkage->num_words; w1++) ctxt->wordtype[w1]=NONE; for (l1=0; l1<linkage_get_num_links(linkage); l1++) { w1=linkage_get_link_rword(linkage, l1); label1 = linkage_get_link_label(linkage, l1); if ((uppercompare(label1, "S")==0) || (uppercompare(label1, "SX")==0) || (uppercompare(label1, "SF")==0)) { ctxt->wordtype[w1] = STYPE; for (l2=0; l2<linkage_get_num_links(linkage); l2++) { w2=linkage_get_link_lword(linkage, l2); label2 = linkage_get_link_label(linkage, l2); if ((w1==w2) && ((post_process_match("Pg#b", label2)==1) || (uppercompare(label2, "I")==0) || (uppercompare(label2, "PP")==0) || (post_process_match("Pv", label2)==1))) { /* Pvf, Pgf? */ ctxt->wordtype[w1] = PTYPE; } } } if (post_process_match("QI#d", label1)==1) { ctxt->wordtype[w1] = QTYPE; for (l2=0; l2<linkage_get_num_links(linkage); l2++) { w2=linkage_get_link_lword(linkage, l2); label2 = linkage_get_link_label(linkage, l2); if ((w1==w2) && (post_process_match("D##w", label2)==1)) { ctxt->wordtype[w1] = QDTYPE; } } } if (post_process_match("Mr", label1)==1) ctxt->wordtype[w1] = QDTYPE; if (post_process_match("MX#d", label1)==1) ctxt->wordtype[w1] = QDTYPE; } }
static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total) { int c, c2, global_leftend_found, adjustment_made, global_rightend_found, lastword, newcon_total=0; Sentence sent; sent = linkage_get_sentence(linkage); for (c=0; c<numcon_total; c++) { /* In a paraphrase construction ("John ran, he said"), the paraphrasing clause doesn't get an S. (This is true in Treebank II, not Treebank I) */ if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0) { ctxt->constituent[c].valid = 0; } /* If it's a possessive with an "'s", the NP on the left should be extended to include the "'s". */ if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) || (uppercompare(ctxt->constituent[c].start_link, "YP") == 0)) { ctxt->constituent[c].right++; } /* If a constituent has starting link MVpn, it's a time expression like "last week"; label it as a noun phrase (incorrectly) */ if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0) { ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss); } if (strcmp(ctxt->constituent[c].start_link, "COn") == 0) { ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss); } if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0) { ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss); } /* If the constituent is an S started by "but" or "and" at the beginning of the sentence, it should be ignored. */ if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) && (ctxt->constituent[c].left == 2)) { ctxt->constituent[c].valid = 0; } /* For prenominal adjectives, an ADJP constituent is assigned if it's a hyphenated (Ah) or comparative (Am) adjective; otherwise no ADJP is assigned, unless the phrase is more than one word long (e.g. "very big"). The same with certain types of adverbs. */ /* That was for Treebank I. For Treebank II, the rule only seems to apply to prenominal adjectives (of all kinds). However, it also applies to number expressions ("QP"). */ if ((post_process_match("A", ctxt->constituent[c].start_link)==1) || (ctxt->constituent[c].domain_type=='d') || (ctxt->constituent[c].domain_type=='h')) { if (ctxt->constituent[c].right-ctxt->constituent[c].left==0) { ctxt->constituent[c].valid=0; } } if ((ctxt->constituent[c].domain_type=='h') && (strcmp(linkage->word[ctxt->constituent[c].left-1], "$")==0)) { ctxt->constituent[c].left--; } /* If a constituent has type VP and its aux value is 2, this means it's an aux that should be printed; change its type to "X". If its aux value is 1, set "valid" to 0. (This applies to Treebank I only) */ if (ctxt->constituent[c].aux == 2) { ctxt->constituent[c].type = string_set_add("X", ctxt->phrase_ss); } if (ctxt->constituent[c].aux == 1) { ctxt->constituent[c].valid = 0; } } numcon_total = numcon_total + newcon_total; /* If there's a global S constituent that includes everything except a final period or question mark, extend it by one word */ for (c=0; c<numcon_total; c++) { if ((ctxt->constituent[c].right==(linkage->num_words)-3) && (ctxt->constituent[c].left==1) && (strcmp(ctxt->constituent[c].type, "S")==0) && (strcmp(sent->word[(linkage->num_words)-2].string, ".")==0)) ctxt->constituent[c].right++; } /* If there's no S boundary at the very left end of the sentence, or the very right end, create a new S spanning the entire sentence */ lastword=(linkage->num_words)-2; global_leftend_found = 0; global_rightend_found = 0; for (c=0; c<numcon_total; c++) { if ((ctxt->constituent[c].left==1) && (strcmp(ctxt->constituent[c].type, "S")==0) && (ctxt->constituent[c].valid==1)) global_leftend_found=1; } for (c=0; c<numcon_total; c++) { if ((ctxt->constituent[c].right>=lastword) && (strcmp(ctxt->constituent[c].type, "S")==0) && (ctxt->constituent[c].valid==1)) global_rightend_found=1; } if ((global_leftend_found==0) || (global_rightend_found==0)) { c = numcon_total; ctxt->constituent[c].left = 1; ctxt->constituent[c].right = linkage->num_words-1; ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss); ctxt->constituent[c].valid = 1; ctxt->constituent[c].domain_type = 'x'; numcon_total++; if (verbosity >= 2) printf("Adding global sentence constituent:\n"); print_constituent(ctxt, linkage, c); } /* Check once more to see if constituents are nested (checking BETWEEN sublinkages this time) */ while (1) { adjustment_made=0; for (c=0; c<numcon_total; c++) { if(ctxt->constituent[c].valid==0) continue; for (c2=0; c2<numcon_total; c2++) { if(ctxt->constituent[c2].valid==0) continue; if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) && (ctxt->constituent[c].right < ctxt->constituent[c2].right) && (ctxt->constituent[c].right >= ctxt->constituent[c2].left)) { if (verbosity>=2) { printf("WARNING: the constituents aren't nested! Adjusting them." \ "(%d, %d)\n", c, c2); } ctxt->constituent[c].left = ctxt->constituent[c2].left; } } } if (adjustment_made==0) break; } return numcon_total; }
static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total) { int c; bool global_leftend_found, global_rightend_found; size_t lastword; for (c = 0; c < numcon_total; c++) { /* In a paraphrase construction ("John ran, he said"), the paraphrasing clause doesn't get an S. (This is true in Treebank II, not Treebank I) */ if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0) { ctxt->constituent[c].valid = false; } /* If it's a possessive with an "'s", the NP on the left should be extended to include the "'s". */ if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) || (uppercompare(ctxt->constituent[c].start_link, "YP") == 0)) { ctxt->constituent[c].right++; } /* If a constituent has starting link MVpn, it's a time expression like "last week"; label it as a noun phrase (incorrectly) */ if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0) { ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss); } if (strcmp(ctxt->constituent[c].start_link, "COn") == 0) { ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss); } if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0) { ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss); } /* If the constituent is an S started by "but" or "and" at the beginning of the sentence, it should be ignored. */ if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) && (ctxt->constituent[c].left == 2)) { ctxt->constituent[c].valid = false; } /* For prenominal adjectives, an ADJP constituent is assigned if it's a hyphenated (Ah) or comparative (Am) adjective; otherwise no ADJP is assigned, unless the phrase is more than one word long (e.g. "very big"). The same with certain types of adverbs. */ /* That was for Treebank I. For Treebank II, the rule only seems to apply to prenominal adjectives (of all kinds). However, it also applies to number expressions ("QP"). */ if ((post_process_match("A", ctxt->constituent[c].start_link) == 1) || (ctxt->constituent[c].domain_type == 'd') || (ctxt->constituent[c].domain_type == 'h')) { if (ctxt->constituent[c].right-ctxt->constituent[c].left == 0) { ctxt->constituent[c].valid = false; } } if ((ctxt->constituent[c].domain_type == 'h') && (strcmp(linkage->word[ctxt->constituent[c].left - 1], "$") == 0)) { ctxt->constituent[c].left--; } } /* If there's a global S constituent that includes everything except a final terminating punctuation (period or question mark), extend it by one word. We know its the terminating punctuation, because it links to the right wall with an RW link. If its not, then that final link is not there... */ for (c = 0; c < numcon_total; c++) { if ((ctxt->constituent[c].right == linkage->num_words - 3) && (ctxt->constituent[c].left == 1) && (strcmp(ctxt->constituent[c].type, "S") == 0)) { size_t ln; for (ln = 0; ln < linkage->num_links; ln++) { if ((linkage->link_array[ln].lw == linkage->num_words - 2) && (linkage->link_array[ln].rw == linkage->num_words - 1)) { ctxt->constituent[c].right++; break; } } } } /* If there's no S boundary at the very left end of the sentence, or the very right end, create a new S spanning the entire sentence */ lastword = linkage->num_words - 2; global_leftend_found = false; global_rightend_found = false; for (c = 0; c < numcon_total; c++) { if ((ctxt->constituent[c].left == 1) && (strcmp(ctxt->constituent[c].type, "S") == 0) && ctxt->constituent[c].valid) { global_leftend_found = true; } } for (c = 0; c < numcon_total; c++) { if ((ctxt->constituent[c].right >= lastword) && (strcmp(ctxt->constituent[c].type, "S") == 0) && ctxt->constituent[c].valid) { global_rightend_found = true; } } if ((global_leftend_found == false) || (global_rightend_found == false)) { c = numcon_total; ctxt->constituent[c].left = 1; ctxt->constituent[c].right = linkage->num_words-1; ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss); ctxt->constituent[c].valid = true; ctxt->constituent[c].domain_type = 'x'; numcon_total++; if (verbosity >= 2) printf("Adding global sentence constituent:\n"); print_constituent(ctxt, linkage, c); } return numcon_total; }