コード例 #1
0
ファイル: constituents.c プロジェクト: arv100kri/linkparser
/**
 * Go through all the words. If a word is on the right end of
 * an S (or SF or SX), wordtype[w]=STYPE.  If it's also on the left end of a
 * Pg*b, I, PP, or Pv, wordtype[w]=PTYPE. If it's a question-word
 * used in an indirect question, wordtype[w]=QTYPE. If it's a
 * question-word determiner,  wordtype[w]=QDTYPE. Else wordtype[w]=NONE.
 * (This function is called once for each sublinkage.)
 */
static void generate_misc_word_info(con_context_t * ctxt, Linkage linkage)
{
	int l1, l2, w1, w2;
	const char * label1, * label2;

	for (w1=0; w1<linkage->num_words; w1++)
		ctxt->wordtype[w1]=NONE;

	for (l1=0; l1<linkage_get_num_links(linkage); l1++) {	
		w1=linkage_get_link_rword(linkage, l1);
		label1 = linkage_get_link_label(linkage, l1);
		if ((uppercompare(label1, "S")==0) ||
			(uppercompare(label1, "SX")==0) ||
			(uppercompare(label1, "SF")==0)) {
			ctxt->wordtype[w1] = STYPE;
			for (l2=0; l2<linkage_get_num_links(linkage); l2++) {
				w2=linkage_get_link_lword(linkage, l2);
				label2 = linkage_get_link_label(linkage, l2);
				if ((w1==w2) &&
					((post_process_match("Pg#b", label2)==1) ||
					 (uppercompare(label2, "I")==0) ||
					 (uppercompare(label2, "PP")==0) ||
					 (post_process_match("Pv", label2)==1))) {
					/* Pvf, Pgf? */
					ctxt->wordtype[w1] = PTYPE;
				}
			}
		}
		if (post_process_match("QI#d", label1)==1) {
			ctxt->wordtype[w1] = QTYPE;
			for (l2=0; l2<linkage_get_num_links(linkage); l2++) {
				w2=linkage_get_link_lword(linkage, l2);
				label2 = linkage_get_link_label(linkage, l2);
				if ((w1==w2) && (post_process_match("D##w", label2)==1)) {
					ctxt->wordtype[w1] = QDTYPE;
				}
			}
		}
		if (post_process_match("Mr", label1)==1) ctxt->wordtype[w1] = QDTYPE;
		if (post_process_match("MX#d", label1)==1) ctxt->wordtype[w1] = QDTYPE;
	}
}
コード例 #2
0
ファイル: constituents.c プロジェクト: arv100kri/linkparser
static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total)
{
	int c, c2, global_leftend_found, adjustment_made,
		global_rightend_found, lastword, newcon_total=0;
	Sentence sent;
	sent = linkage_get_sentence(linkage);

	for (c=0; c<numcon_total; c++)
	{
		/* In a paraphrase construction ("John ran, he said"),
		   the paraphrasing clause doesn't get
		   an S. (This is true in Treebank II, not Treebank I) */

		if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0)
		{
			ctxt->constituent[c].valid = 0;
		}

		/* If it's a possessive with an "'s", the NP on the left
		   should be extended to include the "'s". */
		if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) ||
			(uppercompare(ctxt->constituent[c].start_link, "YP") == 0))
		{
			ctxt->constituent[c].right++;
		}

		/* If a constituent has starting link MVpn, it's a time
		   expression like "last week"; label it as a noun phrase
		   (incorrectly) */

		if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "COn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}

		/* If the constituent is an S started by "but" or "and" at
		   the beginning of the sentence, it should be ignored. */

		if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) &&
			(ctxt->constituent[c].left == 2))
		{
			ctxt->constituent[c].valid = 0;
		}

		/* For prenominal adjectives, an ADJP constituent is assigned
		   if it's a hyphenated (Ah) or comparative (Am) adjective;
		   otherwise no ADJP is assigned, unless the phrase is more
		   than one word long (e.g. "very big"). The same with certain
		   types of adverbs. */
		/* That was for Treebank I. For Treebank II, the rule only
		   seems to apply to prenominal adjectives (of all kinds).
		   However, it also applies to number expressions ("QP"). */

		if ((post_process_match("A", ctxt->constituent[c].start_link)==1) ||
			(ctxt->constituent[c].domain_type=='d') ||
			(ctxt->constituent[c].domain_type=='h')) {
			if (ctxt->constituent[c].right-ctxt->constituent[c].left==0) {
				ctxt->constituent[c].valid=0;
			}
		}

		if ((ctxt->constituent[c].domain_type=='h') &&
			(strcmp(linkage->word[ctxt->constituent[c].left-1], "$")==0)) {
			ctxt->constituent[c].left--;
		}

		/* If a constituent has type VP and its aux value is 2,
		   this means it's an aux that should be printed; change its
		   type to "X". If its aux value is 1, set "valid" to 0. (This
		   applies to Treebank I only) */

		if (ctxt->constituent[c].aux == 2)
		{
			ctxt->constituent[c].type = string_set_add("X", ctxt->phrase_ss);
		}
		if (ctxt->constituent[c].aux == 1)
		{
			ctxt->constituent[c].valid = 0;
		}
	}

	numcon_total = numcon_total + newcon_total;

	/* If there's a global S constituent that includes everything
	   except a final period or question mark, extend it by one word */

	for (c=0; c<numcon_total; c++) {
		if ((ctxt->constituent[c].right==(linkage->num_words)-3) &&
			(ctxt->constituent[c].left==1) &&
			(strcmp(ctxt->constituent[c].type, "S")==0) &&
			(strcmp(sent->word[(linkage->num_words)-2].string, ".")==0))
			ctxt->constituent[c].right++;
	}

	/* If there's no S boundary at the very left end of the sentence,
	   or the very right end, create a new S spanning the entire sentence */

	lastword=(linkage->num_words)-2;
	global_leftend_found = 0;
	global_rightend_found = 0;
	for (c=0; c<numcon_total; c++) {
		if ((ctxt->constituent[c].left==1) && (strcmp(ctxt->constituent[c].type, "S")==0) &&
			(ctxt->constituent[c].valid==1))
			global_leftend_found=1;
	}
	for (c=0; c<numcon_total; c++) {
		if ((ctxt->constituent[c].right>=lastword) &&
			(strcmp(ctxt->constituent[c].type, "S")==0) && (ctxt->constituent[c].valid==1))
			global_rightend_found=1;
	}
	if ((global_leftend_found==0) || (global_rightend_found==0))
	{
		c = numcon_total;
		ctxt->constituent[c].left = 1;
		ctxt->constituent[c].right = linkage->num_words-1;
		ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss);
		ctxt->constituent[c].valid = 1;
		ctxt->constituent[c].domain_type = 'x';
		numcon_total++;
		if (verbosity >= 2)
			printf("Adding global sentence constituent:\n");
		print_constituent(ctxt, linkage, c);
	}

	/* Check once more to see if constituents are nested (checking BETWEEN sublinkages
	   this time) */

	while (1) {
		adjustment_made=0;
		for (c=0; c<numcon_total; c++) {
			if(ctxt->constituent[c].valid==0) continue;
			for (c2=0; c2<numcon_total; c2++) {
				if(ctxt->constituent[c2].valid==0) continue;
				if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
					(ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
					(ctxt->constituent[c].right >= ctxt->constituent[c2].left)) {

					if (verbosity>=2) {
					  printf("WARNING: the constituents aren't nested! Adjusting them." \
							   "(%d, %d)\n", c, c2);
					  }
					ctxt->constituent[c].left = ctxt->constituent[c2].left;
				}
			}
		}
		if (adjustment_made==0) break;
	}
	return numcon_total;
}
コード例 #3
0
ファイル: constituents.c プロジェクト: virneo/link-grammar
static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total)
{
	int c;
	bool global_leftend_found, global_rightend_found;
	size_t lastword;

	for (c = 0; c < numcon_total; c++)
	{
		/* In a paraphrase construction ("John ran, he said"),
		   the paraphrasing clause doesn't get
		   an S. (This is true in Treebank II, not Treebank I) */

		if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0)
		{
			ctxt->constituent[c].valid = false;
		}

		/* If it's a possessive with an "'s", the NP on the left
		   should be extended to include the "'s". */
		if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) ||
			(uppercompare(ctxt->constituent[c].start_link, "YP") == 0))
		{
			ctxt->constituent[c].right++;
		}

		/* If a constituent has starting link MVpn, it's a time
		   expression like "last week"; label it as a noun phrase
		   (incorrectly) */

		if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "COn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}

		/* If the constituent is an S started by "but" or "and" at
		   the beginning of the sentence, it should be ignored. */

		if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) &&
			(ctxt->constituent[c].left == 2))
		{
			ctxt->constituent[c].valid = false;
		}

		/* For prenominal adjectives, an ADJP constituent is assigned
		   if it's a hyphenated (Ah) or comparative (Am) adjective;
		   otherwise no ADJP is assigned, unless the phrase is more
		   than one word long (e.g. "very big"). The same with certain
		   types of adverbs. */
		/* That was for Treebank I. For Treebank II, the rule only
		   seems to apply to prenominal adjectives (of all kinds).
		   However, it also applies to number expressions ("QP"). */

		if ((post_process_match("A", ctxt->constituent[c].start_link) == 1) ||
			(ctxt->constituent[c].domain_type == 'd') ||
			(ctxt->constituent[c].domain_type == 'h')) {
			if (ctxt->constituent[c].right-ctxt->constituent[c].left == 0)
			{
				ctxt->constituent[c].valid = false;
			}
		}

		if ((ctxt->constituent[c].domain_type == 'h') &&
			(strcmp(linkage->word[ctxt->constituent[c].left - 1], "$") == 0))
		{
			ctxt->constituent[c].left--;
		}
	}

	/* If there's a global S constituent that includes everything
	   except a final terminating punctuation (period or question mark),
	   extend it by one word. We know its the terminating punctuation,
	   because it links to the right wall with an RW link.  If its
	   not, then that final link is not there...
	 */
	for (c = 0; c < numcon_total; c++)
	{
		if ((ctxt->constituent[c].right == linkage->num_words - 3) &&
			(ctxt->constituent[c].left == 1) &&
			(strcmp(ctxt->constituent[c].type, "S") == 0))
		{
			size_t ln;
			for (ln = 0; ln < linkage->num_links; ln++)
			{
				if ((linkage->link_array[ln].lw == linkage->num_words - 2) &&
				    (linkage->link_array[ln].rw == linkage->num_words - 1))
				{
					ctxt->constituent[c].right++;
					break;
				}
			}
		}
	}

	/* If there's no S boundary at the very left end of the sentence,
	   or the very right end, create a new S spanning the entire sentence */

	lastword = linkage->num_words - 2;
	global_leftend_found = false;
	global_rightend_found = false;
	for (c = 0; c < numcon_total; c++)
	{
		if ((ctxt->constituent[c].left == 1) &&
		   (strcmp(ctxt->constituent[c].type, "S") == 0) &&
			ctxt->constituent[c].valid)
		{
			global_leftend_found = true;
		}
	}

	for (c = 0; c < numcon_total; c++)
	{
		if ((ctxt->constituent[c].right >= lastword) &&
			(strcmp(ctxt->constituent[c].type, "S") == 0) &&
		   ctxt->constituent[c].valid)
		{
			global_rightend_found = true;
		}
	}

	if ((global_leftend_found == false) || (global_rightend_found == false))
	{
		c = numcon_total;
		ctxt->constituent[c].left = 1;
		ctxt->constituent[c].right = linkage->num_words-1;
		ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss);
		ctxt->constituent[c].valid = true;
		ctxt->constituent[c].domain_type = 'x';
		numcon_total++;
		if (verbosity >= 2)
			printf("Adding global sentence constituent:\n");
		print_constituent(ctxt, linkage, c);
	}

	return numcon_total;
}