Example #1
0
/**
 * This function looks for constituents of type ctype1. Say it finds
 * one, call it c1. It searches for the next larger constituent of
 * type ctype2, call it c2. It then generates a new constituent of
 * ctype3, containing all the words in c2 but not c1.
 */
static int gen_comp(con_context_t *ctxt, Linkage linkage, int numcon_total, int numcon_subl,
					const char * ctype1, const char * ctype2, const char * ctype3, int x)
{
	int w, w2, w3, c, c1, c2, done;
	c = numcon_total + numcon_subl;

	for (c1=numcon_total; c1<numcon_total + numcon_subl; c1++) {

		/* If ctype1 is NP, it has to be an appositive to continue */
		if ((x==4) && (post_process_match("MX#*", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is X, and domain_type is t, it's an infinitive - skip it */
		if ((x==2) && (ctxt->constituent[c1].domain_type=='t'))
			continue;

		/* If it's domain-type z, it's a subject-relative clause;
		   the VP doesn't need an NP */
		if (ctxt->constituent[c1].domain_type=='z')
			continue;

		/* If ctype1 is X or VP, and it's not started by an S, don't generate an NP
		 (Neither of the two previous checks are necessary now, right?) */
		if ((x==1 || x==2) &&
			(((post_process_match("S", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SX", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SF", ctxt->constituent[c1].start_link) == 0)) ||
			 (post_process_match("S##w", ctxt->constituent[c1].start_link) != 0)))
			continue;

		/* If it's an SBAR (relative clause case), it has to be a relative clause */
		if ((x==3) &&
			((post_process_match("Rn", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("R*", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#r", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("Mr", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#d", ctxt->constituent[c1].start_link) == 0)))
			continue;

		/* If ctype1 is SBAR (clause opener case), it has to be an f domain */
		if ((x==5) && (ctxt->constituent[c1].domain_type!='f'))
			continue;

		/* If ctype1 is SBAR (pp opener case), it has to be a g domain */
		if ((x==6) && (ctxt->constituent[c1].domain_type!='g'))
			continue;

		/* If ctype1 is NP (paraphrase case), it has to be started by an SI */
		if ((x==7) && (post_process_match("SI", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle modifier case), it has to be
		   started by an Mv or Mg */
		if ((x==8) && (post_process_match("M", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle opener case), it has
		   to be started by a COp */
		if ((x==9) && (post_process_match("COp", ctxt->constituent[c1].start_link)==0))
			continue;

		/* Now start at the bounds of c1, and work outwards until you
		   find a larger constituent of type ctype2 */
		if (!(strcmp(ctxt->constituent[c1].type, ctype1)==0))
			continue;

		if (verbosity>=2)
			printf("Generating complement constituent for c %d of type %s\n",
				   c1, ctype1);
		done=0;
		for (w2=ctxt->constituent[c1].left; (done==0) && (w2>=0); w2--) {
			for (w3=ctxt->constituent[c1].right; w3<linkage->num_words; w3++) {
				for (c2=numcon_total; (done==0) &&
						 (c2 < numcon_total + numcon_subl); c2++) {
					if (!((ctxt->constituent[c2].left==w2) &&
						  (ctxt->constituent[c2].right==w3)) || (c2==c1))
						continue;
					if (!(strcmp(ctxt->constituent[c2].type, ctype2)==0))
						continue;

					/* if the new constituent (c) is to the left
					   of c1, its right edge should be adjacent to the
					   left edge of c1 - or as close as possible
					   without going outside the current sublinkage.
					   (Or substituting right and left as necessary.) */

					if ((x==5) || (x==6) || (x==9)) {
								/* This is the case where c is to the
								   RIGHT of c1 */
						w = ctxt->constituent[c1].right+1;
						while(1) {
							if (ctxt->word_used[linkage->current][w]==1)
								break;
							w++;
						}
						if (w > ctxt->constituent[c2].right)
						{
							done=1;
							continue;
						}
						ctxt->constituent[c].left = w;
						ctxt->constituent[c].right = ctxt->constituent[c2].right;
					}
					else {
						w = ctxt->constituent[c1].left-1;
						while(1) {
							if (ctxt->word_used[linkage->current][w] == 1)
								break;
							w--;
						}
						if (w < ctxt->constituent[c2].left) {
							done=1;
							continue;
						}
						ctxt->constituent[c].right = w;
						ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}

					adjust_for_left_comma(ctxt, linkage, c1);
					adjust_for_right_comma(ctxt, linkage, c1);

					ctxt->constituent[c].type =
						string_set_add(ctype3, ctxt->phrase_ss);
					ctxt->constituent[c].domain_type = 'x';
					ctxt->constituent[c].start_link =
						string_set_add("XX", ctxt->phrase_ss);
					ctxt->constituent[c].start_num =
						ctxt->constituent[c1].start_num; /* bogus */
					if (verbosity >= 2)
					{
						printf("Larger c found: c %d (%s); ",
							   c2, ctype2);
						printf("Adding constituent:\n");
						print_constituent(ctxt, linkage, c);
					}
					c++;
					assert(c < MAXCONSTITUENTS, "Too many constituents");
					done = 1;
				}
			}
		}
		if (verbosity>=2) {
		  if (done==0)
			printf("No constituent added, because no larger %s " \
				   " was found\n", ctype2);
		}
	}
	numcon_subl = c - numcon_total;
	return numcon_subl;
}
Example #2
0
static int read_constituents_from_domains(con_context_t *ctxt, Linkage linkage,
                                          int numcon_total)
{
	size_t d, l, w2;
	int c, w, c2, numcon_subl = 0;

	for (d = 0, c = numcon_total; d < linkage->hpsg_pp_data.N_domains; d++, c++)
	{
		size_t leftmost, rightmost, leftlimit;
		int rootleft;
		List_o_links * dlink;

		Domain domain = linkage->hpsg_pp_data.domain_array[d];

		// rootright = linkage_get_link_rword(linkage, domain.start_link);
		rootleft =  linkage_get_link_lword(linkage, domain.start_link);

		if ((domain.type=='c') ||
			(domain.type=='d') ||
			(domain.type=='e') ||
			(domain.type=='f') ||
			(domain.type=='g') ||
			(domain.type=='u') ||
			(domain.type=='y'))
		{
			leftlimit = 0;
			leftmost = linkage_get_link_lword(linkage, domain.start_link);
			rightmost = linkage_get_link_lword(linkage, domain.start_link);
		}
		else
		{
			leftlimit = linkage_get_link_lword(linkage, domain.start_link) + 1;
			leftmost = linkage_get_link_rword(linkage, domain.start_link);
			rightmost = linkage_get_link_rword(linkage, domain.start_link);
		}

		/* Start by assigning both left and right limits to the
		 * right word of the start link. This will always be contained
		 * in the constituent. This will also handle the case
		 * where the domain contains no links.
		 */
		for (dlink = domain.lol; dlink != NULL; dlink = dlink->next)
		{
			l = dlink->link;

			if ((linkage_get_link_lword(linkage, l) < leftmost) &&
				(linkage_get_link_lword(linkage, l) >= leftlimit))
			{
				leftmost = linkage_get_link_lword(linkage, l);
			}

			if (linkage_get_link_rword(linkage, l) > rightmost)
			{
				rightmost = linkage_get_link_rword(linkage, l);
			}
		}

		c--;
		c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost,
						cons_of_domain(linkage, domain.type));

		if (domain.type == 'z')
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
		}
		if (domain.type=='c')
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("Ce*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("Rn", ctxt->constituent[c].start_link)==1))
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "SBAR");
		}
		if ((post_process_match("R*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("MX#r", ctxt->constituent[c].start_link)==1))
		{
			w = leftmost;
			if (strcmp(linkage->word[w], ",") == 0) w++;
			c = add_constituent(ctxt, c, linkage, &domain, w, w, "WHNP");
		}
		if (post_process_match("Mj", ctxt->constituent[c].start_link) == 1)
		{
			w = leftmost;
			if (strcmp(linkage->word[w], ",") == 0) w++;
			c = add_constituent(ctxt, c, linkage, &domain, w, w+1, "WHPP");
			c = add_constituent(ctxt, c, linkage, &domain, w+1, w+1, "WHNP");
		}
		if ((post_process_match("Ss#d", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("B#d", ctxt->constituent[c].start_link)==1))
		{
			c = add_constituent(ctxt, c, linkage, &domain, rootleft, rootleft, "WHNP");
			c = add_constituent(ctxt, c, linkage, &domain,
							rootleft, ctxt->constituent[c-1].right, "SBAR");
		}
		if (post_process_match("CP", ctxt->constituent[c].start_link)==1)
		{
			if (strcmp(linkage->word[leftmost], ",") == 0)
				ctxt->constituent[c].left++;
			c = add_constituent(ctxt, c, linkage, &domain, 1, linkage->num_words-1, "S");
		}
		if ((post_process_match("MVs", ctxt->constituent[c].start_link)==1) ||
			(domain.type=='f'))
		{
			w = ctxt->constituent[c].left;
			if (strcmp(linkage->word[w], ",") == 0)
				w++;
			if (strcmp(linkage->word[w], "when") == 0)
			{
				c = add_constituent(ctxt, c, linkage, &domain, w, w, "WHADVP");
			}
		}
		if (domain.type=='t')
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("QI", ctxt->constituent[c].start_link) == 1) ||
			(post_process_match("Mr", ctxt->constituent[c].start_link) == 1) ||
			(post_process_match("MX#d", ctxt->constituent[c].start_link) == 1))
		{
			const char * name = "";
			w = leftmost;
			if (strcmp(linkage->word[w], ",") == 0) w++;
			if (ctxt->wordtype[w] == NONE)
				name = "WHADVP";
			else if (ctxt->wordtype[w] == QTYPE)
				name = "WHNP";
			else if (ctxt->wordtype[w] == QDTYPE)
				name = "WHNP";
			else
				assert(0, "Unexpected word type");
			c = add_constituent(ctxt, c, linkage, &domain, w, w, name);

			if (ctxt->wordtype[w] == QDTYPE)
			{
				/* Now find the finite verb to the right, start an S */
				/* Limit w2 to sentence length. */
				// for( w2=w+1; w2 < ctxt->r_limit-1; w2++ )
				for (w2 = w+1; w2 < rightmost; w2++)
				  if ((ctxt->wordtype[w2] == STYPE) || (ctxt->wordtype[w2] == PTYPE)) break;

				/* Adjust the right boundary of previous constituent */
				ctxt->constituent[c].right = w2 - 1;
				c = add_constituent(ctxt, c, linkage, &domain, w2, rightmost, "S");
			}
		}

		if (ctxt->constituent[c].domain_type == '\0')
		{
			err_ctxt ec;
			err_msg(&ec, Error, "Error: no domain type assigned to constituent\n");
		}
		if (ctxt->constituent[c].start_link == NULL)
		{
			err_ctxt ec;
			err_msg(&ec, Error, "Error: no type assigned to constituent\n");
		}
	}

	numcon_subl = c - numcon_total;
	/* numcon_subl = handle_islands(linkage, numcon_total, numcon_subl);  */

	if (verbosity >= 2)
		printf("Constituents added at first stage:\n");

	for (c = numcon_total; c < numcon_total + numcon_subl; c++)
	{
		print_constituent(ctxt, linkage, c);
	}

	/* Opener case - generates S around main clause.
	   (This must be done first; the S generated will be needed for
	   later cases.) */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "S", "S", CASE_OPENER);

	/* pp opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "S", "S", CASE_PPOPEN);

	/* participle opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "S", "S", "S", CASE_PART_OPEN);

	/* Subject-phrase case; every main VP generates an S */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "S", "NP", CASE_S);

	/* Relative clause case; an SBAR generates a complement NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "NP", "NP", CASE_REL_CLAUSE);

	/* Participle modifier case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "NP", "NP", CASE_PART_MOD);

	/* PP modifying NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "NP", "NP", CASE_PART_MOD);

	/* Appositive case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "NP", "NP", CASE_APPOS);

	/* S-V inversion case; an NP generates a complement VP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "SINV", "VP", CASE_SVINV);

	adjust_subordinate_clauses(ctxt, linkage, numcon_total, numcon_subl);
	for (c = numcon_total; c < numcon_total + numcon_subl; c++)
	{
		if ((ctxt->constituent[c].domain_type=='p') &&
			(strcmp(linkage->word[ctxt->constituent[c].left], ",")==0))
		{
			ctxt->constituent[c].left++;
		}
	}

	/* Make sure the constituents are nested. If two constituents
	 * are not nested: whichever constituent has the furthest left
	 * boundary, shift that boundary rightwards to the left boundary
	 * of the other one.
	 */
	while (true)
	{
		bool adjustment_made = false;
		for (c = numcon_total; c < numcon_total + numcon_subl; c++)
		{
			for (c2 = numcon_total; c2 < numcon_total + numcon_subl; c2++)
			{
				if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
					(ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
					(ctxt->constituent[c].right >= ctxt->constituent[c2].left))
				{
					/* We've found two overlapping constituents.
					   If one is larger, except the smaller one
					   includes an extra comma, adjust the smaller one
					   to exclude the comma */

					if ((strcmp(linkage->word[ctxt->constituent[c2].right], ",") == 0) ||
						(strcmp(linkage->word[ctxt->constituent[c2].right],
								"RIGHT-WALL") == 0))
					{
						if (verbosity >= 2)
							printf("Adjusting %d to fix comma overlap\n", c2);
						adjust_for_right_comma(ctxt, linkage, c2);
						adjustment_made = true;
					}
					else if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
					{
						if (verbosity >= 2)
							printf("Adjusting c %d to fix comma overlap\n", c);
						adjust_for_left_comma(ctxt, linkage, c);
						adjustment_made = true;
					}
					else
					{
						if (verbosity >= 2)
						{
							err_ctxt ec;
							err_msg(&ec, Warn,
							      "Warning: the constituents aren't nested! "
							      "Adjusting them. (%d, %d)\n", c, c2);
					  }
					  ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}
				}
			}
		}
		if (adjustment_made == false) break;
	}

	assert (numcon_total + numcon_subl < ctxt->conlen, "Too many constituents");
	return numcon_subl;
}
Example #3
0
static int read_constituents_from_domains(con_context_t *ctxt, Linkage linkage,
                                          int numcon_total, int s)
{
	int d, c, leftlimit, l, leftmost, rightmost, w, c2, numcon_subl=0, w2;
	List_o_links * dlink;
	int rootright, rootleft, adjustment_made;
	Sublinkage * subl;
	const char * name;
	Domain domain;

	r_limit = linkage->num_words-2; /**PV**/

	subl = &linkage->sublinkage[s];

	for (d=0, c=numcon_total; d<subl->pp_data.N_domains; d++, c++) {
		domain = subl->pp_data.domain_array[d];
		rootright = linkage_get_link_rword(linkage, domain.start_link);
		rootleft =  linkage_get_link_lword(linkage, domain.start_link);

		if ((domain.type=='c') ||
			(domain.type=='d') ||
			(domain.type=='e') ||
			(domain.type=='f') ||
			(domain.type=='g') ||
			(domain.type=='u') ||
			(domain.type=='y')) {
			leftlimit = 0;
			leftmost = linkage_get_link_lword(linkage, domain.start_link);
			rightmost = linkage_get_link_lword(linkage, domain.start_link);
		}
		else {
			leftlimit = linkage_get_link_lword(linkage, domain.start_link)+1;
			leftmost = linkage_get_link_rword(linkage, domain.start_link);
			rightmost = linkage_get_link_rword(linkage, domain.start_link);
		}

		/* Start by assigning both left and right limits to the
		   right word of the start link. This will always be contained
		   in the constituent. This will also handle the case
		   where the domain contains no links. */

		for (dlink = domain.lol; dlink!=NULL; dlink=dlink->next) {
			l=dlink->link;

			if ((linkage_get_link_lword(linkage, l) < leftmost) &&
				(linkage_get_link_lword(linkage, l) >= leftlimit))
				leftmost = linkage_get_link_lword(linkage, l);

			if (linkage_get_link_rword(linkage, l) > rightmost)
				rightmost = linkage_get_link_rword(linkage, l);
		}

		c--;
		c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost,
						cons_of_domain(domain.type));

		if (domain.type=='z') {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
		}
		if (domain.type=='c') {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("Ce*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("Rn", ctxt->constituent[c].start_link)==1)) {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "SBAR");
		}
		if ((post_process_match("R*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("MX#r", ctxt->constituent[c].start_link)==1)) {
			w=leftmost;
			if (strcmp(linkage->word[w], ",")==0) w++;
			c = add_constituent(ctxt, c, linkage, domain, w, w, "WHNP");
		}
		if (post_process_match("Mj", ctxt->constituent[c].start_link)==1) {
			w=leftmost;
			if (strcmp(linkage->word[w], ",")==0) w++;
			c = add_constituent(ctxt, c, linkage, domain, w, w+1, "WHPP");
			c = add_constituent(ctxt, c, linkage, domain, w+1, w+1, "WHNP");
		}
		if ((post_process_match("Ss#d", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("B#d", ctxt->constituent[c].start_link)==1)) {
			c = add_constituent(ctxt, c, linkage, domain, rootleft, rootleft, "WHNP");
			c = add_constituent(ctxt, c, linkage, domain,
							rootleft, ctxt->constituent[c-1].right, "SBAR");
		}
		if (post_process_match("CP", ctxt->constituent[c].start_link)==1) {
			if (strcmp(linkage->word[leftmost], ",")==0)
				ctxt->constituent[c].left++;
			c = add_constituent(ctxt, c, linkage, domain, 1, linkage->num_words-1, "S");
		}
		if ((post_process_match("MVs", ctxt->constituent[c].start_link)==1) ||
			(domain.type=='f')) {
			w=ctxt->constituent[c].left;
			if (strcmp(linkage->word[w], ",")==0)
				w++;
			if (strcmp(linkage->word[w], "when")==0) {
				c = add_constituent(ctxt, c, linkage, domain, w, w, "WHADVP");
			}
		}
		if (domain.type=='t') {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("QI", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("Mr", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("MX#d", ctxt->constituent[c].start_link)==1)) {
			w = leftmost;
			if (strcmp(linkage->word[w], ",")==0) w++;
			if (ctxt->wordtype[w] == NONE)
				name = "WHADVP";
			else if (ctxt->wordtype[w] == QTYPE)
				name = "WHNP";
			else if (ctxt->wordtype[w] == QDTYPE)
				name = "WHNP";
			else
				assert(0, "Unexpected word type");
			c = add_constituent(ctxt, c, linkage, domain, w, w, name);

			if (ctxt->wordtype[w] == QDTYPE) {
				/* Now find the finite verb to the right, start an S */
				/* Limit w2 to sentence length. */
				// for( w2=w+1; w2 < r_limit-1; w2++ )
				for (w2 = w+1; w2 < rightmost; w2++)
				  if ((ctxt->wordtype[w2] == STYPE) || (ctxt->wordtype[w2] == PTYPE)) break;

				/* Adjust the right boundary of previous constituent */
				ctxt->constituent[c].right = w2-1;
				c = add_constituent(ctxt, c, linkage, domain, w2, rightmost, "S");
			  }
		}

		if (ctxt->constituent[c].domain_type=='\0') {
			error("Error: no domain type assigned to constituent\n");
		}
		if (ctxt->constituent[c].start_link==NULL) {
			error("Error: no type assigned to constituent\n");
		}
	}

	numcon_subl = c - numcon_total;
	/* numcon_subl = handle_islands(linkage, numcon_total, numcon_subl);  */

	if (verbosity >= 2)
		printf("Constituents added at first stage for subl %d:\n",
			   linkage->current);
	for (c = numcon_total; c < numcon_total + numcon_subl; c++)
	{
		print_constituent(ctxt, linkage, c);
	}

	/* Opener case - generates S around main clause.
	   (This must be done first; the S generated will be needed for
	   later cases.) */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "S", "S", 5);

	/* pp opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "S", "S", 6);

	/* participle opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "S", "S", "S", 9);

	/* Subject-phrase case; every main VP generates an S */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "S", "NP", 1);

	/* Relative clause case; an SBAR generates a complement NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "NP", "NP", 3);

	/* Participle modifier case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "NP", "NP", 8);

	/* PP modifying NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "NP", "NP", 8);

	/* Appositive case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "NP", "NP", 4);

	/* S-V inversion case; an NP generates a complement VP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "SINV", "VP", 7);

	adjust_subordinate_clauses(ctxt, linkage, numcon_total, numcon_subl);
	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		if ((ctxt->constituent[c].domain_type=='p') &&
			(strcmp(linkage->word[ctxt->constituent[c].left], ",")==0)) {
			ctxt->constituent[c].left++;
		}
	}

	/* Make sure the constituents are nested. If two constituents
	 * are not nested: whichever constituent has the furthest left
	 * boundary, shift that boundary rightwards to the left boundary
	 * of the other one.
	 */
	while (1) {
		adjustment_made=0;
		for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
			for (c2=numcon_total; c2<numcon_total + numcon_subl; c2++) {
				if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
					(ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
					(ctxt->constituent[c].right >= ctxt->constituent[c2].left)) {

					/* We've found two overlapping constituents.
					   If one is larger, except the smaller one
					   includes an extra comma, adjust the smaller one
					   to exclude the comma */

					if ((strcmp(linkage->word[ctxt->constituent[c2].right], ",")==0) ||
						(strcmp(linkage->word[ctxt->constituent[c2].right],
								"RIGHT-WALL")==0)) {
						if (verbosity>=2)
							printf("Adjusting %d to fix comma overlap\n", c2);
						adjust_for_right_comma(ctxt, linkage, c2);
						adjustment_made=1;
					}
					else if (strcmp(linkage->word[ctxt->constituent[c].left], ",")==0) {
						if (verbosity>=2)
							printf("Adjusting c %d to fix comma overlap\n", c);
						adjust_for_left_comma(ctxt, linkage, c);
						adjustment_made=1;
					}
					else {
					  if (verbosity>=2) {
						printf("WARNING: the constituents aren't nested! Adjusting them." \
							   "(%d, %d)\n", c, c2);
					  }
					  ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}
				}
			}
		}
		if (adjustment_made==0) break;
	}

	/* This labels certain words as auxiliaries (such as forms of "be"
	   with passives, forms of "have" wth past participles,
	   "to" with infinitives). These words start VP's which include
	   them. In Treebank I, these don't get printed unless they're part of an
	   andlist, in which case they get labeled "X". (this is why we need to
	   label them as "aux".) In Treebank II, however, they seem to be treated
	   just like other verbs, so the "aux" stuff isn't needed. */


	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		ctxt->constituent[c].subl = linkage->current;
		if (((ctxt->constituent[c].domain_type == 'v') &&
			(ctxt->wordtype[linkage_get_link_rword(linkage,
											 ctxt->constituent[c].start_num)]==PTYPE))
		   ||
		   ((ctxt->constituent[c].domain_type == 't') &&
			(strcmp(ctxt->constituent[c].type, "VP")==0))) {
			ctxt->constituent[c].aux=1;
		}
		else ctxt->constituent[c].aux=0;
	}

	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		ctxt->constituent[c].subl = linkage->current;
		ctxt->constituent[c].aux=0;
	}

	return numcon_subl;
}
Example #4
0
/**
 * This function looks for constituents of type ctype1. Say it finds
 * one, call it c1. It searches for the next larger constituent of
 * type ctype2, call it c2. It then generates a new constituent of
 * ctype3, containing all the words in c2 but not c1.
 */
static int gen_comp(con_context_t *ctxt, Linkage linkage,
                    int numcon_total, int numcon_subl,
					     const char * ctype1, const char * ctype2,
                    const char * ctype3, case_type x)
{
	size_t w, w2, w3;
	int c, c1, c2;
	bool done;
	c = numcon_total + numcon_subl;

	for (c1=numcon_total; c1<numcon_total + numcon_subl; c1++)
	{
		/* If ctype1 is NP, it has to be an appositive to continue */
		if ((x==CASE_APPOS) && (post_process_match("MX#*", ctxt->constituent[c1].start_link)==0))
			continue;

#ifdef REVIVE_DEAD_CODE
		/* If ctype1 is X, and domain_type is t, it's an infinitive - skip it */
		if ((x==CASE_UNUSED) && (ctxt->constituent[c1].domain_type=='t'))
			continue;
#endif /* REVIVE_DEAD_CODE */

		/* If it's domain-type z, it's a subject-relative clause;
		   the VP doesn't need an NP */
		if (ctxt->constituent[c1].domain_type=='z')
			continue;

		/* If ctype1 is X or VP, and it's not started by an S, don't generate an NP
		 (Neither of the two previous checks are necessary now, right?) */
#ifdef REVIVE_DEAD_CODE
		/* use this ... if ((x==CASE_S || x==CASE_UNUSED) && */
#endif /* REVIVE_DEAD_CODE */
		if ((x==CASE_S) &&
			(((post_process_match("S", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SX", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SF", ctxt->constituent[c1].start_link) == 0)) ||
			 (post_process_match("S##w", ctxt->constituent[c1].start_link) != 0)))
			continue;

		/* If it's an SBAR (relative clause case), it has to be a relative clause */
		if ((x==CASE_REL_CLAUSE) &&
			((post_process_match("Rn", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("R*", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#r", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("Mr", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#d", ctxt->constituent[c1].start_link) == 0)))
			continue;

		/* If ctype1 is SBAR (clause opener case), it has to be an f domain */
		if ((x==CASE_OPENER) && (ctxt->constituent[c1].domain_type!='f'))
			continue;

		/* If ctype1 is SBAR (pp opener case), it has to be a g domain */
		if ((x==CASE_PPOPEN) && (ctxt->constituent[c1].domain_type!='g'))
			continue;

		/* If ctype1 is NP (paraphrase case), it has to be started by an SI */
		if ((x==CASE_SVINV) && (post_process_match("SI", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle modifier case), it has to be
		   started by an Mv or Mg */
		if ((x==CASE_PART_MOD) && (post_process_match("M", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle opener case), it has
		   to be started by a COp */
		if ((x==CASE_PART_OPEN) && (post_process_match("COp", ctxt->constituent[c1].start_link)==0))
			continue;

		/* Now start at the bounds of c1, and work outwards until you
		   find a larger constituent of type ctype2 */
		if (!(strcmp(ctxt->constituent[c1].type, ctype1)==0))
			continue;

		if (verbosity >= 2)
			printf("Generating complement constituent for c %d of type %s\n",
				   c1, ctype1);
		done = false;
		for (w2 = ctxt->constituent[c1].left; (done == false) && (w2 != (size_t)-1); w2--)
		{
			for (w3 = ctxt->constituent[c1].right; w3<linkage->num_words; w3++)
			{
				for (c2 = numcon_total; (done == false) &&
						 (c2 < numcon_total + numcon_subl); c2++) {
					if (!((ctxt->constituent[c2].left == w2) &&
						  (ctxt->constituent[c2].right == w3)) || (c2==c1))
						continue;
					if (!(strcmp(ctxt->constituent[c2].type, ctype2)==0))
						continue;

					/* if the new constituent (c) is to the left
					   of c1, its right edge should be adjacent to the
					   left edge of c1 - or as close as possible. */
					if ((x==CASE_OPENER) || (x==CASE_PPOPEN) || (x==CASE_PART_OPEN))
					{
								/* This is the case where c is to the
								   RIGHT of c1 */
						w = ctxt->constituent[c1].right + 1;
						if (w > ctxt->constituent[c2].right)
						{
							done = true;
							continue;
						}
						ctxt->constituent[c].left = w;
						ctxt->constituent[c].right = ctxt->constituent[c2].right;
					}
					else
					{
						w = ctxt->constituent[c1].left - 1;
						if (w < ctxt->constituent[c2].left) {
							done = true;
							continue;
						}
						ctxt->constituent[c].right = w;
						ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}

					adjust_for_left_comma(ctxt, linkage, c1);
					adjust_for_right_comma(ctxt, linkage, c1);

					ctxt->constituent[c].type =
						string_set_add(ctype3, ctxt->phrase_ss);
					ctxt->constituent[c].domain_type = 'x';
					ctxt->constituent[c].start_link =
						string_set_add("XX", ctxt->phrase_ss);
					if (verbosity >= 2)
					{
						printf("Larger c found: c %d (%s); ",
							   c2, ctype2);
						printf("Adding constituent:\n");
						print_constituent(ctxt, linkage, c);
					}
					c++;
					assert (c < ctxt->conlen, "Too many constituents");
					done = true;
				}
			}
		}
		if (verbosity >= 2)
		{
			if (done == false)
				printf("No constituent added, because no larger %s " \
					   " was found\n", ctype2);
		}
	}
	numcon_subl = c - numcon_total;
	return numcon_subl;
}