Example #1
0
/**
 * returns true if and only if all groups containing the specified link
 * contain at least one from the required list.	(as determined by exact
 * string matching)
 */
static bool
apply_contains_one(Postprocessor *pp, Linkage sublinkage, pp_rule *rule)
{
	DTreeLeaf * dtl;
	size_t d, count;
	for (d=0; d<pp->pp_data.N_domains; d++)
	{
		for (dtl = pp->pp_data.domain_array[d].child;
		     dtl != NULL &&
		        !post_process_match(rule->selector,
		           sublinkage->link_array[dtl->link].link_name);
		     dtl = dtl->next) {}
		if (dtl != NULL)
		{
			/* selector link of rule appears in this domain */
			count=0;
			for (dtl = pp->pp_data.domain_array[d].child; dtl != NULL; dtl = dtl->next)
			{
				if (string_in_list(sublinkage->link_array[dtl->link].link_name,
									 rule->link_array))
				{
					count=1;
					break;
				}
			}
			if (count == 0) return false;
		}
	}
	return true;
}
Example #2
0
/**
 * Returns false if the string s does not match anything in
 * the array. The array elements are post-processing symbols.
 */
static int string_in_list(const char * s, const char * a[])
{
	int i;
	for (i=0; a[i] != NULL; i++)
		if (post_process_match(a[i], s)) return true;
	return false;
}
Example #3
0
static int 
apply_contains_one_globally(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule)
{
  /* returns TRUE if and only if 
     (1) the sentence doesn't contain the selector link for the rule, or 
     (2) it does, and it also contains one or more from the rule's link set */

  int i,j,count;
  for (i=0; i<sublinkage->num_links; i++) {
    if (sublinkage->link[i]->l == -1) continue;
    if (post_process_match(rule->selector,sublinkage->link[i]->name)) break;
  }
  if (i==sublinkage->num_links) return TRUE;
  
  /* selector link of rule appears in sentence */
  count=0;
  for (j=0; j<sublinkage->num_links && count==0; j++) {
    if (sublinkage->link[j]->l == -1) continue;
    if (string_in_list(sublinkage->link[j]->name, rule->link_array)) 
      {
	count=1;
	break;
      }
  }
  if (count==0) return FALSE; else return TRUE;
}
Example #4
0
/**
 * Returns true if and only if:
 * all groups containing the selector link do not contain anything
 * from the link_array contained in the rule. Uses exact string matching.
 */
static bool
apply_contains_none(PP_data *pp_data, Linkage sublinkage, pp_rule *rule)
{
	size_t d;

	for (d=0; d<pp_data->N_domains; d++)
	{
		DTreeLeaf * dtl;
		for (dtl = pp_data->domain_array[d].child;
		     dtl != NULL &&
		         !post_process_match(rule->selector,
		                  sublinkage->link_array[dtl->link].link_name);
		     dtl = dtl->next) {}
		if (dtl != NULL)
		{
			/* selector link of rule appears in this domain */
			for (dtl = pp_data->domain_array[d].child; dtl != NULL; dtl = dtl->next)
			{
				if (string_in_list(sublinkage->link_array[dtl->link].link_name,
				                   rule->link_array))
					return false;
			}
		}
	}
	return true;
}
Example #5
0
static int 
apply_contains_none(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule) 
{
  /* returns TRUE if and only if:
     all groups containing the selector link do not contain anything
     from the link_array contained in the rule. Uses exact string matching. */
  DTreeLeaf * dtl;
  int d;
  for (d=0; d<pp->pp_data.N_domains; d++) 
    {
      for (dtl = pp->pp_data.domain_array[d].child; 
	   dtl != NULL &&
	     !post_process_match(rule->selector,
				 sublinkage->link[dtl->link]->name);
	   dtl = dtl->next); 
      if (dtl != NULL) 
	{
	  /* selector link of rule appears in this domain */
	  for (dtl = pp->pp_data.domain_array[d].child; dtl != NULL; dtl = dtl->next) 
	    if (string_in_list(sublinkage->link[dtl->link]->name, 
			       rule->link_array)) 
	      return FALSE;
	}
    }
  return TRUE;
}
Example #6
0
static int 
apply_contains_one(Postprocessor *pp, Sublinkage *sublinkage, pp_rule *rule) 
{
  /* returns TRUE if and only if all groups containing the specified link 
     contain at least one from the required list.  (as determined by exact
     string matching) */    
  DTreeLeaf * dtl;
  int d, count;
  for (d=0; d<pp->pp_data.N_domains; d++) 
    {
      for (dtl = pp->pp_data.domain_array[d].child; 
	   dtl != NULL && 
	     !post_process_match(rule->selector,
				 sublinkage->link[dtl->link]->name);
	   dtl = dtl->next);
      if (dtl != NULL) 
	{
	  /* selector link of rule appears in this domain */
	  count=0;
	  for (dtl = pp->pp_data.domain_array[d].child; dtl != NULL; dtl = dtl->next) 
	    if (string_in_list(sublinkage->link[dtl->link]->name,
			       rule->link_array))
	      {
		count=1;
		break;
	      }
	  if (count == 0) return FALSE;
	}
    }
  return TRUE;
}
Example #7
0
static int string_in_list(const char * s, const char * a[])
{
	/* returns false if the string s does not match anything in
		 the array.	The array elements are post-processing symbols */
	int i;
	for (i=0; a[i] != NULL; i++)
		if (post_process_match(a[i], s)) return true;
	return false;
}
Example #8
0
static int string_in_list(char * s, char * a[]) 
{
  /* returns FALSE if the string s does not match anything in 
     the array.  The array elements are post-processing symbols */
  int i;
  for (i=0; a[i] != NULL; i++) 
    if (post_process_match(a[i], s)) return TRUE;
  return FALSE;
}
Example #9
0
/**
 * This returns TRUE if there is a connector name C in the table
 * such that post_process_match(pp_match_name, C) is TRUE
 */
static bool match_in_cms_table(multiset_table *cmt, const char * pp_match_name)
{
	Cms * cms;
	for (cms = cmt->cms_table[cms_hash(pp_match_name)]; cms != NULL; cms = cms->next)
	{
		if (post_process_match(pp_match_name, cms->name)) return true;
	}
	return false;
}
Example #10
0
/**
 * Look for a constituent started by an MVs or MVg.
 * Find any VP's or ADJP's that contain it (without going
 * beyond a larger S or NP). Adjust them so that
 * they end right before the m domain starts.
 */
static void adjust_subordinate_clauses(con_context_t *ctxt, Linkage linkage,
                                       int numcon_total,
                                       int numcon_subl)
{
	int c, c2;
	size_t w, w2;
	bool done;

	for (c=numcon_total; c<numcon_total + numcon_subl; c++)
	{
		if ((post_process_match("MVs", ctxt->constituent[c].start_link) == 1) ||
			 (post_process_match("MVg", ctxt->constituent[c].start_link) == 1))
		{
			done = false;
			for (w2 = ctxt->constituent[c].left-1; (false == done) && w2 != (size_t) -1; w2--)
			{
				for (c2 = numcon_total; c2 < numcon_total + numcon_subl; c2++)
				{
					if (!((ctxt->constituent[c2].left == w2) &&
						  (ctxt->constituent[c2].right >= ctxt->constituent[c].right)))
						continue;
					if ((strcmp(ctxt->constituent[c2].type, "S") == 0) ||
						(strcmp(ctxt->constituent[c2].type, "NP") == 0)) {
						done = true;
						break;
					}
					if ((ctxt->constituent[c2].domain_type == 'v') ||
						(ctxt->constituent[c2].domain_type == 'a'))
					{
						w = ctxt->constituent[c].left - 1;
						ctxt->constituent[c2].right = w;

						if (verbosity >= 2)
							printf("Adjusting constituent %d:\n", c2);
						print_constituent(ctxt, linkage, c2);
					}
				}
			}
			if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
				ctxt->constituent[c].left++;
		}
	}
}
Example #11
0
/**
 * Return the name of the domain associated with the provided starting
 * link. Return -1 if link isn't associated with a domain.
 */
static size_t find_domain_name(Postprocessor *pp, const char *link)
{
	size_t i, domain;
	StartingLinkAndDomain *sllt = pp->knowledge->starting_link_lookup_table;
	for (i=0;;i++)
	{
		domain = sllt[i].domain;
		if (domain == SIZE_MAX) return SIZE_MAX;  /* hit the end-of-list sentinel */
		if (post_process_match(sllt[i].starting_link, link)) return domain;
	}
}
Example #12
0
/**
 * Look for a constituent started by an MVs or MVg.
 * Find any VP's or ADJP's that contain it (without going
 * beyond a larger S or NP). Adjust them so that
 * they end right before the m domain starts.
 */
static void adjust_subordinate_clauses(con_context_t *ctxt, Linkage linkage,
                                       int numcon_total,
                                       int numcon_subl)
{
	int c, w, c2, w2, done;

	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		if ((post_process_match("MVs", ctxt->constituent[c].start_link) == 1) ||
			(post_process_match("MVg", ctxt->constituent[c].start_link)==1)) {
			done=0;
			for (w2=ctxt->constituent[c].left-1; (done==0) && w2>=0; w2--) {
				for (c2=numcon_total; c2<numcon_total + numcon_subl; c2++) {
					if (!((ctxt->constituent[c2].left==w2) &&
						  (ctxt->constituent[c2].right >= ctxt->constituent[c].right)))
						continue;
					if ((strcmp(ctxt->constituent[c2].type, "S") == 0) ||
						(strcmp(ctxt->constituent[c2].type, "NP") == 0)) {
						done=1;
						break;
					}
					if ((ctxt->constituent[c2].domain_type == 'v') ||
						(ctxt->constituent[c2].domain_type == 'a')) {
						w = ctxt->constituent[c].left-1;
						while (1) {
							if (ctxt->word_used[linkage->current][w] == 1) break;
							w--;
						}
						ctxt->constituent[c2].right = w;

						if (verbosity >= 2)
							printf("Adjusting constituent %d:\n", c2);
						print_constituent(ctxt, linkage, c2);
					}
				}
			}
			if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
				ctxt->constituent[c].left++;
		}	
	}
}
Example #13
0
int pp_linkset_match_bw(pp_linkset *ls, wchar_t *str) 
{
  int hashval;
  pp_linkset_node *p;
  if (ls==NULL) return 0; 
  hashval = compute_hash(ls, str);
  p = ls->hash_table[hashval];
  while(p!=0) {
    if (post_process_match(str,p->str)) return 1;
    p=p->next;
  }
  return 0;
}
Example #14
0
static int find_domain_name(Postprocessor *pp, char *link) 
{
  /* Return the name of the domain associated with the provided starting 
     link. Return -1 if link isn't associated with a domain. */
  int i,domain;
  StartingLinkAndDomain *sllt = pp->knowledge->starting_link_lookup_table;
  for (i=0;;i++) 
    {
      domain = sllt[i].domain;
      if (domain==-1) return -1;          /* hit the end-of-list sentinel */
      if (post_process_match(sllt[i].starting_link, link)) return domain;
    }
}
Example #15
0
int pp_linkset_match(pp_linkset *ls, wchar_t *str) 
{
  /* Set query. Returns 1 if str pp-matches something in the set, 0 otherwise */
  int hashval;
  pp_linkset_node *p;
  if (ls==NULL) return 0; 
  hashval = compute_hash(ls, str);
  p = ls->hash_table[hashval];
  while(p!=0) {
    if (post_process_match(p->str,str)) return 1;
    p=p->next;
  }
  return 0;
}
Example #16
0
bool pp_linkset_match_bw(pp_linkset *ls, const char *str)
{
	unsigned int hashval;
	pp_linkset_node *p;
	if (ls == NULL) return false;
	hashval = compute_hash(ls, str);
	p = ls->hash_table[hashval];
	while (p != 0)
	{
		if (post_process_match(str, p->str)) return true;
		p = p->next;
	}
	return false;
}
Example #17
0
/**
 * Returns true if and only if
 * (1) the sentence doesn't contain the selector link for the rule, or
 * (2) it does, and it also contains one or more from the rule's link set
 */
static bool
apply_contains_one_globally(PP_data *pp_data, Linkage sublinkage, pp_rule *rule)
{
	size_t i, j, count;
	for (i = 0; i < sublinkage->num_links; i++)
	{
		assert(sublinkage->link_array[i].lw != SIZE_MAX);
		if (post_process_match(rule->selector, sublinkage->link_array[i].link_name)) break;
	}
	if (i == sublinkage->num_links) return true;

	/* selector link of rule appears in sentence */
	count = 0;
	for (j = 0; j < sublinkage->num_links && count == 0; j++)
	{
		assert(sublinkage->link_array[j].lw != SIZE_MAX);
		if (string_in_list(sublinkage->link_array[j].link_name, rule->link_array))
		{
			count = 1;
			break;
		}
	}
	if (count == 0) return false; else return true;
}
Example #18
0
/**
 * Go through all the words. If a word is on the right end of
 * an S (or SF or SX), wordtype[w]=STYPE.  If it's also on the left end of a
 * Pg*b, I, PP, or Pv, wordtype[w]=PTYPE. If it's a question-word
 * used in an indirect question, wordtype[w]=QTYPE. If it's a
 * question-word determiner,  wordtype[w]=QDTYPE. Else wordtype[w]=NONE.
 * (This function is called once for each sublinkage.)
 */
static void generate_misc_word_info(con_context_t * ctxt, Linkage linkage)
{
	int l1, l2, w1, w2;
	const char * label1, * label2;

	for (w1=0; w1<linkage->num_words; w1++)
		ctxt->wordtype[w1]=NONE;

	for (l1=0; l1<linkage_get_num_links(linkage); l1++) {	
		w1=linkage_get_link_rword(linkage, l1);
		label1 = linkage_get_link_label(linkage, l1);
		if ((uppercompare(label1, "S")==0) ||
			(uppercompare(label1, "SX")==0) ||
			(uppercompare(label1, "SF")==0)) {
			ctxt->wordtype[w1] = STYPE;
			for (l2=0; l2<linkage_get_num_links(linkage); l2++) {
				w2=linkage_get_link_lword(linkage, l2);
				label2 = linkage_get_link_label(linkage, l2);
				if ((w1==w2) &&
					((post_process_match("Pg#b", label2)==1) ||
					 (uppercompare(label2, "I")==0) ||
					 (uppercompare(label2, "PP")==0) ||
					 (post_process_match("Pv", label2)==1))) {
					/* Pvf, Pgf? */
					ctxt->wordtype[w1] = PTYPE;
				}
			}
		}
		if (post_process_match("QI#d", label1)==1) {
			ctxt->wordtype[w1] = QTYPE;
			for (l2=0; l2<linkage_get_num_links(linkage); l2++) {
				w2=linkage_get_link_lword(linkage, l2);
				label2 = linkage_get_link_label(linkage, l2);
				if ((w1==w2) && (post_process_match("D##w", label2)==1)) {
					ctxt->wordtype[w1] = QDTYPE;
				}
			}
		}
		if (post_process_match("Mr", label1)==1) ctxt->wordtype[w1] = QDTYPE;
		if (post_process_match("MX#d", label1)==1) ctxt->wordtype[w1] = QDTYPE;
	}
}
Example #19
0
static int read_constituents_from_domains(con_context_t *ctxt, Linkage linkage,
                                          int numcon_total, int s)
{
	int d, c, leftlimit, l, leftmost, rightmost, w, c2, numcon_subl=0, w2;
	List_o_links * dlink;
	int rootright, rootleft, adjustment_made;
	Sublinkage * subl;
	const char * name;
	Domain domain;

	r_limit = linkage->num_words-2; /**PV**/

	subl = &linkage->sublinkage[s];

	for (d=0, c=numcon_total; d<subl->pp_data.N_domains; d++, c++) {
		domain = subl->pp_data.domain_array[d];
		rootright = linkage_get_link_rword(linkage, domain.start_link);
		rootleft =  linkage_get_link_lword(linkage, domain.start_link);

		if ((domain.type=='c') ||
			(domain.type=='d') ||
			(domain.type=='e') ||
			(domain.type=='f') ||
			(domain.type=='g') ||
			(domain.type=='u') ||
			(domain.type=='y')) {
			leftlimit = 0;
			leftmost = linkage_get_link_lword(linkage, domain.start_link);
			rightmost = linkage_get_link_lword(linkage, domain.start_link);
		}
		else {
			leftlimit = linkage_get_link_lword(linkage, domain.start_link)+1;
			leftmost = linkage_get_link_rword(linkage, domain.start_link);
			rightmost = linkage_get_link_rword(linkage, domain.start_link);
		}

		/* Start by assigning both left and right limits to the
		   right word of the start link. This will always be contained
		   in the constituent. This will also handle the case
		   where the domain contains no links. */

		for (dlink = domain.lol; dlink!=NULL; dlink=dlink->next) {
			l=dlink->link;

			if ((linkage_get_link_lword(linkage, l) < leftmost) &&
				(linkage_get_link_lword(linkage, l) >= leftlimit))
				leftmost = linkage_get_link_lword(linkage, l);

			if (linkage_get_link_rword(linkage, l) > rightmost)
				rightmost = linkage_get_link_rword(linkage, l);
		}

		c--;
		c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost,
						cons_of_domain(domain.type));

		if (domain.type=='z') {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
		}
		if (domain.type=='c') {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("Ce*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("Rn", ctxt->constituent[c].start_link)==1)) {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "SBAR");
		}
		if ((post_process_match("R*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("MX#r", ctxt->constituent[c].start_link)==1)) {
			w=leftmost;
			if (strcmp(linkage->word[w], ",")==0) w++;
			c = add_constituent(ctxt, c, linkage, domain, w, w, "WHNP");
		}
		if (post_process_match("Mj", ctxt->constituent[c].start_link)==1) {
			w=leftmost;
			if (strcmp(linkage->word[w], ",")==0) w++;
			c = add_constituent(ctxt, c, linkage, domain, w, w+1, "WHPP");
			c = add_constituent(ctxt, c, linkage, domain, w+1, w+1, "WHNP");
		}
		if ((post_process_match("Ss#d", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("B#d", ctxt->constituent[c].start_link)==1)) {
			c = add_constituent(ctxt, c, linkage, domain, rootleft, rootleft, "WHNP");
			c = add_constituent(ctxt, c, linkage, domain,
							rootleft, ctxt->constituent[c-1].right, "SBAR");
		}
		if (post_process_match("CP", ctxt->constituent[c].start_link)==1) {
			if (strcmp(linkage->word[leftmost], ",")==0)
				ctxt->constituent[c].left++;
			c = add_constituent(ctxt, c, linkage, domain, 1, linkage->num_words-1, "S");
		}
		if ((post_process_match("MVs", ctxt->constituent[c].start_link)==1) ||
			(domain.type=='f')) {
			w=ctxt->constituent[c].left;
			if (strcmp(linkage->word[w], ",")==0)
				w++;
			if (strcmp(linkage->word[w], "when")==0) {
				c = add_constituent(ctxt, c, linkage, domain, w, w, "WHADVP");
			}
		}
		if (domain.type=='t') {
			c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("QI", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("Mr", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("MX#d", ctxt->constituent[c].start_link)==1)) {
			w = leftmost;
			if (strcmp(linkage->word[w], ",")==0) w++;
			if (ctxt->wordtype[w] == NONE)
				name = "WHADVP";
			else if (ctxt->wordtype[w] == QTYPE)
				name = "WHNP";
			else if (ctxt->wordtype[w] == QDTYPE)
				name = "WHNP";
			else
				assert(0, "Unexpected word type");
			c = add_constituent(ctxt, c, linkage, domain, w, w, name);

			if (ctxt->wordtype[w] == QDTYPE) {
				/* Now find the finite verb to the right, start an S */
				/* Limit w2 to sentence length. */
				// for( w2=w+1; w2 < r_limit-1; w2++ )
				for (w2 = w+1; w2 < rightmost; w2++)
				  if ((ctxt->wordtype[w2] == STYPE) || (ctxt->wordtype[w2] == PTYPE)) break;

				/* Adjust the right boundary of previous constituent */
				ctxt->constituent[c].right = w2-1;
				c = add_constituent(ctxt, c, linkage, domain, w2, rightmost, "S");
			  }
		}

		if (ctxt->constituent[c].domain_type=='\0') {
			error("Error: no domain type assigned to constituent\n");
		}
		if (ctxt->constituent[c].start_link==NULL) {
			error("Error: no type assigned to constituent\n");
		}
	}

	numcon_subl = c - numcon_total;
	/* numcon_subl = handle_islands(linkage, numcon_total, numcon_subl);  */

	if (verbosity >= 2)
		printf("Constituents added at first stage for subl %d:\n",
			   linkage->current);
	for (c = numcon_total; c < numcon_total + numcon_subl; c++)
	{
		print_constituent(ctxt, linkage, c);
	}

	/* Opener case - generates S around main clause.
	   (This must be done first; the S generated will be needed for
	   later cases.) */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "S", "S", 5);

	/* pp opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "S", "S", 6);

	/* participle opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "S", "S", "S", 9);

	/* Subject-phrase case; every main VP generates an S */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "S", "NP", 1);

	/* Relative clause case; an SBAR generates a complement NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "NP", "NP", 3);

	/* Participle modifier case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "NP", "NP", 8);

	/* PP modifying NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "NP", "NP", 8);

	/* Appositive case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "NP", "NP", 4);

	/* S-V inversion case; an NP generates a complement VP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "SINV", "VP", 7);

	adjust_subordinate_clauses(ctxt, linkage, numcon_total, numcon_subl);
	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		if ((ctxt->constituent[c].domain_type=='p') &&
			(strcmp(linkage->word[ctxt->constituent[c].left], ",")==0)) {
			ctxt->constituent[c].left++;
		}
	}

	/* Make sure the constituents are nested. If two constituents
	 * are not nested: whichever constituent has the furthest left
	 * boundary, shift that boundary rightwards to the left boundary
	 * of the other one.
	 */
	while (1) {
		adjustment_made=0;
		for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
			for (c2=numcon_total; c2<numcon_total + numcon_subl; c2++) {
				if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
					(ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
					(ctxt->constituent[c].right >= ctxt->constituent[c2].left)) {

					/* We've found two overlapping constituents.
					   If one is larger, except the smaller one
					   includes an extra comma, adjust the smaller one
					   to exclude the comma */

					if ((strcmp(linkage->word[ctxt->constituent[c2].right], ",")==0) ||
						(strcmp(linkage->word[ctxt->constituent[c2].right],
								"RIGHT-WALL")==0)) {
						if (verbosity>=2)
							printf("Adjusting %d to fix comma overlap\n", c2);
						adjust_for_right_comma(ctxt, linkage, c2);
						adjustment_made=1;
					}
					else if (strcmp(linkage->word[ctxt->constituent[c].left], ",")==0) {
						if (verbosity>=2)
							printf("Adjusting c %d to fix comma overlap\n", c);
						adjust_for_left_comma(ctxt, linkage, c);
						adjustment_made=1;
					}
					else {
					  if (verbosity>=2) {
						printf("WARNING: the constituents aren't nested! Adjusting them." \
							   "(%d, %d)\n", c, c2);
					  }
					  ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}
				}
			}
		}
		if (adjustment_made==0) break;
	}

	/* This labels certain words as auxiliaries (such as forms of "be"
	   with passives, forms of "have" wth past participles,
	   "to" with infinitives). These words start VP's which include
	   them. In Treebank I, these don't get printed unless they're part of an
	   andlist, in which case they get labeled "X". (this is why we need to
	   label them as "aux".) In Treebank II, however, they seem to be treated
	   just like other verbs, so the "aux" stuff isn't needed. */


	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		ctxt->constituent[c].subl = linkage->current;
		if (((ctxt->constituent[c].domain_type == 'v') &&
			(ctxt->wordtype[linkage_get_link_rword(linkage,
											 ctxt->constituent[c].start_num)]==PTYPE))
		   ||
		   ((ctxt->constituent[c].domain_type == 't') &&
			(strcmp(ctxt->constituent[c].type, "VP")==0))) {
			ctxt->constituent[c].aux=1;
		}
		else ctxt->constituent[c].aux=0;
	}

	for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
		ctxt->constituent[c].subl = linkage->current;
		ctxt->constituent[c].aux=0;
	}

	return numcon_subl;
}
Example #20
0
/**
 * This function looks for constituents of type ctype1. Say it finds
 * one, call it c1. It searches for the next larger constituent of
 * type ctype2, call it c2. It then generates a new constituent of
 * ctype3, containing all the words in c2 but not c1.
 */
static int gen_comp(con_context_t *ctxt, Linkage linkage,
                    int numcon_total, int numcon_subl,
					     const char * ctype1, const char * ctype2,
                    const char * ctype3, case_type x)
{
	size_t w, w2, w3;
	int c, c1, c2;
	bool done;
	c = numcon_total + numcon_subl;

	for (c1=numcon_total; c1<numcon_total + numcon_subl; c1++)
	{
		/* If ctype1 is NP, it has to be an appositive to continue */
		if ((x==CASE_APPOS) && (post_process_match("MX#*", ctxt->constituent[c1].start_link)==0))
			continue;

#ifdef REVIVE_DEAD_CODE
		/* If ctype1 is X, and domain_type is t, it's an infinitive - skip it */
		if ((x==CASE_UNUSED) && (ctxt->constituent[c1].domain_type=='t'))
			continue;
#endif /* REVIVE_DEAD_CODE */

		/* If it's domain-type z, it's a subject-relative clause;
		   the VP doesn't need an NP */
		if (ctxt->constituent[c1].domain_type=='z')
			continue;

		/* If ctype1 is X or VP, and it's not started by an S, don't generate an NP
		 (Neither of the two previous checks are necessary now, right?) */
#ifdef REVIVE_DEAD_CODE
		/* use this ... if ((x==CASE_S || x==CASE_UNUSED) && */
#endif /* REVIVE_DEAD_CODE */
		if ((x==CASE_S) &&
			(((post_process_match("S", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SX", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SF", ctxt->constituent[c1].start_link) == 0)) ||
			 (post_process_match("S##w", ctxt->constituent[c1].start_link) != 0)))
			continue;

		/* If it's an SBAR (relative clause case), it has to be a relative clause */
		if ((x==CASE_REL_CLAUSE) &&
			((post_process_match("Rn", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("R*", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#r", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("Mr", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#d", ctxt->constituent[c1].start_link) == 0)))
			continue;

		/* If ctype1 is SBAR (clause opener case), it has to be an f domain */
		if ((x==CASE_OPENER) && (ctxt->constituent[c1].domain_type!='f'))
			continue;

		/* If ctype1 is SBAR (pp opener case), it has to be a g domain */
		if ((x==CASE_PPOPEN) && (ctxt->constituent[c1].domain_type!='g'))
			continue;

		/* If ctype1 is NP (paraphrase case), it has to be started by an SI */
		if ((x==CASE_SVINV) && (post_process_match("SI", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle modifier case), it has to be
		   started by an Mv or Mg */
		if ((x==CASE_PART_MOD) && (post_process_match("M", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle opener case), it has
		   to be started by a COp */
		if ((x==CASE_PART_OPEN) && (post_process_match("COp", ctxt->constituent[c1].start_link)==0))
			continue;

		/* Now start at the bounds of c1, and work outwards until you
		   find a larger constituent of type ctype2 */
		if (!(strcmp(ctxt->constituent[c1].type, ctype1)==0))
			continue;

		if (verbosity >= 2)
			printf("Generating complement constituent for c %d of type %s\n",
				   c1, ctype1);
		done = false;
		for (w2 = ctxt->constituent[c1].left; (done == false) && (w2 != (size_t)-1); w2--)
		{
			for (w3 = ctxt->constituent[c1].right; w3<linkage->num_words; w3++)
			{
				for (c2 = numcon_total; (done == false) &&
						 (c2 < numcon_total + numcon_subl); c2++) {
					if (!((ctxt->constituent[c2].left == w2) &&
						  (ctxt->constituent[c2].right == w3)) || (c2==c1))
						continue;
					if (!(strcmp(ctxt->constituent[c2].type, ctype2)==0))
						continue;

					/* if the new constituent (c) is to the left
					   of c1, its right edge should be adjacent to the
					   left edge of c1 - or as close as possible. */
					if ((x==CASE_OPENER) || (x==CASE_PPOPEN) || (x==CASE_PART_OPEN))
					{
								/* This is the case where c is to the
								   RIGHT of c1 */
						w = ctxt->constituent[c1].right + 1;
						if (w > ctxt->constituent[c2].right)
						{
							done = true;
							continue;
						}
						ctxt->constituent[c].left = w;
						ctxt->constituent[c].right = ctxt->constituent[c2].right;
					}
					else
					{
						w = ctxt->constituent[c1].left - 1;
						if (w < ctxt->constituent[c2].left) {
							done = true;
							continue;
						}
						ctxt->constituent[c].right = w;
						ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}

					adjust_for_left_comma(ctxt, linkage, c1);
					adjust_for_right_comma(ctxt, linkage, c1);

					ctxt->constituent[c].type =
						string_set_add(ctype3, ctxt->phrase_ss);
					ctxt->constituent[c].domain_type = 'x';
					ctxt->constituent[c].start_link =
						string_set_add("XX", ctxt->phrase_ss);
					if (verbosity >= 2)
					{
						printf("Larger c found: c %d (%s); ",
							   c2, ctype2);
						printf("Adding constituent:\n");
						print_constituent(ctxt, linkage, c);
					}
					c++;
					assert (c < ctxt->conlen, "Too many constituents");
					done = true;
				}
			}
		}
		if (verbosity >= 2)
		{
			if (done == false)
				printf("No constituent added, because no larger %s " \
					   " was found\n", ctype2);
		}
	}
	numcon_subl = c - numcon_total;
	return numcon_subl;
}
Example #21
0
static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total)
{
	int c;
	bool global_leftend_found, global_rightend_found;
	size_t lastword;

	for (c = 0; c < numcon_total; c++)
	{
		/* In a paraphrase construction ("John ran, he said"),
		   the paraphrasing clause doesn't get
		   an S. (This is true in Treebank II, not Treebank I) */

		if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0)
		{
			ctxt->constituent[c].valid = false;
		}

		/* If it's a possessive with an "'s", the NP on the left
		   should be extended to include the "'s". */
		if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) ||
			(uppercompare(ctxt->constituent[c].start_link, "YP") == 0))
		{
			ctxt->constituent[c].right++;
		}

		/* If a constituent has starting link MVpn, it's a time
		   expression like "last week"; label it as a noun phrase
		   (incorrectly) */

		if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "COn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}

		/* If the constituent is an S started by "but" or "and" at
		   the beginning of the sentence, it should be ignored. */

		if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) &&
			(ctxt->constituent[c].left == 2))
		{
			ctxt->constituent[c].valid = false;
		}

		/* For prenominal adjectives, an ADJP constituent is assigned
		   if it's a hyphenated (Ah) or comparative (Am) adjective;
		   otherwise no ADJP is assigned, unless the phrase is more
		   than one word long (e.g. "very big"). The same with certain
		   types of adverbs. */
		/* That was for Treebank I. For Treebank II, the rule only
		   seems to apply to prenominal adjectives (of all kinds).
		   However, it also applies to number expressions ("QP"). */

		if ((post_process_match("A", ctxt->constituent[c].start_link) == 1) ||
			(ctxt->constituent[c].domain_type == 'd') ||
			(ctxt->constituent[c].domain_type == 'h')) {
			if (ctxt->constituent[c].right-ctxt->constituent[c].left == 0)
			{
				ctxt->constituent[c].valid = false;
			}
		}

		if ((ctxt->constituent[c].domain_type == 'h') &&
			(strcmp(linkage->word[ctxt->constituent[c].left - 1], "$") == 0))
		{
			ctxt->constituent[c].left--;
		}
	}

	/* If there's a global S constituent that includes everything
	   except a final terminating punctuation (period or question mark),
	   extend it by one word. We know its the terminating punctuation,
	   because it links to the right wall with an RW link.  If its
	   not, then that final link is not there...
	 */
	for (c = 0; c < numcon_total; c++)
	{
		if ((ctxt->constituent[c].right == linkage->num_words - 3) &&
			(ctxt->constituent[c].left == 1) &&
			(strcmp(ctxt->constituent[c].type, "S") == 0))
		{
			size_t ln;
			for (ln = 0; ln < linkage->num_links; ln++)
			{
				if ((linkage->link_array[ln].lw == linkage->num_words - 2) &&
				    (linkage->link_array[ln].rw == linkage->num_words - 1))
				{
					ctxt->constituent[c].right++;
					break;
				}
			}
		}
	}

	/* If there's no S boundary at the very left end of the sentence,
	   or the very right end, create a new S spanning the entire sentence */

	lastword = linkage->num_words - 2;
	global_leftend_found = false;
	global_rightend_found = false;
	for (c = 0; c < numcon_total; c++)
	{
		if ((ctxt->constituent[c].left == 1) &&
		   (strcmp(ctxt->constituent[c].type, "S") == 0) &&
			ctxt->constituent[c].valid)
		{
			global_leftend_found = true;
		}
	}

	for (c = 0; c < numcon_total; c++)
	{
		if ((ctxt->constituent[c].right >= lastword) &&
			(strcmp(ctxt->constituent[c].type, "S") == 0) &&
		   ctxt->constituent[c].valid)
		{
			global_rightend_found = true;
		}
	}

	if ((global_leftend_found == false) || (global_rightend_found == false))
	{
		c = numcon_total;
		ctxt->constituent[c].left = 1;
		ctxt->constituent[c].right = linkage->num_words-1;
		ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss);
		ctxt->constituent[c].valid = true;
		ctxt->constituent[c].domain_type = 'x';
		numcon_total++;
		if (verbosity >= 2)
			printf("Adding global sentence constituent:\n");
		print_constituent(ctxt, linkage, c);
	}

	return numcon_total;
}
Example #22
0
static int read_constituents_from_domains(con_context_t *ctxt, Linkage linkage,
                                          int numcon_total)
{
	size_t d, l, w2;
	int c, w, c2, numcon_subl = 0;

	for (d = 0, c = numcon_total; d < linkage->hpsg_pp_data.N_domains; d++, c++)
	{
		size_t leftmost, rightmost, leftlimit;
		int rootleft;
		List_o_links * dlink;

		Domain domain = linkage->hpsg_pp_data.domain_array[d];

		// rootright = linkage_get_link_rword(linkage, domain.start_link);
		rootleft =  linkage_get_link_lword(linkage, domain.start_link);

		if ((domain.type=='c') ||
			(domain.type=='d') ||
			(domain.type=='e') ||
			(domain.type=='f') ||
			(domain.type=='g') ||
			(domain.type=='u') ||
			(domain.type=='y'))
		{
			leftlimit = 0;
			leftmost = linkage_get_link_lword(linkage, domain.start_link);
			rightmost = linkage_get_link_lword(linkage, domain.start_link);
		}
		else
		{
			leftlimit = linkage_get_link_lword(linkage, domain.start_link) + 1;
			leftmost = linkage_get_link_rword(linkage, domain.start_link);
			rightmost = linkage_get_link_rword(linkage, domain.start_link);
		}

		/* Start by assigning both left and right limits to the
		 * right word of the start link. This will always be contained
		 * in the constituent. This will also handle the case
		 * where the domain contains no links.
		 */
		for (dlink = domain.lol; dlink != NULL; dlink = dlink->next)
		{
			l = dlink->link;

			if ((linkage_get_link_lword(linkage, l) < leftmost) &&
				(linkage_get_link_lword(linkage, l) >= leftlimit))
			{
				leftmost = linkage_get_link_lword(linkage, l);
			}

			if (linkage_get_link_rword(linkage, l) > rightmost)
			{
				rightmost = linkage_get_link_rword(linkage, l);
			}
		}

		c--;
		c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost,
						cons_of_domain(linkage, domain.type));

		if (domain.type == 'z')
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
		}
		if (domain.type=='c')
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("Ce*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("Rn", ctxt->constituent[c].start_link)==1))
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "SBAR");
		}
		if ((post_process_match("R*", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("MX#r", ctxt->constituent[c].start_link)==1))
		{
			w = leftmost;
			if (strcmp(linkage->word[w], ",") == 0) w++;
			c = add_constituent(ctxt, c, linkage, &domain, w, w, "WHNP");
		}
		if (post_process_match("Mj", ctxt->constituent[c].start_link) == 1)
		{
			w = leftmost;
			if (strcmp(linkage->word[w], ",") == 0) w++;
			c = add_constituent(ctxt, c, linkage, &domain, w, w+1, "WHPP");
			c = add_constituent(ctxt, c, linkage, &domain, w+1, w+1, "WHNP");
		}
		if ((post_process_match("Ss#d", ctxt->constituent[c].start_link)==1) ||
			(post_process_match("B#d", ctxt->constituent[c].start_link)==1))
		{
			c = add_constituent(ctxt, c, linkage, &domain, rootleft, rootleft, "WHNP");
			c = add_constituent(ctxt, c, linkage, &domain,
							rootleft, ctxt->constituent[c-1].right, "SBAR");
		}
		if (post_process_match("CP", ctxt->constituent[c].start_link)==1)
		{
			if (strcmp(linkage->word[leftmost], ",") == 0)
				ctxt->constituent[c].left++;
			c = add_constituent(ctxt, c, linkage, &domain, 1, linkage->num_words-1, "S");
		}
		if ((post_process_match("MVs", ctxt->constituent[c].start_link)==1) ||
			(domain.type=='f'))
		{
			w = ctxt->constituent[c].left;
			if (strcmp(linkage->word[w], ",") == 0)
				w++;
			if (strcmp(linkage->word[w], "when") == 0)
			{
				c = add_constituent(ctxt, c, linkage, &domain, w, w, "WHADVP");
			}
		}
		if (domain.type=='t')
		{
			c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
		}
		if ((post_process_match("QI", ctxt->constituent[c].start_link) == 1) ||
			(post_process_match("Mr", ctxt->constituent[c].start_link) == 1) ||
			(post_process_match("MX#d", ctxt->constituent[c].start_link) == 1))
		{
			const char * name = "";
			w = leftmost;
			if (strcmp(linkage->word[w], ",") == 0) w++;
			if (ctxt->wordtype[w] == NONE)
				name = "WHADVP";
			else if (ctxt->wordtype[w] == QTYPE)
				name = "WHNP";
			else if (ctxt->wordtype[w] == QDTYPE)
				name = "WHNP";
			else
				assert(0, "Unexpected word type");
			c = add_constituent(ctxt, c, linkage, &domain, w, w, name);

			if (ctxt->wordtype[w] == QDTYPE)
			{
				/* Now find the finite verb to the right, start an S */
				/* Limit w2 to sentence length. */
				// for( w2=w+1; w2 < ctxt->r_limit-1; w2++ )
				for (w2 = w+1; w2 < rightmost; w2++)
				  if ((ctxt->wordtype[w2] == STYPE) || (ctxt->wordtype[w2] == PTYPE)) break;

				/* Adjust the right boundary of previous constituent */
				ctxt->constituent[c].right = w2 - 1;
				c = add_constituent(ctxt, c, linkage, &domain, w2, rightmost, "S");
			}
		}

		if (ctxt->constituent[c].domain_type == '\0')
		{
			err_ctxt ec;
			err_msg(&ec, Error, "Error: no domain type assigned to constituent\n");
		}
		if (ctxt->constituent[c].start_link == NULL)
		{
			err_ctxt ec;
			err_msg(&ec, Error, "Error: no type assigned to constituent\n");
		}
	}

	numcon_subl = c - numcon_total;
	/* numcon_subl = handle_islands(linkage, numcon_total, numcon_subl);  */

	if (verbosity >= 2)
		printf("Constituents added at first stage:\n");

	for (c = numcon_total; c < numcon_total + numcon_subl; c++)
	{
		print_constituent(ctxt, linkage, c);
	}

	/* Opener case - generates S around main clause.
	   (This must be done first; the S generated will be needed for
	   later cases.) */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "S", "S", CASE_OPENER);

	/* pp opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "S", "S", CASE_PPOPEN);

	/* participle opener case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "S", "S", "S", CASE_PART_OPEN);

	/* Subject-phrase case; every main VP generates an S */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "S", "NP", CASE_S);

	/* Relative clause case; an SBAR generates a complement NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "NP", "NP", CASE_REL_CLAUSE);

	/* Participle modifier case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "NP", "NP", CASE_PART_MOD);

	/* PP modifying NP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "NP", "NP", CASE_PART_MOD);

	/* Appositive case */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "NP", "NP", CASE_APPOS);

	/* S-V inversion case; an NP generates a complement VP */
	numcon_subl =
		gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "SINV", "VP", CASE_SVINV);

	adjust_subordinate_clauses(ctxt, linkage, numcon_total, numcon_subl);
	for (c = numcon_total; c < numcon_total + numcon_subl; c++)
	{
		if ((ctxt->constituent[c].domain_type=='p') &&
			(strcmp(linkage->word[ctxt->constituent[c].left], ",")==0))
		{
			ctxt->constituent[c].left++;
		}
	}

	/* Make sure the constituents are nested. If two constituents
	 * are not nested: whichever constituent has the furthest left
	 * boundary, shift that boundary rightwards to the left boundary
	 * of the other one.
	 */
	while (true)
	{
		bool adjustment_made = false;
		for (c = numcon_total; c < numcon_total + numcon_subl; c++)
		{
			for (c2 = numcon_total; c2 < numcon_total + numcon_subl; c2++)
			{
				if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
					(ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
					(ctxt->constituent[c].right >= ctxt->constituent[c2].left))
				{
					/* We've found two overlapping constituents.
					   If one is larger, except the smaller one
					   includes an extra comma, adjust the smaller one
					   to exclude the comma */

					if ((strcmp(linkage->word[ctxt->constituent[c2].right], ",") == 0) ||
						(strcmp(linkage->word[ctxt->constituent[c2].right],
								"RIGHT-WALL") == 0))
					{
						if (verbosity >= 2)
							printf("Adjusting %d to fix comma overlap\n", c2);
						adjust_for_right_comma(ctxt, linkage, c2);
						adjustment_made = true;
					}
					else if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
					{
						if (verbosity >= 2)
							printf("Adjusting c %d to fix comma overlap\n", c);
						adjust_for_left_comma(ctxt, linkage, c);
						adjustment_made = true;
					}
					else
					{
						if (verbosity >= 2)
						{
							err_ctxt ec;
							err_msg(&ec, Warn,
							      "Warning: the constituents aren't nested! "
							      "Adjusting them. (%d, %d)\n", c, c2);
					  }
					  ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}
				}
			}
		}
		if (adjustment_made == false) break;
	}

	assert (numcon_total + numcon_subl < ctxt->conlen, "Too many constituents");
	return numcon_subl;
}
Example #23
0
static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge * knowledge;
	size_t i, w;
	int total_deleted, N_deleted;
	bool change, deleteme;
	multiset_table *cmt;

	if (sent->postprocessor == NULL) return 0;
	if (!opts->perform_pp_prune) return 0;

	knowledge = sent->postprocessor->knowledge;

	cmt = cms_table_new();

	for (w = 0; w < sent->length; w++)
	{
		Disjunct *d;
		for (d = sent->word[w].d; d != NULL; d = d->next)
		{
			char dir;
			d->marked = true;
			for (dir=0; dir < 2; dir++)
			{
				Connector *c;
				for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
				{
					insert_in_cms_table(cmt, connector_string(c));
				}
			}
		}
	}

	total_deleted = 0;
	change = true;
	while (change)
	{
		char dir;

		change = false;
		N_deleted = 0;
		for (w = 0; w < sent->length; w++)
		{
			Disjunct *d;
			for (d = sent->word[w].d; d != NULL; d = d->next)
			{
				if (!d->marked) continue;
				deleteme = false;
				for (i = 0; i < knowledge->n_contains_one_rules; i++)
				{
					pp_rule* rule = &knowledge->contains_one_rules[i]; /* the ith rule */
					const char * selector = rule->selector;  /* selector string for this rule */
					pp_linkset * link_set = rule->link_set;  /* the set of criterion links */

					if (rule->selector_has_wildcard) continue;  /* If it has a * forget it */

					for (dir = 0; dir < 2; dir++)
					{
						Connector *c;
						for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
						{

							if (!post_process_match(selector, connector_string(c))) continue;

							/*
							printf("pp_prune: trigger ok.  selector = %s  c->string = %s\n", selector, c->string);
							*/

							/* We know c matches the trigger link of the rule. */
							/* Now check the criterion links */

							if (!rule_satisfiable(cmt, link_set))
							{
								deleteme = true;
								rule->use_count++;
							}
							if (deleteme) break;
						}
						if (deleteme) break;
					}
					if (deleteme) break;
				}

				if (deleteme)         /* now we delete this disjunct */
				{
					N_deleted++;
					total_deleted++;
					d->marked = false; /* mark for deletion later */
					for (dir=0; dir < 2; dir++)
					{
						Connector *c;
						for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
						{
							change |= delete_from_cms_table(cmt, connector_string(c));
						}
					}
				}
			}
		}

		lgdebug(D_PRUNE, "Debug: pp_prune pass deleted %d\n", N_deleted);
	}
	cms_table_delete(cmt);

	if (total_deleted > 0)
	{
		delete_unmarked_disjuncts(sent);
		if (verbosity_level(D_PRUNE))
		{
			prt_error("\n\\");
			prt_error("Debug: After pp_prune:\n\\");
			print_disjunct_counts(sent);
		}
	}

	print_time(opts, "pp pruning");

	return total_deleted;
}
Example #24
0
static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total)
{
	int c, c2, global_leftend_found, adjustment_made,
		global_rightend_found, lastword, newcon_total=0;
	Sentence sent;
	sent = linkage_get_sentence(linkage);

	for (c=0; c<numcon_total; c++)
	{
		/* In a paraphrase construction ("John ran, he said"),
		   the paraphrasing clause doesn't get
		   an S. (This is true in Treebank II, not Treebank I) */

		if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0)
		{
			ctxt->constituent[c].valid = 0;
		}

		/* If it's a possessive with an "'s", the NP on the left
		   should be extended to include the "'s". */
		if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) ||
			(uppercompare(ctxt->constituent[c].start_link, "YP") == 0))
		{
			ctxt->constituent[c].right++;
		}

		/* If a constituent has starting link MVpn, it's a time
		   expression like "last week"; label it as a noun phrase
		   (incorrectly) */

		if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "COn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}
		if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0)
		{
			ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
		}

		/* If the constituent is an S started by "but" or "and" at
		   the beginning of the sentence, it should be ignored. */

		if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) &&
			(ctxt->constituent[c].left == 2))
		{
			ctxt->constituent[c].valid = 0;
		}

		/* For prenominal adjectives, an ADJP constituent is assigned
		   if it's a hyphenated (Ah) or comparative (Am) adjective;
		   otherwise no ADJP is assigned, unless the phrase is more
		   than one word long (e.g. "very big"). The same with certain
		   types of adverbs. */
		/* That was for Treebank I. For Treebank II, the rule only
		   seems to apply to prenominal adjectives (of all kinds).
		   However, it also applies to number expressions ("QP"). */

		if ((post_process_match("A", ctxt->constituent[c].start_link)==1) ||
			(ctxt->constituent[c].domain_type=='d') ||
			(ctxt->constituent[c].domain_type=='h')) {
			if (ctxt->constituent[c].right-ctxt->constituent[c].left==0) {
				ctxt->constituent[c].valid=0;
			}
		}

		if ((ctxt->constituent[c].domain_type=='h') &&
			(strcmp(linkage->word[ctxt->constituent[c].left-1], "$")==0)) {
			ctxt->constituent[c].left--;
		}

		/* If a constituent has type VP and its aux value is 2,
		   this means it's an aux that should be printed; change its
		   type to "X". If its aux value is 1, set "valid" to 0. (This
		   applies to Treebank I only) */

		if (ctxt->constituent[c].aux == 2)
		{
			ctxt->constituent[c].type = string_set_add("X", ctxt->phrase_ss);
		}
		if (ctxt->constituent[c].aux == 1)
		{
			ctxt->constituent[c].valid = 0;
		}
	}

	numcon_total = numcon_total + newcon_total;

	/* If there's a global S constituent that includes everything
	   except a final period or question mark, extend it by one word */

	for (c=0; c<numcon_total; c++) {
		if ((ctxt->constituent[c].right==(linkage->num_words)-3) &&
			(ctxt->constituent[c].left==1) &&
			(strcmp(ctxt->constituent[c].type, "S")==0) &&
			(strcmp(sent->word[(linkage->num_words)-2].string, ".")==0))
			ctxt->constituent[c].right++;
	}

	/* If there's no S boundary at the very left end of the sentence,
	   or the very right end, create a new S spanning the entire sentence */

	lastword=(linkage->num_words)-2;
	global_leftend_found = 0;
	global_rightend_found = 0;
	for (c=0; c<numcon_total; c++) {
		if ((ctxt->constituent[c].left==1) && (strcmp(ctxt->constituent[c].type, "S")==0) &&
			(ctxt->constituent[c].valid==1))
			global_leftend_found=1;
	}
	for (c=0; c<numcon_total; c++) {
		if ((ctxt->constituent[c].right>=lastword) &&
			(strcmp(ctxt->constituent[c].type, "S")==0) && (ctxt->constituent[c].valid==1))
			global_rightend_found=1;
	}
	if ((global_leftend_found==0) || (global_rightend_found==0))
	{
		c = numcon_total;
		ctxt->constituent[c].left = 1;
		ctxt->constituent[c].right = linkage->num_words-1;
		ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss);
		ctxt->constituent[c].valid = 1;
		ctxt->constituent[c].domain_type = 'x';
		numcon_total++;
		if (verbosity >= 2)
			printf("Adding global sentence constituent:\n");
		print_constituent(ctxt, linkage, c);
	}

	/* Check once more to see if constituents are nested (checking BETWEEN sublinkages
	   this time) */

	while (1) {
		adjustment_made=0;
		for (c=0; c<numcon_total; c++) {
			if(ctxt->constituent[c].valid==0) continue;
			for (c2=0; c2<numcon_total; c2++) {
				if(ctxt->constituent[c2].valid==0) continue;
				if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
					(ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
					(ctxt->constituent[c].right >= ctxt->constituent[c2].left)) {

					if (verbosity>=2) {
					  printf("WARNING: the constituents aren't nested! Adjusting them." \
							   "(%d, %d)\n", c, c2);
					  }
					ctxt->constituent[c].left = ctxt->constituent[c2].left;
				}
			}
		}
		if (adjustment_made==0) break;
	}
	return numcon_total;
}
Example #25
0
/**
 * This function looks for constituents of type ctype1. Say it finds
 * one, call it c1. It searches for the next larger constituent of
 * type ctype2, call it c2. It then generates a new constituent of
 * ctype3, containing all the words in c2 but not c1.
 */
static int gen_comp(con_context_t *ctxt, Linkage linkage, int numcon_total, int numcon_subl,
					const char * ctype1, const char * ctype2, const char * ctype3, int x)
{
	int w, w2, w3, c, c1, c2, done;
	c = numcon_total + numcon_subl;

	for (c1=numcon_total; c1<numcon_total + numcon_subl; c1++) {

		/* If ctype1 is NP, it has to be an appositive to continue */
		if ((x==4) && (post_process_match("MX#*", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is X, and domain_type is t, it's an infinitive - skip it */
		if ((x==2) && (ctxt->constituent[c1].domain_type=='t'))
			continue;

		/* If it's domain-type z, it's a subject-relative clause;
		   the VP doesn't need an NP */
		if (ctxt->constituent[c1].domain_type=='z')
			continue;

		/* If ctype1 is X or VP, and it's not started by an S, don't generate an NP
		 (Neither of the two previous checks are necessary now, right?) */
		if ((x==1 || x==2) &&
			(((post_process_match("S", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SX", ctxt->constituent[c1].start_link) == 0) &&
			  (post_process_match("SF", ctxt->constituent[c1].start_link) == 0)) ||
			 (post_process_match("S##w", ctxt->constituent[c1].start_link) != 0)))
			continue;

		/* If it's an SBAR (relative clause case), it has to be a relative clause */
		if ((x==3) &&
			((post_process_match("Rn", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("R*", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#r", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("Mr", ctxt->constituent[c1].start_link) == 0) &&
			 (post_process_match("MX#d", ctxt->constituent[c1].start_link) == 0)))
			continue;

		/* If ctype1 is SBAR (clause opener case), it has to be an f domain */
		if ((x==5) && (ctxt->constituent[c1].domain_type!='f'))
			continue;

		/* If ctype1 is SBAR (pp opener case), it has to be a g domain */
		if ((x==6) && (ctxt->constituent[c1].domain_type!='g'))
			continue;

		/* If ctype1 is NP (paraphrase case), it has to be started by an SI */
		if ((x==7) && (post_process_match("SI", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle modifier case), it has to be
		   started by an Mv or Mg */
		if ((x==8) && (post_process_match("M", ctxt->constituent[c1].start_link)==0))
			continue;

		/* If ctype1 is VP (participle opener case), it has
		   to be started by a COp */
		if ((x==9) && (post_process_match("COp", ctxt->constituent[c1].start_link)==0))
			continue;

		/* Now start at the bounds of c1, and work outwards until you
		   find a larger constituent of type ctype2 */
		if (!(strcmp(ctxt->constituent[c1].type, ctype1)==0))
			continue;

		if (verbosity>=2)
			printf("Generating complement constituent for c %d of type %s\n",
				   c1, ctype1);
		done=0;
		for (w2=ctxt->constituent[c1].left; (done==0) && (w2>=0); w2--) {
			for (w3=ctxt->constituent[c1].right; w3<linkage->num_words; w3++) {
				for (c2=numcon_total; (done==0) &&
						 (c2 < numcon_total + numcon_subl); c2++) {
					if (!((ctxt->constituent[c2].left==w2) &&
						  (ctxt->constituent[c2].right==w3)) || (c2==c1))
						continue;
					if (!(strcmp(ctxt->constituent[c2].type, ctype2)==0))
						continue;

					/* if the new constituent (c) is to the left
					   of c1, its right edge should be adjacent to the
					   left edge of c1 - or as close as possible
					   without going outside the current sublinkage.
					   (Or substituting right and left as necessary.) */

					if ((x==5) || (x==6) || (x==9)) {
								/* This is the case where c is to the
								   RIGHT of c1 */
						w = ctxt->constituent[c1].right+1;
						while(1) {
							if (ctxt->word_used[linkage->current][w]==1)
								break;
							w++;
						}
						if (w > ctxt->constituent[c2].right)
						{
							done=1;
							continue;
						}
						ctxt->constituent[c].left = w;
						ctxt->constituent[c].right = ctxt->constituent[c2].right;
					}
					else {
						w = ctxt->constituent[c1].left-1;
						while(1) {
							if (ctxt->word_used[linkage->current][w] == 1)
								break;
							w--;
						}
						if (w < ctxt->constituent[c2].left) {
							done=1;
							continue;
						}
						ctxt->constituent[c].right = w;
						ctxt->constituent[c].left = ctxt->constituent[c2].left;
					}

					adjust_for_left_comma(ctxt, linkage, c1);
					adjust_for_right_comma(ctxt, linkage, c1);

					ctxt->constituent[c].type =
						string_set_add(ctype3, ctxt->phrase_ss);
					ctxt->constituent[c].domain_type = 'x';
					ctxt->constituent[c].start_link =
						string_set_add("XX", ctxt->phrase_ss);
					ctxt->constituent[c].start_num =
						ctxt->constituent[c1].start_num; /* bogus */
					if (verbosity >= 2)
					{
						printf("Larger c found: c %d (%s); ",
							   c2, ctype2);
						printf("Adding constituent:\n");
						print_constituent(ctxt, linkage, c);
					}
					c++;
					assert(c < MAXCONSTITUENTS, "Too many constituents");
					done = 1;
				}
			}
		}
		if (verbosity>=2) {
		  if (done==0)
			printf("No constituent added, because no larger %s " \
				   " was found\n", ctype2);
		}
	}
	numcon_subl = c - numcon_total;
	return numcon_subl;
}
Example #26
0
static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge *knowledge;
	multiset_table *cmt;

	if (sent->postprocessor == NULL) return 0;
	if (!opts->perform_pp_prune) return 0;

	knowledge = sent->postprocessor->knowledge;
	cmt = cms_table_new();

	jet_sharing_t *js = &sent->jet_sharing;
	if (js->table[0] != NULL)
	{
		for (int dir = 0; dir < 2; dir++)
		{
			for (unsigned int id = 1; id < js->entries[dir] + 1; id++)
			{
				for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next)
				{
					if (0 == c->refcount) continue;
					insert_in_cms_table(cmt, c);
				}
			}
		}
	}
	else
	{
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				for (int dir = 0; dir < 2; dir++)
				{
					Connector *first_c = (dir) ? (d->left) : (d->right);
					for (Connector *c = first_c; c != NULL; c = c->next)
					{
						insert_in_cms_table(cmt, c);
					}
				}
			}
		}
	}

	int D_deleted = 0;       /* Number of deleted disjuncts */
	int Cname_deleted = 0;   /* Number of deleted connector names */

	/* Since the cms table is unchanged, after applying a rule once we
	 * know if it will be TRUE or FALSE if we need to apply it again.
	 * Values: -1: Undecided yet; 0: Rule unsatisfiable; 1 Rule satisfiable. */
	uint8_t *rule_ok = alloca(knowledge->n_contains_one_rules * sizeof(bool));
	memset(rule_ok, -1, knowledge->n_contains_one_rules * sizeof(bool));

	for (size_t i = 0; i < knowledge->n_contains_one_rules; i++)
	{
		if (rule_ok[i] == 1) continue;

		pp_rule* rule = &knowledge->contains_one_rules[i]; /* The ith rule */
		const char *selector = rule->selector;  /* Selector string for this rule */
		pp_linkset *link_set = rule->link_set;  /* The set of criterion links */
		unsigned int hash = cms_hash(selector);

		if (rule->selector_has_wildcard)
		{
			rule_ok[i] = 1;
			continue;  /* If it has a * forget it */
		}

		for (Cms *cms = cmt->cms_table[hash]; cms != NULL; cms = cms->next)
		{
			Connector *c = cms->c;
			if (!post_process_match(selector, connector_string(c))) continue;

			ppdebug("Rule %zu: Selector %s, Connector %s\n",
			        i, selector, connector_string(c));
			/* We know c matches the trigger link of the rule. */
			/* Now check the criterion links */
			if ((rule_ok[i] == 0) || !rule_satisfiable(cmt, link_set))
			{
				rule_ok[i] = 0;
				ppdebug("DELETE %s refcount %d\n", connector_string(c), c->refcount);
				c->nearest_word = BAD_WORD;
				Cname_deleted++;
				rule->use_count++;
			}
			else
			{
				rule_ok[i] = 1;
				break;
			}
		}
	}

	/* Iterate over all connectors and mark the bad trigger connectors.
	 * If the marked connector is not the shallow one, note that the
	 * shallow one on the same disjunct cannot be marked too (this could
	 * facilitate faster detection by power_prune()) because this would be
	 * wrongly reflected through the cms table. */

	if (js->table[0] != NULL)
	{
		for (int dir = 0; dir < 2; dir++)
		{
			for (unsigned int id = 1; id < js->entries[dir] + 1; id++)
			{
				for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next)
				{
					if (0 == c->refcount) continue;
					if (mark_bad_connectors(cmt, c))
					{
						D_deleted++;
						break;
					}
				}
			}
		}
	}
	else
	{
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				for (int dir = 0; dir < 2; dir++)
				{
					Connector *first_c = (dir) ? (d->left) : (d->right);
					for (Connector *c = first_c; c != NULL; c = c->next)
					{
						if (mark_bad_connectors(cmt, c))
						{
							D_deleted++;
							break;
						}
					}
				}

			}
		}
	}

	lgdebug(+D_PRUNE, "Deleted %d (%d connector names)\n",
	        D_deleted, Cname_deleted);

	cms_table_delete(cmt);

	print_time(opts, "pp pruning");

	return D_deleted;
}