Пример #1
0
int word_has_connector(Dict_node * dn, char * cs, int direction) {

  /* This function takes a dict_node (corresponding to an entry in a given dictionary), a
     string (representing a connector), and a direction (0 = right-pointing, 1 = left-pointing);
     it returns 1 if the dictionary expression for the word includes the connector, 0 otherwise.
     This can be used to see if a word is in a certain category (checking for a category 
     connector in a table), or to see if a word has a connector in a normal dictionary. The
     connector check uses a "smart-match", the same kind used by the parser. */

    Connector * c2=NULL;
    Disjunct * d, *d0;
    if(dn == NULL) return -1;
    d0 = d = build_disjuncts_for_dict_node(dn);
    if(d == NULL) return 0;
    for(; d!=NULL; d=d->next) { 
      if(direction==0) c2 = d->right;
      if(direction==1) c2 = d->left;
      for(; c2!=NULL; c2=c2->next) {
	if(easy_match(c2->string, cs)==1) {
	    free_disjuncts(d0);
	    return 1;
	}
      }
    }
    free_disjuncts(d0);
    return 0;
}
Пример #2
0
/**
 *  dict_display_word_info() - display the information about the given word.
 */
void dict_display_word_info(Dictionary dict, const char * s)
{
	Dict_node *dn, *dn_head;
	Disjunct * d1, * d2;
	int len;
	dn_head = dictionary_lookup_list(dict, s);
	if (dn_head == NULL)
	{
		printf("	\"%s\" matches nothing in the dictionary.\n", s);
		return;
	}
	printf("Matches:\n");
	for (dn = dn_head; dn != NULL; dn = dn->right)
	{
		len = 0;
		d1 = build_disjuncts_for_dict_node(dn);
		for(d2 = d1 ; d2 != NULL; d2 = d2->next)
		{
			len++;
		}
		free_disjuncts(d1);
		printf("    ");
		left_print_string(stdout, dn->string,
			"                         ");
		printf(" %5d  disjuncts ", len);
		if (dn->file != NULL)
		{
			printf("<%s>", dn->file->file);
		}
		printf("\n");
	}
	free_lookup_list(dn_head);
	return;
}
Пример #3
0
/**
 * Pack all disjunct and connectors into one big memory block.
 * This facilitate a better memory caching for long sentences
 * (a performance gain of a few percents).
 *
 * The current Connector struct size is 32 bit, and future ones may be
 * smaller, but still with a power-of-2 size.
 * The idea is to put an integral number of connectors in each cache line
 * (assumed to be >= Connector struct size, e.g. 64 bytes),
 * so one connector will not need 2 cache lines.
 *
 * The allocated memory includes 3 sections , in that order:
 * 1. A block for disjuncts, when it start is not aligned (the disjunct size
 * is currently 56 bytes and cannot be reduced much).
 * 2. A small alignment gap, that ends in a 64-byte boundary.
 * 3. A block of connectors, which is so aligned to 64-byte boundary.
 *
 * FIXME: 1. Find the "best" value for SHORTEST_SENTENCE_TO_PACK.
 * 2. Maybe this check should be done in too stages, the second one
 * will use number of disjunct and connector thresholds.
 */
static void pack_sentence(Sentence sent)
{
	int dcnt = 0;
	int ccnt = 0;

	if (sent->length < SHORTEST_SENTENCE_TO_PACK) return;
	for (size_t w = 0; w < sent->length; w++)
	{
		Disjunct *d;

		for (d = sent->word[w].d; NULL != d; d = d->next)
		{
			dcnt++;
			for (Connector *c = d->right; c!=NULL; c = c->next) ccnt++;
			for (Connector *c = d->left; c != NULL; c = c->next) ccnt++;
		}
	}

#define CONN_ALIGNMENT sizeof(Connector)
	size_t dsize = dcnt * sizeof(Disjunct);
	dsize = ALIGN(dsize, CONN_ALIGNMENT); /* Align connector block. */
	size_t csize = ccnt * sizeof(Connector);
	void *memblock = malloc(dsize + csize);
	Disjunct *dblock = memblock;
	Connector *cblock = (Connector *)((char *)memblock + dsize);
	sent->disjuncts_connectors_memblock = memblock;

	for (size_t i = 0; i < sent->length; i++)
	{
		Disjunct *word_disjuncts = sent->word[i].d;

		sent->word[i].d = pack_disjuncts_dup(sent->word[i].d, &dblock, &cblock);
		free_disjuncts(word_disjuncts);
	}
}
Пример #4
0
static void construct_neither(Sentence sent) {
    int w;
    if (!sentence_contains(sent, L"neither")) {
	/* I don't see the point removing disjuncts on "nor".  I
	   Don't know why I did this.  What's the problem keeping the
	   stuff explicitely defined for "nor" in the dictionary?  --DS 3/98 */
#if 0 
	    for (w=0; w<sent->length; w++) {
	    if (wcscmp(sent->word[w].string, L"nor") != 0) continue;
	    free_disjuncts(sent->word[w].d);
	    sent->word[w].d = NULL;  /* a nor with no neither is dead */
	}
#endif
	return;
    }
    for (w=0; w<sent->length; w++) {
	if (wcscmp(sent->word[w].string, L"neither") != 0) continue;
	sent->word[w].d = catenate_disjuncts(
		   special_disjunct(NEITHER_LABEL,L'+',L"", L"neither"),
		   sent->word[w].d);
    }

    for (w=0; w<sent->length; w++) {
	if (wcscmp(sent->word[w].string, L"nor") != 0) continue;
	sent->word[w].d = glom_aux_connector
	                  (sent->word[w].d, NEITHER_LABEL, TRUE);
    }
}
Пример #5
0
/**
 * Initialize the word fields of the connectors, and
 * eliminate those disjuncts that are so long, that they
 * would need to connect past the end of the sentence.
 */
static void setup_connectors(Sentence sent)
{
	size_t w;
	Disjunct * d, * xd, * head;
	for (w=0; w<sent->length; w++)
	{
		head = NULL;
		for (d=sent->word[w].d; d!=NULL; d=xd)
		{
			xd = d->next;
			if ((set_dist_fields(d->left, w, -1) < 0) ||
			    (set_dist_fields(d->right, w, 1) >= (int) sent->length))
			{
				d->next = NULL;
				free_disjuncts(d);
			}
			else
			{
				d->next = head;
				head = d;
			}
		}
		sent->word[w].d = head;
	}
}
Пример #6
0
void free_sentence_disjuncts(Sentence sent) {
    int i;

    for (i=0; i<sent->length; ++i) {
	free_disjuncts(sent->word[i].d);
	sent->word[i].d = NULL;
    }
    if (sentence_contains_conjunction(sent)) free_AND_tables(sent);
}
Пример #7
0
static void free_sentence_disjuncts(Sentence sent)
{
	size_t i;

	for (i = 0; i < sent->length; ++i)
	{
		free_disjuncts(sent->word[i].d);
		sent->word[i].d = NULL;
	}
}
Пример #8
0
/**
 * Free all the connectors and disjuncts of a specific linkage.
 */
void free_linkage_connectors_and_disjuncts(Linkage lkg)
{
  // Free the connectors
  for(size_t i = 0; i < lkg->num_links; i++) {
    free(lkg->link_array[i].rc);
    free(lkg->link_array[i].lc);
  }
  // Free the disjuncts
  for (size_t i = 0; i < lkg->num_words; i++) {
    free_disjuncts(lkg->chosen_disjuncts[i]);
  }
}
Пример #9
0
static void free_sentence_words(Sentence sent)
{
	size_t i;

	for (i = 0; i < sent->length; i++)
	{
		free_X_nodes(sent->word[i].x);
		free_disjuncts(sent->word[i].d);
		free(sent->word[i].alternatives);
	}
	free((void *) sent->word);
	sent->word = NULL;
}
Пример #10
0
void delete_unmarked_disjuncts(Sentence sent)
{
	int w;
	Disjunct *d_head, *d, *dx;

	for (w=0; w<sent->length; w++) {
		d_head = NULL;
		for (d=sent->word[w].d; d != NULL; d=dx) {
			dx = d->next;
			if (d->marked) {
				d->next = d_head;
				d_head = d;
			} else {
				d->next = NULL;
				free_disjuncts(d);
			}
		}
		sent->word[w].d = d_head;
	}
}
Пример #11
0
/**
 * Takes the list of disjuncts pointed to by d, eliminates all
 * duplicates, and returns a pointer to a new list.
 * It frees the disjuncts that are eliminated.
 */
Disjunct * eliminate_duplicate_disjuncts(Disjunct * d)
{
	int i, h, count;
	Disjunct *dn, *dx, *dxn, *front;
	count = 0;
	disjunct_dup_table *dt;

	dt = disjunct_dup_table_new(next_power_of_two_up(2 * count_disjuncts(d)));

	for (;d!=NULL; d = dn)
	{
		dn = d->next;
		h = hash_disjunct(d);

		front = NULL;
		for (dx = dt->dup_table[h]; dx != NULL; dx = dxn)
		{
			dxn = dx->next;
			if (disjunct_matches_alam(dx,d))
			{
				/* we know that d should be killed */
				d->next = NULL;
				free_disjuncts(d);
				count++;
				front = catenate_disjuncts(front, dx);
				break;
			} else if (disjunct_matches_alam(d,dx)) {
				/* we know that dx should be killed off */
				dx->next = NULL;
				free_disjuncts(dx);
				count++;
			} else {
				/* neither should be killed off */
				dx->next = front;
				front = dx;
			}
		}
		if (dx == NULL) {
			/* we put d in the table */
			d->next = front;
			front = d;
		}
		dt->dup_table[h] = front;
	}

	/* d is now NULL */
	for (i = 0; i < dt->dup_table_size; i++)
	{
		for (dx = dt->dup_table[i]; dx != NULL; dx = dxn)
		{
			dxn = dx->next;
			dx->next = d;
			d = dx;
		}
	}

	if ((verbosity > 2) && (count != 0)) printf("killed %d duplicates\n", count);

	disjunct_dup_table_delete(dt);
	return d;
}
Пример #12
0
/**
 * classic_parse() -- parse the given sentence.
 * Perform parsing, using the original link-grammar parsing algorithm
 * given in the original link-grammar papers.
 *
 * Do the parse with the minimum number of null-links within the range
 * specified by opts->min_null_count and opts->max_null_count.
 *
 * To that end, call do_parse() with an increasing null_count, from
 * opts->min_null_count up to (including) opts->max_null_count, until a
 * parse is found.
 *
 * A note about the disjuncts save/restore that is done here:
 * To increase the parsing speed, before invoking do_parse(),
 * pp_and_power_prune() is invoked to remove connectors which have no
 * possibility to connect. It includes a significant optimization when
 * null_count==0 that makes a more aggressive removal, but this
 * optimization is not appropriate when null_count>0.
 *
 * So in case this optimization has been done and a complete parse (i.e.
 * a parse when null_count==0) is not found, we are left with sentence
 * disjuncts which are not appropriate to continue do_parse() tries with
 * null_count>0. To solve that, we need to restore the original
 * disjuncts of the sentence and call pp_and_power_prune() once again.
 */
void classic_parse(Sentence sent, Parse_Options opts)
{
	fast_matcher_t * mchxt = NULL;
	count_context_t * ctxt = NULL;
	bool pp_and_power_prune_done = false;
	Disjunct **disjuncts_copy = NULL;
	bool is_null_count_0 = (0 == opts->min_null_count);
	int max_null_count = MIN((int)sent->length, opts->max_null_count);

	/* Build lists of disjuncts */
	prepare_to_parse(sent, opts);
	if (resources_exhausted(opts->resources)) return;

	if (is_null_count_0 && (0 < max_null_count))
	{
		/* Save the disjuncts in case we need to parse with null_count>0. */
		disjuncts_copy = alloca(sent->length * sizeof(Disjunct *));
		for (size_t i = 0; i < sent->length; i++)
			disjuncts_copy[i] = disjuncts_dup(sent->word[i].d);
	}

	for (int nl = opts->min_null_count; nl <= max_null_count; nl++)
	{
		Count_bin hist;
		s64 total;

		if (!pp_and_power_prune_done)
		{
			if (0 != nl)
			{
				pp_and_power_prune_done = true;
				if (is_null_count_0)
					opts->min_null_count = 1; /* Don't optimize for null_count==0. */

				/* We are parsing now with null_count>0, when previously we
				 * parsed with null_count==0. Restore the save disjuncts. */
				if (NULL != disjuncts_copy)
				{
					free_sentence_disjuncts(sent);
					for (size_t i = 0; i < sent->length; i++)
						sent->word[i].d = disjuncts_copy[i];
					disjuncts_copy = NULL;
				}
			}
			pp_and_power_prune(sent, opts);
			if (is_null_count_0) opts->min_null_count = 0;
			if (resources_exhausted(opts->resources)) break;

			free_count_context(ctxt, sent);
			free_fast_matcher(sent, mchxt);
			pack_sentence(sent);
			ctxt = alloc_count_context(sent);
			mchxt = alloc_fast_matcher(sent);
			print_time(opts, "Initialized fast matcher");
		}

		if (resources_exhausted(opts->resources)) break;
		free_linkages(sent);

		sent->null_count = nl;
		hist = do_parse(sent, mchxt, ctxt, sent->null_count, opts);
		total = hist_total(&hist);

		lgdebug(D_PARSE, "Info: Total count with %zu null links:   %lld\n",
		        sent->null_count, total);

		/* total is 64-bit, num_linkages_found is 32-bit. Clamp */
		total = (total > INT_MAX) ? INT_MAX : total;
		total = (total < 0) ? INT_MAX : total;

		sent->num_linkages_found = (int) total;
		print_time(opts, "Counted parses");

		extractor_t * pex = extractor_new(sent->length, sent->rand_state);
		bool ovfl = setup_linkages(sent, pex, mchxt, ctxt, opts);
		process_linkages(sent, pex, ovfl, opts);
		free_extractor(pex);

		post_process_lkgs(sent, opts);

		if (sent->num_valid_linkages > 0) break;
		if ((0 == nl) && (0 < max_null_count) && verbosity > 0)
			prt_error("No complete linkages found.\n");

		/* If we are here, then no valid linkages were found.
		 * If there was a parse overflow, give up now. */
		if (PARSE_NUM_OVERFLOW < total) break;
		//if (sent->num_linkages_found > 0 && nl>0) printf("NUM_LINKAGES_FOUND %d\n", sent->num_linkages_found);
	}
	sort_linkages(sent, opts);

	if (NULL != disjuncts_copy)
	{
		for (size_t i = 0; i < sent->length; i++)
			free_disjuncts(disjuncts_copy[i]);
	}
	free_count_context(ctxt, sent);
	free_fast_matcher(sent, mchxt);
}
Пример #13
0
int position_words(Dictionary dict, Alink * alink, int currentword, int direction, double leftend, double rightend) {
  /* direction: left = 0, right = 1 */
    Disjunct * d, * d0;
    Connector * c;
    Dict_node * dn;
    wchar_t * s;
    wchar_t * ds;
    int numcon, i, n, w, ok;
    int linkage_found = 1;
    Link link;
    wchar_t * ws;
    Alink * al;
    double position;
    double range;
    double newleftend, newrightend;
    /* Right now it goes through and choose the disjunct twice - once for the right and once for the left. This seems
       unnecessary... */

    ds = tword[currentword].gstring; 

    if(localv == 2) wprintf_s(L"  Tracing word '%s', word %d, direction %d\n", ds, currentword, direction); 

    /* With a conjunction: you could submit a string from one of the andlist element words here as ds, instead of the
       conjunction itself - "currentword" would still be the conjunction, though. (But what about the XL and XR connectors?) */

    dn = dictionary_lookup(dict, ds);

    /* Should we go through the dict_nodes here, or is it okay to just take the first one? */
    d0 = d = build_disjuncts_for_dict_node(dn);
    for(; d!=NULL; d=d->next) { 
      ok = evaluate_disjunct(d, alink, currentword);
      if(ok==1) break;
    }

    if(d == NULL) {
      if(localv == 2) wprintf_s(L"No disjunct found for word '%s'\n", ds);
      free_disjuncts(d0);
      return 0;
    }

    /* We've found a disjunct to use for the current word. Now we go through all the connectors on the disjunct;
       for each one, we look through the links to find a link of the right type with the current word on one end; 
       then we position the word on the other end and repeat this process recursively */

    range=rightend-leftend;
    if(direction==0) c = d->left;
    if(direction==1) c = d->right;
    numcon = 5; 

    /* A better way:
    numcon = 0;      
    if(direction == 0) {
      for(; c!=NULL; c=c->next) numcon++;
    }
    */
    n=1;
    ok = 1;
    while(c!=NULL) {  /* for(n=1; n<=numcon; n++) { */
      s = c->string;
      if(localv == 2) wprintf_s(L"    String from disjunct for '%s': %s\n", ds, s);  
      for(al = alink; al!=NULL; al=al->next) {
	if(al->ignore == 1) continue;
	if(direction==0 && al->rightsub == currentword) {  /* Does the link have the current word on the right end? */
	  if (wcscmp(s, L"XR")==0) continue;
	  if (easy_match (s, al->connector) == 1 && word_position[al->leftsub]==-1.0) {
	    ws=al->left;
	    position = rightend - (range * ((numcon+1.0 - n) / (numcon+1.0)));
	    word_position[al->leftsub] = position;
	    if(localv == 2) wprintf_s(L"  Word '%s' has position %6.6f\n", ws, position); 
	    newleftend = ( position + (rightend - (range * ((numcon+1.0 - (n-1.0)) / (numcon+1.0)))) ) / 2.0;
	    newrightend = ( position + (rightend - (range * ((numcon+1.0 - (n+1.0)) / (numcon+1.0)))) ) / 2.0;
	    /* wprintf_s("Newleftend = %6.6f, newrightend = %6.6f\n", newleftend, newrightend); */
	    linkage_found = position_words(dict, alink, al->leftsub, 0, newleftend, position);
	    if(linkage_found==0) ok = 0;
	    linkage_found = position_words(dict, alink, al->leftsub, 1, position, newrightend);
	    if(linkage_found==0) ok = 0;
	    n++;
	  }
	}
	if(direction==1 && al->leftsub == currentword) {
	  if (wcscmp(s, L"XL")==0) continue;
	  if (easy_match (s, al->connector) == 1 && word_position[al->rightsub]==-1.0) {
	    ws = al->right;
	    position = leftend + (range * ((numcon+1.0 - n) / (numcon+1.0)));
	    word_position[al->rightsub] = position;
	    if(localv == 2) wprintf_s(L"  Word '%s' has position %6.6f\n", ws, position); 
	    newrightend = ( position + (leftend + (range * ((numcon+1.0 - (n-1.0)) / (numcon+1.0)))) ) / 2.0;
	    newleftend = ( position + (leftend + (range * ((numcon+1.0 - (n+1.0)) / (numcon+1.0)))) ) / 2.0;
	    /* wprintf_s("Newleftend = %6.6f, newrightend = %6.6f\n", newleftend, newrightend); */
 	    linkage_found = position_words(dict, alink, al->rightsub, 0, newleftend, position);
	    if(linkage_found==0) ok = 0;
 	    linkage_found = position_words(dict, alink, al->rightsub, 1, position, newrightend);
	    if(linkage_found==0) ok = 0;
	    n++;
	  }
	}
      }
      c = c->next;
    }
    free_disjuncts(d0);
    if(ok==1) return 1;
    else return 0;
}