Exemplo n.º 1
0
/**
 * We've already built the sentence disjuncts, and we've pruned them
 * and power_pruned(GENTLE) them also.  The sentence contains a
 * conjunction.  deletable[][] has been initialized to indicate the
 * ranges which may be deleted in the final linkage.
 *
 * This routine deletes irrelevant disjuncts.  It finds them by first
 * marking them all as irrelevant, and then marking the ones that
 * might be useable.  Finally, the unmarked ones are removed.
 */
void conjunction_prune(Sentence sent, Parse_Options opts)
{
	Disjunct * d;
	int w;
	count_context_t *ctxt = sent->count_ctxt;

	ctxt->current_resources = opts->resources;
	ctxt->deletable = sent->deletable;
	count_set_effective_distance(sent);

	/* We begin by unmarking all disjuncts.  This would not be necessary if
	   whenever we created a disjunct we cleared its marked field.
	   I didn't want to search the program for all such places, so
	   I did this way. XXX FIXME, someday ... 
	   */
	for (w=0; w<sent->length; w++) {
		for (d=sent->word[w].d; d != NULL; d=d->next) {
			d->marked = FALSE;
		}
	}

	init_fast_matcher(sent);
	ctxt->local_sent = sent->word;
	ctxt->null_links = (opts->min_null_count > 0);
	/*
	for (d = sent->word[0].d; d != NULL; d = d->next) {
		if ((d->left == NULL) && region_valid(sent, 0, sent->length, d->right, NULL)) {
			mark_region(sent, 0, sent->length, d->right, NULL);
			d->marked = TRUE;
		}
	}
	mark_region(sent, 0, sent->length, NULL, NULL);
	*/

	if (ctxt->null_links) {
		mark_region(sent, -1, sent->length, NULL, NULL);
	} else {
		for (w=0; w<sent->length; w++) {
		  /* consider removing the words [0,w-1] from the beginning
			 of the sentence */
			if (ctxt->deletable[-1][w]) {
				for (d = sent->word[w].d; d != NULL; d = d->next) {
					if ((d->left == NULL) && region_valid(sent, w, sent->length, d->right, NULL)) {
						mark_region(sent, w, sent->length, d->right, NULL);
						d->marked = TRUE;
					}
				}
			}
		}
	}

	delete_unmarked_disjuncts(sent);

	free_fast_matcher(sent);

	ctxt->local_sent = NULL;
	ctxt->current_resources = NULL;
	ctxt->deletable = NULL;
	count_unset_effective_distance(sent);
}
Exemplo n.º 2
0
Arquivo: api.c Projeto: mclumd/Alfred
int sentence_parse(Sentence sent, Parse_Options opts) {
    int nl;

    verbosity = opts->verbosity;
 
    free_sentence_disjuncts(sent);
    resources_reset_space(opts->resources);

    if (resources_exhausted(opts->resources)) {
	sent->num_valid_linkages = 0;
	return 0;
    }

    expression_prune(sent); 
    print_time(opts, "Finished expression pruning");
    prepare_to_parse(sent, opts);

    init_fast_matcher(sent);
    init_table(sent);

    /* A parse set may have been already been built for this sentence,
       if it was previously parsed.  If so we free it up before building another.  */
    free_parse_set(sent);
    init_x_table(sent);

    for (nl = opts->min_null_count; 
	 (nl<=opts->max_null_count) && (!resources_exhausted(opts->resources)); ++nl) {
	sent->null_count = nl;
	sent->num_linkages_found = parse(sent, sent->null_count, opts);
	print_time(opts, "Counted parses");
	post_process_linkages(sent, opts);
	if (sent->num_valid_linkages > 0) break;
    }

    free_table(sent);
    free_fast_matcher(sent);
    print_time(opts, "Finished parse");

    return sent->num_valid_linkages;
}
Exemplo n.º 3
0
/** Misnamed, this has nothing to do with chart parsing */
static void chart_parse(Sentence sent, Parse_Options opts)
{
	int nl;
	fast_matcher_t * mchxt;
	count_context_t * ctxt;

	/* Build lists of disjuncts */
	prepare_to_parse(sent, opts);
	if (resources_exhausted(opts->resources)) return;

	mchxt = alloc_fast_matcher(sent);
	ctxt = alloc_count_context(sent->length);
	print_time(opts, "Initialized fast matcher");
	if (resources_exhausted(opts->resources))
	{
		free_count_context(ctxt);
		free_fast_matcher(mchxt);
		return;
	}

	/* A parse set may have been already been built for this sentence,
	 * if it was previously parsed.  If so we free it up before
	 * building another.  Huh ?? How could that happen? */
	free_parse_info(sent->parse_info);
	sent->parse_info = parse_info_new(sent->length);

	nl = opts->min_null_count;
	while (true)
	{
		Count_bin hist;
		s64 total;
		if (resources_exhausted(opts->resources)) break;
		sent->null_count = nl;
		hist = do_parse(sent, mchxt, ctxt, sent->null_count, opts);
		total = hist_total(&hist);

		if (opts->verbosity > 1)
		{
			prt_error("Info: Total count with %zu null links:   %lld\n",
			          sent->null_count, total);
		}

		/* total is 64-bit, num_linkages_found is 32-bit. Clamp */
		total = (total > INT_MAX) ? INT_MAX : total;
		total = (total < 0) ? INT_MAX : total;

		sent->num_linkages_found = (int) total;
		print_time(opts, "Counted parses");

		select_linkages(sent, mchxt, ctxt, opts);
		compute_chosen_disjuncts(sent);
		sane_morphism(sent, opts);
		post_process_linkages(sent, opts);
		if (sent->num_valid_linkages > 0) break;

		/* If we are here, then no valid linkages were found.
		 * If there was a parse overflow, give up now. */
		if (PARSE_NUM_OVERFLOW < total) break;

		/* loop termination */
		if (nl == opts->max_null_count) break;

		/* If we are here, we are going round again. Free stuff. */
		free_linkages(sent);
		nl++;
	}
	sort_linkages(sent, opts);

	free_count_context(ctxt);
	free_fast_matcher(mchxt);
}
Exemplo n.º 4
0
/**
 * classic_parse() -- parse the given sentence.
 * Perform parsing, using the original link-grammar parsing algorithm
 * given in the original link-grammar papers.
 *
 * Do the parse with the minimum number of null-links within the range
 * specified by opts->min_null_count and opts->max_null_count.
 *
 * To that end, call do_parse() with an increasing null_count, from
 * opts->min_null_count up to (including) opts->max_null_count, until a
 * parse is found.
 *
 * A note about the disjuncts save/restore that is done here:
 * To increase the parsing speed, before invoking do_parse(),
 * pp_and_power_prune() is invoked to remove connectors which have no
 * possibility to connect. It includes a significant optimization when
 * null_count==0 that makes a more aggressive removal, but this
 * optimization is not appropriate when null_count>0.
 *
 * So in case this optimization has been done and a complete parse (i.e.
 * a parse when null_count==0) is not found, we are left with sentence
 * disjuncts which are not appropriate to continue do_parse() tries with
 * null_count>0. To solve that, we need to restore the original
 * disjuncts of the sentence and call pp_and_power_prune() once again.
 */
void classic_parse(Sentence sent, Parse_Options opts)
{
	fast_matcher_t * mchxt = NULL;
	count_context_t * ctxt = NULL;
	bool pp_and_power_prune_done = false;
	Disjunct **disjuncts_copy = NULL;
	bool is_null_count_0 = (0 == opts->min_null_count);
	int max_null_count = MIN((int)sent->length, opts->max_null_count);

	/* Build lists of disjuncts */
	prepare_to_parse(sent, opts);
	if (resources_exhausted(opts->resources)) return;

	if (is_null_count_0 && (0 < max_null_count))
	{
		/* Save the disjuncts in case we need to parse with null_count>0. */
		disjuncts_copy = alloca(sent->length * sizeof(Disjunct *));
		for (size_t i = 0; i < sent->length; i++)
			disjuncts_copy[i] = disjuncts_dup(sent->word[i].d);
	}

	for (int nl = opts->min_null_count; nl <= max_null_count; nl++)
	{
		Count_bin hist;
		s64 total;

		if (!pp_and_power_prune_done)
		{
			if (0 != nl)
			{
				pp_and_power_prune_done = true;
				if (is_null_count_0)
					opts->min_null_count = 1; /* Don't optimize for null_count==0. */

				/* We are parsing now with null_count>0, when previously we
				 * parsed with null_count==0. Restore the save disjuncts. */
				if (NULL != disjuncts_copy)
				{
					free_sentence_disjuncts(sent);
					for (size_t i = 0; i < sent->length; i++)
						sent->word[i].d = disjuncts_copy[i];
					disjuncts_copy = NULL;
				}
			}
			pp_and_power_prune(sent, opts);
			if (is_null_count_0) opts->min_null_count = 0;
			if (resources_exhausted(opts->resources)) break;

			free_count_context(ctxt, sent);
			free_fast_matcher(sent, mchxt);
			pack_sentence(sent);
			ctxt = alloc_count_context(sent);
			mchxt = alloc_fast_matcher(sent);
			print_time(opts, "Initialized fast matcher");
		}

		if (resources_exhausted(opts->resources)) break;
		free_linkages(sent);

		sent->null_count = nl;
		hist = do_parse(sent, mchxt, ctxt, sent->null_count, opts);
		total = hist_total(&hist);

		lgdebug(D_PARSE, "Info: Total count with %zu null links:   %lld\n",
		        sent->null_count, total);

		/* total is 64-bit, num_linkages_found is 32-bit. Clamp */
		total = (total > INT_MAX) ? INT_MAX : total;
		total = (total < 0) ? INT_MAX : total;

		sent->num_linkages_found = (int) total;
		print_time(opts, "Counted parses");

		extractor_t * pex = extractor_new(sent->length, sent->rand_state);
		bool ovfl = setup_linkages(sent, pex, mchxt, ctxt, opts);
		process_linkages(sent, pex, ovfl, opts);
		free_extractor(pex);

		post_process_lkgs(sent, opts);

		if (sent->num_valid_linkages > 0) break;
		if ((0 == nl) && (0 < max_null_count) && verbosity > 0)
			prt_error("No complete linkages found.\n");

		/* If we are here, then no valid linkages were found.
		 * If there was a parse overflow, give up now. */
		if (PARSE_NUM_OVERFLOW < total) break;
		//if (sent->num_linkages_found > 0 && nl>0) printf("NUM_LINKAGES_FOUND %d\n", sent->num_linkages_found);
	}
	sort_linkages(sent, opts);

	if (NULL != disjuncts_copy)
	{
		for (size_t i = 0; i < sent->length; i++)
			free_disjuncts(disjuncts_copy[i]);
	}
	free_count_context(ctxt, sent);
	free_fast_matcher(sent, mchxt);
}