예제 #1
0
파일: api.c 프로젝트: hckiang/link-grammar
int sentence_parse(Sentence sent, Parse_Options opts)
{
	int rc;

	sent->num_valid_linkages = 0;

	/* If the sentence has not yet been split, do so now.
	 * This is for backwards compatibility, for existing programs
	 * that do not explicitly call the splitter.
	 */
	if (0 == sent->length)
	{
		rc = sentence_split(sent, opts);
		if (rc) return -1;
	}
	else
	{
		/* During a panic parse, we enter here a second time, with leftover
		 * garbage. Free it. We really should make the code that is panicking
		 * do this free, but right now, they have no API for it, so we do it
		 * as a favor. XXX FIXME someday. */
		free_sentence_disjuncts(sent);
	}

	/* Check for bad sentence length */
	if (MAX_SENTENCE <= sent->length)
	{
		prt_error("Error: sentence too long, contains more than %d words\n",
			MAX_SENTENCE);
		return -2;
	}

	resources_reset(opts->resources);

	/* Expressions were set up during the tokenize stage.
	 * Prune them, and then parse.
	 */
	expression_prune(sent, opts);
	print_time(opts, "Finished expression pruning");
	if (opts->use_sat_solver)
	{
		sat_parse(sent, opts);
	}
	else
	{
		classic_parse(sent, opts);
	}
	print_time(opts, "Finished parse");

	if ((verbosity > 0) &&
	   (PARSE_NUM_OVERFLOW < sent->num_linkages_found))
	{
		prt_error("Warning: Combinatorial explosion! nulls=%zu cnt=%d\n"
			"Consider retrying the parse with the max allowed disjunct cost set lower.\n"
			"At the command line, use !cost-max\n",
			sent->null_count, sent->num_linkages_found);
	}
	return sent->num_valid_linkages;
}
예제 #2
0
int sentence_parse(Sentence sent, Parse_Options opts)
{
	int rc;

	verbosity = opts->verbosity;
	debug = opts->debug;
	test = opts->test;

	sent->num_valid_linkages = 0;

	/* If the sentence has not yet been split, do so now.
	 * This is for backwards compatibility, for existing programs
	 * that do not explicitly call the splitter.
	 */
	if (0 == sent->length)
	{
		rc = sentence_split(sent, opts);
		if (rc) return -1;
	}

	/* Check for bad sentence length */
	if (MAX_SENTENCE <= sent->length)
	{
		prt_error("Error: sentence too long, contains more than %d words\n",
			MAX_SENTENCE);
		return -2;
	}

	/* Initialize/free any leftover garbage */
	free_sentence_disjuncts(sent);  /* Is this really needed ??? */
	resources_reset_space(opts->resources);

	if (resources_exhausted(opts->resources))
		return 0;

	/* Expressions were previously set up during the tokenize stage. */
	expression_prune(sent);
	print_time(opts, "Finished expression pruning");
	if (opts->use_sat_solver)
	{
		sat_parse(sent, opts);
	}
	else
	{
		chart_parse(sent, opts);
	}
	print_time(opts, "Finished parse");

	if ((verbosity > 0) &&
	   (PARSE_NUM_OVERFLOW < sent->num_linkages_found))
	{
		prt_error("WARNING: Combinatorial explosion! nulls=%zu cnt=%d\n"
			"Consider retrying the parse with the max allowed disjunct cost set lower.\n"
			"At the command line, use !cost-max\n",
			sent->null_count, sent->num_linkages_found);
	}
	return sent->num_valid_linkages;
}
예제 #3
0
파일: api.c 프로젝트: hckiang/link-grammar
static void free_sentence_words(Sentence sent)
{
	for (WordIdx i = 0; i < sent->length; i++)
	{
		free_X_nodes(sent->word[i].x);
		free(sent->word[i].alternatives);
	}
	free_sentence_disjuncts(sent);
	free((void *) sent->word);
	sent->word = NULL;
}
예제 #4
0
파일: api.c 프로젝트: mclumd/Alfred
void sentence_delete(Sentence sent) {

  /*free_andlists(sent); */
    free_sentence_disjuncts(sent);      
    free_sentence_expressions(sent);
    string_set_delete(sent->string_set);
    free_parse_set(sent);
    free_post_processing(sent);
    post_process_close_sentence(sent->dict->postprocessor);
    free_lookup_list();
    free_deletable(sent);
    free_effective_dist(sent);
    xfree(sent->is_conjunction, sizeof(char)*sent->length);
    xfree((char *) sent, sizeof(struct Sentence_s));
}
예제 #5
0
파일: api.c 프로젝트: mclumd/Alfred
int sentence_parse(Sentence sent, Parse_Options opts) {
    int nl;

    verbosity = opts->verbosity;
 
    free_sentence_disjuncts(sent);
    resources_reset_space(opts->resources);

    if (resources_exhausted(opts->resources)) {
	sent->num_valid_linkages = 0;
	return 0;
    }

    expression_prune(sent); 
    print_time(opts, "Finished expression pruning");
    prepare_to_parse(sent, opts);

    init_fast_matcher(sent);
    init_table(sent);

    /* A parse set may have been already been built for this sentence,
       if it was previously parsed.  If so we free it up before building another.  */
    free_parse_set(sent);
    init_x_table(sent);

    for (nl = opts->min_null_count; 
	 (nl<=opts->max_null_count) && (!resources_exhausted(opts->resources)); ++nl) {
	sent->null_count = nl;
	sent->num_linkages_found = parse(sent, sent->null_count, opts);
	print_time(opts, "Counted parses");
	post_process_linkages(sent, opts);
	if (sent->num_valid_linkages > 0) break;
    }

    free_table(sent);
    free_fast_matcher(sent);
    print_time(opts, "Finished parse");

    return sent->num_valid_linkages;
}
예제 #6
0
/**
 * classic_parse() -- parse the given sentence.
 * Perform parsing, using the original link-grammar parsing algorithm
 * given in the original link-grammar papers.
 *
 * Do the parse with the minimum number of null-links within the range
 * specified by opts->min_null_count and opts->max_null_count.
 *
 * To that end, call do_parse() with an increasing null_count, from
 * opts->min_null_count up to (including) opts->max_null_count, until a
 * parse is found.
 *
 * A note about the disjuncts save/restore that is done here:
 * To increase the parsing speed, before invoking do_parse(),
 * pp_and_power_prune() is invoked to remove connectors which have no
 * possibility to connect. It includes a significant optimization when
 * null_count==0 that makes a more aggressive removal, but this
 * optimization is not appropriate when null_count>0.
 *
 * So in case this optimization has been done and a complete parse (i.e.
 * a parse when null_count==0) is not found, we are left with sentence
 * disjuncts which are not appropriate to continue do_parse() tries with
 * null_count>0. To solve that, we need to restore the original
 * disjuncts of the sentence and call pp_and_power_prune() once again.
 */
void classic_parse(Sentence sent, Parse_Options opts)
{
	fast_matcher_t * mchxt = NULL;
	count_context_t * ctxt = NULL;
	bool pp_and_power_prune_done = false;
	Disjunct **disjuncts_copy = NULL;
	bool is_null_count_0 = (0 == opts->min_null_count);
	int max_null_count = MIN((int)sent->length, opts->max_null_count);

	/* Build lists of disjuncts */
	prepare_to_parse(sent, opts);
	if (resources_exhausted(opts->resources)) return;

	if (is_null_count_0 && (0 < max_null_count))
	{
		/* Save the disjuncts in case we need to parse with null_count>0. */
		disjuncts_copy = alloca(sent->length * sizeof(Disjunct *));
		for (size_t i = 0; i < sent->length; i++)
			disjuncts_copy[i] = disjuncts_dup(sent->word[i].d);
	}

	for (int nl = opts->min_null_count; nl <= max_null_count; nl++)
	{
		Count_bin hist;
		s64 total;

		if (!pp_and_power_prune_done)
		{
			if (0 != nl)
			{
				pp_and_power_prune_done = true;
				if (is_null_count_0)
					opts->min_null_count = 1; /* Don't optimize for null_count==0. */

				/* We are parsing now with null_count>0, when previously we
				 * parsed with null_count==0. Restore the save disjuncts. */
				if (NULL != disjuncts_copy)
				{
					free_sentence_disjuncts(sent);
					for (size_t i = 0; i < sent->length; i++)
						sent->word[i].d = disjuncts_copy[i];
					disjuncts_copy = NULL;
				}
			}
			pp_and_power_prune(sent, opts);
			if (is_null_count_0) opts->min_null_count = 0;
			if (resources_exhausted(opts->resources)) break;

			free_count_context(ctxt, sent);
			free_fast_matcher(sent, mchxt);
			pack_sentence(sent);
			ctxt = alloc_count_context(sent);
			mchxt = alloc_fast_matcher(sent);
			print_time(opts, "Initialized fast matcher");
		}

		if (resources_exhausted(opts->resources)) break;
		free_linkages(sent);

		sent->null_count = nl;
		hist = do_parse(sent, mchxt, ctxt, sent->null_count, opts);
		total = hist_total(&hist);

		lgdebug(D_PARSE, "Info: Total count with %zu null links:   %lld\n",
		        sent->null_count, total);

		/* total is 64-bit, num_linkages_found is 32-bit. Clamp */
		total = (total > INT_MAX) ? INT_MAX : total;
		total = (total < 0) ? INT_MAX : total;

		sent->num_linkages_found = (int) total;
		print_time(opts, "Counted parses");

		extractor_t * pex = extractor_new(sent->length, sent->rand_state);
		bool ovfl = setup_linkages(sent, pex, mchxt, ctxt, opts);
		process_linkages(sent, pex, ovfl, opts);
		free_extractor(pex);

		post_process_lkgs(sent, opts);

		if (sent->num_valid_linkages > 0) break;
		if ((0 == nl) && (0 < max_null_count) && verbosity > 0)
			prt_error("No complete linkages found.\n");

		/* If we are here, then no valid linkages were found.
		 * If there was a parse overflow, give up now. */
		if (PARSE_NUM_OVERFLOW < total) break;
		//if (sent->num_linkages_found > 0 && nl>0) printf("NUM_LINKAGES_FOUND %d\n", sent->num_linkages_found);
	}
	sort_linkages(sent, opts);

	if (NULL != disjuncts_copy)
	{
		for (size_t i = 0; i < sent->length; i++)
			free_disjuncts(disjuncts_copy[i]);
	}
	free_count_context(ctxt, sent);
	free_fast_matcher(sent, mchxt);
}