Exemple #1
0
/**
 * Assumes that the sentence expression lists have been generated.
 */
void prepare_to_parse(Sentence sent, Parse_Options opts)
{
	size_t i;

	build_sentence_disjuncts(sent, opts->disjunct_cost);
	if (verbosity > 2) {
		printf("After expanding expressions into disjuncts:");
		print_disjunct_counts(sent);
	}
	print_time(opts, "Built disjuncts");

	for (i=0; i<sent->length; i++) {
		sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);

		/* Some long Russian sentences can really blow up, here. */
		if (resources_exhausted(opts->resources))
			return;
	}
	print_time(opts, "Eliminated duplicate disjuncts");

	if (verbosity > 2) {
		printf("\nAfter expression pruning and duplicate elimination:\n");
		print_disjunct_counts(sent);
	}

	set_connector_length_limits(sent, opts);
	pp_and_power_prune(sent, opts);
}
Exemple #2
0
void my_prepare_to_parse(Sentence sent, Parse_Options opts) {
/* assumes that the sentence expression lists have been generated     */
/* this does all the necessary pruning and building of and            */
/* structures.                                                        */
    int i, has_conjunction;

  //  build_sentence_disjuncts(sent, opts->disjunct_cost);
  //  if (verbosity > 2) {
	//printf("After expanding expressions into disjuncts:") ;
	//print_disjunct_counts(sent);
   // }
    print_time(opts, "Built disjuncts");
    
    for (i=0; i<sent->length; i++) {
	sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);
    }
    print_time(opts, "Eliminated duplicate disjuncts");

    if (verbosity > 2) {
	printf("\nAfter expression pruning and duplicate elimination:\n");
	print_disjunct_counts(sent);
    }

    null_links = (opts->min_null_count > 0);

    has_conjunction = sentence_contains_conjunction(sent);
    set_connector_length_limits(sent, opts);
    build_deletable(sent, has_conjunction);
    build_effective_dist(sent, has_conjunction);  
    /* why do we do these here instead of in
       first_prepare_to_parse() only?  The
       reason is that the deletable region
       depends on if null links are in use.
       with null_links everything is deletable */

    if (!has_conjunction) {
	pp_and_power_prune(sent, RUTHLESS, opts);
    } else {
	pp_and_power_prune(sent, GENTLE, opts);
	
	/*if (verbosity > 2) {
	    printf("\nAfter Gentle power pruning:\n");
	    print_disjunct_counts(sent);
	}
	*/
	/*print_time(opts, "Finished gentle power pruning"); */
	conjunction_prune(sent, opts);
	if (verbosity > 2) {
	    printf("\nAfter conjunction pruning:\n");
	    print_disjunct_counts(sent);
	    print_statistics();
	}
	print_time(opts, "Done conjunction pruning");
	build_conjunction_tables(sent);
	install_fat_connectors(sent);
	install_special_conjunctive_connectors(sent);
	if (verbosity > 2) {
	    printf("After conjunctions, disjuncts counts:\n");
	    print_disjunct_counts(sent);
	}
	set_connector_length_limits(sent, opts);
              /* have to do this again cause of the
	         new fat connectors and disjuncts */

	print_time(opts, "Constructed fat disjuncts");
	
	prune(sent);
	print_time(opts, "Pruned fat disjuncts");
	
	for (i=0; i<sent->length; i++) {
	    sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);
	}
	if (verbosity > 2) {
	    printf("After pruning and duplicate elimination:\n");
	    print_disjunct_counts(sent);
	}
	print_time(opts, "Eliminated duplicate disjuncts (again)");
	
	if (verbosity > 2) print_AND_statistics(sent);

	power_prune(sent, RUTHLESS, opts);
    }

    /*
    if (verbosity > 2) {
	printf("\nAfter RUTHLESS power-pruning:\n");
	print_disjunct_counts(sent);
    }
    */
    /* print time for power pruning used to be here */
    /* now done in power_prune itself */
    print_time(opts, "Initialized fast matcher and hash table");
}
Exemple #3
0
/**
 * classic_parse() -- parse the given sentence.
 * Perform parsing, using the original link-grammar parsing algorithm
 * given in the original link-grammar papers.
 *
 * Do the parse with the minimum number of null-links within the range
 * specified by opts->min_null_count and opts->max_null_count.
 *
 * To that end, call do_parse() with an increasing null_count, from
 * opts->min_null_count up to (including) opts->max_null_count, until a
 * parse is found.
 *
 * A note about the disjuncts save/restore that is done here:
 * To increase the parsing speed, before invoking do_parse(),
 * pp_and_power_prune() is invoked to remove connectors which have no
 * possibility to connect. It includes a significant optimization when
 * null_count==0 that makes a more aggressive removal, but this
 * optimization is not appropriate when null_count>0.
 *
 * So in case this optimization has been done and a complete parse (i.e.
 * a parse when null_count==0) is not found, we are left with sentence
 * disjuncts which are not appropriate to continue do_parse() tries with
 * null_count>0. To solve that, we need to restore the original
 * disjuncts of the sentence and call pp_and_power_prune() once again.
 */
void classic_parse(Sentence sent, Parse_Options opts)
{
	fast_matcher_t * mchxt = NULL;
	count_context_t * ctxt = NULL;
	bool pp_and_power_prune_done = false;
	Disjunct **disjuncts_copy = NULL;
	bool is_null_count_0 = (0 == opts->min_null_count);
	int max_null_count = MIN((int)sent->length, opts->max_null_count);

	/* Build lists of disjuncts */
	prepare_to_parse(sent, opts);
	if (resources_exhausted(opts->resources)) return;

	if (is_null_count_0 && (0 < max_null_count))
	{
		/* Save the disjuncts in case we need to parse with null_count>0. */
		disjuncts_copy = alloca(sent->length * sizeof(Disjunct *));
		for (size_t i = 0; i < sent->length; i++)
			disjuncts_copy[i] = disjuncts_dup(sent->word[i].d);
	}

	for (int nl = opts->min_null_count; nl <= max_null_count; nl++)
	{
		Count_bin hist;
		s64 total;

		if (!pp_and_power_prune_done)
		{
			if (0 != nl)
			{
				pp_and_power_prune_done = true;
				if (is_null_count_0)
					opts->min_null_count = 1; /* Don't optimize for null_count==0. */

				/* We are parsing now with null_count>0, when previously we
				 * parsed with null_count==0. Restore the save disjuncts. */
				if (NULL != disjuncts_copy)
				{
					free_sentence_disjuncts(sent);
					for (size_t i = 0; i < sent->length; i++)
						sent->word[i].d = disjuncts_copy[i];
					disjuncts_copy = NULL;
				}
			}
			pp_and_power_prune(sent, opts);
			if (is_null_count_0) opts->min_null_count = 0;
			if (resources_exhausted(opts->resources)) break;

			free_count_context(ctxt, sent);
			free_fast_matcher(sent, mchxt);
			pack_sentence(sent);
			ctxt = alloc_count_context(sent);
			mchxt = alloc_fast_matcher(sent);
			print_time(opts, "Initialized fast matcher");
		}

		if (resources_exhausted(opts->resources)) break;
		free_linkages(sent);

		sent->null_count = nl;
		hist = do_parse(sent, mchxt, ctxt, sent->null_count, opts);
		total = hist_total(&hist);

		lgdebug(D_PARSE, "Info: Total count with %zu null links:   %lld\n",
		        sent->null_count, total);

		/* total is 64-bit, num_linkages_found is 32-bit. Clamp */
		total = (total > INT_MAX) ? INT_MAX : total;
		total = (total < 0) ? INT_MAX : total;

		sent->num_linkages_found = (int) total;
		print_time(opts, "Counted parses");

		extractor_t * pex = extractor_new(sent->length, sent->rand_state);
		bool ovfl = setup_linkages(sent, pex, mchxt, ctxt, opts);
		process_linkages(sent, pex, ovfl, opts);
		free_extractor(pex);

		post_process_lkgs(sent, opts);

		if (sent->num_valid_linkages > 0) break;
		if ((0 == nl) && (0 < max_null_count) && verbosity > 0)
			prt_error("No complete linkages found.\n");

		/* If we are here, then no valid linkages were found.
		 * If there was a parse overflow, give up now. */
		if (PARSE_NUM_OVERFLOW < total) break;
		//if (sent->num_linkages_found > 0 && nl>0) printf("NUM_LINKAGES_FOUND %d\n", sent->num_linkages_found);
	}
	sort_linkages(sent, opts);

	if (NULL != disjuncts_copy)
	{
		for (size_t i = 0; i < sent->length; i++)
			free_disjuncts(disjuncts_copy[i]);
	}
	free_count_context(ctxt, sent);
	free_fast_matcher(sent, mchxt);
}