Exemple #1
0
/**
 * Assumes that the sentence expression lists have been generated.
 */
void prepare_to_parse(Sentence sent, Parse_Options opts)
{
	size_t i;

	build_sentence_disjuncts(sent, opts->disjunct_cost);
	if (verbosity > 2) {
		printf("After expanding expressions into disjuncts:");
		print_disjunct_counts(sent);
	}
	print_time(opts, "Built disjuncts");

	for (i=0; i<sent->length; i++) {
		sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);

		/* Some long Russian sentences can really blow up, here. */
		if (resources_exhausted(opts->resources))
			return;
	}
	print_time(opts, "Eliminated duplicate disjuncts");

	if (verbosity > 2) {
		printf("\nAfter expression pruning and duplicate elimination:\n");
		print_disjunct_counts(sent);
	}

	set_connector_length_limits(sent, opts);
	pp_and_power_prune(sent, opts);
}
Exemple #2
0
/**
 * Increase the number of disjuncts associated to each word in the
 * sentence by working with word-clusters. Return true if the number
 * of disjuncts were expanded, else return false.
 */
bool lg_expand_disjunct_list(Sentence sent)
{
	size_t w;

	Cluster *clu = lg_cluster_new();

	bool expanded = false;
	for (w = 0; w < sent->length; w++)
	{
		X_node * x;
		Disjunct * d = sent->word[w].d;
		for (x = sent->word[w].x; x != NULL; x = x->next)
		{
			Disjunct *dx = build_expansion_disjuncts(clu, x);
			if (dx)
			{
				unsigned int cnt = count_disjuncts(d);
				d = catenate_disjuncts(dx, d);
				d = eliminate_duplicate_disjuncts(d);
				if (cnt < count_disjuncts(d)) expanded = true;
			}
		}
		sent->word[w].d = d;
	}
	lg_cluster_delete(clu);

	return expanded;
}
Exemple #3
0
/**
 * Assumes that the sentence expression lists have been generated.
 */
void prepare_to_parse(Sentence sent, Parse_Options opts)
{
	size_t i;

	build_sentence_disjuncts(sent, opts->disjunct_cost, opts);
	if (verbosity_level(5))
	{
		prt_error("Debug: After expanding expressions into disjuncts:\n");
		print_disjunct_counts(sent);
	}
	print_time(opts, "Built disjuncts");

	for (i=0; i<sent->length; i++)
	{
		sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);

		/* Some long Russian sentences can really blow up, here. */
		if (resources_exhausted(opts->resources))
			return;
	}
	print_time(opts, "Eliminated duplicate disjuncts");

	if (verbosity_level(5))
	{
		prt_error("Debug: After expression pruning and duplicate elimination:\n");
		print_disjunct_counts(sent);
	}

	gword_record_in_connector(sent);
	setup_connectors(sent);
}
Exemple #4
0
void my_prepare_to_parse(Sentence sent, Parse_Options opts) {
/* assumes that the sentence expression lists have been generated     */
/* this does all the necessary pruning and building of and            */
/* structures.                                                        */
    int i, has_conjunction;

  //  build_sentence_disjuncts(sent, opts->disjunct_cost);
  //  if (verbosity > 2) {
	//printf("After expanding expressions into disjuncts:") ;
	//print_disjunct_counts(sent);
   // }
    print_time(opts, "Built disjuncts");
    
    for (i=0; i<sent->length; i++) {
	sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);
    }
    print_time(opts, "Eliminated duplicate disjuncts");

    if (verbosity > 2) {
	printf("\nAfter expression pruning and duplicate elimination:\n");
	print_disjunct_counts(sent);
    }

    null_links = (opts->min_null_count > 0);

    has_conjunction = sentence_contains_conjunction(sent);
    set_connector_length_limits(sent, opts);
    build_deletable(sent, has_conjunction);
    build_effective_dist(sent, has_conjunction);  
    /* why do we do these here instead of in
       first_prepare_to_parse() only?  The
       reason is that the deletable region
       depends on if null links are in use.
       with null_links everything is deletable */

    if (!has_conjunction) {
	pp_and_power_prune(sent, RUTHLESS, opts);
    } else {
	pp_and_power_prune(sent, GENTLE, opts);
	
	/*if (verbosity > 2) {
	    printf("\nAfter Gentle power pruning:\n");
	    print_disjunct_counts(sent);
	}
	*/
	/*print_time(opts, "Finished gentle power pruning"); */
	conjunction_prune(sent, opts);
	if (verbosity > 2) {
	    printf("\nAfter conjunction pruning:\n");
	    print_disjunct_counts(sent);
	    print_statistics();
	}
	print_time(opts, "Done conjunction pruning");
	build_conjunction_tables(sent);
	install_fat_connectors(sent);
	install_special_conjunctive_connectors(sent);
	if (verbosity > 2) {
	    printf("After conjunctions, disjuncts counts:\n");
	    print_disjunct_counts(sent);
	}
	set_connector_length_limits(sent, opts);
              /* have to do this again cause of the
	         new fat connectors and disjuncts */

	print_time(opts, "Constructed fat disjuncts");
	
	prune(sent);
	print_time(opts, "Pruned fat disjuncts");
	
	for (i=0; i<sent->length; i++) {
	    sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);
	}
	if (verbosity > 2) {
	    printf("After pruning and duplicate elimination:\n");
	    print_disjunct_counts(sent);
	}
	print_time(opts, "Eliminated duplicate disjuncts (again)");
	
	if (verbosity > 2) print_AND_statistics(sent);

	power_prune(sent, RUTHLESS, opts);
    }

    /*
    if (verbosity > 2) {
	printf("\nAfter RUTHLESS power-pruning:\n");
	print_disjunct_counts(sent);
    }
    */
    /* print time for power pruning used to be here */
    /* now done in power_prune itself */
    print_time(opts, "Initialized fast matcher and hash table");
}