Exemple #1
0
/**
 * Assumes that the sentence expression lists have been generated.
 */
void prepare_to_parse(Sentence sent, Parse_Options opts)
{
	size_t i;

	build_sentence_disjuncts(sent, opts->disjunct_cost);
	if (verbosity > 2) {
		printf("After expanding expressions into disjuncts:");
		print_disjunct_counts(sent);
	}
	print_time(opts, "Built disjuncts");

	for (i=0; i<sent->length; i++) {
		sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);

		/* Some long Russian sentences can really blow up, here. */
		if (resources_exhausted(opts->resources))
			return;
	}
	print_time(opts, "Eliminated duplicate disjuncts");

	if (verbosity > 2) {
		printf("\nAfter expression pruning and duplicate elimination:\n");
		print_disjunct_counts(sent);
	}

	set_connector_length_limits(sent, opts);
	pp_and_power_prune(sent, opts);
}
Exemple #2
0
/**
 * Assumes that the sentence expression lists have been generated.
 */
void prepare_to_parse(Sentence sent, Parse_Options opts)
{
	size_t i;

	build_sentence_disjuncts(sent, opts->disjunct_cost, opts);
	if (verbosity_level(5))
	{
		prt_error("Debug: After expanding expressions into disjuncts:\n");
		print_disjunct_counts(sent);
	}
	print_time(opts, "Built disjuncts");

	for (i=0; i<sent->length; i++)
	{
		sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);

		/* Some long Russian sentences can really blow up, here. */
		if (resources_exhausted(opts->resources))
			return;
	}
	print_time(opts, "Eliminated duplicate disjuncts");

	if (verbosity_level(5))
	{
		prt_error("Debug: After expression pruning and duplicate elimination:\n");
		print_disjunct_counts(sent);
	}

	gword_record_in_connector(sent);
	setup_connectors(sent);
}
Exemple #3
0
void my_prepare_to_parse(Sentence sent, Parse_Options opts) {
/* assumes that the sentence expression lists have been generated     */
/* this does all the necessary pruning and building of and            */
/* structures.                                                        */
    int i, has_conjunction;

  //  build_sentence_disjuncts(sent, opts->disjunct_cost);
  //  if (verbosity > 2) {
	//printf("After expanding expressions into disjuncts:") ;
	//print_disjunct_counts(sent);
   // }
    print_time(opts, "Built disjuncts");
    
    for (i=0; i<sent->length; i++) {
	sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);
    }
    print_time(opts, "Eliminated duplicate disjuncts");

    if (verbosity > 2) {
	printf("\nAfter expression pruning and duplicate elimination:\n");
	print_disjunct_counts(sent);
    }

    null_links = (opts->min_null_count > 0);

    has_conjunction = sentence_contains_conjunction(sent);
    set_connector_length_limits(sent, opts);
    build_deletable(sent, has_conjunction);
    build_effective_dist(sent, has_conjunction);  
    /* why do we do these here instead of in
       first_prepare_to_parse() only?  The
       reason is that the deletable region
       depends on if null links are in use.
       with null_links everything is deletable */

    if (!has_conjunction) {
	pp_and_power_prune(sent, RUTHLESS, opts);
    } else {
	pp_and_power_prune(sent, GENTLE, opts);
	
	/*if (verbosity > 2) {
	    printf("\nAfter Gentle power pruning:\n");
	    print_disjunct_counts(sent);
	}
	*/
	/*print_time(opts, "Finished gentle power pruning"); */
	conjunction_prune(sent, opts);
	if (verbosity > 2) {
	    printf("\nAfter conjunction pruning:\n");
	    print_disjunct_counts(sent);
	    print_statistics();
	}
	print_time(opts, "Done conjunction pruning");
	build_conjunction_tables(sent);
	install_fat_connectors(sent);
	install_special_conjunctive_connectors(sent);
	if (verbosity > 2) {
	    printf("After conjunctions, disjuncts counts:\n");
	    print_disjunct_counts(sent);
	}
	set_connector_length_limits(sent, opts);
              /* have to do this again cause of the
	         new fat connectors and disjuncts */

	print_time(opts, "Constructed fat disjuncts");
	
	prune(sent);
	print_time(opts, "Pruned fat disjuncts");
	
	for (i=0; i<sent->length; i++) {
	    sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);
	}
	if (verbosity > 2) {
	    printf("After pruning and duplicate elimination:\n");
	    print_disjunct_counts(sent);
	}
	print_time(opts, "Eliminated duplicate disjuncts (again)");
	
	if (verbosity > 2) print_AND_statistics(sent);

	power_prune(sent, RUTHLESS, opts);
    }

    /*
    if (verbosity > 2) {
	printf("\nAfter RUTHLESS power-pruning:\n");
	print_disjunct_counts(sent);
    }
    */
    /* print time for power pruning used to be here */
    /* now done in power_prune itself */
    print_time(opts, "Initialized fast matcher and hash table");
}
Exemple #4
0
/** The return value is the number of disjuncts deleted.
 *  Implementation notes:
 *  Normally all the identical disjunct-jets are memory shared.
 *  The suffix_id of each connector serves as its reference count
 *  in the power table. Each time when a connector that cannot match
 *  is discovered, its reference count is decreased, and its
 *  nearest_word field is assigned BAD_WORD. Due to the memory sharing,
 *  each such an assignment affects immediately all the identical
 *  disjunct-jets.
 *  */
static int power_prune(Sentence sent, Parse_Options opts)
{
	power_table pt;
	prune_context pc;
	int N_deleted[2] = {0}; /* [0] counts first deletions, [1] counts dups. */
	int total_deleted = 0;

	power_table_alloc(sent, &pt);
	power_table_init(sent, &pt);

	pc.pt = &pt;
	pc.power_cost = 0;
	pc.null_links = (opts->min_null_count > 0);
	pc.N_changed = 1;  /* forces it always to make at least two passes */
	pc.sent = sent;

	while (1)
	{
		/* left-to-right pass */
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct **dd = &sent->word[w].d; *dd != NULL; /* See: NEXT */)
			{
				Disjunct *d = *dd; /* just for convenience */
				if (d->left == NULL)
				{
					dd = &d->next;  /* NEXT */
					continue;
				}

				bool is_bad = d->left->nearest_word == BAD_WORD;

				if (is_bad || left_connector_list_update(&pc, d->left, w, true) < 0)
				{
					mark_connector_sequence_for_dequeue(d->left, true);
					mark_connector_sequence_for_dequeue(d->right, false);

					/* discard the current disjunct */
					*dd = d->next; /* NEXT - set current disjunct to the next one */
					N_deleted[(int)is_bad]++;
					continue;
				}

				dd = &d->next; /* NEXT */
			}

			clean_table(pt.r_table_size[w], pt.r_table[w]);
		}

		total_deleted += N_deleted[0] + N_deleted[1];
		lgdebug(D_PRUNE, "Debug: l->r pass changed %d and deleted %d (%d+%d)\n",
		        pc.N_changed, N_deleted[0]+N_deleted[1], N_deleted[0], N_deleted[1]);

		if (pc.N_changed == 0 && N_deleted[0] == 0 && N_deleted[1] == 0) break;
		pc.N_changed = N_deleted[0] = N_deleted[1] = 0;

		/* right-to-left pass */
		for (WordIdx w = sent->length-1; w != (WordIdx) -1; w--)
		{
			for (Disjunct **dd = &sent->word[w].d; *dd != NULL; /* See: NEXT */)
			{
				Disjunct *d = *dd; /* just for convenience */
				if (d->right == NULL)
				{
					dd = &d->next;  /* NEXT */
					continue;
				}

				bool is_bad = d->right->nearest_word == BAD_WORD;

				if (is_bad || right_connector_list_update(&pc, d->right, w, true) >= sent->length)
				{
					mark_connector_sequence_for_dequeue(d->right, true);
					mark_connector_sequence_for_dequeue(d->left, false);

					/* Discard the current disjunct. */
					*dd = d->next; /* NEXT - set current disjunct to the next one */
					N_deleted[(int)is_bad]++;
					continue;
				}

				dd = &d->next; /* NEXT */
			}

			clean_table(pt.l_table_size[w], pt.l_table[w]);
		}

		total_deleted += N_deleted[0] + N_deleted[1];
		lgdebug(D_PRUNE, "Debug: r->l pass changed %d and deleted %d (%d+%d)\n",
		        pc.N_changed, N_deleted[0]+N_deleted[1], N_deleted[0], N_deleted[1]);

		if (pc.N_changed == 0 && N_deleted[0] == 0 && N_deleted[1] == 0) break;
		pc.N_changed = N_deleted[0] = N_deleted[1] = 0;
	}
	power_table_delete(&pt);

	lgdebug(D_PRUNE, "Debug: power prune cost: %d\n", pc.power_cost);

	print_time(opts, "power pruned");
	if (verbosity_level(D_PRUNE))
	{
		prt_error("\n\\");
		prt_error("Debug: After power_pruning:\n\\");
		print_disjunct_counts(sent);
	}

#ifdef DEBUG
	for (WordIdx w = 0; w < sent->length; w++)
	{
		for (Disjunct *d = sent->word[w].d; NULL != d; d = d->next)
		{
			for (Connector *c = d->left; NULL != c; c = c->next)
				assert(c->nearest_word != BAD_WORD);
			for (Connector *c = d->right; NULL != c; c = c->next)
				assert(c->nearest_word != BAD_WORD);
		}
	}
#endif

	return total_deleted;
}
Exemple #5
0
static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge * knowledge;
	size_t i, w;
	int total_deleted, N_deleted;
	bool change, deleteme;
	multiset_table *cmt;

	if (sent->postprocessor == NULL) return 0;
	if (!opts->perform_pp_prune) return 0;

	knowledge = sent->postprocessor->knowledge;

	cmt = cms_table_new();

	for (w = 0; w < sent->length; w++)
	{
		Disjunct *d;
		for (d = sent->word[w].d; d != NULL; d = d->next)
		{
			char dir;
			d->marked = true;
			for (dir=0; dir < 2; dir++)
			{
				Connector *c;
				for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
				{
					insert_in_cms_table(cmt, connector_string(c));
				}
			}
		}
	}

	total_deleted = 0;
	change = true;
	while (change)
	{
		char dir;

		change = false;
		N_deleted = 0;
		for (w = 0; w < sent->length; w++)
		{
			Disjunct *d;
			for (d = sent->word[w].d; d != NULL; d = d->next)
			{
				if (!d->marked) continue;
				deleteme = false;
				for (i = 0; i < knowledge->n_contains_one_rules; i++)
				{
					pp_rule* rule = &knowledge->contains_one_rules[i]; /* the ith rule */
					const char * selector = rule->selector;  /* selector string for this rule */
					pp_linkset * link_set = rule->link_set;  /* the set of criterion links */

					if (rule->selector_has_wildcard) continue;  /* If it has a * forget it */

					for (dir = 0; dir < 2; dir++)
					{
						Connector *c;
						for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
						{

							if (!post_process_match(selector, connector_string(c))) continue;

							/*
							printf("pp_prune: trigger ok.  selector = %s  c->string = %s\n", selector, c->string);
							*/

							/* We know c matches the trigger link of the rule. */
							/* Now check the criterion links */

							if (!rule_satisfiable(cmt, link_set))
							{
								deleteme = true;
								rule->use_count++;
							}
							if (deleteme) break;
						}
						if (deleteme) break;
					}
					if (deleteme) break;
				}

				if (deleteme)         /* now we delete this disjunct */
				{
					N_deleted++;
					total_deleted++;
					d->marked = false; /* mark for deletion later */
					for (dir=0; dir < 2; dir++)
					{
						Connector *c;
						for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
						{
							change |= delete_from_cms_table(cmt, connector_string(c));
						}
					}
				}
			}
		}

		lgdebug(D_PRUNE, "Debug: pp_prune pass deleted %d\n", N_deleted);
	}
	cms_table_delete(cmt);

	if (total_deleted > 0)
	{
		delete_unmarked_disjuncts(sent);
		if (verbosity_level(D_PRUNE))
		{
			prt_error("\n\\");
			prt_error("Debug: After pp_prune:\n\\");
			print_disjunct_counts(sent);
		}
	}

	print_time(opts, "pp pruning");

	return total_deleted;
}