コード例 #1
0
ファイル: api.c プロジェクト: suriyadeepan/link-grammar
/**
 * Remove the empty words from a linkage.
 * XXX Should we remove here also the dict-cap tokens? In any case, for now they
 * are left for debug.
 */
static void remove_empty_words(Linkage lkg)
{
	size_t i, j;
	Disjunct **cdj = lkg->chosen_disjuncts;
	int *remap = alloca(lkg->num_words * sizeof(*remap));

	if (4 <= verbosity)
	{
		lgdebug(0, "Info: chosen_disjuncts before removing empty words:\n");
		print_chosen_disjuncts_words(lkg);
	}

	for (i = 0, j = 0; i < lkg->num_words; i++)
	{
		if ((NULL != cdj[i]) && (MT_EMPTY == cdj[i]->word[0]->morpheme_type))
		{
			remap[i] = -1;
		}
		else
		{
			cdj[j] = cdj[i];
			remap[i] = j;
			j++;
		}
	}
	lkg->num_words = j;
	/* Unused memory not freed - all of it will be freed in free_linkages(). */

	if (4 <= verbosity)
	{
		lgdebug(0, "Info: chosen_disjuncts after removing empty words:\n");
		print_chosen_disjuncts_words(lkg);
	}

	for (i = 0, j = 0; i < lkg->num_links; i++)
	{
		const Link *old_lnk = &(lkg->link_array[i]);

		if ((-1 != remap[old_lnk->rw]) && (-1 != remap[old_lnk->lw]))
		{
			Link *new_lnk = &(lkg->link_array[j]);

			/* Copy the entire link contents, thunking the word numbers.
			 * Note that j is always <= i so this is always safe. */
			new_lnk->lw = remap[old_lnk->lw];
			new_lnk->rw = remap[old_lnk->rw];
			new_lnk->lc = old_lnk->lc;
			new_lnk->rc = old_lnk->rc;
			new_lnk->link_name = old_lnk->link_name;
			j++;
		}
	}
	lkg->num_links = j;
	/* Unused memory not freed - all of it will be freed in free_linkages(). */
}
コード例 #2
0
ファイル: parse.c プロジェクト: hckiang/link-grammar
/**
 * This fills the linkage array with morphologically-acceptable
 * linkages.
 */
static void process_linkages(Sentence sent, extractor_t* pex,
                             bool overflowed, Parse_Options opts)
{
	if (0 == sent->num_linkages_found) return;
	if (0 == sent->num_linkages_alloced) return; /* Avoid a later crash. */

	/* Pick random linkages if we get more than what was asked for. */
	bool pick_randomly = overflowed ||
	    (sent->num_linkages_found > (int) sent->num_linkages_alloced);

	sent->num_valid_linkages = 0;
	size_t N_invalid_morphism = 0;

	int itry = 0;
	size_t in = 0;
	int maxtries;

	/* In the case of overflow, which will happen for some long
	 * sentences, but is particularly common for the amy/ady random
	 * splitters, we want to find as many morpho-acceptable linkages
	 * as possible, but keep the CPU usage down, as these might be
	 * very rare. This is due to a bug/feature in the interaction
	 * between the word-graph and the parser: valid morph linkages
	 * can be one-in-a-thousand.. or worse.  Search for them, but
	 * don't over-do it.
	 * Note: This problem has recently been alleviated by an
	 * alternatives-compatibility check in the fast matcher - see
	 * alt_connection_possible().
	 */
#define MAX_TRIES 250000

	if (pick_randomly)
	{
		/* Try picking many more linkages, but not more than possible. */
		maxtries = MIN((int) sent->num_linkages_alloced + MAX_TRIES,
		               sent->num_linkages_found);
	}
	else
	{
		maxtries = sent->num_linkages_alloced;
	}

	bool need_init = true;
	for (itry=0; itry<maxtries; itry++)
	{
		Linkage lkg = &sent->lnkages[in];
		Linkage_info * lifo = &lkg->lifo;

		/* Negative values tell extract-links to pick randomly; for
		 * reproducible-rand, the actual value is the rand seed. */
		lifo->index = pick_randomly ? -(itry+1) : itry;

		if (need_init)
		{
			partial_init_linkage(sent, lkg, sent->length);
			need_init = false;
		}
		extract_links(pex, lkg);
		compute_link_names(lkg, sent->string_set);

		if (verbosity_level(+D_PL))
		{
			err_msg(lg_Debug, "chosen_disjuncts before:\n\\");
			print_chosen_disjuncts_words(lkg, /*prt_opt*/true);
		}

		if (sane_linkage_morphism(sent, lkg, opts))
		{
			remove_empty_words(lkg);

			if (verbosity_level(+D_PL))
			{
				err_msg(lg_Debug, "chosen_disjuncts after:\n\\");
				print_chosen_disjuncts_words(lkg, /*prt_opt*/false);
			}

			need_init = true;
			in++;
			if (in >= sent->num_linkages_alloced) break;
		}
		else
		{
			N_invalid_morphism++;
			lkg->num_links = 0;
			lkg->num_words = sent->length;
			// memset(lkg->link_array, 0, lkg->lasz * sizeof(Link));
			memset(lkg->chosen_disjuncts, 0, sent->length * sizeof(Disjunct *));
		}
	}

	/* The last one was alloced, but never actually used. Free it. */
	if (!need_init) free_linkage(&sent->lnkages[in]);

	sent->num_valid_linkages = in;

	/* The remainder of the array is garbage; we never filled it in.
	 * So just pretend that it's shorter than it is */
	sent->num_linkages_alloced = sent->num_valid_linkages;

	lgdebug(D_PARSE, "Info: sane_morphism(): %zu of %d linkages had "
	        "invalid morphology construction\n", N_invalid_morphism,
	        itry + (itry != maxtries));
}