Exemplo n.º 1
0
/**
 * Insert the gword into the path queue in reverse order of its hier_depth.
 *
 * The deepest wordgraph alternatives must be scanned first.
 * Otherwise, this sentence fails: "T" to let there a notice
 * (It depends on "T" matching EMOTICON.)
 *
 * Parameters:
 *   same_word: mark that the same word is queued again.
 * For validation code only (until the wordgraph version is mature):
 *   used: mark that the word has already been issued into the 2D-array.
 *   diff_alternative: validate we don't queue words from the same alternative.
 */
bool wordgraph_pathpos_add(Wordgraph_pathpos **wp, Gword *p, bool used,
                              bool same_word, bool diff_alternative)
{
	size_t n = wordgraph_pathpos_len(*wp);
	Wordgraph_pathpos *wpt;
	size_t insert_here = n;

	assert(NULL != p);
	wordgraph_hier_position(p); /* in case it is not set yet */

#ifdef DEBUG
	if (7 <= verbosity) { printf("\n"); print_hier_position(p); }
#endif

	if (NULL != *wp)
	{
		for (wpt = *wp; NULL != wpt->word; wpt++)
		{
			if (p == wpt->word)
				return false; /* already in the pathpos queue - nothing to do */

			/* Insert in reverse order of hier_depth. */
			if ((n == insert_here) && (p->hier_depth >= wpt->word->hier_depth))
				insert_here = wpt - *wp;

			/* Validate that there are no words in the pathpos queue from the same
			 * alternative. This can be commented out when the wordgraph code is
			 * mature. FIXME */
			if (diff_alternative)
			{
				assert(same_word||wpt->same_word||!in_same_alternative(p,wpt->word),
				       "wordgraph_pathpos_add(): "
				       "Word%zu '%s' is from same alternative of word%zu '%s'",
				       p->node_num, p->subword,
				       wpt->word->node_num, wpt->word->subword);
			}
		}
	}


	*wp = wordgraph_pathpos_resize(*wp, n);
	if (insert_here < n)
	{
		memmove(&(*wp)[insert_here+1], &(*wp)[insert_here],
		        (n - insert_here) * sizeof (*wpt));
	}

	(*wp)[insert_here].word = p;
	(*wp)[insert_here].same_word = same_word;
	(*wp)[insert_here].used = used;
	(*wp)[insert_here].next_ok = false;

	return true;
}
Exemplo n.º 2
0
/**
 * Construct word paths (one or more) through the Wordgraph.
 *
 * Add 'current_word" to the potential path.
 * Add "p" to the path queue, which defines the start of the next potential
 * paths to be checked.
 *
 * Each path is up to the current word (not including). It doesn't actually
 * construct a full path if there are null words - they break it. The final path
 * is constructed when the Wordgraph termination word is encountered.
 *
 * Note: The final path doesn't match the linkage word indexing if the linkage
 * contains empty words, at least until empty words are eliminated from the
 * linkage (in compute_chosen_words()). Further processing of the path is done
 * there in case morphology splits are to be hidden or there are morphemes with
 * null linkage.
 */
static void wordgraph_path_append(Wordgraph_pathpos **nwp, const Gword **path,
                                  Gword *current_word, /* add to the path */
                                  Gword *p)      /* add to the path queue */
{
	size_t n = wordgraph_pathpos_len(*nwp);

	assert(NULL != p, "Tried to add a NULL word to the word queue");

	/* Check if the path queue already contains the word to be added to it. */
	if (NULL != *nwp)
	{
		const Wordgraph_pathpos *wpt;

		for (wpt = *nwp; NULL != wpt->word; wpt++)
		{
			if (p == wpt->word)
			{
				/* If we are here, there are 2 or more paths leading to this word
				 * (p) that end with the same number of consecutive null words that
				 * consist an entire alternative. These null words represent
				 * different ways to split the subword upward in the hierarchy, but
				 * since they don't have linkage we don't care which of these
				 * paths is used. */
				return; /* The word is already in the queue */
			}
		}
	}

	/* Not already in the path queue - add it. */
	*nwp = wordgraph_pathpos_resize(*nwp, n);
	(*nwp)[n].word = p;

	if (MT_INFRASTRUCTURE == p->prev[0]->morpheme_type)
	{
			/* Previous word is the Wordgraph dummy word. Initialize the path. */
			(*nwp)[n].path = NULL;
	}
	else
	{
		/* We branch to another path. Duplicate it from the current path and add
		 * the current word to it. */
		size_t path_arr_size = (gwordlist_len(path)+1)*sizeof(*path);

		(*nwp)[n].path = malloc(path_arr_size);
		memcpy((*nwp)[n].path, path, path_arr_size);
	}
   /* FIXME (cast) but anyway gwordlist_append() doesn't modify Gword. */
	gwordlist_append((Gword ***)&(*nwp)[n].path, current_word);
}