Exemplo n.º 1
0
static void printov(const char *str, ov_t *pov, int top, callout_data_t *cd, bool is_pcreov)
{
	int i;
	const cgnum_t *cgnump = NULL;

	for (i = 0; i < top; i++)
	{
		printf("%2d", i);
		if (!is_pcreov && (NULL != cd) && (NULL != cd->capture_level))
			printf(" (%d)", (pov[i].e < 0) ? 0 : cd->capture_level[i]);
		printf(": ");
		if (pov[i].s < 0)
		{
			printf(" <unset>");
		} else
		{
			if (pov[i].e < 0)
				printf(" END<0 (%d,%d)", pov[i].s, pov[i].e);
			else
				printf(" %.*s (%d,%d)", pov[i].e - pov[i].s, str + pov[i].s, pov[i].s, pov[i].e);

		}

		/* Find the tokenizer capture group info for the current OV element:
		 * - For PCRE OV, use its index (if > 0) as capture group.
		 * - For the tokenizer OV, use the recorded capture level.
		 *  Since the cgnum array is 0-based and the first parenthesized capture
		 *  group is 1, subtract 1 to get the actual index. */
		if ((NULL != cd) && (NULL != cd->capture_level) && (NULL != cd->cgnum) &&
				(!is_pcreov || (i > 0)) && pov[i].e >= 0)
			cgnump = cd->cgnum[(is_pcreov ? i : cd->capture_level[i]) - 1];

		if (NULL != cgnump)
		{
			const char *a = "", *p = "";
			char lookup_mark[10];
			char *sm;

			if (NULL != cgnump->lookup_mark)
			{
				if ('a' == cgnump->lookup_mark_pos)
				{
					lg_strlcpy(lookup_mark, cgnump->lookup_mark, sizeof(lookup_mark));
					sm = strrchr(lookup_mark, SUBSCRIPT_MARK);
					if (NULL != sm) *sm = '.';
					a = lookup_mark;
				}
				else
				{
					p = cgnump->lookup_mark;
				}
			}
			printf(" [%s%s%s]", p, cgnump->name, a);
		}

		printf("\n");
	}
}
Exemplo n.º 2
0
/**
 * Print connector list to string.
 * This reverses the order of the connectors in the connector list,
 * so that the resulting list is in the same order as it would appear
 * in the dictionary. The character 'dir' is appended to each connector.
 */
static char * reversed_conlist_str(Connector* c, char dir, char* buf, size_t sz)
{
	char* p;
	size_t len = 0;

	if (NULL == c) return buf;
	p = reversed_conlist_str(c->next, dir, buf, sz);

	sz -= (p-buf);

	if (c->multi)
		p[len++] = '@';

	len += lg_strlcpy(p+len, c->string, sz-len);
	if (3 < sz-len)
	{
		p[len++] = dir;
		p[len++] = ' ';
		p[len] = 0x0;
	}
	return p+len;
}
Exemplo n.º 3
0
/**
 * lg_compute_disjunct_strings -- Given sentence, compute disjuncts.
 *
 * This routine will compute the string representation of the disjunct
 * used for each word in parsing the given sentence. A string
 * representation of the disjunct is needed for most of the corpus
 * statistics functions: this string, together with the "inflected"
 * word, is used as a key to index the statistics information in the
 * database. 
 */
void lg_compute_disjunct_strings(Sentence sent, Linkage_info *lifo)
{
	char djstr[MAX_TOKEN_LENGTH*20]; /* no word will have more than 20 links */
	size_t copied, left;
	int i, w;
	int nwords = sent->length;
	Parse_info pi = sent->parse_info;
	int nlinks = pi->N_links;
	int *djlist, *djloco, *djcount;

	if (lifo->disjunct_list_str) return;
	lifo->nwords = nwords;
	lifo->disjunct_list_str = (char **) malloc(nwords * sizeof(char *));
	bzero(lifo->disjunct_list_str, nwords * sizeof(char *));

	djcount = (int *) malloc (sizeof(int) * (nwords + 2*nwords*nlinks));
	djlist = djcount + nwords;
	djloco = djlist + nwords*nlinks;

	/* Decrement nwords, so as to ignore the RIGHT-WALL */
	nwords --;

	for (w=0; w<nwords; w++)
	{
		djcount[w] = 0;
	}

	/* Create a table of disjuncts for each word. */
	for (i=0; i<nlinks; i++)
	{
		int lword = pi->link_array[i].l;
		int rword = pi->link_array[i].r;
		int slot = djcount[lword];

		/* Skip over RW link to the right wall */
		if (nwords <= rword) continue;

		djlist[lword*nlinks + slot] = i;
      djloco[lword*nlinks + slot] = rword;
		djcount[lword] ++;

		slot = djcount[rword];
		djlist[rword*nlinks + slot] = i;
      djloco[rword*nlinks + slot] = lword;
		djcount[rword] ++;

#ifdef DEBUG
		printf("Link: %d is %s--%s--%s\n", i, 
			sent->word[lword].string, pi->link_array[i].name,
			sent->word[rword].string);
#endif
	}

	/* Process each word in the sentence (skipping LEFT-WALL, which is
	 * word 0. */
	for (w=1; w<nwords; w++)
	{
		/* Sort the disjuncts for this word. -- bubble sort */
		int slot = djcount[w];
		for (i=0; i<slot; i++)
		{
			int j;
			for (j=i+1; j<slot; j++)
			{
				if (djloco[w*nlinks + i] > djloco[w*nlinks + j])
				{
					int tmp = djloco[w*nlinks + i];
					djloco[w*nlinks + i] = djloco[w*nlinks + j];
					djloco[w*nlinks + j] = tmp;
					tmp = djlist[w*nlinks + i];
					djlist[w*nlinks + i] = djlist[w*nlinks + j];
					djlist[w*nlinks + j] = tmp;
				}
			}
		}

		/* Create the disjunct string */
		left = sizeof(djstr);
		copied = 0;
		for (i=0; i<slot; i++)
		{
			int dj = djlist[w*nlinks + i];
			copied += lg_strlcpy(djstr+copied, pi->link_array[dj].name, left);
			left = sizeof(djstr) - copied;
			if (djloco[w*nlinks + i] < w)
				copied += lg_strlcpy(djstr+copied, "-", left--);
			else
				copied += lg_strlcpy(djstr+copied, "+", left--);
			copied += lg_strlcpy(djstr+copied, " ", left--);
		}

		lifo->disjunct_list_str[w] = strdup(djstr);
	}

	free (djcount);
}