static void printov(const char *str, ov_t *pov, int top, callout_data_t *cd, bool is_pcreov) { int i; const cgnum_t *cgnump = NULL; for (i = 0; i < top; i++) { printf("%2d", i); if (!is_pcreov && (NULL != cd) && (NULL != cd->capture_level)) printf(" (%d)", (pov[i].e < 0) ? 0 : cd->capture_level[i]); printf(": "); if (pov[i].s < 0) { printf(" <unset>"); } else { if (pov[i].e < 0) printf(" END<0 (%d,%d)", pov[i].s, pov[i].e); else printf(" %.*s (%d,%d)", pov[i].e - pov[i].s, str + pov[i].s, pov[i].s, pov[i].e); } /* Find the tokenizer capture group info for the current OV element: * - For PCRE OV, use its index (if > 0) as capture group. * - For the tokenizer OV, use the recorded capture level. * Since the cgnum array is 0-based and the first parenthesized capture * group is 1, subtract 1 to get the actual index. */ if ((NULL != cd) && (NULL != cd->capture_level) && (NULL != cd->cgnum) && (!is_pcreov || (i > 0)) && pov[i].e >= 0) cgnump = cd->cgnum[(is_pcreov ? i : cd->capture_level[i]) - 1]; if (NULL != cgnump) { const char *a = "", *p = ""; char lookup_mark[10]; char *sm; if (NULL != cgnump->lookup_mark) { if ('a' == cgnump->lookup_mark_pos) { lg_strlcpy(lookup_mark, cgnump->lookup_mark, sizeof(lookup_mark)); sm = strrchr(lookup_mark, SUBSCRIPT_MARK); if (NULL != sm) *sm = '.'; a = lookup_mark; } else { p = cgnump->lookup_mark; } } printf(" [%s%s%s]", p, cgnump->name, a); } printf("\n"); } }
/** * Print connector list to string. * This reverses the order of the connectors in the connector list, * so that the resulting list is in the same order as it would appear * in the dictionary. The character 'dir' is appended to each connector. */ static char * reversed_conlist_str(Connector* c, char dir, char* buf, size_t sz) { char* p; size_t len = 0; if (NULL == c) return buf; p = reversed_conlist_str(c->next, dir, buf, sz); sz -= (p-buf); if (c->multi) p[len++] = '@'; len += lg_strlcpy(p+len, c->string, sz-len); if (3 < sz-len) { p[len++] = dir; p[len++] = ' '; p[len] = 0x0; } return p+len; }
/** * lg_compute_disjunct_strings -- Given sentence, compute disjuncts. * * This routine will compute the string representation of the disjunct * used for each word in parsing the given sentence. A string * representation of the disjunct is needed for most of the corpus * statistics functions: this string, together with the "inflected" * word, is used as a key to index the statistics information in the * database. */ void lg_compute_disjunct_strings(Sentence sent, Linkage_info *lifo) { char djstr[MAX_TOKEN_LENGTH*20]; /* no word will have more than 20 links */ size_t copied, left; int i, w; int nwords = sent->length; Parse_info pi = sent->parse_info; int nlinks = pi->N_links; int *djlist, *djloco, *djcount; if (lifo->disjunct_list_str) return; lifo->nwords = nwords; lifo->disjunct_list_str = (char **) malloc(nwords * sizeof(char *)); bzero(lifo->disjunct_list_str, nwords * sizeof(char *)); djcount = (int *) malloc (sizeof(int) * (nwords + 2*nwords*nlinks)); djlist = djcount + nwords; djloco = djlist + nwords*nlinks; /* Decrement nwords, so as to ignore the RIGHT-WALL */ nwords --; for (w=0; w<nwords; w++) { djcount[w] = 0; } /* Create a table of disjuncts for each word. */ for (i=0; i<nlinks; i++) { int lword = pi->link_array[i].l; int rword = pi->link_array[i].r; int slot = djcount[lword]; /* Skip over RW link to the right wall */ if (nwords <= rword) continue; djlist[lword*nlinks + slot] = i; djloco[lword*nlinks + slot] = rword; djcount[lword] ++; slot = djcount[rword]; djlist[rword*nlinks + slot] = i; djloco[rword*nlinks + slot] = lword; djcount[rword] ++; #ifdef DEBUG printf("Link: %d is %s--%s--%s\n", i, sent->word[lword].string, pi->link_array[i].name, sent->word[rword].string); #endif } /* Process each word in the sentence (skipping LEFT-WALL, which is * word 0. */ for (w=1; w<nwords; w++) { /* Sort the disjuncts for this word. -- bubble sort */ int slot = djcount[w]; for (i=0; i<slot; i++) { int j; for (j=i+1; j<slot; j++) { if (djloco[w*nlinks + i] > djloco[w*nlinks + j]) { int tmp = djloco[w*nlinks + i]; djloco[w*nlinks + i] = djloco[w*nlinks + j]; djloco[w*nlinks + j] = tmp; tmp = djlist[w*nlinks + i]; djlist[w*nlinks + i] = djlist[w*nlinks + j]; djlist[w*nlinks + j] = tmp; } } } /* Create the disjunct string */ left = sizeof(djstr); copied = 0; for (i=0; i<slot; i++) { int dj = djlist[w*nlinks + i]; copied += lg_strlcpy(djstr+copied, pi->link_array[dj].name, left); left = sizeof(djstr) - copied; if (djloco[w*nlinks + i] < w) copied += lg_strlcpy(djstr+copied, "-", left--); else copied += lg_strlcpy(djstr+copied, "+", left--); copied += lg_strlcpy(djstr+copied, " ", left--); } lifo->disjunct_list_str[w] = strdup(djstr); } free (djcount); }