Linkage linkage_create(int k, Sentence sent, Parse_Options opts) { Linkage linkage; assert((k < sent->num_linkages_post_processed) && (k >= 0), "index out of range"); /* Using exalloc since this is external to the parser itself. */ linkage = (Linkage) exalloc(sizeof(struct Linkage_s)); linkage->num_words = sent->length; linkage->word = (char **) exalloc(linkage->num_words*sizeof(char *)); linkage->current = 0; linkage->num_sublinkages=0; linkage->sublinkage = NULL; linkage->unionized = FALSE; linkage->sent = sent; linkage->opts = opts; linkage->info = sent->link_info[k]; extract_links(sent->link_info[k].index, sent->null_count, sent->parse_info); compute_chosen_words(sent, linkage); if (set_has_fat_down(sent)) { extract_fat_linkage(sent, opts, linkage); } else { extract_thin_linkage(sent, opts, linkage); } if (sent->dict->postprocessor != NULL) { linkage_post_process(linkage, sent->dict->postprocessor); } return linkage; }
static CNode * make_CNode(char *q) { CNode * cn; cn = exalloc(sizeof(CNode)); cn->label = (char *) exalloc(sizeof(char)*(strlen(q)+1)); strcpy(cn->label, q); cn->child = cn->next = (CNode *) NULL; cn->next = (CNode *) NULL; cn->start = cn->end = -1; return cn; }
String * string_new(void) { #define INITSZ 30 String * string; string = (String *) exalloc(sizeof(String)); string->allocated = INITSZ; string->p = (char *) exalloc(INITSZ*sizeof(char)); string->p[0] = '\0'; string->eos = 0; return string; }
static Sublinkage * ex_create_sublinkage(Parse_info pi) { Sublinkage *s = (Sublinkage *) exalloc (sizeof(Sublinkage)); s->link = (Link *) exalloc(pi->N_links*sizeof(Link)); s->num_links = pi->N_links; zero_sublinkage(s); assert(pi->N_links < MAX_LINKS, "Too many links"); return s; }
PP_info excopy_pp_info(PP_info ppi) { static PP_info newppi; int i; newppi.num_domains = ppi.num_domains; newppi.domain_name = (char **) exalloc(sizeof(char *)*ppi.num_domains); for (i=0; i<newppi.num_domains; ++i) { newppi.domain_name[i] = (char *) exalloc(sizeof(char)*(strlen(ppi.domain_name[i])+1)); strcpy(newppi.domain_name[i], ppi.domain_name[i]); } return newppi; }
Connector * excopy_connectors(Connector * c) { Connector *c1; if (c == NULL) return NULL; c1 = init_connector((Connector *) exalloc(sizeof(Connector))); *c1 = *c; c1->string = (char *) exalloc(sizeof(char)*(strlen(c->string)+1)); strcpy(c1->string, c->string); c1->next = excopy_connectors(c->next); return c1; }
Link excopy_link(Link l) { Link newl; if (l == NULL) return NULL; newl = (Link) exalloc(sizeof(struct Link_s)); newl->name = (char *) exalloc(sizeof(char)*(strlen(l->name)+1)); strcpy(newl->name, l->name); newl->l = l->l; newl->r = l->r; newl->lc = excopy_connectors(l->lc); newl->rc = excopy_connectors(l->rc); return newl; }
void append_string(String * string, const char *fmt, ...) { #define TMPLEN 1024 char temp_string[TMPLEN]; size_t templen; char * p; size_t new_size; va_list args; va_start(args, fmt); templen = vsnprintf(temp_string, TMPLEN, fmt, args); va_end(args); if (string->allocated <= string->eos + templen) { new_size = 2 * string->allocated + templen + 1; p = (char *) exalloc(sizeof(char)*new_size); strcpy(p, string->p); strcpy(p + string->eos, temp_string); exfree(string->p, sizeof(char)*string->allocated); string->p = p; string->allocated = new_size; string->eos += templen; } else { strcpy(string->p + string->eos, temp_string); string->eos += templen; } }
/** * Store the domain names in the linkage. * This is an utter waste of CPU time, if on is not interested * in printing the domain names. * * XXX TODO: refactor, so that this does not need to be called except * when printing the domain names. */ void linkage_set_domain_names(Postprocessor * postprocessor, Linkage linkage) { PP_node * pp; size_t j, k; D_type_list * d; if (NULL == linkage) return; if (NULL == postprocessor) return; /* The only reason to build the type array is for this function. */ build_type_array(postprocessor); linkage->pp_info = (PP_info *) exalloc(sizeof(PP_info) * linkage->num_links); for (j = 0; j < linkage->num_links; ++j) { linkage->pp_info[j].num_domains = 0; linkage->pp_info[j].domain_name = NULL; } /* Copy the post-processing results over into the linkage */ pp = postprocessor->pp_node; if (pp->violation != NULL) return; for (j = 0; j < linkage->num_links; ++j) { k = 0; for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++; linkage->pp_info[j].num_domains = k; if (k > 0) { linkage->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k); } k = 0; for (d = pp->d_type_array[j]; d != NULL; d = d->next) { char buff[5]; snprintf(buff, 5, "%c", d->type); linkage->pp_info[j].domain_name[k] = string_set_add (buff, postprocessor->string_set); k++; } } }
static void replace_link_name(Link l, const char *s) { char * t; exfree((char *) l->name, sizeof(char)*(strlen(l->name)+1)); t = (char *) exalloc(sizeof(char)*(strlen(s)+1)); strcpy(t, s); l->name = t; }
/** * Store the domain names in the linkage. These are not needed * unless the user asks the domain names to be printed! */ void linkage_set_domain_names(Postprocessor *postprocessor, Linkage linkage) { PP_node * pp; size_t j, k; D_type_list * d; if (NULL == linkage) return; if (NULL == postprocessor) return; linkage->pp_info = (PP_info *) exalloc(sizeof(PP_info) * linkage->num_links); memset(linkage->pp_info, 0, sizeof(PP_info) * linkage->num_links); /* Copy the post-processing results over into the linkage */ pp = postprocessor->pp_node; if (pp->violation != NULL) return; for (j = 0; j < linkage->num_links; ++j) { k = 0; for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++; linkage->pp_info[j].num_domains = k; if (k > 0) { linkage->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k); } k = 0; for (d = pp->d_type_array[j]; d != NULL; d = d->next) { char buff[] = {d->type, '\0'}; linkage->pp_info[j].domain_name[k] = string_set_add (buff, postprocessor->string_set); k++; } } }
Sublinkage unionize_linkage(Linkage linkage) { int i, j, num_in_union=0; Sublinkage u; Link link; char *p; for (i=0; i<linkage->num_sublinkages; ++i) { for (j=0; j<linkage->sublinkage[i].num_links; ++j) { link = linkage->sublinkage[i].link[j]; if (!link_already_appears(linkage, link, i)) num_in_union++; } } u.num_links = num_in_union; u.link = (Link *) exalloc(sizeof(Link)*num_in_union); u.pp_info = (PP_info *) exalloc(sizeof(PP_info)*num_in_union); u.violation = NULL; num_in_union = 0; for (i=0; i<linkage->num_sublinkages; ++i) { for (j=0; j<linkage->sublinkage[i].num_links; ++j) { link = linkage->sublinkage[i].link[j]; if (!link_already_appears(linkage, link, i)) { u.link[num_in_union] = excopy_link(link); u.pp_info[num_in_union] = excopy_pp_info(linkage->sublinkage[i].pp_info[j]); if (((p=linkage->sublinkage[i].violation) != NULL) && (u.violation == NULL)) { u.violation = (char *) exalloc((strlen(p)+1)*sizeof(char)); strcpy(u.violation, p); } num_in_union++; } } } return u; }
/* Partial, but not full initialization of the linakge struct ... */ void partial_init_linkage(Linkage lkg, unsigned int N_words) { lkg->num_links = 0; lkg->lasz = 2 * N_words; lkg->link_array = (Link *) malloc(lkg->lasz * sizeof(Link)); memset(lkg->link_array, 0, lkg->lasz * sizeof(Link)); lkg->num_words = N_words; lkg->cdsz = N_words; lkg->chosen_disjuncts = (Disjunct **) exalloc(lkg->cdsz * sizeof(Disjunct *)); memset(lkg->chosen_disjuncts, 0, N_words * sizeof(Disjunct *)); lkg->disjunct_list_str = NULL; #ifdef USE_CORPUS lkg->sense_list = NULL; #endif lkg->pp_info = NULL; }
void append_string(String * string, const char *fmt, ...) { #define TMPLEN 1024 char temp_string[TMPLEN]; size_t templen; char * p; size_t new_size; va_list args; #ifdef _MSC_VER char * tmp = alloca(strlen(fmt)+1); char * tok = tmp; strcpy(tmp, fmt); while ((tok = strstr(tok, "%zu"))) { tok[1] = 'I'; tok++;} fmt = tmp; #endif va_start(args, fmt); templen = vsnprintf(temp_string, TMPLEN, fmt, args); va_end(args); if (string->allocated <= string->eos + templen) { new_size = 2 * string->allocated + templen + 1; p = (char *) exalloc(sizeof(char)*new_size); strcpy(p, string->p); strcpy(p + string->eos, temp_string); exfree(string->p, sizeof(char)*string->allocated); string->p = p; string->allocated = new_size; string->eos += templen; } else { strcpy(string->p + string->eos, temp_string); string->eos += templen; } }
int linkage_compute_union(Linkage linkage) { int i, num_subs=linkage->num_sublinkages; Sublinkage * new_sublinkage; if (linkage->unionized) { linkage->current = linkage->num_sublinkages-1; return 0; } if (num_subs == 1) { linkage->unionized = TRUE; return 1; } new_sublinkage = (Sublinkage *) exalloc(sizeof(Sublinkage)*(num_subs+1)); for (i=0; i<num_subs; ++i) { new_sublinkage[i] = linkage->sublinkage[i]; } exfree(linkage->sublinkage, sizeof(Sublinkage)*num_subs); linkage->sublinkage = new_sublinkage; linkage->sublinkage[num_subs] = unionize_linkage(linkage); /* The domain data will not be needed for the union -- zero it out */ linkage->sublinkage[num_subs].pp_data.N_domains=0; linkage->sublinkage[num_subs].pp_data.length=0; linkage->sublinkage[num_subs].pp_data.links_to_ignore=NULL; for (i=0; i<MAX_SENTENCE; ++i) { linkage->sublinkage[num_subs].pp_data.word_links[i] = NULL; } linkage->num_sublinkages++; linkage->unionized = TRUE; linkage->current = linkage->num_sublinkages-1; return 1; }
/** * Print out the constituent tree. * mode 1: treebank-style constituent tree * mode 2: flat, bracketed tree [A like [B this B] A] * mode 3: flat, treebank-style tree (A like (B this) ) */ char * linkage_print_constituent_tree(Linkage linkage, int mode) { String * cs; CNode * root; char * p; if ((mode == 0) || (linkage->sent->dict->constituent_pp == NULL)) { return NULL; } else if (mode == 1 || mode == 3) { cs = String_create(); root = linkage_constituent_tree(linkage); print_tree(cs, (mode==1), root, 0, 0); linkage_free_constituent_tree(root); append_string(cs, "\n"); p = exalloc(strlen(cs->p)+1); strcpy(p, cs->p); exfree(cs->p, sizeof(char)*cs->allocated); exfree(cs, sizeof(String)); return p; } else if (mode == 2) { char * str; con_context_t *ctxt; ctxt = (con_context_t *) malloc(sizeof(con_context_t)); str = print_flat_constituents(ctxt, linkage); free(ctxt); return str; } assert(0, "Illegal mode in linkage_print_constituent_tree"); return NULL; }
static Linkage linkage_array_new(int num_to_alloc) { Linkage lkgs = (Linkage) exalloc(num_to_alloc * sizeof(struct Linkage_s)); memset(lkgs, 0, num_to_alloc * sizeof(struct Linkage_s)); return lkgs; }
char * string_copy(String *s) { char * p = (char *) exalloc(s->eos + 1); strcpy(p, s->p); return p; }
void linkage_post_process(Linkage linkage, Postprocessor * postprocessor) { int N_sublinkages = linkage_get_num_sublinkages(linkage); Parse_Options opts = linkage->opts; Sentence sent = linkage->sent; Sublinkage * subl; PP_node * pp; int i, j, k; D_type_list * d; for (i=0; i<N_sublinkages; ++i) { subl = &linkage->sublinkage[i]; if (subl->pp_info != NULL) { for (j=0; j<subl->num_links; ++j) { exfree_pp_info(subl->pp_info[j]); } post_process_free_data(&subl->pp_data); exfree(subl->pp_info, sizeof(PP_info)*subl->num_links); } subl->pp_info = (PP_info *) exalloc(sizeof(PP_info)*subl->num_links); for (j=0; j<subl->num_links; ++j) { subl->pp_info[j].num_domains = 0; subl->pp_info[j].domain_name = NULL; } if (subl->violation != NULL) { exfree(subl->violation, sizeof(char)*(strlen(subl->violation)+1)); subl->violation = NULL; } if (linkage->info.improper_fat_linkage) { pp = NULL; } else { pp = post_process(postprocessor, opts, sent, subl, FALSE); /* This can return NULL, for example if there is no post-processor */ } if (pp == NULL) { for (j=0; j<subl->num_links; ++j) { subl->pp_info[j].num_domains = 0; subl->pp_info[j].domain_name = NULL; } } else { for (j=0; j<subl->num_links; ++j) { k=0; for (d = pp->d_type_array[j]; d!=NULL; d=d->next) k++; subl->pp_info[j].num_domains = k; if (k > 0) { subl->pp_info[j].domain_name = (char **) exalloc(sizeof(char *)*k); } k = 0; for (d = pp->d_type_array[j]; d!=NULL; d=d->next) { subl->pp_info[j].domain_name[k] = (char *) exalloc(sizeof(char)*2); sprintf(subl->pp_info[j].domain_name[k], "%c", d->type); k++; } } subl->pp_data = postprocessor->pp_data; if (pp->violation != NULL) { subl->violation = (char *) exalloc(sizeof(char)*(strlen(pp->violation)+1)); strcpy(subl->violation, pp->violation); } } } post_process_close_sentence(postprocessor); }
static char * exprint_constituent_structure(con_context_t *ctxt, Linkage linkage, int numcon_total) { int c, w; int leftdone[MAXCONSTITUENTS]; int rightdone[MAXCONSTITUENTS]; int best, bestright, bestleft; Sentence sent; char s[100], * p; String * cs = String_create(); assert (numcon_total < MAXCONSTITUENTS, "Too many constituents"); sent = linkage_get_sentence(linkage); for(c=0; c<numcon_total; c++) { leftdone[c]=0; rightdone[c]=0; } if(verbosity>=2) printf("\n"); for(w=1; w<linkage->num_words; w++) { /* Skip left wall; don't skip right wall, since it may have constituent boundaries */ while(1) { best = -1; bestright = -1; for(c=0; c<numcon_total; c++) { if ((ctxt->constituent[c].left==w) && (leftdone[c]==0) && (ctxt->constituent[c].valid==1) && (ctxt->constituent[c].right >= bestright)) { best = c; bestright = ctxt->constituent[c].right; } } if (best==-1) break; leftdone[best]=1; if(ctxt->constituent[best].aux==1) continue; append_string(cs, "%c%s ", OPEN_BRACKET, ctxt->constituent[best].type); } if (w<linkage->num_words-1) { /* Don't print out right wall */ strcpy(s, sent->word[w].string); /* Now, if the first character of the word was originally uppercase, we put it back that way */ if (sent->word[w].firstupper ==1 ) upcase_utf8_str(s, s, MAX_WORD); append_string(cs, "%s ", s); } while(1) { best = -1; bestleft = -1; for(c=0; c<numcon_total; c++) { if ((ctxt->constituent[c].right==w) && (rightdone[c]==0) && (ctxt->constituent[c].valid==1) && (ctxt->constituent[c].left > bestleft)) { best = c; bestleft = ctxt->constituent[c].left; } } if (best==-1) break; rightdone[best]=1; if (ctxt->constituent[best].aux==1) continue; append_string(cs, "%s%c ", ctxt->constituent[best].type, CLOSE_BRACKET); } } append_string(cs, "\n"); p = exalloc(strlen(cs->p)+1); strcpy(p, cs->p); exfree(cs->p, sizeof(char)*cs->allocated); exfree(cs, sizeof(String)); return p; }
/** * This procedure mimics analyze_fat_linkage in order to * extract the sublinkages and copy them to the Linkage * data structure passed in. */ void extract_fat_linkage(Sentence sent, Parse_Options opts, Linkage linkage) { int i, j, N_thin_links; DIS_node *d_root; int num_sublinkages; Sublinkage * sublinkage; Parse_info pi = sent->parse_info; sublinkage = x_create_sublinkage(pi); build_digraph(pi, word_links); structure_violation = FALSE; d_root = build_DIS_CON_tree(pi, word_links); if (structure_violation) { compute_link_names(sent); for (i=0; i<pi->N_links; i++) { copy_full_link(&sublinkage->link[i],&(pi->link_array[i])); } linkage->num_sublinkages=1; linkage->sublinkage = ex_create_sublinkage(pi); /* This will have fat links! */ for (i=0; i<pi->N_links; ++i) { linkage->sublinkage->link[i] = excopy_link(sublinkage->link[i]); } free_sublinkage(sublinkage); free_digraph(pi, word_links); free_DIS_tree(d_root); return; } /* first get number of sublinkages and allocate space */ num_sublinkages = 0; for (;;) { num_sublinkages++; if (!advance_DIS(d_root)) break; } linkage->num_sublinkages = num_sublinkages; linkage->sublinkage = (Sublinkage *) exalloc(sizeof(Sublinkage)*num_sublinkages); for (i=0; i<num_sublinkages; ++i) { linkage->sublinkage[i].link = NULL; linkage->sublinkage[i].pp_info = NULL; linkage->sublinkage[i].violation = NULL; } /* now fill out the sublinkage arrays */ compute_link_names(sent); num_sublinkages = 0; for (;;) { for (i=0; i<pi->N_links; i++) { patch_array[i].used = patch_array[i].changed = FALSE; patch_array[i].newl = pi->link_array[i].l; patch_array[i].newr = pi->link_array[i].r; copy_full_link(&sublinkage->link[i], &(pi->link_array[i])); } fill_patch_array_DIS(d_root, NULL, word_links); for (i=0; i<pi->N_links; i++) { if (patch_array[i].changed || patch_array[i].used) { sublinkage->link[i]->l = patch_array[i].newl; sublinkage->link[i]->r = patch_array[i].newr; } else if ((dfs_root_word[pi->link_array[i].l] != -1) && (dfs_root_word[pi->link_array[i].r] != -1)) { sublinkage->link[i]->l = -1; } } compute_pp_link_array_connectors(sent, sublinkage); compute_pp_link_names(sent, sublinkage); /* Don't copy the fat links into the linkage */ N_thin_links = 0; for (i= 0; i<pi->N_links; ++i) { if (sublinkage->link[i]->l == -1) continue; N_thin_links++; } linkage->sublinkage[num_sublinkages].num_links = N_thin_links; linkage->sublinkage[num_sublinkages].link = (Link *) exalloc(sizeof(Link)*N_thin_links); linkage->sublinkage[num_sublinkages].pp_info = NULL; linkage->sublinkage[num_sublinkages].violation = NULL; for (i=0, j=0; i<pi->N_links; ++i) { if (sublinkage->link[i]->l == -1) continue; linkage->sublinkage[num_sublinkages].link[j++] = excopy_link(sublinkage->link[i]); } num_sublinkages++; if (!advance_DIS(d_root)) break; } free_sublinkage(sublinkage); free_digraph(pi, word_links); free_DIS_tree(d_root); }
void compute_chosen_words(Sentence sent, Linkage linkage, Parse_Options opts) { WordIdx i; /* index of chosen_words */ WordIdx j; Disjunct **cdjp = linkage->chosen_disjuncts; const char **chosen_words = alloca(linkage->num_words * sizeof(*chosen_words)); int *remap = alloca(linkage->num_words * sizeof(*remap)); bool *show_word = alloca(linkage->num_words * sizeof(*show_word)); bool display_morphology = opts->display_morphology; Gword **lwg_path = linkage->wg_path; Gword **n_lwg_path = NULL; /* new Wordgraph path, to match chosen_words */ Gword **nullblock_start = NULL; /* start of a null block, to be put in [] */ size_t nbsize = 0; /* number of word in a null block */ Gword *sentence_word; memset(show_word, 0, linkage->num_words * sizeof(*show_word)); if (verbosity_level(D_CCW)) print_lwg_path(lwg_path, "Linkage"); for (i = 0; i < linkage->num_words; i++) { Disjunct *cdj = cdjp[i]; Gword *w; /* current word */ const Gword *nw; /* next word (NULL if none) */ Gword **wgp; /* wordgraph_path traversing pointer */ const char *t = NULL; /* current word string */ bool at_nullblock_end; /* current word is at end of a nullblock */ bool join_alt = false; /* morpheme-join this alternative */ char *s; size_t l; size_t m; lgdebug(D_CCW, "Loop start, word%zu: cdj %s, path %s\n", i, cdj ? cdj->word_string : "NULL", lwg_path[i] ? lwg_path[i]->subword : "NULL"); w = lwg_path[i]; nw = lwg_path[i+1]; wgp = &lwg_path[i]; sentence_word = wg_get_sentence_word(sent, w); /* FIXME If the original word was capitalized in a capitalizable * position, the displayed null word may be its downcase version. */ if (NULL == cdj) /* a null word (the chosen disjunct was NULL) */ { chosen_words[i] = NULL; nbsize++; if (NULL == nullblock_start) /* it starts a new null block */ nullblock_start = wgp; at_nullblock_end = (NULL == nw) || (wg_get_sentence_word(sent, nw->unsplit_word) != sentence_word); /* Accumulate null words in this alternative */ if (!at_nullblock_end && (NULL == cdjp[i+1]) && ((w->morpheme_type == MT_PUNC) == (nw->morpheme_type == MT_PUNC))) { lgdebug(D_CCW, "Skipping word%zu cdjp=NULL#%zu, path %s\n", i, nbsize, w->subword); chosen_words[i] = NULL; continue; } if (NULL != nullblock_start) { /* If we are here, this null word is an end of a null block */ lgdebug(+D_CCW, "Handling %zu null words at %zu: ", nbsize, i); if (1 == nbsize) { /* Case 1: A single null subword. */ lgdebug(D_CCW, "A single null subword.\n"); t = join_null_word(sent, wgp, nbsize); gwordlist_append(&n_lwg_path, w); } else { lgdebug(D_CCW, "Combining null subwords"); /* Use alternative_id to check for start of alternative. */ if (((*nullblock_start)->alternative_id == *nullblock_start) && at_nullblock_end) { /* Case 2: A null unsplit_word (all-nulls alternative).*/ lgdebug(D_CCW, " (null alternative)\n"); t = sentence_word->subword; gwordlist_append(&n_lwg_path, sentence_word); } else { /* Case 3: Join together >=2 null morphemes. */ Gword *wgnull; lgdebug(D_CCW, " (null partial word)\n"); wgnull = wordgraph_null_join(sent, wgp-nbsize+1, wgp); gwordlist_append(&n_lwg_path, wgnull); t = wgnull->subword; } } nullblock_start = NULL; nbsize = 0; show_word[i] = true; if (MT_WALL != w->morpheme_type) { /* Put brackets around the null word. */ l = strlen(t) + 2; s = (char *) alloca(l+1); s[0] = NULLWORD_START; strcpy(&s[1], t); s[l-1] = NULLWORD_END; s[l] = '\0'; t = string_set_add(s, sent->string_set); lgdebug(D_CCW, " %s\n", t); /* Null words have no links, so take care not to drop them. */ } } } else { /* This word has a linkage. */ /* TODO: Suppress "virtual-morphemes", currently the dictcap ones. */ char *sm; t = cdj->word_string; /* Print the subscript, as in "dog.n" as opposed to "dog". */ if (0) { /* TODO */ } else { /* Get rid of those ugly ".Ixx" */ if (is_idiom_word(t)) { s = strdupa(t); sm = strrchr(s, SUBSCRIPT_MARK); /* Possible double subscript. */ UNREACHABLE(NULL == sm); /* We know it has a subscript. */ *sm = '\0'; t = string_set_add(s, sent->string_set); } else if (HIDE_MORPHO) { /* Concatenate the word morphemes together into one word. * Concatenate their subscripts into one subscript. * Use subscript separator SUBSCRIPT_SEP. * XXX Check whether we can encounter an idiom word here. * FIXME Combining contracted words is not handled yet, because * combining morphemes which have non-LL links to other words is * not yet implemented. * FIXME Move to a separate function. */ Gword **wgaltp; size_t join_len = 0; size_t mcnt = 0; /* If the alternative contains morpheme subwords, mark it * for joining... */ const Gword *unsplit_word = w->unsplit_word; for (wgaltp = wgp, j = i; NULL != *wgaltp; wgaltp++, j++) { if ((*wgaltp)->unsplit_word != unsplit_word) break; if (MT_INFRASTRUCTURE == (*wgaltp)->unsplit_word->morpheme_type) break; mcnt++; if (NULL == cdjp[j]) { /* ... but not if it contains a null word */ join_alt = false; break; } join_len += strlen(cdjp[j]->word_string) + 1; if ((*wgaltp)->morpheme_type & IS_REG_MORPHEME) join_alt = true; } if (join_alt) { /* Join it in two steps: 1. Base words. 2. Subscripts. * FIXME? Can be done in one step (more efficient but maybe * less clear). * Put SUBSCRIPT_SEP between the subscripts. * XXX No 1-1 correspondence between the hidden base words * and the subscripts after the join, in case there are base * words with and without subscripts. */ const char subscript_sep_str[] = { SUBSCRIPT_SEP, '\0'}; char *join = calloc(join_len + 1, 1); /* zeroed out */ join[0] = '\0'; /* 1. Join base words. (Could just use the unsplit_word.) */ for (wgaltp = wgp, m = 0; m < mcnt; wgaltp++, m++) { add_morpheme_unmarked(sent, join, cdjp[i+m]->word_string, (*wgaltp)->morpheme_type); } strcat(join, subscript_mark_str()); /* tentative */ /* 2. Join subscripts. */ for (wgaltp = wgp, m = 0; m < mcnt; wgaltp++, m++) { /* Cannot NULLify the word - we may have links to it. */ if (m != mcnt-1) chosen_words[i+m] = ""; sm = strchr(cdjp[i+m]->word_string, SUBSCRIPT_MARK); if (NULL != sm) { /* Supposing stem subscript is .=x (x optional) */ if (MT_STEM == (*wgaltp)->morpheme_type) { sm += 1 + STEM_MARK_L; /* sm+strlen(".=") */ if ('\0' == *sm) sm = NULL; #if 0 if ((cnt-1) == m) { /* Support a prefix-stem combination. In that case * we have just nullified the combined word, so we * need to move it to the position of the prefix. * FIXME: May still not be good enough. */ move_combined_word = i+m-1; /* And the later chosen_word assignment should be: * chosen_words[-1 != move_combined_word ? * move_combined_word : i] = t; */ } else { move_combined_word = -1; } #endif } } if (NULL != sm) { strcat(join, sm+1); strcat(join, subscript_sep_str); } } /* Remove an extra mark, if any */ join_len = strlen(join); if ((SUBSCRIPT_SEP == join[join_len-1]) || (SUBSCRIPT_MARK == join[join_len-1])) join[join_len-1] = '\0'; gwordlist_append(&n_lwg_path, sentence_word); t = string_set_add(join, sent->string_set); free(join); i += mcnt-1; } } } if (!join_alt) gwordlist_append(&n_lwg_path, *wgp); /* * Add guess marks in [] square brackets, if needed, at the * end of the base word. Convert the badly-printing * SUBSCRIPT_MARK (hex 03 or ^C) into a period. */ if (t) { s = strdupa(t); sm = strrchr(s, SUBSCRIPT_MARK); if (sm) *sm = SUBSCRIPT_DOT; if ((!(w->status & WS_GUESS) && (w->status & WS_INDICT)) || !DISPLAY_GUESS_MARKS) { t = string_set_add(s, sent->string_set); } else { const char *regex_name = w->regex_name; /* 4 = 1(null) + 1(guess_mark) + 2 (sizeof "[]") */ int baselen = NULL == sm ? strlen(t) : (size_t)(sm-s); char guess_mark = 0; switch (w->status & WS_GUESS) { case WS_SPELL: guess_mark = GM_SPELL; break; case WS_RUNON: guess_mark = GM_RUNON; break; case WS_REGEX: guess_mark = GM_REGEX; break; case 0: guess_mark = GM_UNKNOWN; break; default: assert(0, "Missing 'case: %2x'", w->status & WS_GUESS); } /* In the case of display_morphology==0, the guess indication of * the last subword is used as the guess indication of the whole * word. * FIXME? The guess indications of other subwords are ignored in * this mode. This implies that if a first or middle subword has * a guess indication but the last subword doesn't have, no guess * indication would be shown at all. */ if ((NULL == regex_name) || HIDE_MORPHO) regex_name = ""; s = alloca(strlen(t) + strlen(regex_name) + 4); strncpy(s, t, baselen); s[baselen] = '['; s[baselen + 1] = guess_mark; strcpy(s + baselen + 2, regex_name); strcat(s, "]"); if (NULL != sm) strcat(s, sm); t = string_set_add(s, sent->string_set); } } } assert(t != NULL, "Word %zu: NULL", i); chosen_words[i] = t; } /* Conditional test removal of quotation marks and the "capdict" tokens, * to facilitate using diff on sentence batch runs. */ if (test_enabled("removeZZZ")) { for (i=0, j=0; i<linkage->num_links; i++) { Link *lnk = &(linkage->link_array[i]); if (0 == strcmp("ZZZ", lnk->link_name)) chosen_words[lnk->rw] = NULL; } } /* If morphology printing is being suppressed, then all links * connecting morphemes will be discarded. */ if (HIDE_MORPHO) { /* Discard morphology links. */ for (i=0; i<linkage->num_links; i++) { Link * lnk = &linkage->link_array[i]; if (is_morphology_link(lnk->link_name)) { /* Mark link for discarding. */ lnk->link_name = NULL; } else { /* Mark word for not discarding. */ show_word[lnk->rw] = true; show_word[lnk->lw] = true; } } } /* We alloc a little more than needed, but so what... */ linkage->word = (const char **) exalloc(linkage->num_words*sizeof(char *)); /* Copy over the chosen words, dropping the discarded words. * However, don't discard existing words (chosen_words[i][0]). * Note that if a word only has morphology links and is not combined with * another word, then it will get displayed with no links at all (e.g. * when explicitly specifying root and suffix for debug: root.= =suf */ for (i=0, j=0; i<linkage->num_words; ++i) { if (chosen_words[i] && (chosen_words[i][0] || (!HIDE_MORPHO || show_word[i]))) { const char *cwtmp = linkage->word[j]; linkage->word[j] = chosen_words[i]; chosen_words[i] = cwtmp; remap[i] = j; j++; } else { remap[i] = -1; } } linkage->num_words = j; remap_linkages(linkage, remap); /* Update linkage->link_array / num_links. */ linkage->wg_path_display = n_lwg_path; if (verbosity_level(D_CCW)) print_lwg_path(n_lwg_path, "Display"); }