static void batch_process_some_linkages(Label label, Sentence sent, Command_Options* copts) { Parse_Options opts = copts->popts; if (there_was_an_error(label, sent, opts)) { /* If we found at least one good linkage, print it. */ if (sentence_num_valid_linkages(sent) > 0) { Linkage linkage = NULL; int i; for (i=0; i<sentence_num_linkages_post_processed(sent); i++) { if (0 == sentence_num_violations(sent, i)) { linkage = linkage_create(i, sent, opts); break; } } process_linkage(linkage, copts); linkage_delete(linkage); } fprintf(stdout, "+++++ error %d\n", batch_errors); } else { if (test_enabled(test, "batch-print-parse-statistics")) { print_parse_statistics(sent, opts, copts); } } }
static void wordgraph_unlink_xtmpfile(void) { char *fn; if (!test_enabled("gvfile")) { concatfn(fn, TMPDIR, DOT_FILENAME); if (unlink(fn) == -1) prt_error("Warning: Cannot unlink %s: %s\n", fn, strerror(errno)); } }
/** * Check for the auto-next-linkage test request (for LG code development). * It is given using the special command: test=auto-next-linkage[:display_max] * when :display_max is an optional indication of the maximum number of * linkages to auto-display (the default is DISPLAY_MAX). * For example, to issue up to 20000 linkages for each batch sentence, * the following can be used: * link-parser -limit=30000 -test=auto-next-linkage:20000 < file.batch */ static int auto_next_linkage_test(const char *test_opt) { const char *auto_next_linkage_pos = test_enabled(test_opt, "auto-next-linkage"); int max_display = 0; if (auto_next_linkage_pos == NULL) return 0; if (':' == auto_next_linkage_pos[0]) max_display = atoi(auto_next_linkage_pos + 1); if (max_display != 0) return max_display; return DISPLAY_MAX; }
static bool pushpop(GLuint bits, const char *test) { bool pushpop_affects = (bits & (GL_ENABLE_BIT | GL_COLOR_BUFFER_BIT)) != 0; printf("%s test:\n", test); glEnable(GL_FRAMEBUFFER_SRGB); glPushAttrib(bits); glDisable(GL_FRAMEBUFFER_SRGB); glPopAttrib(); if (!test_enabled(pushpop_affects)) return false; /* Now, test the bits the other direction. */ glDisable(GL_FRAMEBUFFER_SRGB); glPushAttrib(bits); glEnable(GL_FRAMEBUFFER_SRGB); glPopAttrib(); return test_enabled(!pushpop_affects); }
/** * Display the word-graph in the indicated mode. * This is for debug. It is not reentrant due to the static pid and the * possibly created fixed filenames. * When Using X11, a "dot -Txlib" program is launched on the graph * description file. The xlib driver refreshes the graph when the file is * changed, displaying additional sentences in the same window. The viewer * program exits on program end (see the comments in the code). When * compiled with MSVC or MINGW, the system PhotoViewer is used by default, * unless !wg=x is used (for using X11 when available). * * The "dot" and the "PhotoViewer" programs must be in the PATH. * * FIXME? "dot" may get a SEGV due to memory corruptions in it (a known * problem - exists even in 2.38). This can be worked-around by trying it * again until it succeeds (but the window size, if changed by the user, * will not be preserved). * * modestr: a graph display mode as defined in wordgraph.h (default "ldu"). */ void wordgraph_show(Sentence sent, const char *modestr) { String *wgd; char *gvf_name; bool generate_gvfile = test_enabled("gvfile"); /* keep it for debug */ char *wgds; bool gvfile = false; unsigned int mode = 0; const char *mp; /* No check is done for correct flags - at most "mode" will be nonsense. */ for (mp = modestr; '\0' != *mp && ',' != *mp; mp++) mode |= 1<<(*mp-'a'); /* test=wg: sets the mode to ":" (0x2000000) and thus no flags are set. */ if ((0 == mode) || (WGR_X11 == mode)) mode |= WGR_LEGEND|WGR_DBGLABEL|WGR_UNSPLIT; wgd = wordgraph2dot(sent, mode, modestr); wgds = string_copy(wgd); string_delete(wgd); #if defined(HAVE_FORK) && !defined(POPEN_DOT) gvfile = true; #endif if (gvfile || generate_gvfile) { FILE *gvf; bool gvf_error = false; static bool wordgraph_unlink_xtmpfile_needed = true; concatfn(gvf_name, TMPDIR, DOT_FILENAME); gvf = fopen(gvf_name, "w"); if (NULL == gvf) { prt_error("Error: wordgraph_show: open %s failed: %s", gvf_name, strerror(errno)); } else { if (fprintf(gvf, "%s", wgds) == -1) { gvf_error = true; prt_error("Error: wordgraph_show: print to %s failed: %s", gvf_name, strerror(errno)); } if (fclose(gvf) == EOF) { gvf_error = true; prt_error("Error: wordgraph_show: close %s failed: %s", gvf_name, strerror(errno)); } } if (gvf_error && gvfile) /* we need it - cannot continue */ { free(wgds); return; } if (wordgraph_unlink_xtmpfile_needed) { /* The filename is fixed - removal needed only once. */ wordgraph_unlink_xtmpfile_needed = false; atexit(wordgraph_unlink_xtmpfile); } } #if !defined HAVE_FORK || defined POPEN_DOT x_popen((mode & WGR_X11)? POPEN_DOT_CMD : POPEN_DOT_CMD_WINDOWS, wgds); #else { const char *const args[] = { DOT_COMMAND, DOT_DRIVER, gvf_name, NULL }; x_forkexec(args, &pid); } #endif free(wgds); }
int main(int argc, char * argv[]) { FILE *input_fh = stdin; Dictionary dict; const char *language = NULL; int num_linkages; Label label = NO_LABEL; Command_Options *copts; Parse_Options opts; bool batch_in_progress = false; isatty_stdin = isatty(fileno(stdin)); isatty_stdout = isatty(fileno(stdout)); #ifdef _WIN32 /* If compiled with MSVC/MinGW, we still support running under Cygwin. * This is done by checking running_under_cygwin to resolve * incompatibilities. */ const char *ostype = getenv("OSTYPE"); if ((NULL != ostype) && (0 == strcmp(ostype, "cygwin"))) running_under_cygwin = true; /* argv encoding is in the current locale. */ argv = argv2utf8(argc); if (NULL == argv) { prt_error("Fatal error: Unable to parse command line\n"); exit(-1); } #ifdef _MSC_VER _set_printf_count_output(1); /* enable %n support for display_1line_help()*/ #endif /* _MSC_VER */ win32_set_utf8_output(); #endif /* _WIN32 */ #if LATER /* Try to catch the SIGWINCH ... except this is not working. */ struct sigaction winch_act; winch_act.sa_handler = winch_handler; winch_act.sa_sigaction = NULL; sigemptyset (&winch_act.sa_mask); winch_act.sa_flags = 0; sigaction (SIGWINCH, &winch_act, NULL); #endif copts = command_options_create(); if (copts == NULL || copts->panic_opts == NULL) { prt_error("Fatal error: unable to create parse options\n"); exit(-1); } opts = copts->popts; setup_panic_parse_options(copts->panic_opts); copts->panic_mode = true; parse_options_set_max_parse_time(opts, 30); parse_options_set_linkage_limit(opts, 1000); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_set_short_length(opts, 16); parse_options_set_islands_ok(opts, false); parse_options_set_display_morphology(opts, false); save_default_opts(copts); /* Options so far are the defaults */ if ((argc > 1) && (argv[1][0] != '-')) { /* The dictionary is the first argument if it doesn't begin with "-" */ language = argv[1]; } for (int i = 1; i < argc; i++) { if (strcmp("--help", argv[i]) == 0) { print_usage(stdout, argv[0], copts, 0); } if (strcmp("--version", argv[i]) == 0) { printf("Version: %s\n", linkgrammar_get_version()); printf("%s\n", linkgrammar_get_configuration()); exit(0); } } /* Process command line variable-setting commands (only). */ for (int i = 1; i < argc; i++) { if (argv[i][0] == '-') { const char *var = argv[i] + ((argv[i][1] != '-') ? 1 : 2); if ((var[0] != '!') && (0 > issue_special_command(var, copts, NULL))) print_usage(stderr, argv[0], copts, -1); } else if (i != 1) { prt_error("Fatal error: Unknown argument '%s'.\n", argv[i]); print_usage(stderr, argv[0], copts, -1); } } if (language && *language) { dict = dictionary_create_lang(language); if (dict == NULL) { prt_error("Fatal error: Unable to open dictionary.\n"); exit(-1); } } else { dict = dictionary_create_default_lang(); if (dict == NULL) { prt_error("Fatal error: Unable to open default dictionary.\n"); exit(-1); } } /* Process the command line '!' commands */ for (int i = 1; i < argc; i++) { if ((argv[i][0] == '-') && (argv[i][1] == '!')) { if (0 > issue_special_command(argv[i]+1, copts, dict)) print_usage(stderr, argv[0], copts, -1); } } check_winsize(copts); prt_error("Info: Dictionary version %s, locale %s\n", linkgrammar_get_dict_version(dict), linkgrammar_get_dict_locale(dict)); prt_error("Info: Library version %s. Enter \"!help\" for help.\n", linkgrammar_get_version()); /* Main input loop */ while (true) { char *input_string; Sentence sent = NULL; /* Make sure stderr is shown even when MSVC binary runs under * Cygwin/MSYS pty (in that case it is fully buffered(!)). */ fflush(stderr); verbosity = parse_options_get_verbosity(opts); debug = parse_options_get_debug(opts); test = parse_options_get_test(opts); input_string = fget_input_string(input_fh, stdout, /*check_return*/false); check_winsize(copts); if (NULL == input_string) { if (ferror(input_fh)) prt_error("Error: Read: %s\n", strerror(errno)); if (input_fh == stdin) break; fclose (input_fh); input_fh = stdin; continue; } /* Discard whitespace characters from end of string. */ for (char *p = &input_string[strlen(input_string)-1]; (p > input_string) && strchr(WHITESPACE, *p) ; p--) { *p = '\0'; } /* If the input string is just whitespace, then ignore it. */ if (strspn(input_string, WHITESPACE) == strlen(input_string)) continue; char command = special_command(input_string, copts, dict); if ('e' == command) break; /* It was an exit command */ if ('c' == command) continue; /* It was another command */ if (-1 == command) continue; /* It was a bad command */ /* We have to handle the !file command inline; it's too hairy * otherwise ... */ if ('f' == command) { char * filename = &input_string[strcspn(input_string, WHITESPACE)] + 1; int fnlen = strlen(filename); if (0 == fnlen) { prt_error("Error: Missing file name argument\n"); continue; } if ('\n' == filename[fnlen-1]) filename[fnlen-1] = '\0'; struct stat statbuf; if ((0 == stat(filename, &statbuf)) && statbuf.st_mode & S_IFDIR) { prt_error("Error: Cannot open %s: %s\n", filename, strerror(EISDIR)); continue; } input_fh = fopen(filename, "r"); if (NULL == input_fh) { prt_error("Error: Cannot open %s: %s\n", filename, strerror(errno)); input_fh = stdin; continue; } continue; } if (!copts->batch_mode) batch_in_progress = false; if ('\0' != test[0]) { /* In batch mode warn only once. * In auto-next-linkage mode don't warn at all. */ if (!batch_in_progress && !auto_next_linkage_test(test)) { fflush(stdout); /* Remind the developer this is a test mode. */ prt_error("Warning: Tests enabled: %s\n", test); if (copts->batch_mode) batch_in_progress = true; } } if (copts->echo_on) { printf("%s\n", input_string); } if (copts->batch_mode || auto_next_linkage_test(test)) { label = strip_off_label(input_string); } // Post-processing-based pruning will clip away connectors // that we might otherwise want to examine. So disable PP // pruning in this situation. if (copts->display_bad) parse_options_set_perform_pp_prune(opts, false); else parse_options_set_perform_pp_prune(opts, true); sent = sentence_create(input_string, dict); if (sentence_split(sent, opts) < 0) { sentence_delete(sent); sent = NULL; continue; } if (0 != copts->display_wordgraph) { const char *wg_display_flags = ""; /* default flags */ switch (copts->display_wordgraph) { case 1: /* default flags */ break; case 2: /* subgraphs with a legend */ wg_display_flags = "sl"; break; case 3: { /* Use esoteric flags from the test user variable. */ const char *s = test_enabled(test, "wg"); if ((NULL != s) && (':' == s[0])) wg_display_flags = s; } break; default: prt_error("Warning: wordgraph=%d: Unknown value, using 1\n", copts->display_wordgraph); copts->display_wordgraph = 1; } sentence_display_wordgraph(sent, wg_display_flags); } /* First parse with the default disjunct_cost as set by the library * (currently 2.7). Usually parse here with no null links. * However, if "-test=one-step-parse" is used and we are said to * parse with null links, allow parsing here with null links too. */ bool one_step_parse = !copts->batch_mode && copts->allow_null && test_enabled(test, "one-step-parse"); int max_null_count = one_step_parse ? sentence_length(sent) : 0; parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, max_null_count); parse_options_reset_resources(opts); num_linkages = sentence_parse(sent, opts); /* num_linkages is negative only on a hard-error; * typically, due to a zero-length sentence. */ if (num_linkages < 0) { sentence_delete(sent); sent = NULL; continue; } #if 0 /* Try again, this time omitting the requirement for * definite articles, etc. This should allow for the parsing * of newspaper headlines and other clipped speech. * * XXX Unfortunately, this also allows for the parsing of * all sorts of ungrammatical sentences which should not * parse, and leads to bad parses of many other unparsable * but otherwise grammatical sentences. Thus, this trick * pretty much fails; we leave it here to document the * experiment. */ if (num_linkages == 0) { parse_options_set_disjunct_cost(opts, 4.5); num_linkages = sentence_parse(sent, opts); if (num_linkages < 0) continue; } #endif /* 0 */ /* Try using a larger list of disjuncts */ /* XXX FIXME: the lg_expand_disjunct_list() routine is not * currently a part of the public API; it should be made so, * or this expansion idea should be abandoned... not sure which. */ if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts)) { int expanded; if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n"); parse_options_set_disjunct_cost(opts, 3.9); expanded = lg_expand_disjunct_list(sent); if (expanded) { num_linkages = sentence_parse(sent, opts); } if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n"); } /* If asked to show bad linkages, then show them. */ if ((num_linkages == 0) && (!copts->batch_mode)) { if (copts->display_bad) { num_linkages = sentence_num_linkages_found(sent); } } /* Now parse with null links */ if (!one_step_parse && num_linkages == 0 && !copts->batch_mode) { if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n"); if (copts->allow_null) { /* XXX should use expanded disjunct list here too */ parse_options_set_min_null_count(opts, 1); parse_options_set_max_null_count(opts, sentence_length(sent)); num_linkages = sentence_parse(sent, opts); } } if (verbosity > 0) { if (parse_options_timer_expired(opts)) fprintf(stdout, "Timer is expired!\n"); if (parse_options_memory_exhausted(opts)) fprintf(stdout, "Memory is exhausted!\n"); } if ((num_linkages == 0) && copts->panic_mode && parse_options_resources_exhausted(opts)) { /* print_total_time(opts); */ batch_errors++; if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n"); /* If the parser used was the SAT solver, set the panic parser to * it too. * FIXME? Currently, the SAT solver code is not too useful in * panic mode since it doesn't handle parsing with null words, so * using the regular parser in that case could be beneficial. * However, this currently causes a crash due to a memory * management mess. */ parse_options_set_use_sat_parser(copts->panic_opts, parse_options_get_use_sat_parser(opts)); parse_options_reset_resources(copts->panic_opts); parse_options_set_verbosity(copts->panic_opts, verbosity); (void)sentence_parse(sent, copts->panic_opts); if (verbosity > 0) { if (parse_options_timer_expired(copts->panic_opts)) fprintf(stdout, "Panic timer is expired!\n"); } } if (verbosity > 1) parse_options_print_total_time(opts); const char *rc = ""; if (copts->batch_mode) { batch_process_some_linkages(label, sent, copts); } else { rc = process_some_linkages(input_fh, sent, copts); } fflush(stdout); sentence_delete(sent); sent = NULL; if ((NULL == rc) && (input_fh == stdin)) break; } if (copts->batch_mode) { /* print_time(opts, "Total"); */ fprintf(stderr, "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s"); } /* Free stuff, so that mem-leak detectors don't complain. */ command_options_delete(copts); dictionary_delete(dict); printf ("Bye.\n"); return 0; }
void compute_chosen_words(Sentence sent, Linkage linkage, Parse_Options opts) { WordIdx i; /* index of chosen_words */ WordIdx j; Disjunct **cdjp = linkage->chosen_disjuncts; const char **chosen_words = alloca(linkage->num_words * sizeof(*chosen_words)); int *remap = alloca(linkage->num_words * sizeof(*remap)); bool *show_word = alloca(linkage->num_words * sizeof(*show_word)); bool display_morphology = opts->display_morphology; Gword **lwg_path = linkage->wg_path; Gword **n_lwg_path = NULL; /* new Wordgraph path, to match chosen_words */ Gword **nullblock_start = NULL; /* start of a null block, to be put in [] */ size_t nbsize = 0; /* number of word in a null block */ Gword *sentence_word; memset(show_word, 0, linkage->num_words * sizeof(*show_word)); if (verbosity_level(D_CCW)) print_lwg_path(lwg_path, "Linkage"); for (i = 0; i < linkage->num_words; i++) { Disjunct *cdj = cdjp[i]; Gword *w; /* current word */ const Gword *nw; /* next word (NULL if none) */ Gword **wgp; /* wordgraph_path traversing pointer */ const char *t = NULL; /* current word string */ bool at_nullblock_end; /* current word is at end of a nullblock */ bool join_alt = false; /* morpheme-join this alternative */ char *s; size_t l; size_t m; lgdebug(D_CCW, "Loop start, word%zu: cdj %s, path %s\n", i, cdj ? cdj->word_string : "NULL", lwg_path[i] ? lwg_path[i]->subword : "NULL"); w = lwg_path[i]; nw = lwg_path[i+1]; wgp = &lwg_path[i]; sentence_word = wg_get_sentence_word(sent, w); /* FIXME If the original word was capitalized in a capitalizable * position, the displayed null word may be its downcase version. */ if (NULL == cdj) /* a null word (the chosen disjunct was NULL) */ { chosen_words[i] = NULL; nbsize++; if (NULL == nullblock_start) /* it starts a new null block */ nullblock_start = wgp; at_nullblock_end = (NULL == nw) || (wg_get_sentence_word(sent, nw->unsplit_word) != sentence_word); /* Accumulate null words in this alternative */ if (!at_nullblock_end && (NULL == cdjp[i+1]) && ((w->morpheme_type == MT_PUNC) == (nw->morpheme_type == MT_PUNC))) { lgdebug(D_CCW, "Skipping word%zu cdjp=NULL#%zu, path %s\n", i, nbsize, w->subword); chosen_words[i] = NULL; continue; } if (NULL != nullblock_start) { /* If we are here, this null word is an end of a null block */ lgdebug(+D_CCW, "Handling %zu null words at %zu: ", nbsize, i); if (1 == nbsize) { /* Case 1: A single null subword. */ lgdebug(D_CCW, "A single null subword.\n"); t = join_null_word(sent, wgp, nbsize); gwordlist_append(&n_lwg_path, w); } else { lgdebug(D_CCW, "Combining null subwords"); /* Use alternative_id to check for start of alternative. */ if (((*nullblock_start)->alternative_id == *nullblock_start) && at_nullblock_end) { /* Case 2: A null unsplit_word (all-nulls alternative).*/ lgdebug(D_CCW, " (null alternative)\n"); t = sentence_word->subword; gwordlist_append(&n_lwg_path, sentence_word); } else { /* Case 3: Join together >=2 null morphemes. */ Gword *wgnull; lgdebug(D_CCW, " (null partial word)\n"); wgnull = wordgraph_null_join(sent, wgp-nbsize+1, wgp); gwordlist_append(&n_lwg_path, wgnull); t = wgnull->subword; } } nullblock_start = NULL; nbsize = 0; show_word[i] = true; if (MT_WALL != w->morpheme_type) { /* Put brackets around the null word. */ l = strlen(t) + 2; s = (char *) alloca(l+1); s[0] = NULLWORD_START; strcpy(&s[1], t); s[l-1] = NULLWORD_END; s[l] = '\0'; t = string_set_add(s, sent->string_set); lgdebug(D_CCW, " %s\n", t); /* Null words have no links, so take care not to drop them. */ } } } else { /* This word has a linkage. */ /* TODO: Suppress "virtual-morphemes", currently the dictcap ones. */ char *sm; t = cdj->word_string; /* Print the subscript, as in "dog.n" as opposed to "dog". */ if (0) { /* TODO */ } else { /* Get rid of those ugly ".Ixx" */ if (is_idiom_word(t)) { s = strdupa(t); sm = strrchr(s, SUBSCRIPT_MARK); /* Possible double subscript. */ UNREACHABLE(NULL == sm); /* We know it has a subscript. */ *sm = '\0'; t = string_set_add(s, sent->string_set); } else if (HIDE_MORPHO) { /* Concatenate the word morphemes together into one word. * Concatenate their subscripts into one subscript. * Use subscript separator SUBSCRIPT_SEP. * XXX Check whether we can encounter an idiom word here. * FIXME Combining contracted words is not handled yet, because * combining morphemes which have non-LL links to other words is * not yet implemented. * FIXME Move to a separate function. */ Gword **wgaltp; size_t join_len = 0; size_t mcnt = 0; /* If the alternative contains morpheme subwords, mark it * for joining... */ const Gword *unsplit_word = w->unsplit_word; for (wgaltp = wgp, j = i; NULL != *wgaltp; wgaltp++, j++) { if ((*wgaltp)->unsplit_word != unsplit_word) break; if (MT_INFRASTRUCTURE == (*wgaltp)->unsplit_word->morpheme_type) break; mcnt++; if (NULL == cdjp[j]) { /* ... but not if it contains a null word */ join_alt = false; break; } join_len += strlen(cdjp[j]->word_string) + 1; if ((*wgaltp)->morpheme_type & IS_REG_MORPHEME) join_alt = true; } if (join_alt) { /* Join it in two steps: 1. Base words. 2. Subscripts. * FIXME? Can be done in one step (more efficient but maybe * less clear). * Put SUBSCRIPT_SEP between the subscripts. * XXX No 1-1 correspondence between the hidden base words * and the subscripts after the join, in case there are base * words with and without subscripts. */ const char subscript_sep_str[] = { SUBSCRIPT_SEP, '\0'}; char *join = calloc(join_len + 1, 1); /* zeroed out */ join[0] = '\0'; /* 1. Join base words. (Could just use the unsplit_word.) */ for (wgaltp = wgp, m = 0; m < mcnt; wgaltp++, m++) { add_morpheme_unmarked(sent, join, cdjp[i+m]->word_string, (*wgaltp)->morpheme_type); } strcat(join, subscript_mark_str()); /* tentative */ /* 2. Join subscripts. */ for (wgaltp = wgp, m = 0; m < mcnt; wgaltp++, m++) { /* Cannot NULLify the word - we may have links to it. */ if (m != mcnt-1) chosen_words[i+m] = ""; sm = strchr(cdjp[i+m]->word_string, SUBSCRIPT_MARK); if (NULL != sm) { /* Supposing stem subscript is .=x (x optional) */ if (MT_STEM == (*wgaltp)->morpheme_type) { sm += 1 + STEM_MARK_L; /* sm+strlen(".=") */ if ('\0' == *sm) sm = NULL; #if 0 if ((cnt-1) == m) { /* Support a prefix-stem combination. In that case * we have just nullified the combined word, so we * need to move it to the position of the prefix. * FIXME: May still not be good enough. */ move_combined_word = i+m-1; /* And the later chosen_word assignment should be: * chosen_words[-1 != move_combined_word ? * move_combined_word : i] = t; */ } else { move_combined_word = -1; } #endif } } if (NULL != sm) { strcat(join, sm+1); strcat(join, subscript_sep_str); } } /* Remove an extra mark, if any */ join_len = strlen(join); if ((SUBSCRIPT_SEP == join[join_len-1]) || (SUBSCRIPT_MARK == join[join_len-1])) join[join_len-1] = '\0'; gwordlist_append(&n_lwg_path, sentence_word); t = string_set_add(join, sent->string_set); free(join); i += mcnt-1; } } } if (!join_alt) gwordlist_append(&n_lwg_path, *wgp); /* * Add guess marks in [] square brackets, if needed, at the * end of the base word. Convert the badly-printing * SUBSCRIPT_MARK (hex 03 or ^C) into a period. */ if (t) { s = strdupa(t); sm = strrchr(s, SUBSCRIPT_MARK); if (sm) *sm = SUBSCRIPT_DOT; if ((!(w->status & WS_GUESS) && (w->status & WS_INDICT)) || !DISPLAY_GUESS_MARKS) { t = string_set_add(s, sent->string_set); } else { const char *regex_name = w->regex_name; /* 4 = 1(null) + 1(guess_mark) + 2 (sizeof "[]") */ int baselen = NULL == sm ? strlen(t) : (size_t)(sm-s); char guess_mark = 0; switch (w->status & WS_GUESS) { case WS_SPELL: guess_mark = GM_SPELL; break; case WS_RUNON: guess_mark = GM_RUNON; break; case WS_REGEX: guess_mark = GM_REGEX; break; case 0: guess_mark = GM_UNKNOWN; break; default: assert(0, "Missing 'case: %2x'", w->status & WS_GUESS); } /* In the case of display_morphology==0, the guess indication of * the last subword is used as the guess indication of the whole * word. * FIXME? The guess indications of other subwords are ignored in * this mode. This implies that if a first or middle subword has * a guess indication but the last subword doesn't have, no guess * indication would be shown at all. */ if ((NULL == regex_name) || HIDE_MORPHO) regex_name = ""; s = alloca(strlen(t) + strlen(regex_name) + 4); strncpy(s, t, baselen); s[baselen] = '['; s[baselen + 1] = guess_mark; strcpy(s + baselen + 2, regex_name); strcat(s, "]"); if (NULL != sm) strcat(s, sm); t = string_set_add(s, sent->string_set); } } } assert(t != NULL, "Word %zu: NULL", i); chosen_words[i] = t; } /* Conditional test removal of quotation marks and the "capdict" tokens, * to facilitate using diff on sentence batch runs. */ if (test_enabled("removeZZZ")) { for (i=0, j=0; i<linkage->num_links; i++) { Link *lnk = &(linkage->link_array[i]); if (0 == strcmp("ZZZ", lnk->link_name)) chosen_words[lnk->rw] = NULL; } } /* If morphology printing is being suppressed, then all links * connecting morphemes will be discarded. */ if (HIDE_MORPHO) { /* Discard morphology links. */ for (i=0; i<linkage->num_links; i++) { Link * lnk = &linkage->link_array[i]; if (is_morphology_link(lnk->link_name)) { /* Mark link for discarding. */ lnk->link_name = NULL; } else { /* Mark word for not discarding. */ show_word[lnk->rw] = true; show_word[lnk->lw] = true; } } } /* We alloc a little more than needed, but so what... */ linkage->word = (const char **) exalloc(linkage->num_words*sizeof(char *)); /* Copy over the chosen words, dropping the discarded words. * However, don't discard existing words (chosen_words[i][0]). * Note that if a word only has morphology links and is not combined with * another word, then it will get displayed with no links at all (e.g. * when explicitly specifying root and suffix for debug: root.= =suf */ for (i=0, j=0; i<linkage->num_words; ++i) { if (chosen_words[i] && (chosen_words[i][0] || (!HIDE_MORPHO || show_word[i]))) { const char *cwtmp = linkage->word[j]; linkage->word[j] = chosen_words[i]; chosen_words[i] = cwtmp; remap[i] = j; j++; } else { remap[i] = -1; } } linkage->num_words = j; remap_linkages(linkage, remap); /* Update linkage->link_array / num_links. */ linkage->wg_path_display = n_lwg_path; if (verbosity_level(D_CCW)) print_lwg_path(n_lwg_path, "Display"); }