int sentence_parse(Sentence sent, Parse_Options opts) { int rc; sent->num_valid_linkages = 0; /* If the sentence has not yet been split, do so now. * This is for backwards compatibility, for existing programs * that do not explicitly call the splitter. */ if (0 == sent->length) { rc = sentence_split(sent, opts); if (rc) return -1; } else { /* During a panic parse, we enter here a second time, with leftover * garbage. Free it. We really should make the code that is panicking * do this free, but right now, they have no API for it, so we do it * as a favor. XXX FIXME someday. */ free_sentence_disjuncts(sent); } /* Check for bad sentence length */ if (MAX_SENTENCE <= sent->length) { prt_error("Error: sentence too long, contains more than %d words\n", MAX_SENTENCE); return -2; } resources_reset(opts->resources); /* Expressions were set up during the tokenize stage. * Prune them, and then parse. */ expression_prune(sent, opts); print_time(opts, "Finished expression pruning"); if (opts->use_sat_solver) { sat_parse(sent, opts); } else { classic_parse(sent, opts); } print_time(opts, "Finished parse"); if ((verbosity > 0) && (PARSE_NUM_OVERFLOW < sent->num_linkages_found)) { prt_error("Warning: Combinatorial explosion! nulls=%zu cnt=%d\n" "Consider retrying the parse with the max allowed disjunct cost set lower.\n" "At the command line, use !cost-max\n", sent->null_count, sent->num_linkages_found); } return sent->num_valid_linkages; }
int sentence_parse(Sentence sent, Parse_Options opts) { int rc; verbosity = opts->verbosity; debug = opts->debug; test = opts->test; sent->num_valid_linkages = 0; /* If the sentence has not yet been split, do so now. * This is for backwards compatibility, for existing programs * that do not explicitly call the splitter. */ if (0 == sent->length) { rc = sentence_split(sent, opts); if (rc) return -1; } /* Check for bad sentence length */ if (MAX_SENTENCE <= sent->length) { prt_error("Error: sentence too long, contains more than %d words\n", MAX_SENTENCE); return -2; } /* Initialize/free any leftover garbage */ free_sentence_disjuncts(sent); /* Is this really needed ??? */ resources_reset_space(opts->resources); if (resources_exhausted(opts->resources)) return 0; /* Expressions were previously set up during the tokenize stage. */ expression_prune(sent); print_time(opts, "Finished expression pruning"); if (opts->use_sat_solver) { sat_parse(sent, opts); } else { chart_parse(sent, opts); } print_time(opts, "Finished parse"); if ((verbosity > 0) && (PARSE_NUM_OVERFLOW < sent->num_linkages_found)) { prt_error("WARNING: Combinatorial explosion! nulls=%zu cnt=%d\n" "Consider retrying the parse with the max allowed disjunct cost set lower.\n" "At the command line, use !cost-max\n", sent->null_count, sent->num_linkages_found); } return sent->num_valid_linkages; }
static void free_sentence_words(Sentence sent) { for (WordIdx i = 0; i < sent->length; i++) { free_X_nodes(sent->word[i].x); free(sent->word[i].alternatives); } free_sentence_disjuncts(sent); free((void *) sent->word); sent->word = NULL; }
void sentence_delete(Sentence sent) { /*free_andlists(sent); */ free_sentence_disjuncts(sent); free_sentence_expressions(sent); string_set_delete(sent->string_set); free_parse_set(sent); free_post_processing(sent); post_process_close_sentence(sent->dict->postprocessor); free_lookup_list(); free_deletable(sent); free_effective_dist(sent); xfree(sent->is_conjunction, sizeof(char)*sent->length); xfree((char *) sent, sizeof(struct Sentence_s)); }
int sentence_parse(Sentence sent, Parse_Options opts) { int nl; verbosity = opts->verbosity; free_sentence_disjuncts(sent); resources_reset_space(opts->resources); if (resources_exhausted(opts->resources)) { sent->num_valid_linkages = 0; return 0; } expression_prune(sent); print_time(opts, "Finished expression pruning"); prepare_to_parse(sent, opts); init_fast_matcher(sent); init_table(sent); /* A parse set may have been already been built for this sentence, if it was previously parsed. If so we free it up before building another. */ free_parse_set(sent); init_x_table(sent); for (nl = opts->min_null_count; (nl<=opts->max_null_count) && (!resources_exhausted(opts->resources)); ++nl) { sent->null_count = nl; sent->num_linkages_found = parse(sent, sent->null_count, opts); print_time(opts, "Counted parses"); post_process_linkages(sent, opts); if (sent->num_valid_linkages > 0) break; } free_table(sent); free_fast_matcher(sent); print_time(opts, "Finished parse"); return sent->num_valid_linkages; }
/** * classic_parse() -- parse the given sentence. * Perform parsing, using the original link-grammar parsing algorithm * given in the original link-grammar papers. * * Do the parse with the minimum number of null-links within the range * specified by opts->min_null_count and opts->max_null_count. * * To that end, call do_parse() with an increasing null_count, from * opts->min_null_count up to (including) opts->max_null_count, until a * parse is found. * * A note about the disjuncts save/restore that is done here: * To increase the parsing speed, before invoking do_parse(), * pp_and_power_prune() is invoked to remove connectors which have no * possibility to connect. It includes a significant optimization when * null_count==0 that makes a more aggressive removal, but this * optimization is not appropriate when null_count>0. * * So in case this optimization has been done and a complete parse (i.e. * a parse when null_count==0) is not found, we are left with sentence * disjuncts which are not appropriate to continue do_parse() tries with * null_count>0. To solve that, we need to restore the original * disjuncts of the sentence and call pp_and_power_prune() once again. */ void classic_parse(Sentence sent, Parse_Options opts) { fast_matcher_t * mchxt = NULL; count_context_t * ctxt = NULL; bool pp_and_power_prune_done = false; Disjunct **disjuncts_copy = NULL; bool is_null_count_0 = (0 == opts->min_null_count); int max_null_count = MIN((int)sent->length, opts->max_null_count); /* Build lists of disjuncts */ prepare_to_parse(sent, opts); if (resources_exhausted(opts->resources)) return; if (is_null_count_0 && (0 < max_null_count)) { /* Save the disjuncts in case we need to parse with null_count>0. */ disjuncts_copy = alloca(sent->length * sizeof(Disjunct *)); for (size_t i = 0; i < sent->length; i++) disjuncts_copy[i] = disjuncts_dup(sent->word[i].d); } for (int nl = opts->min_null_count; nl <= max_null_count; nl++) { Count_bin hist; s64 total; if (!pp_and_power_prune_done) { if (0 != nl) { pp_and_power_prune_done = true; if (is_null_count_0) opts->min_null_count = 1; /* Don't optimize for null_count==0. */ /* We are parsing now with null_count>0, when previously we * parsed with null_count==0. Restore the save disjuncts. */ if (NULL != disjuncts_copy) { free_sentence_disjuncts(sent); for (size_t i = 0; i < sent->length; i++) sent->word[i].d = disjuncts_copy[i]; disjuncts_copy = NULL; } } pp_and_power_prune(sent, opts); if (is_null_count_0) opts->min_null_count = 0; if (resources_exhausted(opts->resources)) break; free_count_context(ctxt, sent); free_fast_matcher(sent, mchxt); pack_sentence(sent); ctxt = alloc_count_context(sent); mchxt = alloc_fast_matcher(sent); print_time(opts, "Initialized fast matcher"); } if (resources_exhausted(opts->resources)) break; free_linkages(sent); sent->null_count = nl; hist = do_parse(sent, mchxt, ctxt, sent->null_count, opts); total = hist_total(&hist); lgdebug(D_PARSE, "Info: Total count with %zu null links: %lld\n", sent->null_count, total); /* total is 64-bit, num_linkages_found is 32-bit. Clamp */ total = (total > INT_MAX) ? INT_MAX : total; total = (total < 0) ? INT_MAX : total; sent->num_linkages_found = (int) total; print_time(opts, "Counted parses"); extractor_t * pex = extractor_new(sent->length, sent->rand_state); bool ovfl = setup_linkages(sent, pex, mchxt, ctxt, opts); process_linkages(sent, pex, ovfl, opts); free_extractor(pex); post_process_lkgs(sent, opts); if (sent->num_valid_linkages > 0) break; if ((0 == nl) && (0 < max_null_count) && verbosity > 0) prt_error("No complete linkages found.\n"); /* If we are here, then no valid linkages were found. * If there was a parse overflow, give up now. */ if (PARSE_NUM_OVERFLOW < total) break; //if (sent->num_linkages_found > 0 && nl>0) printf("NUM_LINKAGES_FOUND %d\n", sent->num_linkages_found); } sort_linkages(sent, opts); if (NULL != disjuncts_copy) { for (size_t i = 0; i < sent->length; i++) free_disjuncts(disjuncts_copy[i]); } free_count_context(ctxt, sent); free_fast_matcher(sent, mchxt); }