int sentence_split(Sentence sent, Parse_Options opts) { Dictionary dict = sent->dict; bool fw_failed = false; /* Cleanup stuff previously allocated. This is because some free * routines depend on sent-length, which might change in different * parse-opts settings. */ /* Tokenize */ if (!separate_sentence(sent, opts)) { return -1; } /* Flatten the word graph created by separate_sentence() to a 2D-word-array * which is compatible to the current parsers. * This may fail if the EMPTY_WORD_DOT or UNKNOWN_WORD words are needed but * are not defined in the dictionary, or an internal error happens. */ fw_failed = !flatten_wordgraph(sent, opts); /* If unknown_word is not defined, then no special processing * will be done for e.g. capitalized words. */ if (!(dict->unknown_word_defined && dict->use_unknown_word)) { if (!sentence_in_dictionary(sent)) { return -2; } } if (fw_failed) return -3; return 0; }
Sentence sentence_create(char *input_string, Dictionary dict) { Sentence sent; int i; free_lookup_list(); sent = (Sentence) xalloc(sizeof(struct Sentence_s)); sent->dict = dict; sent->length = 0; sent->num_linkages_found = 0; sent->num_linkages_alloced = 0; sent->num_linkages_post_processed = 0; sent->num_valid_linkages = 0; sent->link_info = NULL; sent->deletable = NULL; sent->effective_dist = NULL; sent->num_valid_linkages = 0; sent->null_count = 0; sent->parse_info = NULL; sent->string_set = string_set_create(); if (!separate_sentence(input_string, sent)) { string_set_delete(sent->string_set); xfree(sent, sizeof(struct Sentence_s)); return NULL; } sent->q_pruned_rules = FALSE; /* for post processing */ sent->is_conjunction = (char *) xalloc(sizeof(char)*sent->length); set_is_conjunction(sent); initialize_conjunction_tables(sent); for (i=0; i<sent->length; i++) { /* in case we free these before they set to anything else */ sent->word[i].x = NULL; sent->word[i].d = NULL; } if (!(dict->unknown_word_defined && dict->use_unknown_word)) { if (!sentence_in_dictionary(sent)) { sentence_delete(sent); return NULL; } } if (!build_sentence_expressions(sent)) { sentence_delete(sent); return NULL; } return sent; }
int sentence_split(Sentence sent, Parse_Options opts) { Dictionary dict = sent->dict; bool fw_failed = false; /* 0 == global_rand_state denotes "repeatable rand". * If non-zero, set it here so that anysplit can use it. */ if (false == opts->repeatable_rand && 0 == sent->rand_state) { if (0 == global_rand_state) global_rand_state = 42; sent->rand_state = global_rand_state; } /* Tokenize */ if (!separate_sentence(sent, opts)) { return -1; } /* Flatten the word graph created by separate_sentence() to a 2D-word-array * which is compatible to the current parsers. * This may fail if UNKNOWN_WORD is needed but * is not defined in the dictionary, or an internal error happens. */ fw_failed = !flatten_wordgraph(sent, opts); /* If unknown_word is not defined, then no special processing * will be done for e.g. capitalized words. */ if (!(dict->unknown_word_defined && dict->use_unknown_word)) { if (!sentence_in_dictionary(sent)) { return -2; } } if (fw_failed) { /* Make sure an error message is always printed. * So it may be redundant. */ prt_error("Error: sentence_split(): Internal error detected\n"); return -3; } return 0; }
int sentence_split(Sentence sent, Parse_Options opts) { Dictionary dict = sent->dict; bool fw_failed = false; /* Tokenize */ if (!separate_sentence(sent, opts)) { return -1; } /* Flatten the word graph created by separate_sentence() to a 2D-word-array * which is compatible to the current parsers. * This may fail if the EMPTY_WORD_DOT or UNKNOWN_WORD words are needed but * are not defined in the dictionary, or an internal error happens. */ fw_failed = !flatten_wordgraph(sent, opts); /* If unknown_word is not defined, then no special processing * will be done for e.g. capitalized words. */ if (!(dict->unknown_word_defined && dict->use_unknown_word)) { if (!sentence_in_dictionary(sent)) { return -2; } } if (fw_failed) { /* Make sure an error message is always printed. * So it may be redundant. */ prt_error("Error: sentence_split(): Internal error detected"); return -3; } return 0; }