Пример #1
0
int sentence_split(Sentence sent, Parse_Options opts)
{
	Dictionary dict = sent->dict;
	bool fw_failed = false;

	/* Cleanup stuff previously allocated. This is because some free
	 * routines depend on sent-length, which might change in different
	 * parse-opts settings.
	 */
	/* Tokenize */
	if (!separate_sentence(sent, opts))
	{
		return -1;
	}

	/* Flatten the word graph created by separate_sentence() to a 2D-word-array
	 * which is compatible to the current parsers.
	 * This may fail if the EMPTY_WORD_DOT or UNKNOWN_WORD words are needed but
	 * are not defined in the dictionary, or an internal error happens. */
	fw_failed = !flatten_wordgraph(sent, opts);

	/* If unknown_word is not defined, then no special processing
	 * will be done for e.g. capitalized words. */
	if (!(dict->unknown_word_defined && dict->use_unknown_word))
	{
		if (!sentence_in_dictionary(sent)) {
			return -2;
		}
	}

	if (fw_failed) return -3;

	return 0;
}
Пример #2
0
Sentence sentence_create(char *input_string, Dictionary dict) {
    Sentence sent;
    int i;

    free_lookup_list();

    sent = (Sentence) xalloc(sizeof(struct Sentence_s));
    sent->dict = dict;
    sent->length = 0;
    sent->num_linkages_found = 0;
    sent->num_linkages_alloced = 0;
    sent->num_linkages_post_processed = 0;
    sent->num_valid_linkages = 0;
    sent->link_info = NULL;
    sent->deletable = NULL;
    sent->effective_dist = NULL;
    sent->num_valid_linkages = 0;
    sent->null_count = 0;
    sent->parse_info = NULL;
    sent->string_set = string_set_create();

    if (!separate_sentence(input_string, sent)) {
	string_set_delete(sent->string_set);
	xfree(sent, sizeof(struct Sentence_s));
	return NULL;
    }
   
    sent->q_pruned_rules = FALSE; /* for post processing */
    sent->is_conjunction = (char *) xalloc(sizeof(char)*sent->length);
    set_is_conjunction(sent);
    initialize_conjunction_tables(sent);

    for (i=0; i<sent->length; i++) {
	/* in case we free these before they set to anything else */
	sent->word[i].x = NULL;
	sent->word[i].d = NULL;
    }
    
    if (!(dict->unknown_word_defined && dict->use_unknown_word)) {
	if (!sentence_in_dictionary(sent)) {
	    sentence_delete(sent);
	    return NULL;
	}
    }
    
    if (!build_sentence_expressions(sent)) {
	sentence_delete(sent);
	return NULL;
    }

    return sent;
}
Пример #3
0
int sentence_split(Sentence sent, Parse_Options opts)
{
	Dictionary dict = sent->dict;
	bool fw_failed = false;

	/* 0 == global_rand_state denotes "repeatable rand".
	 * If non-zero, set it here so that anysplit can use it.
	 */
	if (false == opts->repeatable_rand && 0 == sent->rand_state)
	{
		if (0 == global_rand_state) global_rand_state = 42;
		sent->rand_state = global_rand_state;
	}

	/* Tokenize */
	if (!separate_sentence(sent, opts))
	{
		return -1;
	}

	/* Flatten the word graph created by separate_sentence() to a 2D-word-array
	 * which is compatible to the current parsers.
	 * This may fail if UNKNOWN_WORD is needed but
	 * is not defined in the dictionary, or an internal error happens. */
	fw_failed = !flatten_wordgraph(sent, opts);

	/* If unknown_word is not defined, then no special processing
	 * will be done for e.g. capitalized words. */
	if (!(dict->unknown_word_defined && dict->use_unknown_word))
	{
		if (!sentence_in_dictionary(sent)) {
			return -2;
		}
	}

	if (fw_failed)
	{
		/* Make sure an error message is always printed.
		 * So it may be redundant. */
		prt_error("Error: sentence_split(): Internal error detected\n");
		return -3;
	}

	return 0;
}
Пример #4
0
int sentence_split(Sentence sent, Parse_Options opts)
{
	Dictionary dict = sent->dict;
	bool fw_failed = false;

	/* Tokenize */
	if (!separate_sentence(sent, opts))
	{
		return -1;
	}

	/* Flatten the word graph created by separate_sentence() to a 2D-word-array
	 * which is compatible to the current parsers.
	 * This may fail if the EMPTY_WORD_DOT or UNKNOWN_WORD words are needed but
	 * are not defined in the dictionary, or an internal error happens. */
	fw_failed = !flatten_wordgraph(sent, opts);

	/* If unknown_word is not defined, then no special processing
	 * will be done for e.g. capitalized words. */
	if (!(dict->unknown_word_defined && dict->use_unknown_word))
	{
		if (!sentence_in_dictionary(sent)) {
			return -2;
		}
	}

	if (fw_failed)
	{
		/* Make sure an error message is always printed.
		 * So it may be redundant. */
		prt_error("Error: sentence_split(): Internal error detected");
		return -3;
	}

	return 0;
}