Ejemplo n.º 1
0
static void free_sentence_words(Sentence sent)
{
	for (WordIdx i = 0; i < sent->length; i++)
	{
		free_X_nodes(sent->word[i].x);
		free(sent->word[i].alternatives);
	}
	free_sentence_disjuncts(sent);
	free((void *) sent->word);
	sent->word = NULL;
}
Ejemplo n.º 2
0
void free_sentence_expressions(Sentence sent) {
  int i;
  for (i=0; i<sent->length; i++) {
    free_X_nodes(sent->word[i].x);
  }
}
Ejemplo n.º 3
0
/**
 * Corrects case of first word, fills in other proper nouns, and
 * builds the expression lists for the resulting words.
 *
 * Algorithm:
 * Apply the following step to all words w:
 * if w is in the dictionary, use it.
 * else if w is upper case use PROPER_WORD disjuncts for w.
 * else if it's hyphenated, use HYPHENATED_WORD
 * else if it's a number, use NUMBER_WORD.
 *
 * Now, we correct the first word, w.
 * if w is upper case, let w' be the lower case version of w.
 * if both w and w' are in the dict, concatenate these disjncts.
 * else if w' is in dict, use disjuncts of w'
 * else leave the disjuncts alone
 */
int build_sentence_expressions(Sentence sent)
{
	int i, first_word;  /* the index of the first word after the wall */
	char *s, *u, temp_word[MAX_WORD+1];
	X_node * e;
	Dictionary dict = sent->dict;

	if (dict->left_wall_defined) {
		first_word = 1;
	} else {
		first_word = 0;
	}

	/* the following loop treats all words the same
	   (nothing special for 1st word) */
	for (i=0; i<sent->length; i++)
	{
		s = sent->word[i].string;
		if (boolean_dictionary_lookup(sent->dict, s))
		{
			sent->word[i].x = build_word_expressions(sent, s);
		}
		else if (is_utf8_upper(s) && is_s_word(s) && dict->pl_capitalized_word_defined) 
		{
			if (!special_string(sent, i, PL_PROPER_WORD)) return FALSE;
		}
		else if (is_utf8_upper(s) && dict->capitalized_word_defined)
		{
			if (!special_string(sent, i, PROPER_WORD)) return FALSE;
		}
		else if (is_number(s) && dict->number_word_defined)
		{
			/* we know it's a plural number, or 1 */
			/* if the string is 1, we'll only be here if 1's not in the dictionary */
			if (!special_string(sent, i, NUMBER_WORD)) return FALSE;
		}
		else if (ishyphenated(s) && dict->hyphenated_word_defined)
		{
			/* singular hyphenated */
			if (!special_string(sent, i, HYPHENATED_WORD)) return FALSE;
		} 
		/* XXX
		 * The following does some morphology-guessing for words that
		 * that are not in the dictionary. This should be replaced by
		 * a generic morphology-guesser for langauges that aren't english.
		 * XXX
		 */
		else if (is_ing_word(s) && dict->ing_word_defined) 
		{
			if (!guessed_string(sent, i, s, ING_WORD)) return FALSE;
		}
		else if (is_s_word(s) && dict->s_word_defined)
		{
			if (!guessed_string(sent, i, s, S_WORD)) return FALSE;
		}
		else if (is_ed_word(s) && dict->ed_word_defined)
		{
			if (!guessed_string(sent, i, s, ED_WORD)) return FALSE;
		}
		else if (is_ly_word(s) && dict->ly_word_defined)
		{
			if (!guessed_string(sent, i, s, LY_WORD)) return FALSE;
		}
		else if (dict->unknown_word_defined && dict->use_unknown_word)
		{
			handle_unknown_word(sent, i, s);
		}
		else 
		{
			/* The reason I can assert this is that the word
			 * should have been looked up already if we get here.
			 */
			assert(FALSE, "I should have found that word.");
		}
	}

	/* Under certain cases--if it's the first word of the sentence,
	 * or if it follows a colon or a quotation mark--a word that's 
	 * capitalized has to be looked up as an uncapitalized word
	 * (as well as a capitalized word).
	 */
	for (i=0; i<sent->length; i++)
	{
		if (! (i==first_word || (i>0 && strcmp(":", sent->word[i-1].string)==0) || post_quote[i]==1) ) continue;
		s = sent->word[i].string;

		if (is_utf8_upper(s))
		{
			downcase_utf8_str(temp_word, s, MAX_WORD);
			u = string_set_add(temp_word, sent->string_set);

			/* If the lower-case version is in the dictionary... */
			if (boolean_dictionary_lookup(sent->dict, u))
			{
				/* Then check if the upper-case version is there. 
				 * If it is, the disjuncts for the upper-case version 
				 * have been put there already. So add on the disjuncts
				 * for the lower-case version. */
				if (boolean_dictionary_lookup(sent->dict, s))
				{
					e = build_word_expressions(sent, u);
					sent->word[i].x =
						catenate_X_nodes(sent->word[i].x, e);
				} 
				else
				{
					/* If the upper-case version isn't there,
					 * replace the u.c. disjuncts with l.c. ones.
					 */
					safe_strcpy(s,u, MAX_WORD);
					e = build_word_expressions(sent, s);
					free_X_nodes(sent->word[i].x);
					sent->word[i].x = e;
				}
			}
		}
	}

	return TRUE;
}