Пример #1
0
/**
 * connector() -- make a node for a connector or dictionary word.
 *
 * Assumes the current token is a connector or dictionary word.
 */
static Exp * connector(Dictionary dict)
{
	Exp * n;
	Dict_node *dn, *dn_head;
	int i;

	i = strlen(dict->token) - 1;  /* this must be + or - if a connector */
	if ((dict->token[i] != '+') && (dict->token[i] != '-'))
	{
		/* If we are here, token is a word */
		dn_head = abridged_lookup_list(dict, dict->token);
		dn = dn_head;
		while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0))
		{
			dn = dn->right;
		}
		if (dn == NULL)
		{
			free_lookup_list(dn_head);
			dict_error(dict, "\nPerhaps missing + or - in a connector.\n"
			                 "Or perhaps you forgot the suffix on a word.\n"
			                 "Or perhaps a word is used before it is defined.\n");
			return NULL;
		}
		n = make_unary_node(dict, dn->exp);
		free_lookup_list(dn_head);
	} 
	else
	{
		/* If we are here, token is a connector */
		if (!check_connector(dict, dict->token))
		{
			return NULL;
		}
		n = Exp_create(dict);
		n->dir = dict->token[i];
		dict->token[i] = '\0';				   /* get rid of the + or - */
		if (dict->token[0] == '@')
		{
			n->u.string = string_set_add(dict->token+1, dict->string_set);
			n->multi = TRUE;
		}
		else
		{
			n->u.string = string_set_add(dict->token, dict->string_set);
			n->multi = FALSE;
		}
		n->type = CONNECTOR_type;
		n->cost = 0.0f;
	}

	if (!link_advance(dict))
	{
		exp_free(n);
		return NULL;
	}
	return n;
}
Пример #2
0
const char * linkgrammar_get_dict_version(Dictionary dict)
{
	static char * ver = NULL;
	char * p;
	Dict_node *dn;
	Exp *e;

	if (ver) return ver;

	/* The newer dictionaries should contain a macro of the form:
	 * <dictionary-version-number>: V4v6v6+;
	 * which would indicate dictionary verison 4.6.6
	 * Older dictionaries contain no version info.
	 */
	dn = dictionary_lookup_list(dict, "<dictionary-version-number>");
	if (NULL == dn) return "[unknown]";

	e = dn->exp;
	ver = strdup(&e->u.string[1]);
	p = strchr(ver, 'v');
	while (p)
	{
		*p = '.';
		p = strchr(p+1, 'v');
	}

	free_lookup_list(dn);
	return ver;
}
Пример #3
0
int boolean_dictionary_lookup(Dictionary dict, const char *s)
{
	Dict_node *llist = dictionary_lookup_list(dict, s);
	int boool = (llist != NULL);
	free_lookup_list(llist);
	return boool;
}
Пример #4
0
/**
 *  dict_display_word_info() - display the information about the given word.
 */
void dict_display_word_info(Dictionary dict, const char * s)
{
	Dict_node *dn, *dn_head;
	Disjunct * d1, * d2;
	int len;
	dn_head = dictionary_lookup_list(dict, s);
	if (dn_head == NULL)
	{
		printf("	\"%s\" matches nothing in the dictionary.\n", s);
		return;
	}
	printf("Matches:\n");
	for (dn = dn_head; dn != NULL; dn = dn->right)
	{
		len = 0;
		d1 = build_disjuncts_for_dict_node(dn);
		for(d2 = d1 ; d2 != NULL; d2 = d2->next)
		{
			len++;
		}
		free_disjuncts(d1);
		printf("    ");
		left_print_string(stdout, dn->string,
			"                         ");
		printf(" %5d  disjuncts ", len);
		if (dn->file != NULL)
		{
			printf("<%s>", dn->file->file);
		}
		printf("\n");
	}
	free_lookup_list(dn_head);
	return;
}
Пример #5
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->version = NULL;
	dict->num_entries = 0;
	dict->affix_table = NULL;
	dict->regex_root = NULL;

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup_list(dict, dict_node);

	return dict;
}
Пример #6
0
/**
 * insert_list() -
 * p points to a list of dict_nodes connected by their left pointers.
 * l is the length of this list (the last ptr may not be NULL).
 * It inserts the list into the dictionary.
 * It does the middle one first, then the left half, then the right.
 *
 * Note: I think this insert middle, then left, then right, has
 * its origins as a lame attempt to hack around the fact that the 
 * resulting binary tree is rather badly unbalanced. This has been 
 * fixed by using the DSW rebalancing algo. Now, that would seem
 * to render this crazy bisected-insertion algo obsoloete, but ..
 * oddly enough, it seems to make the DSW balancing go really fast!
 * Faster than a simple insertion. Go figure. I think this has
 * something to do with the fact that the dictionaries are in
 * alphabetical order! This subdivision helps randomize a bit.
 */
static void insert_list(Dictionary dict, Dict_node * p, int l)
{
	Dict_node * dn, *dn_head, *dn_second_half;
	int k, i; /* length of first half */

	if (l == 0) return;

	k = (l-1)/2;
	dn = p;
	for (i = 0; i < k; i++)
	{
		dn = dn->left;
	}

	/* dn now points to the middle element */
	dn_second_half = dn->left;
	dn->left = dn->right = NULL;

	if (contains_underbar(dn->string))
	{
		insert_idiom(dict, dn);
	}
	else if (is_idiom_word(dn->string))
	{
		err_ctxt ec;
		ec.sent = NULL;
		err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n"
		        "\tWords ending \".Ix\" (x a number) are reserved for idioms.\n"
		        "\tThis word will be ignored.\n",
		        dn->string, dict->line_number);
		free_dict_node(dn);
	}
	else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL)
	{
		Dict_node *dnx;
		err_ctxt ec;
		ec.sent = NULL;
		err_msg(&ec, Warn, "Warning: The word \"%s\" "
		          "found near line %d of %s matches the following words:\n",
	             dn->string, dict->line_number, dict->name);
		for (dnx = dn_head; dnx != NULL; dnx = dnx->right) {
			fprintf(stderr, "\t%s", dnx->string);
		}
		fprintf(stderr, "\n\tThis word will be ignored.\n");
		free_lookup_list(dn_head);
		free_dict_node(dn);
	}
	else
	{
		dict->root = insert_dict(dict, dict->root, dn);
		dict->num_entries++;
	}

	insert_list(dict, p, k);
	insert_list(dict, dn_second_half, l-k-1);
}
Пример #7
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
	if (NULL == dict->spell_checker)
		prt_error("Info: Spell checker disabled.");
#endif
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup_list(dict, dict_node);

	return dict;
}
Пример #8
0
Sentence sentence_create(char *input_string, Dictionary dict) {
    Sentence sent;
    int i;

    free_lookup_list();

    sent = (Sentence) xalloc(sizeof(struct Sentence_s));
    sent->dict = dict;
    sent->length = 0;
    sent->num_linkages_found = 0;
    sent->num_linkages_alloced = 0;
    sent->num_linkages_post_processed = 0;
    sent->num_valid_linkages = 0;
    sent->link_info = NULL;
    sent->deletable = NULL;
    sent->effective_dist = NULL;
    sent->num_valid_linkages = 0;
    sent->null_count = 0;
    sent->parse_info = NULL;
    sent->string_set = string_set_create();

    if (!separate_sentence(input_string, sent)) {
	string_set_delete(sent->string_set);
	xfree(sent, sizeof(struct Sentence_s));
	return NULL;
    }
   
    sent->q_pruned_rules = FALSE; /* for post processing */
    sent->is_conjunction = (char *) xalloc(sizeof(char)*sent->length);
    set_is_conjunction(sent);
    initialize_conjunction_tables(sent);

    for (i=0; i<sent->length; i++) {
	/* in case we free these before they set to anything else */
	sent->word[i].x = NULL;
	sent->word[i].d = NULL;
    }
    
    if (!(dict->unknown_word_defined && dict->use_unknown_word)) {
	if (!sentence_in_dictionary(sent)) {
	    sentence_delete(sent);
	    return NULL;
	}
    }
    
    if (!build_sentence_expressions(sent)) {
	sentence_delete(sent);
	return NULL;
    }

    return sent;
}
Пример #9
0
void sentence_delete(Sentence sent) {

  /*free_andlists(sent); */
    free_sentence_disjuncts(sent);      
    free_sentence_expressions(sent);
    string_set_delete(sent->string_set);
    free_parse_set(sent);
    free_post_processing(sent);
    post_process_close_sentence(sent->dict->postprocessor);
    free_lookup_list();
    free_deletable(sent);
    free_effective_dist(sent);
    xfree(sent->is_conjunction, sizeof(char)*sent->length);
    xfree((char *) sent, sizeof(struct Sentence_s));
}
Пример #10
0
/**
 * build_idiom_word_name() -- return idiomized name of given string.
 *
 * Allocates string space and returns a pointer to it.
 * In this string is placed the idiomized name of the given string s.
 * This is the same as s, but with a postfix of ".Ix", where x is an
 * appropriate number.  x is the minimum number that distinguishes
 * this word from others in the dictionary.
 */
static const char * build_idiom_word_name(Dictionary dict, const char * s)
{
	char buff[2*MAX_WORD];
	char *x;
	int count;

	Dict_node *dn = dictionary_lookup_list(dict, s);
	count = max_postfix_found(dn)+1;
	free_lookup_list(dn);

	x = buff;
	while((*s != '\0') && (*s != '.'))
	{
		*x = *s;
		x++;
		s++;
	}
	sprintf(x, ".I%d",count);

	return string_set_add(buff, dict->string_set);
}
Пример #11
0
/**
 * build_word_expressions() -- build list of expressions for a word
 *
 * Looks up the word s in the dictionary.  Returns NULL if it's not there.
 * If there, it builds the list of expressions for the word, and returns
 * a pointer to it.
 */
X_node * build_word_expressions(Sentence sent, const char * s)
{
	Dict_node * dn, *dn_head;
	X_node * x, * y;

	dn_head = dictionary_lookup_list(sent->dict, s);
	dn = dn_head;

	x = NULL;
	while (dn != NULL)
	{
		y = (X_node *) xalloc(sizeof(X_node));
		y->next = x;
		x = y;
		x->exp = copy_Exp(dn->exp);
		x->string = dn->string;
		dn = dn->right;
	}
	free_lookup_list (dn_head);
	return x;
}
Пример #12
0
/**
 *  dict_display_word_expr() - display the connector info for a given word.
 */
void dict_display_word_expr(Dictionary dict, const char * s)
{
	Dict_node *dn, *dn_head;

	dn_head = dictionary_lookup_list(dict, s);
	if (dn_head == NULL)
	{
		printf("	\"%s\" matches nothing in the dictionary.\n", s);
		return;
	}
	printf("\nExpressions:\n");
	for (dn = dn_head; dn != NULL; dn = dn->right)
	{
		printf("    ");
		left_print_string(stdout, dn->string,
			"                         ");
		print_expression(dn->exp);
		printf("\n\n");
	}
	free_lookup_list(dn_head);
	return;
}
Пример #13
0
/* The following function is dictionary_create with an extra paramater called "path".
   If this is non-null, then the path used to find the file is taken from that path.
   Otherwise the path is taken from the dict_name.  This is only needed because
   an affix_file is opened by a recursive call to this function.
 */
static Dictionary internal_dictionary_create(char * dict_name, char * pp_name, char * cons_name, char * affix_name, char * path) {
    Dictionary dict;
    static int rand_table_inited=FALSE;
    Dict_node *dict_node;
    char * dictionary_path_name;

    dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));

    if (!rand_table_inited) {
        init_randtable();
	rand_table_inited=TRUE;
    }

    dict->string_set = string_set_create();
    dict->name = string_set_add(dict_name, dict->string_set);
    dict->num_entries = 0;
    dict->is_special = FALSE;
    dict->already_got_it = '\0';
    dict->line_number = 1;
    dict->root = NULL;
    dict->word_file_header = NULL;
    dict->exp_list = NULL;
    dict->affix_table = NULL;

    /*  *DS*  remove this
    if (pp_name != NULL) {
	dict->post_process_filename = string_set_add(pp_name, dict->string_set);
    }
    else {
	dict->post_process_filename = NULL;
    }
    */
    
    if (path != NULL) dictionary_path_name = path; else dictionary_path_name = dict_name;

    if (!open_dictionary(dictionary_path_name, dict)) {
	lperror(NODICT, dict_name);
	string_set_delete(dict->string_set);
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
    }

    if (!read_dictionary(dict)) {
	string_set_delete(dict->string_set);
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
    }

    dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
    dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);
    dict->postprocessor      = post_process_open(dict->name, pp_name);
    dict->constituent_pp     = post_process_open(dict->name, cons_name);
    
    dict->affix_table = NULL;
    if (affix_name != NULL) {
	dict->affix_table = internal_dictionary_create(affix_name, NULL, NULL, NULL, dict_name);
	if (dict->affix_table == NULL) {
	    fprintf(stderr, "%s\n", lperrmsg);
	    exit(-1);
	}
    }
    
    dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
    dict->use_unknown_word = TRUE;
    dict->capitalized_word_defined = boolean_dictionary_lookup(dict, PROPER_WORD);
    dict->pl_capitalized_word_defined = boolean_dictionary_lookup(dict, PL_PROPER_WORD);
    dict->hyphenated_word_defined = boolean_dictionary_lookup(dict, HYPHENATED_WORD);
    dict->number_word_defined = boolean_dictionary_lookup(dict, NUMBER_WORD);
    dict->ing_word_defined = boolean_dictionary_lookup(dict, ING_WORD);
    dict->s_word_defined = boolean_dictionary_lookup(dict, S_WORD);
    dict->ed_word_defined = boolean_dictionary_lookup(dict, ED_WORD);
    dict->ly_word_defined = boolean_dictionary_lookup(dict, LY_WORD);
    dict->max_cost = 1000;

    if ((dict_node = dictionary_lookup(dict, ANDABLE_CONNECTORS_WORD)) != NULL) {
	dict->andable_connector_set = connector_set_create(dict_node->exp);
    } else {
	dict->andable_connector_set = NULL;
    }

    if ((dict_node = dictionary_lookup(dict, UNLIMITED_CONNECTORS_WORD)) != NULL) {
	dict->unlimited_connector_set = connector_set_create(dict_node->exp);
    } else {
	dict->unlimited_connector_set = NULL;
    }

    free_lookup_list();
    return dict;
}
Пример #14
0
/**
 * read_entry() -- read one dictionary entry
 * Starting with the current token, parse one dictionary entry.
 * A single dictionary entry must have one and only one colon in it,
 * and is terminated by a semi-colon.
 * Add these words to the dictionary.
 */
static int read_entry(Dictionary dict)
{
	Exp *n;
	int i;

	Dict_node *dn_new, *dnx, *dn = NULL;

	/* Reset multi-byte shift state every line. */
	memset(&dict->mbss, 0, sizeof(dict->mbss));

	while (!is_equal(dict, ':'))
	{
		if (dict->is_special)
		{
			dict_error(dict, "I expected a word but didn\'t get it.");
			return 0;
		}

		/* If it's a word-file name */
		/* However, be careful to reject "/.v" which is the division symbol
		 * used in equations (.v means verb-like) */
		if ((dict->token[0] == '/') && (dict->token[1] != '.'))
		{
			dn = read_word_file(dict, dn, dict->token);
			if (dn == NULL)
			{
				err_ctxt ec;
				ec.sent = NULL;
				err_msg(&ec, Error, "Error opening word file %s\n", dict->token);
				return 0;
			}
		}
		else
		{
			dn_new = dict_node_new();
			dn_new->left = dn;
			dn = dn_new;
			dn->file = NULL;
			dn->string = string_set_add(dict->token, dict->string_set);
		}

		/* Advance to next entry, unless error */
		if (0 == link_advance(dict)) goto syntax_error;
	}

	/* pass the : */
	if (!link_advance(dict))
	{
		goto syntax_error;
	}

	n = expression(dict);
	if (n == NULL)
	{
		goto syntax_error;
	}

	if (!is_equal(dict, ';'))
	{
		dict_error(dict, "Expecting \";\" at the end of an entry.");
		goto syntax_error;
	}

	/* pass the ; */
	if (!link_advance(dict))
	{
		goto syntax_error;
	}

	/* At this point, dn points to a list of Dict_nodes connected by
	 * their left pointers. These are to be inserted into the dictionary */
	i = 0;
	for (dnx = dn; dnx != NULL; dnx = dnx->left)
	{
		dnx->exp = n;
		i++;
	}
	insert_list(dict, dn, i);
	return 1;

syntax_error:
	free_lookup_list(dn);
	return 0;
}