Example #1
0
int boolean_dictionary_lookup(Dictionary dict, const char *s)
{
	Dict_node *llist = dictionary_lookup_list(dict, s);
	int boool = (llist != NULL);
	free_lookup_list(llist);
	return boool;
}
Example #2
0
const char * linkgrammar_get_dict_version(Dictionary dict)
{
	static char * ver = NULL;
	char * p;
	Dict_node *dn;
	Exp *e;

	if (ver) return ver;

	/* The newer dictionaries should contain a macro of the form:
	 * <dictionary-version-number>: V4v6v6+;
	 * which would indicate dictionary verison 4.6.6
	 * Older dictionaries contain no version info.
	 */
	dn = dictionary_lookup_list(dict, "<dictionary-version-number>");
	if (NULL == dn) return "[unknown]";

	e = dn->exp;
	ver = strdup(&e->u.string[1]);
	p = strchr(ver, 'v');
	while (p)
	{
		*p = '.';
		p = strchr(p+1, 'v');
	}

	free_lookup_list(dn);
	return ver;
}
Example #3
0
/**
 *  dict_display_word_info() - display the information about the given word.
 */
void dict_display_word_info(Dictionary dict, const char * s)
{
	Dict_node *dn, *dn_head;
	Disjunct * d1, * d2;
	int len;
	dn_head = dictionary_lookup_list(dict, s);
	if (dn_head == NULL)
	{
		printf("	\"%s\" matches nothing in the dictionary.\n", s);
		return;
	}
	printf("Matches:\n");
	for (dn = dn_head; dn != NULL; dn = dn->right)
	{
		len = 0;
		d1 = build_disjuncts_for_dict_node(dn);
		for(d2 = d1 ; d2 != NULL; d2 = d2->next)
		{
			len++;
		}
		free_disjuncts(d1);
		printf("    ");
		left_print_string(stdout, dn->string,
			"                         ");
		printf(" %5d  disjuncts ", len);
		if (dn->file != NULL)
		{
			printf("<%s>", dn->file->file);
		}
		printf("\n");
	}
	free_lookup_list(dn_head);
	return;
}
Example #4
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->version = NULL;
	dict->num_entries = 0;
	dict->affix_table = NULL;
	dict->regex_root = NULL;

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup_list(dict, dict_node);

	return dict;
}
Example #5
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
	if (NULL == dict->spell_checker)
		prt_error("Info: Spell checker disabled.");
#endif
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup_list(dict, dict_node);

	return dict;
}
Example #6
0
/**
 * build_idiom_word_name() -- return idiomized name of given string.
 *
 * Allocates string space and returns a pointer to it.
 * In this string is placed the idiomized name of the given string s.
 * This is the same as s, but with a postfix of ".Ix", where x is an
 * appropriate number.  x is the minimum number that distinguishes
 * this word from others in the dictionary.
 */
static const char * build_idiom_word_name(Dictionary dict, const char * s)
{
	char buff[2*MAX_WORD];
	char *x;
	int count;

	Dict_node *dn = dictionary_lookup_list(dict, s);
	count = max_postfix_found(dn)+1;
	free_lookup_list(dn);

	x = buff;
	while((*s != '\0') && (*s != '.'))
	{
		*x = *s;
		x++;
		s++;
	}
	sprintf(x, ".I%d",count);

	return string_set_add(buff, dict->string_set);
}
Example #7
0
/**
 * build_word_expressions() -- build list of expressions for a word
 *
 * Looks up the word s in the dictionary.  Returns NULL if it's not there.
 * If there, it builds the list of expressions for the word, and returns
 * a pointer to it.
 */
X_node * build_word_expressions(Sentence sent, const char * s)
{
	Dict_node * dn, *dn_head;
	X_node * x, * y;

	dn_head = dictionary_lookup_list(sent->dict, s);
	dn = dn_head;

	x = NULL;
	while (dn != NULL)
	{
		y = (X_node *) xalloc(sizeof(X_node));
		y->next = x;
		x = y;
		x->exp = copy_Exp(dn->exp);
		x->string = dn->string;
		dn = dn->right;
	}
	free_lookup_list (dn_head);
	return x;
}
Example #8
0
/**
 *  dict_display_word_expr() - display the connector info for a given word.
 */
void dict_display_word_expr(Dictionary dict, const char * s)
{
	Dict_node *dn, *dn_head;

	dn_head = dictionary_lookup_list(dict, s);
	if (dn_head == NULL)
	{
		printf("	\"%s\" matches nothing in the dictionary.\n", s);
		return;
	}
	printf("\nExpressions:\n");
	for (dn = dn_head; dn != NULL; dn = dn->right)
	{
		printf("    ");
		left_print_string(stdout, dn->string,
			"                         ");
		print_expression(dn->exp);
		printf("\n\n");
	}
	free_lookup_list(dn_head);
	return;
}
Example #9
0
static Dictionary
dictionary_six_str(const char * lang,
                   const char * input,
                   const char * dict_name,
                   const char * pp_name, const char * cons_name,
                   const char * affix_name, const char * regex_name)
{
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);
	dict->name = string_set_add(dict_name, dict->string_set);

	/*
	 * A special setup per dictionary type. The check here assumes the affix
	 * dictionary name contains "affix". FIXME: For not using this
	 * assumption, the dictionary creating stuff needs a rearrangement.
	 */
	if (0 == strstr(dict->name, "affix"))
	{
		/* To disable spell-checking, just set the checker to NULL */
		dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
		/* TODO:
		 * 1. Set the spell option to 0, to signify no spell checking is done.
		 * 2. On verbosity >= 1, add a detailed message on the reason. */
		if (NULL == dict->spell_checker)
			prt_error("Info: Spell checker disabled.");
#endif
		dict->insert_entry = insert_list;

		dict->lookup_list = lookup_list;
		dict->free_lookup = free_llist;
		dict->lookup = boolean_lookup;
	}
	else
	{
		/*
		 * Affix dictionary.
		 */
		size_t i;

		dict->insert_entry = load_affix;
		dict->lookup = return_true;

		/* initialize the class table */
		dict->afdict_class =
		   malloc(sizeof(*dict->afdict_class) * ARRAY_SIZE(afdict_classname));
		for (i = 0; i < ARRAY_SIZE(afdict_classname); i++)
		{
			dict->afdict_class[i].mem_elems = 0;
			dict->afdict_class[i].length = 0;
			dict->afdict_class[i].string = NULL;
		}
	}
	dict->affix_table = NULL;

	/* Read dictionary from the input string. */
	dict->input = input;
	dict->pin = dict->input;
	if (!read_dictionary(dict))
	{
		dict->pin = NULL;
		dict->input = NULL;
		goto failure;
	}
	dict->pin = NULL;
	dict->input = NULL;

	if (NULL == affix_name)
	{
		/*
		 * The affix table is handled alone in this invocation.
		 * Skip the rest of processing!
		 * FIXME: The dictionary creating stuff needs a rearrangement.
		 */
		return dict;
	}

	/* If we don't have a locale per dictionary, the following
	 * will also set the program's locale. */
	dict->locale = linkgrammar_get_dict_locale(dict);
	set_utf8_program_locale();

#ifdef HAVE_LOCALE_T
	/* We have a locale per dictionary. */
	if (NULL != dict->locale)
		dict->locale_t = newlocale_LC_CTYPE(dict->locale);

	/* If we didn't succeed to set the dictionary locale, the program will
	 * SEGFAULT when it tries to use it with the isw*() functions.
	 * So set it to the current program's locale as a last resort. */
	if (NULL == dict->locale)
	{
		dict->locale = setlocale(LC_CTYPE, NULL);
		dict->locale_t = newlocale_LC_CTYPE(setlocale(LC_CTYPE, NULL));
		prt_error("Warning: Couldn't set dictionary locale! "
		          "Using current program locale %s", dict->locale);
	}
	/* If dict->locale is still not set, there is a bug. */
	assert((locale_t)0 != dict->locale_t, "Dictionary locale is not set.");
#else
	/* We don't have a locale per dictionary - but anyway make sure
	 * dict->locale is consistent with the current program's locale,
	 * and especially that it is not NULL. It still indicates the intended
	 * locale of this dictionary and the locale of the compiled regexs. */
	dict->locale = setlocale(LC_CTYPE, NULL);
#endif /* HAVE_LOCALE_T */

	dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
	if (dict->affix_table == NULL)
	{
		prt_error("Error: Could not open affix file %s", affix_name);
		goto failure;
	}
	if (! afdict_init(dict))
		goto failure;

	/*
	 * Process the regex file.
	 * We have to compile regexs using the dictionary locale,
	 * so make a temporary locale swap.
	 */
	if (read_regex_file(dict, regex_name)) goto failure;

	const char *locale = setlocale(LC_CTYPE, NULL);
	locale = strdupa(locale); /* setlocale() uses static memory. */
	setlocale(LC_CTYPE, dict->locale);
	lgdebug(+D_DICT, "Regexs locale %s\n", setlocale(LC_CTYPE, NULL));

	if (compile_regexs(dict->regex_root, dict))
	{
		locale = setlocale(LC_CTYPE, locale);
		goto failure;
	}
	locale = setlocale(LC_CTYPE, locale);
	assert(NULL != locale, "Cannot restore program locale\n");

#ifdef USE_CORPUS
	dict->corpus = lg_corpus_new();
#endif

	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->base_knowledge  = pp_knowledge_open(pp_name);
	dict->hpsg_knowledge  = pp_knowledge_open(cons_name);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup(dict_node);

	return dict;

failure:
	string_set_delete(dict->string_set);
	if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s));
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
}
Example #10
0
/**
 * Read dictionary entries from a wide-character string "input".
 * All other parts are read from files.
 */
static Dictionary
dictionary_six_str(const char * lang,
                   const char * input,
                   const char * dict_name,
                   const char * pp_name, const char * cons_name,
                   const char * affix_name, const char * regex_name)
{
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->num_entries = 0;
	dict->is_special = false;
	dict->already_got_it = '\0';
	dict->line_number = 0;
	dict->root = NULL;
	dict->regex_root = NULL;
	dict->word_file_header = NULL;
	dict->exp_list = NULL;
	dict->affix_table = NULL;
	dict->recursive_error = false;
	dict->version = NULL;
#ifdef HAVE_SQLITE
	dict->db_handle = NULL;
#endif
#ifdef USE_ANYSPLIT
	dict->anysplit = NULL;
#endif

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);
	dict->name = string_set_add(dict_name, dict->string_set);

	/*
	 * A special setup per dictionary type. The check here assumes the affix
	 * dictionary name contains "affix". FIXME: For not using this
	 * assumption, the dictionary creating stuff needs a rearrangement.
	 */
	if (0 == strstr(dict->name, "affix"))
	{
		/* To disable spell-checking, just set the checker to NULL */
		dict->spell_checker = spellcheck_create(dict->lang);
		dict->insert_entry = insert_list;

		dict->lookup_list = lookup_list;
		dict->free_lookup = free_llist;
		dict->lookup = boolean_lookup;
	}
	else
	{
		/*
		 * Affix dictionary.
		 */
		size_t i;

		dict->insert_entry = load_affix;
		dict->lookup = return_true;

		/* initialize the class table */
		dict->afdict_class =
		   malloc(sizeof(*dict->afdict_class) * NUMELEMS(afdict_classname));
		for (i = 0; i < NUMELEMS(afdict_classname); i++)
		{
			dict->afdict_class[i].mem_elems = 0;
			dict->afdict_class[i].length = 0;
			dict->afdict_class[i].string = NULL;
		}
	}
	dict->affix_table = NULL;

	/* Read dictionary from the input string. */
	dict->input = input;
	dict->pin = dict->input;
	if (!read_dictionary(dict))
	{
		dict->pin = NULL;
		dict->input = NULL;
		goto failure;
	}
	dict->pin = NULL;
	dict->input = NULL;

	if (NULL == affix_name)
	{
		/*
		 * The affix table is handled alone in this invocation.
		 * Skip the rest of processing!
		 * FIXME: The dictionary creating stuff needs a rearrangement.
		 */
		return dict;
	}

	dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
	if (dict->affix_table == NULL)
	{
		prt_error("Error: Could not open affix file %s", affix_name);
		goto failure;
	}
	if (! afdict_init(dict))
		goto failure;

	if (read_regex_file(dict, regex_name)) goto failure;
	if (compile_regexs(dict->regex_root, dict)) goto failure;

#ifdef USE_CORPUS
	dict->corpus = lg_corpus_new();
#endif

	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->base_knowledge  = pp_knowledge_open(pp_name);
	dict->hpsg_knowledge  = pp_knowledge_open(cons_name);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup(dict_node);

	return dict;

failure:
	string_set_delete(dict->string_set);
	if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s));
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
}