void indri::collection::CompressedCollection::close() {
  _lookup.close();
  if( _output ) {
    _output->flush();
    delete _output;
    _output = 0;
  }

  _storage.close();

  indri::utility::HashTable<const char*, lemur::file::Keyfile*>::iterator iter;

  for( iter = _forwardLookups.begin(); iter != _forwardLookups.end(); iter++ ) {
    (*iter->second)->close();
    delete (*iter->second);
  }

  for( iter = _reverseLookups.begin(); iter != _reverseLookups.end(); iter++ ) {
    (*iter->second)->close();
    delete (*iter->second);
  }

  _forwardLookups.clear();
  _reverseLookups.clear();

  string_set_delete( _strings );
  _strings = string_set_create();
}
Esempio n. 2
0
Postprocessor * post_process_open(char *dictname, char *path)
{
  /* read rules from path and initialize the appropriate fields in 
     a postprocessor structure, a pointer to which is returned.
     The only reason we need the dictname is to used it for the
     path, in case there is no DICTPATH set up.  If the dictname
     is null, and there is no DICTPATH, it just uses the filename
     as the full path.
  */
  Postprocessor *pp;
  if (path==NULL) return NULL;

  pp = (Postprocessor *) xalloc (sizeof(Postprocessor));
  pp->knowledge  = pp_knowledge_open(dictname, path);
  pp->sentence_link_name_set = string_set_create();
  pp->set_of_links_of_sentence = pp_linkset_open(1024);
  pp->set_of_links_in_an_active_rule=pp_linkset_open(1024);
  pp->relevant_contains_one_rules = 
      (int *) xalloc ((pp->knowledge->n_contains_one_rules+1)
		      *(sizeof pp->relevant_contains_one_rules[0]));
  pp->relevant_contains_none_rules = 
      (int *) xalloc ((pp->knowledge->n_contains_none_rules+1)
		      *(sizeof pp->relevant_contains_none_rules[0]));
  pp->relevant_contains_one_rules[0]  = -1;    
  pp->relevant_contains_none_rules[0] = -1;   
  pp->pp_node = NULL;
  pp->pp_data.links_to_ignore = NULL;
  pp->n_local_rules_firing  = 0;
  pp->n_global_rules_firing = 0;
  return pp;
}
Esempio n. 3
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->version = NULL;
	dict->num_entries = 0;
	dict->affix_table = NULL;
	dict->regex_root = NULL;

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup_list(dict, dict_node);

	return dict;
}
Esempio n. 4
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
	if (NULL == dict->spell_checker)
		prt_error("Info: Spell checker disabled.");
#endif
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup_list(dict, dict_node);

	return dict;
}
indri::collection::CompressedCollection::CompressedCollection() {
  _stream = new z_stream_s;
  _stream->zalloc = zlib_alloc;
  _stream->zfree = zlib_free;
  _stream->next_out = 0;
  _stream->avail_out = 0;

  deflateInit( _stream, Z_BEST_SPEED );

  _strings = string_set_create();
  _output = 0;
}
Esempio n. 6
0
File: api.c Progetto: mclumd/Alfred
Sentence sentence_create(char *input_string, Dictionary dict) {
    Sentence sent;
    int i;

    free_lookup_list();

    sent = (Sentence) xalloc(sizeof(struct Sentence_s));
    sent->dict = dict;
    sent->length = 0;
    sent->num_linkages_found = 0;
    sent->num_linkages_alloced = 0;
    sent->num_linkages_post_processed = 0;
    sent->num_valid_linkages = 0;
    sent->link_info = NULL;
    sent->deletable = NULL;
    sent->effective_dist = NULL;
    sent->num_valid_linkages = 0;
    sent->null_count = 0;
    sent->parse_info = NULL;
    sent->string_set = string_set_create();

    if (!separate_sentence(input_string, sent)) {
	string_set_delete(sent->string_set);
	xfree(sent, sizeof(struct Sentence_s));
	return NULL;
    }
   
    sent->q_pruned_rules = FALSE; /* for post processing */
    sent->is_conjunction = (char *) xalloc(sizeof(char)*sent->length);
    set_is_conjunction(sent);
    initialize_conjunction_tables(sent);

    for (i=0; i<sent->length; i++) {
	/* in case we free these before they set to anything else */
	sent->word[i].x = NULL;
	sent->word[i].d = NULL;
    }
    
    if (!(dict->unknown_word_defined && dict->use_unknown_word)) {
	if (!sentence_in_dictionary(sent)) {
	    sentence_delete(sent);
	    return NULL;
	}
    }
    
    if (!build_sentence_expressions(sent)) {
	sentence_delete(sent);
	return NULL;
    }

    return sent;
}
Esempio n. 7
0
void post_process_close_sentence(Postprocessor *pp)
{
  if (pp==NULL) return;
  pp_linkset_clear(pp->set_of_links_of_sentence);
  pp_linkset_clear(pp->set_of_links_in_an_active_rule);
  string_set_delete(pp->sentence_link_name_set);
  pp->sentence_link_name_set = string_set_create(); 
  pp->n_local_rules_firing  = 0;
  pp->n_global_rules_firing = 0;
  pp->relevant_contains_one_rules[0]  = -1;    
  pp->relevant_contains_none_rules[0] = -1;   
  free_pp_node(pp);
}
indri::parse::NormalizationTransformation::NormalizationTransformation( indri::api::Parameters* acronymList )
  :
  _handler(0),
  _acronyms(0)
{
  if( acronymList && acronymList->exists("word") ) {
    indri::api::Parameters words = (*acronymList)["word"];
    _acronyms = string_set_create();
    
    for( size_t i=0; i<words.size(); i++ ) {
      std::string acronym;
      acronym = (std::string) words[i];
      string_set_add( acronym.c_str(), _acronyms );
    }
  }
}
Esempio n. 9
0
Sentence sentence_create(const char *input_string, Dictionary dict)
{
	Sentence sent;

	sent = (Sentence) xalloc(sizeof(struct Sentence_s));
	memset(sent, 0, sizeof(struct Sentence_s));

	sent->dict = dict;
	sent->string_set = string_set_create();
	sent->rand_state = global_rand_state;

	sent->postprocessor = post_process_new(dict->base_knowledge);

	/* Make a copy of the input */
	sent->orig_sentence = string_set_add (input_string, sent->string_set);

	return sent;
}
Esempio n. 10
0
pp_knowledge *pp_knowledge_open(const char *path)
{
  /* read knowledge from disk into pp_knowledge */
  FILE *f = dictopen(path, "r");
  pp_knowledge *k = (pp_knowledge *) xalloc (sizeof(pp_knowledge));
  if (!f)
  {
    prt_error("Fatal Error: Couldn't find post-process knowledge file %s", path);
    exit(1);
  }
  k->lt = pp_lexer_open(f);
  fclose(f);
  k->string_set = string_set_create();
  k->path = string_set_add(path, k->string_set);
  read_starting_link_table(k);
  read_link_sets(k);
  read_rules(k);
  initialize_set_of_links_starting_bounded_domain(k);
  return k;
}
Esempio n. 11
0
/**
 * read rules from path and initialize the appropriate fields in
 * a postprocessor structure, a pointer to which is returned.
 */
Postprocessor * post_process_new(pp_knowledge * kno)
{
	Postprocessor *pp;

	pp = (Postprocessor *) malloc (sizeof(Postprocessor));
	pp->knowledge = kno;
	pp->string_set = string_set_create();
	pp->set_of_links_of_sentence = pp_linkset_open(1024);
	pp->set_of_links_in_an_active_rule = pp_linkset_open(1024);
	pp->relevant_contains_one_rules =
	      (int *) malloc ((pp->knowledge->n_contains_one_rules + 1)
	                      *(sizeof pp->relevant_contains_one_rules[0]));
	pp->relevant_contains_none_rules =
	      (int *) malloc ((pp->knowledge->n_contains_none_rules + 1)
	                      *(sizeof pp->relevant_contains_none_rules[0]));
	pp->relevant_contains_one_rules[0]	= -1;
	pp->relevant_contains_none_rules[0] = -1;
	pp->pp_node = NULL;
	pp->n_local_rules_firing	= 0;
	pp->n_global_rules_firing = 0;

	pp->q_pruned_rules = false;

	/* 60 is just starting size, these are expanded if needed */
	pp->vlength = 60;
	pp->visited = (bool*) malloc(pp->vlength * sizeof(bool));
	memset(pp->visited, 0, pp->vlength * sizeof(bool));

	pp->pp_data.links_to_ignore = NULL;
	pp->pp_data.domlen = 60;
	pp->pp_data.domain_array = (Domain*) malloc(pp->pp_data.domlen * sizeof(Domain));
	memset(pp->pp_data.domain_array, 0, pp->pp_data.domlen * sizeof(Domain));

	pp->pp_data.wowlen = 60;
	pp->pp_data.word_links = (List_o_links **) malloc(pp->pp_data.wowlen * sizeof(List_o_links*));
	memset(pp->pp_data.word_links, 0, pp->pp_data.wowlen * sizeof(List_o_links *));

	return pp;
}
Esempio n. 12
0
/**
 * read rules from path and initialize the appropriate fields in
 * a postprocessor structure, a pointer to which is returned.
 */
Postprocessor * post_process_new(pp_knowledge * kno)
{
	Postprocessor *pp;
	PP_data *pp_data;

	pp = (Postprocessor *) malloc (sizeof(Postprocessor));
	pp->knowledge = kno;
	pp->string_set = string_set_create();
	pp->set_of_links_of_sentence = pp_linkset_open(1024);
	pp->set_of_links_in_an_active_rule = pp_linkset_open(1024);
	pp->relevant_contains_one_rules =
	      (int *) malloc ((pp->knowledge->n_contains_one_rules + 1)
	                      *(sizeof pp->relevant_contains_one_rules[0]));
	pp->relevant_contains_none_rules =
	      (int *) malloc ((pp->knowledge->n_contains_none_rules + 1)
	                      *(sizeof pp->relevant_contains_none_rules[0]));
	pp->relevant_contains_one_rules[0] = -1;
	pp->relevant_contains_none_rules[0] = -1;
	pp->pp_node = NULL;
	pp->n_local_rules_firing = 0;
	pp->n_global_rules_firing = 0;

	pp->q_pruned_rules = false;

	pp_data = &pp->pp_data;
	pp_data->vlength = PP_INITLEN;
	pp_data->visited = (bool*) malloc(pp_data->vlength * sizeof(bool));
	memset(pp_data->visited, 0, pp_data->vlength * sizeof(bool));

	pp_data->links_to_ignore = NULL;
	pp_new_domain_array(pp_data);

	pp_data->wowlen = PP_INITLEN;
	pp_data->word_links = (List_o_links **) malloc(pp_data->wowlen * sizeof(List_o_links*));
	memset(pp_data->word_links, 0, pp_data->wowlen * sizeof(List_o_links *));

	return pp;
}
Esempio n. 13
0
static char * do_print_flat_constituents(con_context_t *ctxt, Linkage linkage)
{
	int numcon_total= 0, numcon_subl;
	char * q;
	Sentence sent = linkage->sent;

	assert(NULL != sent->lnkages, "No linkages"); /* Sentence already free()'d */
	ctxt->phrase_ss = string_set_create();
	generate_misc_word_info(ctxt, linkage);

	if (NULL ==  sent->constituent_pp)         /* First time for this sentence */
		sent->constituent_pp = post_process_new(sent->dict->hpsg_knowledge);
	do_post_process(sent->constituent_pp, linkage, linkage->is_sent_long);

	/** No-op. If we wanted to debug domain names, we could do this...
	 * linkage_free_pp_info(linkage);
	 * linkage_set_domain_names(sent->constituent_pp, linkage);
	 */

	linkage->hpsg_pp_data = sent->constituent_pp->pp_data;
	pp_new_domain_array(&linkage->hpsg_pp_data);

	numcon_subl = read_constituents_from_domains(ctxt, linkage, numcon_total);
	numcon_total += numcon_subl;
	assert (numcon_total < ctxt->conlen, "Too many constituents (c)");
	numcon_total = merge_constituents(ctxt, linkage, numcon_total);
	assert (numcon_total < ctxt->conlen, "Too many constituents (d)");
	numcon_total = new_style_conjunctions(ctxt, linkage, numcon_total);
	assert (numcon_total < ctxt->conlen, "Too many constituents (e)");
	numcon_total = last_minute_fixes(ctxt, linkage, numcon_total);
	assert (numcon_total < ctxt->conlen, "Too many constituents (f)");
	q = exprint_constituent_structure(ctxt, linkage, numcon_total);
	string_set_delete(ctxt->phrase_ss);
	ctxt->phrase_ss = NULL;
	return q;
}
Esempio n. 14
0
static char * print_flat_constituents(con_context_t *ctxt, Linkage linkage)
{
	int num_words;
	Sentence sent;
	Postprocessor * pp;
	int s, numcon_total, numcon_subl, num_subl;
	char * q;

	sent = linkage_get_sentence(linkage);
	ctxt->phrase_ss = string_set_create();
	pp = linkage->sent->dict->constituent_pp;
	numcon_total = 0;

	count_words_used(ctxt, linkage);

	num_subl = linkage->num_sublinkages;
	if(num_subl > MAXSUBL) {
	  num_subl=MAXSUBL;
	  if(verbosity>=2) printf("Number of sublinkages exceeds maximum: only considering first %d sublinkages\n", MAXSUBL);
	}
	if(linkage->unionized==1 && num_subl>1) num_subl--;
	for (s=0; s<num_subl; s++) {
		linkage_set_current_sublinkage(linkage, s);
		linkage_post_process(linkage, pp);
		num_words = linkage_get_num_words(linkage);
		generate_misc_word_info(ctxt, linkage);
		numcon_subl = read_constituents_from_domains(ctxt, linkage, numcon_total, s);
		numcon_total = numcon_total + numcon_subl;
	}
	numcon_total = merge_constituents(ctxt, linkage, numcon_total);
	numcon_total = last_minute_fixes(ctxt, linkage, numcon_total);
	q = exprint_constituent_structure(ctxt, linkage, numcon_total);
	string_set_delete(ctxt->phrase_ss);
	ctxt->phrase_ss = NULL;
	return q;
}
Esempio n. 15
0
pp_knowledge *pp_knowledge_open(const char *path)
{
  /* read knowledge from disk into pp_knowledge */
  FILE *f = dictopen(path, "r");
  if (NULL == f)
  {
    prt_error("Error: Couldn't find post-process knowledge file %s\n", path);
    return NULL;
  }
  pp_knowledge *k = (pp_knowledge *) malloc (sizeof(pp_knowledge));
  *k = (pp_knowledge){0};
  k->lt = pp_lexer_open(f);
  fclose(f);
  if (NULL == k->lt) goto failure;
  k->string_set = string_set_create();
  k->path = string_set_add(path, k->string_set);
  if (!read_starting_link_table(k)) goto failure;

  if (!read_link_sets(k)) goto failure;
  if (!read_rules(k)) goto failure;
  initialize_set_of_links_starting_bounded_domain(k);

  /* If the knowledge file was empty, do nothing at all. */
  if (0 == k->nStartingLinks)
  {
    pp_knowledge_close(k);
    return NULL;
  }

  return k;

failure:
  prt_error("Error: Unable to open knowledge file %s.\n", path);
  pp_knowledge_close(k);
  return NULL;
}
Esempio n. 16
0
File: api.c Progetto: mclumd/Alfred
/* The following function is dictionary_create with an extra paramater called "path".
   If this is non-null, then the path used to find the file is taken from that path.
   Otherwise the path is taken from the dict_name.  This is only needed because
   an affix_file is opened by a recursive call to this function.
 */
static Dictionary internal_dictionary_create(char * dict_name, char * pp_name, char * cons_name, char * affix_name, char * path) {
    Dictionary dict;
    static int rand_table_inited=FALSE;
    Dict_node *dict_node;
    char * dictionary_path_name;

    dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));

    if (!rand_table_inited) {
        init_randtable();
	rand_table_inited=TRUE;
    }

    dict->string_set = string_set_create();
    dict->name = string_set_add(dict_name, dict->string_set);
    dict->num_entries = 0;
    dict->is_special = FALSE;
    dict->already_got_it = '\0';
    dict->line_number = 1;
    dict->root = NULL;
    dict->word_file_header = NULL;
    dict->exp_list = NULL;
    dict->affix_table = NULL;

    /*  *DS*  remove this
    if (pp_name != NULL) {
	dict->post_process_filename = string_set_add(pp_name, dict->string_set);
    }
    else {
	dict->post_process_filename = NULL;
    }
    */
    
    if (path != NULL) dictionary_path_name = path; else dictionary_path_name = dict_name;

    if (!open_dictionary(dictionary_path_name, dict)) {
	lperror(NODICT, dict_name);
	string_set_delete(dict->string_set);
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
    }

    if (!read_dictionary(dict)) {
	string_set_delete(dict->string_set);
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
    }

    dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
    dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);
    dict->postprocessor      = post_process_open(dict->name, pp_name);
    dict->constituent_pp     = post_process_open(dict->name, cons_name);
    
    dict->affix_table = NULL;
    if (affix_name != NULL) {
	dict->affix_table = internal_dictionary_create(affix_name, NULL, NULL, NULL, dict_name);
	if (dict->affix_table == NULL) {
	    fprintf(stderr, "%s\n", lperrmsg);
	    exit(-1);
	}
    }
    
    dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
    dict->use_unknown_word = TRUE;
    dict->capitalized_word_defined = boolean_dictionary_lookup(dict, PROPER_WORD);
    dict->pl_capitalized_word_defined = boolean_dictionary_lookup(dict, PL_PROPER_WORD);
    dict->hyphenated_word_defined = boolean_dictionary_lookup(dict, HYPHENATED_WORD);
    dict->number_word_defined = boolean_dictionary_lookup(dict, NUMBER_WORD);
    dict->ing_word_defined = boolean_dictionary_lookup(dict, ING_WORD);
    dict->s_word_defined = boolean_dictionary_lookup(dict, S_WORD);
    dict->ed_word_defined = boolean_dictionary_lookup(dict, ED_WORD);
    dict->ly_word_defined = boolean_dictionary_lookup(dict, LY_WORD);
    dict->max_cost = 1000;

    if ((dict_node = dictionary_lookup(dict, ANDABLE_CONNECTORS_WORD)) != NULL) {
	dict->andable_connector_set = connector_set_create(dict_node->exp);
    } else {
	dict->andable_connector_set = NULL;
    }

    if ((dict_node = dictionary_lookup(dict, UNLIMITED_CONNECTORS_WORD)) != NULL) {
	dict->unlimited_connector_set = connector_set_create(dict_node->exp);
    } else {
	dict->unlimited_connector_set = NULL;
    }

    free_lookup_list();
    return dict;
}
Esempio n. 17
0
static Dictionary
dictionary_six_str(const char * lang,
                   const char * input,
                   const char * dict_name,
                   const char * pp_name, const char * cons_name,
                   const char * affix_name, const char * regex_name)
{
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);
	dict->name = string_set_add(dict_name, dict->string_set);

	/*
	 * A special setup per dictionary type. The check here assumes the affix
	 * dictionary name contains "affix". FIXME: For not using this
	 * assumption, the dictionary creating stuff needs a rearrangement.
	 */
	if (0 == strstr(dict->name, "affix"))
	{
		/* To disable spell-checking, just set the checker to NULL */
		dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
		/* TODO:
		 * 1. Set the spell option to 0, to signify no spell checking is done.
		 * 2. On verbosity >= 1, add a detailed message on the reason. */
		if (NULL == dict->spell_checker)
			prt_error("Info: Spell checker disabled.");
#endif
		dict->insert_entry = insert_list;

		dict->lookup_list = lookup_list;
		dict->free_lookup = free_llist;
		dict->lookup = boolean_lookup;
	}
	else
	{
		/*
		 * Affix dictionary.
		 */
		size_t i;

		dict->insert_entry = load_affix;
		dict->lookup = return_true;

		/* initialize the class table */
		dict->afdict_class =
		   malloc(sizeof(*dict->afdict_class) * ARRAY_SIZE(afdict_classname));
		for (i = 0; i < ARRAY_SIZE(afdict_classname); i++)
		{
			dict->afdict_class[i].mem_elems = 0;
			dict->afdict_class[i].length = 0;
			dict->afdict_class[i].string = NULL;
		}
	}
	dict->affix_table = NULL;

	/* Read dictionary from the input string. */
	dict->input = input;
	dict->pin = dict->input;
	if (!read_dictionary(dict))
	{
		dict->pin = NULL;
		dict->input = NULL;
		goto failure;
	}
	dict->pin = NULL;
	dict->input = NULL;

	if (NULL == affix_name)
	{
		/*
		 * The affix table is handled alone in this invocation.
		 * Skip the rest of processing!
		 * FIXME: The dictionary creating stuff needs a rearrangement.
		 */
		return dict;
	}

	/* If we don't have a locale per dictionary, the following
	 * will also set the program's locale. */
	dict->locale = linkgrammar_get_dict_locale(dict);
	set_utf8_program_locale();

#ifdef HAVE_LOCALE_T
	/* We have a locale per dictionary. */
	if (NULL != dict->locale)
		dict->locale_t = newlocale_LC_CTYPE(dict->locale);

	/* If we didn't succeed to set the dictionary locale, the program will
	 * SEGFAULT when it tries to use it with the isw*() functions.
	 * So set it to the current program's locale as a last resort. */
	if (NULL == dict->locale)
	{
		dict->locale = setlocale(LC_CTYPE, NULL);
		dict->locale_t = newlocale_LC_CTYPE(setlocale(LC_CTYPE, NULL));
		prt_error("Warning: Couldn't set dictionary locale! "
		          "Using current program locale %s", dict->locale);
	}
	/* If dict->locale is still not set, there is a bug. */
	assert((locale_t)0 != dict->locale_t, "Dictionary locale is not set.");
#else
	/* We don't have a locale per dictionary - but anyway make sure
	 * dict->locale is consistent with the current program's locale,
	 * and especially that it is not NULL. It still indicates the intended
	 * locale of this dictionary and the locale of the compiled regexs. */
	dict->locale = setlocale(LC_CTYPE, NULL);
#endif /* HAVE_LOCALE_T */

	dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
	if (dict->affix_table == NULL)
	{
		prt_error("Error: Could not open affix file %s", affix_name);
		goto failure;
	}
	if (! afdict_init(dict))
		goto failure;

	/*
	 * Process the regex file.
	 * We have to compile regexs using the dictionary locale,
	 * so make a temporary locale swap.
	 */
	if (read_regex_file(dict, regex_name)) goto failure;

	const char *locale = setlocale(LC_CTYPE, NULL);
	locale = strdupa(locale); /* setlocale() uses static memory. */
	setlocale(LC_CTYPE, dict->locale);
	lgdebug(+D_DICT, "Regexs locale %s\n", setlocale(LC_CTYPE, NULL));

	if (compile_regexs(dict->regex_root, dict))
	{
		locale = setlocale(LC_CTYPE, locale);
		goto failure;
	}
	locale = setlocale(LC_CTYPE, locale);
	assert(NULL != locale, "Cannot restore program locale\n");

#ifdef USE_CORPUS
	dict->corpus = lg_corpus_new();
#endif

	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->base_knowledge  = pp_knowledge_open(pp_name);
	dict->hpsg_knowledge  = pp_knowledge_open(cons_name);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup(dict_node);

	return dict;

failure:
	string_set_delete(dict->string_set);
	if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s));
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
}
Esempio n. 18
0
/**
 * Read dictionary entries from a wide-character string "input".
 * All other parts are read from files.
 */
static Dictionary
dictionary_six_str(const char * lang,
                   const char * input,
                   const char * dict_name,
                   const char * pp_name, const char * cons_name,
                   const char * affix_name, const char * regex_name)
{
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->num_entries = 0;
	dict->is_special = false;
	dict->already_got_it = '\0';
	dict->line_number = 0;
	dict->root = NULL;
	dict->regex_root = NULL;
	dict->word_file_header = NULL;
	dict->exp_list = NULL;
	dict->affix_table = NULL;
	dict->recursive_error = false;
	dict->version = NULL;
#ifdef HAVE_SQLITE
	dict->db_handle = NULL;
#endif
#ifdef USE_ANYSPLIT
	dict->anysplit = NULL;
#endif

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);
	dict->name = string_set_add(dict_name, dict->string_set);

	/*
	 * A special setup per dictionary type. The check here assumes the affix
	 * dictionary name contains "affix". FIXME: For not using this
	 * assumption, the dictionary creating stuff needs a rearrangement.
	 */
	if (0 == strstr(dict->name, "affix"))
	{
		/* To disable spell-checking, just set the checker to NULL */
		dict->spell_checker = spellcheck_create(dict->lang);
		dict->insert_entry = insert_list;

		dict->lookup_list = lookup_list;
		dict->free_lookup = free_llist;
		dict->lookup = boolean_lookup;
	}
	else
	{
		/*
		 * Affix dictionary.
		 */
		size_t i;

		dict->insert_entry = load_affix;
		dict->lookup = return_true;

		/* initialize the class table */
		dict->afdict_class =
		   malloc(sizeof(*dict->afdict_class) * NUMELEMS(afdict_classname));
		for (i = 0; i < NUMELEMS(afdict_classname); i++)
		{
			dict->afdict_class[i].mem_elems = 0;
			dict->afdict_class[i].length = 0;
			dict->afdict_class[i].string = NULL;
		}
	}
	dict->affix_table = NULL;

	/* Read dictionary from the input string. */
	dict->input = input;
	dict->pin = dict->input;
	if (!read_dictionary(dict))
	{
		dict->pin = NULL;
		dict->input = NULL;
		goto failure;
	}
	dict->pin = NULL;
	dict->input = NULL;

	if (NULL == affix_name)
	{
		/*
		 * The affix table is handled alone in this invocation.
		 * Skip the rest of processing!
		 * FIXME: The dictionary creating stuff needs a rearrangement.
		 */
		return dict;
	}

	dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
	if (dict->affix_table == NULL)
	{
		prt_error("Error: Could not open affix file %s", affix_name);
		goto failure;
	}
	if (! afdict_init(dict))
		goto failure;

	if (read_regex_file(dict, regex_name)) goto failure;
	if (compile_regexs(dict->regex_root, dict)) goto failure;

#ifdef USE_CORPUS
	dict->corpus = lg_corpus_new();
#endif

	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->base_knowledge  = pp_knowledge_open(pp_name);
	dict->hpsg_knowledge  = pp_knowledge_open(cons_name);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup(dict_node);

	return dict;

failure:
	string_set_delete(dict->string_set);
	if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s));
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
}