Esempio n. 1
0
/**
 * connector() -- make a node for a connector or dictionary word.
 *
 * Assumes the current token is a connector or dictionary word.
 */
static Exp * connector(Dictionary dict)
{
	Exp * n;
	Dict_node *dn, *dn_head;
	int i;

	i = strlen(dict->token) - 1;  /* this must be + or - if a connector */
	if ((dict->token[i] != '+') && (dict->token[i] != '-'))
	{
		/* If we are here, token is a word */
		dn_head = abridged_lookup_list(dict, dict->token);
		dn = dn_head;
		while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0))
		{
			dn = dn->right;
		}
		if (dn == NULL)
		{
			free_lookup_list(dn_head);
			dict_error(dict, "\nPerhaps missing + or - in a connector.\n"
			                 "Or perhaps you forgot the suffix on a word.\n"
			                 "Or perhaps a word is used before it is defined.\n");
			return NULL;
		}
		n = make_unary_node(dict, dn->exp);
		free_lookup_list(dn_head);
	} 
	else
	{
		/* If we are here, token is a connector */
		if (!check_connector(dict, dict->token))
		{
			return NULL;
		}
		n = Exp_create(dict);
		n->dir = dict->token[i];
		dict->token[i] = '\0';				   /* get rid of the + or - */
		if (dict->token[0] == '@')
		{
			n->u.string = string_set_add(dict->token+1, dict->string_set);
			n->multi = TRUE;
		}
		else
		{
			n->u.string = string_set_add(dict->token, dict->string_set);
			n->multi = FALSE;
		}
		n->type = CONNECTOR_type;
		n->cost = 0.0f;
	}

	if (!link_advance(dict))
	{
		exp_free(n);
		return NULL;
	}
	return n;
}
Esempio n. 2
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->version = NULL;
	dict->num_entries = 0;
	dict->affix_table = NULL;
	dict->regex_root = NULL;

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup_list(dict, dict_node);

	return dict;
}
Esempio n. 3
0
Dictionary dictionary_create_from_db(const char *lang)
{
	char *dbname;
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);

	/* To disable spell-checking, just set the checker to NULL */
	dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
	if (NULL == dict->spell_checker)
		prt_error("Info: Spell checker disabled.");
#endif
	dict->base_knowledge = NULL;
	dict->hpsg_knowledge = NULL;

	dbname = join_path (lang, "dict.db");
	dict->name = string_set_add(dbname, dict->string_set);
	free(dbname);

	/* Set up the database */
	dict->db_handle = object_open(dict->name, db_open, NULL);

	dict->lookup_list = db_lookup_list;
	dict->free_lookup = db_free_llist;
	dict->lookup = db_lookup;
	dict->close = db_close;

	/* Misc remaining common (generic) dict setup work */
	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup_list(dict, dict_node);

	return dict;
}
Esempio n. 4
0
static void read_contains_rules(pp_knowledge *k, const char *label,
				pp_rule **rules, int *nRules)
{
  /* Reading the 'contains_one_rules' and reading the
     'contains_none_rules' into their respective arrays */
  int n_commas, n_tokens, i, r;
  const char *p;
  const char **tokens;
  if (!pp_lexer_set_label(k->lt, label)) {
      *nRules = 0;
      if (verbosity>0) printf("PP warning: Not using any %s rules\n", label);
  }
  else {
    n_commas = pp_lexer_count_commas_of_label(k->lt);
    *nRules = (n_commas + 1)/3;
  }
  *rules = (pp_rule*) xalloc ((1+*nRules)*sizeof(pp_rule));
  for (r=0; r<*nRules; r++)
    {
      /* first read link */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens>1)
      {
        prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1);
        exit(1);
      }
      (*rules)[r].selector = string_set_add(tokens[0], k->string_set);

      /* read link set */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      (*rules)[r].link_set = pp_linkset_open(n_tokens);
      (*rules)[r].link_set_size = n_tokens;
      (*rules)[r].link_array = (const char **) xalloc((1+n_tokens)*sizeof(const char*));
      for (i=0; i<n_tokens; i++)
      {
        p = string_set_add(tokens[i], k->string_set);
        pp_linkset_add((*rules)[r].link_set, p);
        (*rules)[r].link_array[i] = p;
      }
      (*rules)[r].link_array[i]=0; /* NULL-terminator */

      /* read error message */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens>1)
      {
        prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1);
        exit(1);
      }
      (*rules)[r].msg = string_set_add(tokens[0], k->string_set);
    }

  /* sentinel entry */
  (*rules)[*nRules].msg = 0;
}
Esempio n. 5
0
static int guessed_string(Sentence sent, int i, const char * s, const char * type) {
	X_node * e;
	char *t, *u;
	char str[MAX_WORD+1];
	if (boolean_dictionary_lookup(sent->dict, type)) {
	  sent->word[i].x = build_word_expressions(sent, type);
	  e = sent->word[i].x;
		  if(is_s_word(s)) {

			for (; e != NULL; e = e->next) {
			  t = strchr(e->string, '.');
			  if (t != NULL) {
				sprintf(str, "%.50s[!].%.5s", s, t+1);
			  } else {
				sprintf(str, "%.50s[!]", s);
			  }
			  t = (char *) xalloc(strlen(str)+1);
			  strcpy(t,str);
			  u = string_set_add(t, sent->string_set);
			  xfree(t, strlen(str)+1);
			  e->string = u;
			}
		  }

		  else {
			if(is_ed_word(s)) {
			  sprintf(str, "%.50s[!].v", s);
			}
			else if(is_ing_word(s)) {
			  sprintf(str, "%.50s[!].g", s);
			}
			else if(is_ly_word(s)) {
			  sprintf(str, "%.50s[!].e", s);
			}
			else sprintf(str, "%.50s[!]", s);

			t = (char *) xalloc(strlen(str)+1);
			strcpy(t,str);
			u = string_set_add(t, sent->string_set);
			xfree(t, strlen(str)+1);
			e->string = u;
		  }
		  return TRUE;

	} else {
		lperror(BUILDEXPR, ".\n To process this sentence your dictionary "
				"needs the word \"%s\".\n", type);
		return FALSE;
	}
}
Esempio n. 6
0
static bool read_form_a_cycle_rules(pp_knowledge *k, const char *label)
{
  size_t n_commas, n_tokens;
  size_t r, i;
  pp_linkset *lsHandle;
  const char **tokens;
  if (!pp_lexer_set_label(k->lt, label)) {
      k->n_form_a_cycle_rules = 0;
      if (verbosity_level(+D_PPK))
          prt_error("Warning: File %s: Not using any 'form a cycle' rules\n",
                    k->path);
  }
  else {
    n_commas = pp_lexer_count_commas_of_label(k->lt);
    k->n_form_a_cycle_rules = (n_commas + 1)/2;
  }
  k->form_a_cycle_rules=
    (pp_rule*) malloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule));
  for (r=0; r<k->n_form_a_cycle_rules; r++)
    {
      /* read link set */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens <= 0)
      {
        prt_error("Error: File %s: Syntax error\n", k->path);
        return false;
      }
      lsHandle = pp_linkset_open(n_tokens);
      for (i=0; i<n_tokens; i++)
          pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set));
      k->form_a_cycle_rules[r].link_set = lsHandle;

      /* read error message */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens > 1)
      {
         prt_error("Error: File %s: Invalid syntax (rule %zu of %s)\n",
                   k->path, r+1,label);
         return false;
      }
      k->form_a_cycle_rules[r].msg = string_set_add(tokens[0], k->string_set);
      k->form_a_cycle_rules[r].use_count = 0;
    }

  /* sentinel entry */
  k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0;
  k->form_a_cycle_rules[k->n_form_a_cycle_rules].use_count = 0;

  return true;
}
void indri::collection::CompressedCollection::open( const std::string& fileName ) {
  std::string lookupName = indri::file::Path::combine( fileName, "lookup" );
  std::string storageName = indri::file::Path::combine( fileName, "storage" );
  std::string manifestName = indri::file::Path::combine( fileName, "manifest" );

  indri::api::Parameters manifest;
  manifest.loadFile( manifestName );

  _storage.open( storageName );
  _lookup.open( lookupName );
  _output = new indri::file::SequentialWriteBuffer( _storage, 1024*1024 );

  if( manifest.exists("forward.field") ) {
    indri::api::Parameters forward = manifest["forward.field"];

    for( size_t i=0; i<forward.size(); i++ ) {
      std::stringstream metalookupName;
      metalookupName << "forwardLookup" << i;

      std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() );
      lemur::file::Keyfile* metalookup = new lemur::file::Keyfile;
      metalookup->open( metalookupPath );

      std::string fieldName = forward[i];
      const char* key = string_set_add( fieldName.c_str(), _strings );
      _forwardLookups.insert( key, metalookup );
    }
  }

  indri::api::Parameters reverse = manifest["reverse"];

  if( manifest.exists("reverse.field") ) {
    indri::api::Parameters reverse = manifest["reverse.field"];

    for( size_t i=0; i<reverse.size(); i++ ) {
      std::stringstream metalookupName;
      metalookupName << "reverseLookup" << i;

      std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() );
      lemur::file::Keyfile* metalookup = new lemur::file::Keyfile;
      metalookup->open( metalookupPath );

      std::string fieldName = reverse[i];
      const char* key = string_set_add( fieldName.c_str(), _strings );
      _reverseLookups.insert( key, metalookup );
    }
  }

}
Esempio n. 8
0
/**
 * Reads in one word from the file, allocates space for it,
 * and returns it.
 *
 * In case of an error, return a null string (cannot be a valid word).
 */
static const char * get_a_word(Dictionary dict, FILE * fp)
{
	char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
	const char * s;
	int c, j;

	do {
		c = fgetc(fp);
	} while ((c != EOF) && lg_isspace(c));
	if (c == EOF) return NULL;

	for (j=0; (j <= MAX_WORD-1) && (!lg_isspace(c)) && (c != EOF); j++)
	{
		word[j] = c;
		c = fgetc(fp);
	}

	if (j >= MAX_WORD) {
		word[MAX_WORD] = '\0';
		prt_error("The dictionary contains a word that is too long: %s\n", word);
		return ""; /* error indication */
	}
	word[j] = '\0';
	patch_subscript(word);
	s = string_set_add(word, dict->string_set);
	return s;
}
Esempio n. 9
0
void morpheme_list_add(Morpho_structures ms, Morpheme **morpheme_list, char * new_word, Feature *new_f){
	Morpheme *node;
	Feature_list *fln;
	Feature *temp_f;
	Morpheme *temp_morpheme;
	for (node=*morpheme_list; node!=NULL; node=node->next){
		if (strcmp(node->word, new_word)==0){
			break;
		}
	}
	if (node!=NULL){
		for (fln=node->f_list; fln!=NULL; fln=fln->next)
		{
			if (feature_is_equal(fln->f,new_f))
				break;
		}
		if (fln!=NULL){
			return;
		}
		else{
			temp_f=feature_copy_driver(USE_SOURCE_STRING_SET, new_f);
			feature_list_add(&(node->f_list), temp_f);
		}
	}
	else{
		temp_morpheme = (Morpheme *)xalloc (sizeof (Morpheme));
		temp_morpheme->word = string_set_add(new_word, ms->dict->string_set);
		temp_morpheme->f_list=NULL;
		temp_f=feature_copy_driver(USE_SOURCE_STRING_SET, new_f);
		feature_list_add(&(temp_morpheme->f_list), temp_f);
		temp_morpheme->next = *morpheme_list;
		*morpheme_list = temp_morpheme;
	}

}
Esempio n. 10
0
/**
 * Create a short form of flags summary for displaying in a word node.
 */
const char *gword_status(Sentence sent, const Gword *w)
{
	dyn_str *s = dyn_str_new();
	const char *r;
	size_t len;

	if (w->status & WS_UNKNOWN)
		dyn_strcat(s, "UNK|");
	if (w->status & WS_INDICT)
		dyn_strcat(s, "IN|");
	if (w->status & WS_REGEX)
		dyn_strcat(s, "RE|");
	if (w->status & WS_SPELL)
		dyn_strcat(s, "SP|");
	if (w->status & WS_RUNON)
		dyn_strcat(s, "RU|");
	if (w->status & WS_HASALT)
		dyn_strcat(s, "HA|");
	if (w->status & WS_UNSPLIT)
		dyn_strcat(s, "UNS|");
	if (w->status & WS_PL)
		dyn_strcat(s, "PL|");

	len = strlen(s->str);
	if (len > 0) s->str[len-1] = '\0';
	r = string_set_add(s->str, sent->string_set);
	dyn_str_delete(s);
	return r;
}
Esempio n. 11
0
static pp_linkset *read_link_set(pp_knowledge *k,
                                 const char *label, String_set *ss)
{
  /* read link set, marked by label in knowledge file, into a set of links
     whose handle is returned. Return NULL if link set not defined in file,
     in which case the set is taken to be empty. */
  int n_strings,i;
  pp_linkset *ls;
  if (!pp_lexer_set_label(k->lt, label))
  {
    if (verbosity_level(+D_PPK))
      prt_error("Warning: File %s: Link set %s not defined: assuming empty\n",
             k->path, label);
    n_strings = 0;
  }
  else
  {
    n_strings = pp_lexer_count_tokens_of_label(k->lt);
    if (-1 == n_strings) return &LINK_SET_ERROR;
  }
  ls = pp_linkset_open(n_strings);
  for (i=0; i<n_strings; i++)
    pp_linkset_add(ls,
                   string_set_add(pp_lexer_get_next_token_of_label(k->lt),ss));
  return ls;
}
Esempio n. 12
0
void altappend(Sentence sent, const char ***altp, const char *w)
{
	size_t n = altlen(*altp);

	*altp = resize_alts(*altp, n);
	(*altp)[n] = string_set_add(w, sent->string_set);
}
Esempio n. 13
0
wchar_t * build_idiom_word_name(Dictionary dict, wchar_t * s) {
    /* Allocates string space and returns a pointer to it.
       In this string is placed the idiomized name of the given string s.
       This is the same as s, but with a postfix of ".Ix", where x is an
       appropriate number.  x is the minimum number that distinguishes
       this word from others in the dictionary.
       */
    wchar_t * new_s, * x, *id;
    int count, sz;

    count = max_postfix_found(dictionary_lookup(dict, s))+1;

    sz = wcslen(s)+10;
    new_s = x = (wchar_t *) xalloc(sizeof(wchar_t)*sz); /* fails if > 10**10 idioms */
    while((*s != L'\0') && (*s != L'.')) {
	*x = *s;
	x++;
	s++;
    }
    swprintf_s(x, sz - (x - new_s), L".I%d", count);

    id = string_set_add(new_s, dict->string_set);
    xfree(new_s, sizeof(wchar_t)*sz);
    return id;
}
Esempio n. 14
0
/**
 *
 * (1) opens the word file and adds it to the word file list
 * (2) reads in the words
 * (3) puts each word in a Dict_node
 * (4) links these together by their left pointers at the
 *     front of the list pointed to by dn
 * (5) returns a pointer to the first of this list
 */
Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename)
{
	Word_file * wf;
	FILE * fp;
	const char * s;

	filename += 1; /* get rid of leading '/' */

	if ((fp = dictopen(filename, "r")) == NULL) {
		return NULL;
	}

	wf = malloc(sizeof (Word_file));
	wf->file = string_set_add(filename, dict->string_set);
	wf->changed = false;
	wf->next = dict->word_file_header;
	dict->word_file_header = wf;

	while ((s = get_a_word(dict, fp)) != NULL) {
		if ('\0' == s[0]) /* returned error indication */
		{
			fclose(fp);
			free_insert_list(dn);
			return NULL;
		}
		Dict_node * dn_new = malloc(sizeof(Dict_node));
		dn_new->left = dn;
		dn = dn_new;
		dn->string = s;
		dn->file = wf;
	}
	fclose(fp);
	return dn;
}
Esempio n. 15
0
GNUC_UNUSED const char *gword_morpheme(Sentence sent, const Gword *w)
{
	const char *mt;
	char buff[64];

	switch (w->morpheme_type)
	{
		case MT_INVALID:
			mt = "MT_INVALID";
			break;
		case MT_WORD:
			mt = "MT_WORD";
			break;
		case MT_FEATURE:
			mt = "MT_FEATURE";
			break;
		case MT_INFRASTRUCTURE:
			mt = "MT_I-S";
			break;
		case MT_WALL:
			mt = "MT_WALL";
			break;
		case MT_EMPTY:
			mt = "MT_EMPTY";
			break;
		case MT_UNKNOWN:
			mt = "MT_UNKNOWN";
			break;
		case MT_TEMPLATE:
			mt = "MT_TEMPLATE";
			break;
		case MT_ROOT:
			mt = "MT_ROOT";
			break;
		case MT_CONTR:
			mt = "MT_CONTR";
			break;
		case MT_PUNC:
			mt = "MT_PUNC";
			break;
		case MT_STEM:
			mt = "MT_STEM";
			break;
		case MT_PREFIX:
			mt = "MT_PREFIX";
			break;
		case MT_MIDDLE:
			mt = "MT_MIDDLE";
			break;
		case MT_SUFFIX:
			mt = "MT_SUFFIX";
			break;
		default:
			/* No truncation is expected. */
			snprintf(buff, sizeof(buff), "MT_%d", w->morpheme_type);
			mt = string_set_add(buff, sent->string_set);
	}

	return mt;
}
Esempio n. 16
0
static void handle_unknown_word(Sentence sent, int i, char * s) {
  /* puts into word[i].x the expression for the unknown word */
  /* the parameter s is the word that was not in the dictionary */
  /* it massages the names to have the corresponding subscripts */
  /* to those of the unknown words */
  /* so "grok" becomes "grok[?].v"  */
	char *t,*u;
	X_node *d;
	char str[MAX_WORD+1];

	sent->word[i].x = build_word_expressions(sent, UNKNOWN_WORD);
	if (sent->word[i].x == NULL)
		assert(FALSE, "UNKNOWN_WORD should have been there");

	for (d = sent->word[i].x; d != NULL; d = d->next) {
		t = strchr(d->string, '.');
		if (t != NULL) {
			sprintf(str, "%.50s[?].%.5s", s, t+1);
		} else {
			sprintf(str, "%.50s[?]", s);
		}
		t = (char *) xalloc(strlen(str)+1);
		strcpy(t,str);
		u = string_set_add(t, sent->string_set);
		xfree(t, strlen(str)+1);
		d->string = u;
	}
}
Esempio n. 17
0
static void
set_connector_length_limits(Sentence sent, Parse_Options opts)
{
	size_t i;
	unsigned int len = opts->short_length;
	bool all_short = opts->all_short;
	Connector_set * ucs = sent->dict->unlimited_connector_set;
	const char * ZZZ = string_set_add("ZZZ", sent->dict->string_set);

	if (0)
	{
		/* Not setting the length_limit saves observable time. However, if we
		 * would like to set the ZZZ connector length_limit to 1 for all
		 * sentences, we cannot do the following.
		 * FIXME(?): Use a flag that the sentence contains an empty word. */
		if (len >= sent->length) return; /* No point to enforce short_length. */
	}

	if (len > UNLIMITED_LEN) len = UNLIMITED_LEN;

	for (i=0; i<sent->length; i++)
	{
		Disjunct *d;
		for (d = sent->word[i].d; d != NULL; d = d->next)
		{
			set_connector_list_length_limit(d->left, ucs, len, all_short, ZZZ);
			set_connector_list_length_limit(d->right, ucs, len, all_short, ZZZ);
		}
	}
}
Esempio n. 18
0
/**
 * Read table of [link, domain type].
 * This tells us what domain type each link belongs to.
 * This lookup table *must* be defined in the knowledge file.
 */
static void read_starting_link_table(pp_knowledge *k)
{
  const char *p;
  const char label[] = "STARTING_LINK_TYPE_TABLE";
  int i, n_tokens;
  if (!pp_lexer_set_label(k->lt, label))
  {
    prt_error("Fatal error: post_process: Couldn't find starting link table %s",label);
    exit(1);
  }
  n_tokens = pp_lexer_count_tokens_of_label(k->lt);
  if (n_tokens %2)
  {
    prt_error("Fatal error: post_process: Link table must have format [<link> <domain name>]+");
    exit(1);
  }
  k->nStartingLinks = n_tokens/2;
  k->starting_link_lookup_table = (StartingLinkAndDomain*)
    xalloc((1+k->nStartingLinks)*sizeof(StartingLinkAndDomain));
  for (i=0; i<k->nStartingLinks; i++)
    {
      /* read the starting link itself */
      k->starting_link_lookup_table[i].starting_link =
	string_set_add(pp_lexer_get_next_token_of_label(k->lt),k->string_set);

      /* read the domain type of the link */
      p = pp_lexer_get_next_token_of_label(k->lt);
      check_domain_is_legal(p);
      k->starting_link_lookup_table[i].domain = (int) p[0];
    }

  /* end sentinel */
  k->starting_link_lookup_table[k->nStartingLinks].domain = -1;
}
Esempio n. 19
0
void
register_css (const char *file)
{
    if (!downloaded_css_set)
        downloaded_css_set = make_string_hash_table (0);
    string_set_add (downloaded_css_set, file);
}
Esempio n. 20
0
/**
 * Tear the idiom string apart.
 * Put the parts into a list of Dict_nodes (connected by their right pointers)
 * Sets the string fields of these Dict_nodes pointing to the
 * fragments of the string s.  Later these will be replaced by
 * correct names (with .Ix suffixes).
 * The list is reversed from the way they occur in the string.
 * A pointer to this list is returned.
 */
static Dict_node * make_idiom_Dict_nodes(Dictionary dict, const char * string)
{
	Dict_node * dn, * dn_new;
	char * t, *s, *p;
	int more, sz;
	dn = NULL;

	sz = strlen(string)+1;
	p = s = (char *) xalloc(sz);
	strcpy(s, string);

	while (*s != '\0') {
		t = s;
		while((*s != '\0') && (*s != '_')) s++;
		if (*s == '_') {
			more = TRUE;
			*s = '\0';
		} else {
			more = FALSE;
		}
		dn_new = (Dict_node *) xalloc(sizeof (Dict_node));
		dn_new->right = dn;
		dn = dn_new;
		dn->string = string_set_add(t, dict->string_set);
		dn->file = NULL;
		if (more) s++;
	}

	xfree(p, sz);
	return dn;
}
Esempio n. 21
0
/**
 * Reads in one word from the file, allocates space for it,
 * and returns it.
 */
static const char * get_a_word(Dictionary dict, FILE * fp)
{
	char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
	const char * s;
	wint_t c;
	mbstate_t mbss;
	int j;

	do {
		c = fgetwc(fp);
	} while ((c != WEOF) && iswspace(c));
	if (c == WEOF) return NULL;

	memset(&mbss, 0, sizeof(mbss));
	for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);)
	{
		j += wctomb_check(&word[j], c, &mbss);
		c = fgetwc(fp);
	}

	if (j >= MAX_WORD) {
		word[MAX_WORD] = 0x0;
		prt_error("Fatal Error: The dictionary contains a word that "
		          "is too long. The word was: %s", word);
		exit(1);
	}
	word[j] = '\0';
	s = string_set_add(word, dict->string_set);
	return s;
}
Esempio n. 22
0
void
register_html (const char *url, const char *file)
{
  if (!downloaded_html_set)
    downloaded_html_set = make_string_hash_table (0);
  string_set_add (downloaded_html_set, file);
}
Esempio n. 23
0
static void affix_list_add(Dictionary afdict, Afdict_class * ac,
		const char * affix)
{
	if (NULL == ac)  return; /* ignore unknown class name */
	if (ac->length == ac->mem_elems)
		affix_list_resize(ac);
	ac->string[ac->length] = string_set_add(affix, afdict->string_set);
	ac->length++;
}
Esempio n. 24
0
Exp * connector(Dictionary dict) {
/* the current token is a connector (or a dictionary word)           */
/* make a node for it                                                */   
 
    Exp * n;
    Dict_node * dn;
    int i;

    i = wcslen(dict->token)-1;  /* this must be + or - if a connector */
    if ((dict->token[i] != L'+') && (dict->token[i] != L'-')) {
	dn = abridged_lookup(dict, dict->token);
	while((dn != NULL) && (wcscmp(dn->string, dict->token) != 0)) {
	    dn = dn->right;
	}
	if (dn == NULL) {
	    
	    dict_error(dict, L"\nPerhaps missing + or - in a connector.\n"
		             L"Or perhaps you forgot the suffix on a word.\n"
		             L"Or perhaps a word is used before it is defined.\n");
	    return NULL;
	}
	n = make_unary_node(dict, dn->exp);
    } else {
	if (!check_connector(dict, dict->token)) {
	    return NULL;
	}
	n = Exp_create(dict);
	n->dir = dict->token[i];
	dict->token[i] = L'\0';                   /* get rid of the + or - */
	if (dict->token[0] == L'@') {
	    n->u.string = string_set_add(dict->token+1, dict->string_set);
	    n->multi = TRUE;
	} else {
	    n->u.string = string_set_add(dict->token, dict->string_set);
	    n->multi = FALSE;
	}
	n->type = CONNECTOR_type;		
	n->cost = 0;
    }
    if (!advance(dict)) {
	return NULL;
    }
    return n;
}
Esempio n. 25
0
/* Remembers broken links.  */
void
nonexisting_url (const char *url)
{
  /* Ignore robots.txt URLs */
  if (is_robots_txt_url (url))
    return;
  if (!nonexisting_urls_set)
    nonexisting_urls_set = make_string_hash_table (0);
  string_set_add (nonexisting_urls_set, url);
}
Esempio n. 26
0
static void read_form_a_cycle_rules(pp_knowledge *k, const char *label)
{
  int n_commas, n_tokens, r, i;
  pp_linkset *lsHandle;
  const char **tokens;
  if (!pp_lexer_set_label(k->lt, label)) {
      k->n_form_a_cycle_rules = 0;
      if (verbosity>0)
	printf("PP warning: Not using any 'form a cycle' rules\n");
  }
  else {
    n_commas = pp_lexer_count_commas_of_label(k->lt);
    k->n_form_a_cycle_rules = (n_commas + 1)/2;
  }
  k->form_a_cycle_rules=
    (pp_rule*) xalloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule));
  for (r=0; r<k->n_form_a_cycle_rules; r++)
    {
      /* read link set */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens <= 0)
      {
        prt_error("Fatal Error: syntax error in knowledge file");
        exit(1);
      }
      lsHandle = pp_linkset_open(n_tokens);
      for (i=0; i<n_tokens; i++)
          pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set));
      k->form_a_cycle_rules[r].link_set=lsHandle;

      /* read error message */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens > 1)
      {
         prt_error("Fatal Error: post_process: Invalid syntax (rule %i of %s)",r+1,label);
         exit(1);
      }
      k->form_a_cycle_rules[r].msg=string_set_add(tokens[0],k->string_set);
    }

  /* sentinel entry */
  k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0;
}
Esempio n. 27
0
void indri::collection::CompressedCollection::create( const std::string& fileName, const std::vector<std::string>& forwardIndexedFields, const std::vector<std::string>& reverseIndexedFields ) {
  std::string manifestName = indri::file::Path::combine( fileName, "manifest" );
  std::string lookupName = indri::file::Path::combine( fileName, "lookup" );
  std::string storageName = indri::file::Path::combine( fileName, "storage" );

  _storage.create( storageName );
  _lookup.create( lookupName );
  _output = new indri::file::SequentialWriteBuffer( _storage, 1024*1024 );

  indri::api::Parameters manifest;
  indri::api::Parameters forwardParameters = manifest.append( "forward" );

  for( size_t i=0; i<forwardIndexedFields.size(); i++ ) {
    std::stringstream metalookupName;
    metalookupName << "forwardLookup" << i;

    std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() );
    lemur::file::Keyfile* metalookup = new lemur::file::Keyfile;
    metalookup->create( metalookupPath );

    const char* key = string_set_add( forwardIndexedFields[i].c_str(), _strings );
    _forwardLookups.insert( key, metalookup );
    forwardParameters.append("field").set(forwardIndexedFields[i]);
  }

  indri::api::Parameters reverseParameters = manifest.append( "reverse" );

  for( size_t i=0; i<reverseIndexedFields.size(); i++ ) {
    std::stringstream metalookupName;
    metalookupName << "reverseLookup" << i;

    std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() );
    lemur::file::Keyfile* metalookup = new lemur::file::Keyfile;
    metalookup->create( metalookupPath );

    const char* key = string_set_add( reverseIndexedFields[i].c_str(), _strings );
    _reverseLookups.insert( key, metalookup );
    reverseParameters.append("field").set(reverseIndexedFields[i]);
  }

  manifest.writeFile( manifestName );
}
Esempio n. 28
0
void
register_html (const char *url, const char *file)
{
  if (!downloaded_html_set)
    downloaded_html_set = make_string_hash_table (0);
  else if (hash_table_contains (downloaded_html_set, file))
    return;

  /* The set and the list should use the same copy of FILE, but the
     slist interface insists on strduping the string it gets.  Oh
     well. */
  string_set_add (downloaded_html_set, file);
  downloaded_html_list = slist_prepend (downloaded_html_list, file);
}
Esempio n. 29
0
static void affix_list_add(Dictionary afdict, Afdict_class * ac,
		const char * affix)
{
	if (NULL == ac)  return; /* ignore unknown class name */
	if (ac->mem_elems <= ac->length)
	{
		size_t new_sz;
		ac->mem_elems += AFFIX_COUNT_MEM_INCREMENT;
		new_sz = ac->mem_elems * sizeof(const char *);
		ac->string = (char const **) realloc((void *)ac->string, new_sz);
	}
	ac->string[ac->length] = string_set_add(affix, afdict->string_set);
	ac->length++;
}
Esempio n. 30
0
Gword *gword_new(Sentence sent, const char *s)
{
	Gword * const gword = malloc(sizeof(*gword));

	memset(gword, 0, sizeof(*gword));
	assert(NULL != gword, "Null-string subword");
	gword->subword = string_set_add(s, sent->string_set);

	if (NULL != sent->last_word) sent->last_word->chain_next = gword;
	sent->last_word = gword;
	gword->node_num = sent->gword_node_num++;

	return gword;
}