/** * connector() -- make a node for a connector or dictionary word. * * Assumes the current token is a connector or dictionary word. */ static Exp * connector(Dictionary dict) { Exp * n; Dict_node *dn, *dn_head; int i; i = strlen(dict->token) - 1; /* this must be + or - if a connector */ if ((dict->token[i] != '+') && (dict->token[i] != '-')) { /* If we are here, token is a word */ dn_head = abridged_lookup_list(dict, dict->token); dn = dn_head; while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0)) { dn = dn->right; } if (dn == NULL) { free_lookup_list(dn_head); dict_error(dict, "\nPerhaps missing + or - in a connector.\n" "Or perhaps you forgot the suffix on a word.\n" "Or perhaps a word is used before it is defined.\n"); return NULL; } n = make_unary_node(dict, dn->exp); free_lookup_list(dn_head); } else { /* If we are here, token is a connector */ if (!check_connector(dict, dict->token)) { return NULL; } n = Exp_create(dict); n->dir = dict->token[i]; dict->token[i] = '\0'; /* get rid of the + or - */ if (dict->token[0] == '@') { n->u.string = string_set_add(dict->token+1, dict->string_set); n->multi = TRUE; } else { n->u.string = string_set_add(dict->token, dict->string_set); n->multi = FALSE; } n->type = CONNECTOR_type; n->cost = 0.0f; } if (!link_advance(dict)) { exp_free(n); return NULL; } return n; }
Dictionary dictionary_create_from_db(const char *lang) { char *dbname; const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); dict->version = NULL; dict->num_entries = 0; dict->affix_table = NULL; dict->regex_root = NULL; /* Language and file-name stuff */ dict->string_set = string_set_create(); dict->lang = lang; t = strrchr (lang, '/'); if (t) dict->lang = string_set_add(t+1, dict->string_set); /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); dict->base_knowledge = NULL; dict->hpsg_knowledge = NULL; dbname = join_path (lang, "dict.db"); dict->name = string_set_add(dbname, dict->string_set); free(dbname); /* Set up the database */ dict->db_handle = object_open(dict->name, db_open, NULL); dict->lookup_list = db_lookup_list; dict->free_lookup = db_free_llist; dict->lookup = db_lookup; dict->close = db_close; /* Misc remaining common (generic) dict setup work */ dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) { dict->unlimited_connector_set = connector_set_create(dict_node->exp); } else { dict->unlimited_connector_set = NULL; } free_lookup_list(dict, dict_node); return dict; }
Dictionary dictionary_create_from_db(const char *lang) { char *dbname; const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); /* Language and file-name stuff */ dict->string_set = string_set_create(); t = strrchr (lang, '/'); t = (NULL == t) ? lang : t+1; dict->lang = string_set_add(t, dict->string_set); lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang); /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); #if defined HAVE_HUNSPELL || defined HAVE_ASPELL if (NULL == dict->spell_checker) prt_error("Info: Spell checker disabled."); #endif dict->base_knowledge = NULL; dict->hpsg_knowledge = NULL; dbname = join_path (lang, "dict.db"); dict->name = string_set_add(dbname, dict->string_set); free(dbname); /* Set up the database */ dict->db_handle = object_open(dict->name, db_open, NULL); dict->lookup_list = db_lookup_list; dict->free_lookup = db_free_llist; dict->lookup = db_lookup; dict->close = db_close; /* Misc remaining common (generic) dict setup work */ dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) dict->unlimited_connector_set = connector_set_create(dict_node->exp); free_lookup_list(dict, dict_node); return dict; }
static void read_contains_rules(pp_knowledge *k, const char *label, pp_rule **rules, int *nRules) { /* Reading the 'contains_one_rules' and reading the 'contains_none_rules' into their respective arrays */ int n_commas, n_tokens, i, r; const char *p; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { *nRules = 0; if (verbosity>0) printf("PP warning: Not using any %s rules\n", label); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); *nRules = (n_commas + 1)/3; } *rules = (pp_rule*) xalloc ((1+*nRules)*sizeof(pp_rule)); for (r=0; r<*nRules; r++) { /* first read link */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens>1) { prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1); exit(1); } (*rules)[r].selector = string_set_add(tokens[0], k->string_set); /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); (*rules)[r].link_set = pp_linkset_open(n_tokens); (*rules)[r].link_set_size = n_tokens; (*rules)[r].link_array = (const char **) xalloc((1+n_tokens)*sizeof(const char*)); for (i=0; i<n_tokens; i++) { p = string_set_add(tokens[i], k->string_set); pp_linkset_add((*rules)[r].link_set, p); (*rules)[r].link_array[i] = p; } (*rules)[r].link_array[i]=0; /* NULL-terminator */ /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens>1) { prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1); exit(1); } (*rules)[r].msg = string_set_add(tokens[0], k->string_set); } /* sentinel entry */ (*rules)[*nRules].msg = 0; }
static int guessed_string(Sentence sent, int i, const char * s, const char * type) { X_node * e; char *t, *u; char str[MAX_WORD+1]; if (boolean_dictionary_lookup(sent->dict, type)) { sent->word[i].x = build_word_expressions(sent, type); e = sent->word[i].x; if(is_s_word(s)) { for (; e != NULL; e = e->next) { t = strchr(e->string, '.'); if (t != NULL) { sprintf(str, "%.50s[!].%.5s", s, t+1); } else { sprintf(str, "%.50s[!]", s); } t = (char *) xalloc(strlen(str)+1); strcpy(t,str); u = string_set_add(t, sent->string_set); xfree(t, strlen(str)+1); e->string = u; } } else { if(is_ed_word(s)) { sprintf(str, "%.50s[!].v", s); } else if(is_ing_word(s)) { sprintf(str, "%.50s[!].g", s); } else if(is_ly_word(s)) { sprintf(str, "%.50s[!].e", s); } else sprintf(str, "%.50s[!]", s); t = (char *) xalloc(strlen(str)+1); strcpy(t,str); u = string_set_add(t, sent->string_set); xfree(t, strlen(str)+1); e->string = u; } return TRUE; } else { lperror(BUILDEXPR, ".\n To process this sentence your dictionary " "needs the word \"%s\".\n", type); return FALSE; } }
static bool read_form_a_cycle_rules(pp_knowledge *k, const char *label) { size_t n_commas, n_tokens; size_t r, i; pp_linkset *lsHandle; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { k->n_form_a_cycle_rules = 0; if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Not using any 'form a cycle' rules\n", k->path); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); k->n_form_a_cycle_rules = (n_commas + 1)/2; } k->form_a_cycle_rules= (pp_rule*) malloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule)); for (r=0; r<k->n_form_a_cycle_rules; r++) { /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens <= 0) { prt_error("Error: File %s: Syntax error\n", k->path); return false; } lsHandle = pp_linkset_open(n_tokens); for (i=0; i<n_tokens; i++) pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set)); k->form_a_cycle_rules[r].link_set = lsHandle; /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Error: File %s: Invalid syntax (rule %zu of %s)\n", k->path, r+1,label); return false; } k->form_a_cycle_rules[r].msg = string_set_add(tokens[0], k->string_set); k->form_a_cycle_rules[r].use_count = 0; } /* sentinel entry */ k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0; k->form_a_cycle_rules[k->n_form_a_cycle_rules].use_count = 0; return true; }
void indri::collection::CompressedCollection::open( const std::string& fileName ) { std::string lookupName = indri::file::Path::combine( fileName, "lookup" ); std::string storageName = indri::file::Path::combine( fileName, "storage" ); std::string manifestName = indri::file::Path::combine( fileName, "manifest" ); indri::api::Parameters manifest; manifest.loadFile( manifestName ); _storage.open( storageName ); _lookup.open( lookupName ); _output = new indri::file::SequentialWriteBuffer( _storage, 1024*1024 ); if( manifest.exists("forward.field") ) { indri::api::Parameters forward = manifest["forward.field"]; for( size_t i=0; i<forward.size(); i++ ) { std::stringstream metalookupName; metalookupName << "forwardLookup" << i; std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() ); lemur::file::Keyfile* metalookup = new lemur::file::Keyfile; metalookup->open( metalookupPath ); std::string fieldName = forward[i]; const char* key = string_set_add( fieldName.c_str(), _strings ); _forwardLookups.insert( key, metalookup ); } } indri::api::Parameters reverse = manifest["reverse"]; if( manifest.exists("reverse.field") ) { indri::api::Parameters reverse = manifest["reverse.field"]; for( size_t i=0; i<reverse.size(); i++ ) { std::stringstream metalookupName; metalookupName << "reverseLookup" << i; std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() ); lemur::file::Keyfile* metalookup = new lemur::file::Keyfile; metalookup->open( metalookupPath ); std::string fieldName = reverse[i]; const char* key = string_set_add( fieldName.c_str(), _strings ); _reverseLookups.insert( key, metalookup ); } } }
/** * Reads in one word from the file, allocates space for it, * and returns it. * * In case of an error, return a null string (cannot be a valid word). */ static const char * get_a_word(Dictionary dict, FILE * fp) { char word[MAX_WORD+4]; /* allow for 4-byte wide chars */ const char * s; int c, j; do { c = fgetc(fp); } while ((c != EOF) && lg_isspace(c)); if (c == EOF) return NULL; for (j=0; (j <= MAX_WORD-1) && (!lg_isspace(c)) && (c != EOF); j++) { word[j] = c; c = fgetc(fp); } if (j >= MAX_WORD) { word[MAX_WORD] = '\0'; prt_error("The dictionary contains a word that is too long: %s\n", word); return ""; /* error indication */ } word[j] = '\0'; patch_subscript(word); s = string_set_add(word, dict->string_set); return s; }
void morpheme_list_add(Morpho_structures ms, Morpheme **morpheme_list, char * new_word, Feature *new_f){ Morpheme *node; Feature_list *fln; Feature *temp_f; Morpheme *temp_morpheme; for (node=*morpheme_list; node!=NULL; node=node->next){ if (strcmp(node->word, new_word)==0){ break; } } if (node!=NULL){ for (fln=node->f_list; fln!=NULL; fln=fln->next) { if (feature_is_equal(fln->f,new_f)) break; } if (fln!=NULL){ return; } else{ temp_f=feature_copy_driver(USE_SOURCE_STRING_SET, new_f); feature_list_add(&(node->f_list), temp_f); } } else{ temp_morpheme = (Morpheme *)xalloc (sizeof (Morpheme)); temp_morpheme->word = string_set_add(new_word, ms->dict->string_set); temp_morpheme->f_list=NULL; temp_f=feature_copy_driver(USE_SOURCE_STRING_SET, new_f); feature_list_add(&(temp_morpheme->f_list), temp_f); temp_morpheme->next = *morpheme_list; *morpheme_list = temp_morpheme; } }
/** * Create a short form of flags summary for displaying in a word node. */ const char *gword_status(Sentence sent, const Gword *w) { dyn_str *s = dyn_str_new(); const char *r; size_t len; if (w->status & WS_UNKNOWN) dyn_strcat(s, "UNK|"); if (w->status & WS_INDICT) dyn_strcat(s, "IN|"); if (w->status & WS_REGEX) dyn_strcat(s, "RE|"); if (w->status & WS_SPELL) dyn_strcat(s, "SP|"); if (w->status & WS_RUNON) dyn_strcat(s, "RU|"); if (w->status & WS_HASALT) dyn_strcat(s, "HA|"); if (w->status & WS_UNSPLIT) dyn_strcat(s, "UNS|"); if (w->status & WS_PL) dyn_strcat(s, "PL|"); len = strlen(s->str); if (len > 0) s->str[len-1] = '\0'; r = string_set_add(s->str, sent->string_set); dyn_str_delete(s); return r; }
static pp_linkset *read_link_set(pp_knowledge *k, const char *label, String_set *ss) { /* read link set, marked by label in knowledge file, into a set of links whose handle is returned. Return NULL if link set not defined in file, in which case the set is taken to be empty. */ int n_strings,i; pp_linkset *ls; if (!pp_lexer_set_label(k->lt, label)) { if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Link set %s not defined: assuming empty\n", k->path, label); n_strings = 0; } else { n_strings = pp_lexer_count_tokens_of_label(k->lt); if (-1 == n_strings) return &LINK_SET_ERROR; } ls = pp_linkset_open(n_strings); for (i=0; i<n_strings; i++) pp_linkset_add(ls, string_set_add(pp_lexer_get_next_token_of_label(k->lt),ss)); return ls; }
void altappend(Sentence sent, const char ***altp, const char *w) { size_t n = altlen(*altp); *altp = resize_alts(*altp, n); (*altp)[n] = string_set_add(w, sent->string_set); }
wchar_t * build_idiom_word_name(Dictionary dict, wchar_t * s) { /* Allocates string space and returns a pointer to it. In this string is placed the idiomized name of the given string s. This is the same as s, but with a postfix of ".Ix", where x is an appropriate number. x is the minimum number that distinguishes this word from others in the dictionary. */ wchar_t * new_s, * x, *id; int count, sz; count = max_postfix_found(dictionary_lookup(dict, s))+1; sz = wcslen(s)+10; new_s = x = (wchar_t *) xalloc(sizeof(wchar_t)*sz); /* fails if > 10**10 idioms */ while((*s != L'\0') && (*s != L'.')) { *x = *s; x++; s++; } swprintf_s(x, sz - (x - new_s), L".I%d", count); id = string_set_add(new_s, dict->string_set); xfree(new_s, sizeof(wchar_t)*sz); return id; }
/** * * (1) opens the word file and adds it to the word file list * (2) reads in the words * (3) puts each word in a Dict_node * (4) links these together by their left pointers at the * front of the list pointed to by dn * (5) returns a pointer to the first of this list */ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename) { Word_file * wf; FILE * fp; const char * s; filename += 1; /* get rid of leading '/' */ if ((fp = dictopen(filename, "r")) == NULL) { return NULL; } wf = malloc(sizeof (Word_file)); wf->file = string_set_add(filename, dict->string_set); wf->changed = false; wf->next = dict->word_file_header; dict->word_file_header = wf; while ((s = get_a_word(dict, fp)) != NULL) { if ('\0' == s[0]) /* returned error indication */ { fclose(fp); free_insert_list(dn); return NULL; } Dict_node * dn_new = malloc(sizeof(Dict_node)); dn_new->left = dn; dn = dn_new; dn->string = s; dn->file = wf; } fclose(fp); return dn; }
GNUC_UNUSED const char *gword_morpheme(Sentence sent, const Gword *w) { const char *mt; char buff[64]; switch (w->morpheme_type) { case MT_INVALID: mt = "MT_INVALID"; break; case MT_WORD: mt = "MT_WORD"; break; case MT_FEATURE: mt = "MT_FEATURE"; break; case MT_INFRASTRUCTURE: mt = "MT_I-S"; break; case MT_WALL: mt = "MT_WALL"; break; case MT_EMPTY: mt = "MT_EMPTY"; break; case MT_UNKNOWN: mt = "MT_UNKNOWN"; break; case MT_TEMPLATE: mt = "MT_TEMPLATE"; break; case MT_ROOT: mt = "MT_ROOT"; break; case MT_CONTR: mt = "MT_CONTR"; break; case MT_PUNC: mt = "MT_PUNC"; break; case MT_STEM: mt = "MT_STEM"; break; case MT_PREFIX: mt = "MT_PREFIX"; break; case MT_MIDDLE: mt = "MT_MIDDLE"; break; case MT_SUFFIX: mt = "MT_SUFFIX"; break; default: /* No truncation is expected. */ snprintf(buff, sizeof(buff), "MT_%d", w->morpheme_type); mt = string_set_add(buff, sent->string_set); } return mt; }
static void handle_unknown_word(Sentence sent, int i, char * s) { /* puts into word[i].x the expression for the unknown word */ /* the parameter s is the word that was not in the dictionary */ /* it massages the names to have the corresponding subscripts */ /* to those of the unknown words */ /* so "grok" becomes "grok[?].v" */ char *t,*u; X_node *d; char str[MAX_WORD+1]; sent->word[i].x = build_word_expressions(sent, UNKNOWN_WORD); if (sent->word[i].x == NULL) assert(FALSE, "UNKNOWN_WORD should have been there"); for (d = sent->word[i].x; d != NULL; d = d->next) { t = strchr(d->string, '.'); if (t != NULL) { sprintf(str, "%.50s[?].%.5s", s, t+1); } else { sprintf(str, "%.50s[?]", s); } t = (char *) xalloc(strlen(str)+1); strcpy(t,str); u = string_set_add(t, sent->string_set); xfree(t, strlen(str)+1); d->string = u; } }
static void set_connector_length_limits(Sentence sent, Parse_Options opts) { size_t i; unsigned int len = opts->short_length; bool all_short = opts->all_short; Connector_set * ucs = sent->dict->unlimited_connector_set; const char * ZZZ = string_set_add("ZZZ", sent->dict->string_set); if (0) { /* Not setting the length_limit saves observable time. However, if we * would like to set the ZZZ connector length_limit to 1 for all * sentences, we cannot do the following. * FIXME(?): Use a flag that the sentence contains an empty word. */ if (len >= sent->length) return; /* No point to enforce short_length. */ } if (len > UNLIMITED_LEN) len = UNLIMITED_LEN; for (i=0; i<sent->length; i++) { Disjunct *d; for (d = sent->word[i].d; d != NULL; d = d->next) { set_connector_list_length_limit(d->left, ucs, len, all_short, ZZZ); set_connector_list_length_limit(d->right, ucs, len, all_short, ZZZ); } } }
/** * Read table of [link, domain type]. * This tells us what domain type each link belongs to. * This lookup table *must* be defined in the knowledge file. */ static void read_starting_link_table(pp_knowledge *k) { const char *p; const char label[] = "STARTING_LINK_TYPE_TABLE"; int i, n_tokens; if (!pp_lexer_set_label(k->lt, label)) { prt_error("Fatal error: post_process: Couldn't find starting link table %s",label); exit(1); } n_tokens = pp_lexer_count_tokens_of_label(k->lt); if (n_tokens %2) { prt_error("Fatal error: post_process: Link table must have format [<link> <domain name>]+"); exit(1); } k->nStartingLinks = n_tokens/2; k->starting_link_lookup_table = (StartingLinkAndDomain*) xalloc((1+k->nStartingLinks)*sizeof(StartingLinkAndDomain)); for (i=0; i<k->nStartingLinks; i++) { /* read the starting link itself */ k->starting_link_lookup_table[i].starting_link = string_set_add(pp_lexer_get_next_token_of_label(k->lt),k->string_set); /* read the domain type of the link */ p = pp_lexer_get_next_token_of_label(k->lt); check_domain_is_legal(p); k->starting_link_lookup_table[i].domain = (int) p[0]; } /* end sentinel */ k->starting_link_lookup_table[k->nStartingLinks].domain = -1; }
void register_css (const char *file) { if (!downloaded_css_set) downloaded_css_set = make_string_hash_table (0); string_set_add (downloaded_css_set, file); }
/** * Tear the idiom string apart. * Put the parts into a list of Dict_nodes (connected by their right pointers) * Sets the string fields of these Dict_nodes pointing to the * fragments of the string s. Later these will be replaced by * correct names (with .Ix suffixes). * The list is reversed from the way they occur in the string. * A pointer to this list is returned. */ static Dict_node * make_idiom_Dict_nodes(Dictionary dict, const char * string) { Dict_node * dn, * dn_new; char * t, *s, *p; int more, sz; dn = NULL; sz = strlen(string)+1; p = s = (char *) xalloc(sz); strcpy(s, string); while (*s != '\0') { t = s; while((*s != '\0') && (*s != '_')) s++; if (*s == '_') { more = TRUE; *s = '\0'; } else { more = FALSE; } dn_new = (Dict_node *) xalloc(sizeof (Dict_node)); dn_new->right = dn; dn = dn_new; dn->string = string_set_add(t, dict->string_set); dn->file = NULL; if (more) s++; } xfree(p, sz); return dn; }
/** * Reads in one word from the file, allocates space for it, * and returns it. */ static const char * get_a_word(Dictionary dict, FILE * fp) { char word[MAX_WORD+4]; /* allow for 4-byte wide chars */ const char * s; wint_t c; mbstate_t mbss; int j; do { c = fgetwc(fp); } while ((c != WEOF) && iswspace(c)); if (c == WEOF) return NULL; memset(&mbss, 0, sizeof(mbss)); for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);) { j += wctomb_check(&word[j], c, &mbss); c = fgetwc(fp); } if (j >= MAX_WORD) { word[MAX_WORD] = 0x0; prt_error("Fatal Error: The dictionary contains a word that " "is too long. The word was: %s", word); exit(1); } word[j] = '\0'; s = string_set_add(word, dict->string_set); return s; }
void register_html (const char *url, const char *file) { if (!downloaded_html_set) downloaded_html_set = make_string_hash_table (0); string_set_add (downloaded_html_set, file); }
static void affix_list_add(Dictionary afdict, Afdict_class * ac, const char * affix) { if (NULL == ac) return; /* ignore unknown class name */ if (ac->length == ac->mem_elems) affix_list_resize(ac); ac->string[ac->length] = string_set_add(affix, afdict->string_set); ac->length++; }
Exp * connector(Dictionary dict) { /* the current token is a connector (or a dictionary word) */ /* make a node for it */ Exp * n; Dict_node * dn; int i; i = wcslen(dict->token)-1; /* this must be + or - if a connector */ if ((dict->token[i] != L'+') && (dict->token[i] != L'-')) { dn = abridged_lookup(dict, dict->token); while((dn != NULL) && (wcscmp(dn->string, dict->token) != 0)) { dn = dn->right; } if (dn == NULL) { dict_error(dict, L"\nPerhaps missing + or - in a connector.\n" L"Or perhaps you forgot the suffix on a word.\n" L"Or perhaps a word is used before it is defined.\n"); return NULL; } n = make_unary_node(dict, dn->exp); } else { if (!check_connector(dict, dict->token)) { return NULL; } n = Exp_create(dict); n->dir = dict->token[i]; dict->token[i] = L'\0'; /* get rid of the + or - */ if (dict->token[0] == L'@') { n->u.string = string_set_add(dict->token+1, dict->string_set); n->multi = TRUE; } else { n->u.string = string_set_add(dict->token, dict->string_set); n->multi = FALSE; } n->type = CONNECTOR_type; n->cost = 0; } if (!advance(dict)) { return NULL; } return n; }
/* Remembers broken links. */ void nonexisting_url (const char *url) { /* Ignore robots.txt URLs */ if (is_robots_txt_url (url)) return; if (!nonexisting_urls_set) nonexisting_urls_set = make_string_hash_table (0); string_set_add (nonexisting_urls_set, url); }
static void read_form_a_cycle_rules(pp_knowledge *k, const char *label) { int n_commas, n_tokens, r, i; pp_linkset *lsHandle; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { k->n_form_a_cycle_rules = 0; if (verbosity>0) printf("PP warning: Not using any 'form a cycle' rules\n"); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); k->n_form_a_cycle_rules = (n_commas + 1)/2; } k->form_a_cycle_rules= (pp_rule*) xalloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule)); for (r=0; r<k->n_form_a_cycle_rules; r++) { /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens <= 0) { prt_error("Fatal Error: syntax error in knowledge file"); exit(1); } lsHandle = pp_linkset_open(n_tokens); for (i=0; i<n_tokens; i++) pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set)); k->form_a_cycle_rules[r].link_set=lsHandle; /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Fatal Error: post_process: Invalid syntax (rule %i of %s)",r+1,label); exit(1); } k->form_a_cycle_rules[r].msg=string_set_add(tokens[0],k->string_set); } /* sentinel entry */ k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0; }
void indri::collection::CompressedCollection::create( const std::string& fileName, const std::vector<std::string>& forwardIndexedFields, const std::vector<std::string>& reverseIndexedFields ) { std::string manifestName = indri::file::Path::combine( fileName, "manifest" ); std::string lookupName = indri::file::Path::combine( fileName, "lookup" ); std::string storageName = indri::file::Path::combine( fileName, "storage" ); _storage.create( storageName ); _lookup.create( lookupName ); _output = new indri::file::SequentialWriteBuffer( _storage, 1024*1024 ); indri::api::Parameters manifest; indri::api::Parameters forwardParameters = manifest.append( "forward" ); for( size_t i=0; i<forwardIndexedFields.size(); i++ ) { std::stringstream metalookupName; metalookupName << "forwardLookup" << i; std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() ); lemur::file::Keyfile* metalookup = new lemur::file::Keyfile; metalookup->create( metalookupPath ); const char* key = string_set_add( forwardIndexedFields[i].c_str(), _strings ); _forwardLookups.insert( key, metalookup ); forwardParameters.append("field").set(forwardIndexedFields[i]); } indri::api::Parameters reverseParameters = manifest.append( "reverse" ); for( size_t i=0; i<reverseIndexedFields.size(); i++ ) { std::stringstream metalookupName; metalookupName << "reverseLookup" << i; std::string metalookupPath = indri::file::Path::combine( fileName, metalookupName.str() ); lemur::file::Keyfile* metalookup = new lemur::file::Keyfile; metalookup->create( metalookupPath ); const char* key = string_set_add( reverseIndexedFields[i].c_str(), _strings ); _reverseLookups.insert( key, metalookup ); reverseParameters.append("field").set(reverseIndexedFields[i]); } manifest.writeFile( manifestName ); }
void register_html (const char *url, const char *file) { if (!downloaded_html_set) downloaded_html_set = make_string_hash_table (0); else if (hash_table_contains (downloaded_html_set, file)) return; /* The set and the list should use the same copy of FILE, but the slist interface insists on strduping the string it gets. Oh well. */ string_set_add (downloaded_html_set, file); downloaded_html_list = slist_prepend (downloaded_html_list, file); }
static void affix_list_add(Dictionary afdict, Afdict_class * ac, const char * affix) { if (NULL == ac) return; /* ignore unknown class name */ if (ac->mem_elems <= ac->length) { size_t new_sz; ac->mem_elems += AFFIX_COUNT_MEM_INCREMENT; new_sz = ac->mem_elems * sizeof(const char *); ac->string = (char const **) realloc((void *)ac->string, new_sz); } ac->string[ac->length] = string_set_add(affix, afdict->string_set); ac->length++; }
Gword *gword_new(Sentence sent, const char *s) { Gword * const gword = malloc(sizeof(*gword)); memset(gword, 0, sizeof(*gword)); assert(NULL != gword, "Null-string subword"); gword->subword = string_set_add(s, sent->string_set); if (NULL != sent->last_word) sent->last_word->chain_next = gword; sent->last_word = gword; gword->node_num = sent->gword_node_num++; return gword; }