int spellcheck_suggest(void * chk, char ***sug, const char * word) { struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk; if (!sug) { prt_error("Error: Aspell. Corrupt pointer.\n"); return 0; } if (aspell && aspell->speller) { const AspellWordList *list = NULL; AspellStringEnumeration *elem = NULL; const char *aword = NULL; unsigned int size, i; char **array = NULL; list = aspell_speller_suggest(aspell->speller, word, -1); elem = aspell_word_list_elements(list); size = aspell_word_list_size(list); /* allocate an array of char* for returning back to link-parser */ array = (char **)malloc(sizeof(char *) * size); if (!array) { prt_error("Error: Aspell. Out of memory.\n"); delete_aspell_string_enumeration(elem); return 0; } i = 0; while ((aword = aspell_string_enumeration_next(elem)) != NULL) { array[i++] = strdup(aword); } delete_aspell_string_enumeration(elem); *sug = array; return size; } return 0; }
/** * Upcase the first letter of the word. * XXX FIXME This works 'most of the time', but is not technically correct. * This is because towlower() and towupper() are locale dependent, and also * because the byte-counts might not match up, e.g. German ß and SS. * The correct long-term fix is to use ICU or glib g_utf8_strup(), etc. */ void upcase_utf8_str(char *to, const char * from, size_t usize) { wchar_t c; int i, nbl, nbh; char low[MB_LEN_MAX]; mbstate_t mbs; memset(&mbs, 0, sizeof(mbs)); nbh = mbrtowc (&c, from, MB_CUR_MAX, &mbs); if (nbh < 0) { prt_error("Error: Invalid multi-byte string!"); return; } c = towupper(c); nbl = wctomb_check(low, c); /* Check for error on an in-place copy */ if ((nbh < nbl) && (to == from)) { /* I'm to lazy to fix this */ prt_error("Error: can't upcase multi-byte string!"); return; } /* Upcase */ for (i=0; i<nbl; i++) { to[i] = low[i]; } if ((nbh == nbl) && (to == from)) return; from += nbh; to += nbl; safe_strcpy(to, from, usize-nbl); }
static char * win32_getlocale (void) { char lbuf[10]; char locale[32]; LCID lcid = GetThreadLocale(); if (0 >= GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, lbuf, sizeof(lbuf))) { prt_error("Error: GetLocaleInfoA LOCALE_SENGLISHLANGUAGENAME LCID=%d: " "Error %d\n", (int)lcid, (int)GetLastError()); return NULL; } strcpy(locale, lbuf); strcat(locale, "-"); if (0 >= GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, lbuf, sizeof(lbuf))) { prt_error("Error: GetLocaleInfoA LOCALE_SISO3166CTRYNAME LCID=%d: " "Error %d\n", (int)lcid, (int)GetLastError()); return NULL; } strcat(locale, lbuf); return strdup(locale); }
/** * Ensure that the program's locale has a UTF-8 codeset. */ void set_utf8_program_locale(void) { #ifndef _WIN32 /* The LG library doesn't use mbrtowc_l(), since it doesn't exist in * the dynamic glibc (2.22). mbsrtowcs_l() could also be used, but for * some reason it exists only in the static glibc. * In order that mbrtowc() will work for any UTF-8 character, UTF-8 * codeset is ensured. */ const char *codeset = nl_langinfo(CODESET); if (!strstr(codeset, "UTF") && !strstr(codeset, "utf")) { const char *locale = setlocale(LC_CTYPE, NULL); /* Avoid an initial spurious message. */ if ((0 != strcmp(locale, "C")) && (0 != strcmp(locale, "POSIX"))) { prt_error("Warning: Program locale \"%s\" (codeset %s) was not UTF-8; " "force-setting to en_US.UTF-8\n", locale, codeset); } locale = setlocale(LC_CTYPE, "en_US.UTF-8"); if (NULL == locale) { prt_error("Warning: Program locale en_US.UTF-8 could not be set; " "force-setting to C.UTF-8\n"); locale = setlocale(LC_CTYPE, "C.UTF-8"); if (NULL == locale) { prt_error("Warning: Could not set a UTF-8 program locale; " "program may malfunction\n"); } } } #endif /* !_WIN32 */ }
/** * Assumes that the sentence expression lists have been generated. */ void prepare_to_parse(Sentence sent, Parse_Options opts) { size_t i; build_sentence_disjuncts(sent, opts->disjunct_cost, opts); if (verbosity_level(D_PREP)) { prt_error("Debug: After expanding expressions into disjuncts:\n"); print_disjunct_counts(sent); } print_time(opts, "Built disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); /* Some long Russian sentences can really blow up, here. */ if (resources_exhausted(opts->resources)) return; } print_time(opts, "Eliminated duplicate disjuncts"); if (verbosity_level(D_PREP)) { prt_error("Debug: After expression pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } setup_connectors(sent); }
/** * Read table of [link, domain type]. * This tells us what domain type each link belongs to. * This lookup table *must* be defined in the knowledge file. */ static void read_starting_link_table(pp_knowledge *k) { const char *p; const char label[] = "STARTING_LINK_TYPE_TABLE"; int i, n_tokens; if (!pp_lexer_set_label(k->lt, label)) { prt_error("Fatal error: post_process: Couldn't find starting link table %s",label); exit(1); } n_tokens = pp_lexer_count_tokens_of_label(k->lt); if (n_tokens %2) { prt_error("Fatal error: post_process: Link table must have format [<link> <domain name>]+"); exit(1); } k->nStartingLinks = n_tokens/2; k->starting_link_lookup_table = (StartingLinkAndDomain*) xalloc((1+k->nStartingLinks)*sizeof(StartingLinkAndDomain)); for (i=0; i<k->nStartingLinks; i++) { /* read the starting link itself */ k->starting_link_lookup_table[i].starting_link = string_set_add(pp_lexer_get_next_token_of_label(k->lt),k->string_set); /* read the domain type of the link */ p = pp_lexer_get_next_token_of_label(k->lt); check_domain_is_legal(p); k->starting_link_lookup_table[i].domain = (int) p[0]; } /* end sentinel */ k->starting_link_lookup_table[k->nStartingLinks].domain = -1; }
int sentence_parse(Sentence sent, Parse_Options opts) { int rc; sent->num_valid_linkages = 0; /* If the sentence has not yet been split, do so now. * This is for backwards compatibility, for existing programs * that do not explicitly call the splitter. */ if (0 == sent->length) { rc = sentence_split(sent, opts); if (rc) return -1; } else { /* During a panic parse, we enter here a second time, with leftover * garbage. Free it. We really should make the code that is panicking * do this free, but right now, they have no API for it, so we do it * as a favor. XXX FIXME someday. */ free_sentence_disjuncts(sent); } /* Check for bad sentence length */ if (MAX_SENTENCE <= sent->length) { prt_error("Error: sentence too long, contains more than %d words\n", MAX_SENTENCE); return -2; } resources_reset(opts->resources); /* Expressions were set up during the tokenize stage. * Prune them, and then parse. */ expression_prune(sent, opts); print_time(opts, "Finished expression pruning"); if (opts->use_sat_solver) { sat_parse(sent, opts); } else { classic_parse(sent, opts); } print_time(opts, "Finished parse"); if ((verbosity > 0) && (PARSE_NUM_OVERFLOW < sent->num_linkages_found)) { prt_error("Warning: Combinatorial explosion! nulls=%zu cnt=%d\n" "Consider retrying the parse with the max allowed disjunct cost set lower.\n" "At the command line, use !cost-max\n", sent->null_count, sent->num_linkages_found); } return sent->num_valid_linkages; }
int sentence_parse(Sentence sent, Parse_Options opts) { int rc; verbosity = opts->verbosity; debug = opts->debug; test = opts->test; sent->num_valid_linkages = 0; /* If the sentence has not yet been split, do so now. * This is for backwards compatibility, for existing programs * that do not explicitly call the splitter. */ if (0 == sent->length) { rc = sentence_split(sent, opts); if (rc) return -1; } /* Check for bad sentence length */ if (MAX_SENTENCE <= sent->length) { prt_error("Error: sentence too long, contains more than %d words\n", MAX_SENTENCE); return -2; } /* Initialize/free any leftover garbage */ free_sentence_disjuncts(sent); /* Is this really needed ??? */ resources_reset_space(opts->resources); if (resources_exhausted(opts->resources)) return 0; /* Expressions were previously set up during the tokenize stage. */ expression_prune(sent); print_time(opts, "Finished expression pruning"); if (opts->use_sat_solver) { sat_parse(sent, opts); } else { chart_parse(sent, opts); } print_time(opts, "Finished parse"); if ((verbosity > 0) && (PARSE_NUM_OVERFLOW < sent->num_linkages_found)) { prt_error("WARNING: Combinatorial explosion! nulls=%zu cnt=%d\n" "Consider retrying the parse with the max allowed disjunct cost set lower.\n" "At the command line, use !cost-max\n", sent->null_count, sent->num_linkages_found); } return sent->num_valid_linkages; }
/** * Read in the whole stinkin file. This routine returns * malloced memory, which should be freed as soon as possible. */ char *get_file_contents(const char * dict_name) { int fd; size_t tot_size; size_t tot_read = 0; struct stat buf; char * contents; /* On Windows, 'b' (binary mode) is mandatory, otherwise fstat file length * is confused by crlf counted as one byte. POSIX systems just ignore it. */ FILE *fp = dictopen(dict_name, "rb"); if (fp == NULL) return NULL; /* Get the file size, in bytes. */ fd = fileno(fp); fstat(fd, &buf); tot_size = buf.st_size; contents = (char *) malloc(sizeof(char) * (tot_size+7)); /* Now, read the whole file. * Normally, a single fread() call below reads the whole file. */ while (1) { size_t read_size = fread(contents, 1, tot_size+7, fp); if (0 == read_size) { bool err = (0 != ferror(fp)); if (err) { char errbuf[64]; strerror_r(errno, errbuf, sizeof(errbuf)); fclose(fp); prt_error("Error: %s: Read error (%s)\n", dict_name, errbuf); free(contents); return NULL; } fclose(fp); break; } tot_read += read_size; } if (tot_read > tot_size+6) { prt_error("Error: %s: File size is insane (%zu)!\n", dict_name, tot_size); free(contents); return NULL; } contents[tot_read] = '\0'; return contents; }
void LGCheckGrammar::P_UNDEFINED () { int t, n = n_errors; if (n_terms > max_char_set+1) { strcpy (gft, ".lex"); strcpy (grmfid, gdn); strcat (grmfid, gfn); strcat (grmfid, gft); char* Input_start = input_start; char* Input_end = input_end; char** Line_ptr = line_ptr; input_start = lex_input_start; input_end = lex_input_end; line_ptr = lex_line_ptr; for (t = max_char_set+1; t < n_terms; t++) // Skip <eof> { if (term_type[t] & LEXFILE) prt_error ("'%s' is listed in the .lex file, but not defined in the .lgr file", term_name[t], 0, term_line[t]); } strcpy (gft, ".lgr"); strcpy (grmfid, gdn); strcat (grmfid, gfn); strcat (grmfid, gft); input_start = Input_start; input_end = Input_end; line_ptr = Line_ptr; for (t = max_char_set+1; t < n_terms; t++) // Skip <eof> { if (!(term_type[t] & LEXFILE)) { char code = charcode[*term_name[t]]; if ( code == DIGIT || code == QUOTE) { prt_error ("'%s' is not a predefined symbol", term_name[t], 0, term_line[t]); prt_log ("Predefined symbols are:\n"); prt_log (" 0..31\n"); for (int i = 32; i < 127; i++) prt_log (" %s\n", term_name[i]); prt_log (" 127..255\n"); LG::Terminate(95); } else { prt_error ("'%s' is not defined in the lexical grammar (.lgr file)", term_name[t], 0, term_line[t]); } } } } FREE (term_type, n_terms); }
static Sense * lg_corpus_senses(Corpus *corp, const char * inflected_word, const char * disjunct, int wrd) { double log_prob; const unsigned char *sense; Sense *sns, *head = NULL; int rc; /* Look up the disjunct in the database */ rc = sqlite3_bind_text(corp->sense_query, 1, inflected_word, -1, SQLITE_STATIC); if (rc != SQLITE_OK) { prt_error("Error: SQLite can't bind word in sense query: rc=%d \n", rc); return NULL; } rc = sqlite3_bind_text(corp->sense_query, 2, disjunct, -1, SQLITE_STATIC); if (rc != SQLITE_OK) { prt_error("Error: SQLite can't bind disjunct in sense query: rc=%d \n", rc); return NULL; } rc = sqlite3_step(corp->sense_query); while (SQLITE_ROW == rc) { sense = sqlite3_column_text(corp->sense_query, 0); log_prob = sqlite3_column_double(corp->sense_query, 1); // printf ("Word=%s dj=%s sense=%s score=%f\n", // inflected_word, disjunct, sense, log_prob); sns = (Sense *) malloc(sizeof(Sense)); sns->next = head; head = sns; sns->inflected_word = inflected_word; sns->disjunct = disjunct; sns->sense = strdup(sense); sns->score = log_prob; sns->word = wrd; /* Get the next row, if any */ rc = sqlite3_step(corp->sense_query); } /* Failure to do both a reset *and* a clear will cause subsequent * binds tp fail. */ sqlite3_reset(corp->sense_query); sqlite3_clear_bindings(corp->sense_query); return head; }
/** * get_disjunct_score -- get log probability of observing disjunt. * * Given an "inflected" word and a disjunct, thris routine returns the * -log_2 conditional probability prob(d|w) of seeing the disjunct 'd' * given that the word 'w' was observed. Here, "inflected word" means * the link-grammar dictionary entry, complete with its trailing period * and tag -- e.g. run.v or running.g -- everything after the dot is the * "inflection". */ static double get_disjunct_score(Corpus *corp, const char * inflected_word, const char * disjunct) { double val; int rc; /* Look up the disjunct in the database */ rc = sqlite3_bind_text(corp->rank_query, 1, inflected_word, -1, SQLITE_STATIC); if (rc != SQLITE_OK) { const char *errmsg = sqlite3_errmsg(corp->dbconn); prt_error("Error: SQLite can't bind word: rc=%d %s\n", rc, errmsg); return LOW_SCORE; } rc = sqlite3_bind_text(corp->rank_query, 2, disjunct, -1, SQLITE_STATIC); if (rc != SQLITE_OK) { const char *errmsg = sqlite3_errmsg(corp->dbconn); prt_error("Error: SQLite can't bind disjunct: rc=%d %s\n", rc, errmsg); return LOW_SCORE; } rc = sqlite3_step(corp->rank_query); if (rc != SQLITE_ROW) { val = LOW_SCORE; #ifdef DEBUG printf ("Word=%s dj=%s not found in dict, assume score=%f\n", inflected_word, disjunct, val); #endif if (rc < SQLITE_ROW) { const char *errmsg = sqlite3_errmsg(corp->dbconn); prt_error("Error: SQLite can't ifind word: rc=%d %s\n", rc, errmsg); } } else { val = sqlite3_column_double(corp->rank_query, 0); if (LOW_SCORE < val) val = LOW_SCORE; #ifdef DEBUG printf ("Word=%s dj=%s score=%f\n", inflected_word, disjunct, val); #endif } /* Failure to do both a reset *and* a clear will cause subsequent * binds to fail. */ sqlite3_reset(corp->rank_query); sqlite3_clear_bindings(corp->rank_query); return val; }
static void read_contains_rules(pp_knowledge *k, const char *label, pp_rule **rules, int *nRules) { /* Reading the 'contains_one_rules' and reading the 'contains_none_rules' into their respective arrays */ int n_commas, n_tokens, i, r; const char *p; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { *nRules = 0; if (verbosity>0) printf("PP warning: Not using any %s rules\n", label); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); *nRules = (n_commas + 1)/3; } *rules = (pp_rule*) xalloc ((1+*nRules)*sizeof(pp_rule)); for (r=0; r<*nRules; r++) { /* first read link */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens>1) { prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1); exit(1); } (*rules)[r].selector = string_set_add(tokens[0], k->string_set); /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); (*rules)[r].link_set = pp_linkset_open(n_tokens); (*rules)[r].link_set_size = n_tokens; (*rules)[r].link_array = (const char **) xalloc((1+n_tokens)*sizeof(const char*)); for (i=0; i<n_tokens; i++) { p = string_set_add(tokens[i], k->string_set); pp_linkset_add((*rules)[r].link_set, p); (*rules)[r].link_array[i] = p; } (*rules)[r].link_array[i]=0; /* NULL-terminator */ /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens>1) { prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1); exit(1); } (*rules)[r].msg = string_set_add(tokens[0], k->string_set); } /* sentinel entry */ (*rules)[*nRules].msg = 0; }
static bool read_form_a_cycle_rules(pp_knowledge *k, const char *label) { size_t n_commas, n_tokens; size_t r, i; pp_linkset *lsHandle; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { k->n_form_a_cycle_rules = 0; if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Not using any 'form a cycle' rules\n", k->path); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); k->n_form_a_cycle_rules = (n_commas + 1)/2; } k->form_a_cycle_rules= (pp_rule*) malloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule)); for (r=0; r<k->n_form_a_cycle_rules; r++) { /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens <= 0) { prt_error("Error: File %s: Syntax error\n", k->path); return false; } lsHandle = pp_linkset_open(n_tokens); for (i=0; i<n_tokens; i++) pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set)); k->form_a_cycle_rules[r].link_set = lsHandle; /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Error: File %s: Invalid syntax (rule %zu of %s)\n", k->path, r+1,label); return false; } k->form_a_cycle_rules[r].msg = string_set_add(tokens[0], k->string_set); k->form_a_cycle_rules[r].use_count = 0; } /* sentinel entry */ k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0; k->form_a_cycle_rules[k->n_form_a_cycle_rules].use_count = 0; return true; }
static void prt_regerror(const char *msg, const Regex_node *re, int rc, int erroffset) { #if HAVE_PCRE2_H PCRE2_UCHAR errbuf[ERRBUFFLEN]; pcre2_get_error_message(rc, errbuf, ERRBUFFLEN); #else char errbuf[ERRBUFFLEN]; regerror(rc, re->re, errbuf, ERRBUFFLEN); #endif /* HAVE_PCRE2_H */ prt_error("Error: %s: \"%s\" (%s", msg, re->pattern, re->name); if (-1 != erroffset) prt_error(" at %d", erroffset); prt_error("): %s (%d)\n", errbuf, rc); }
static per_thread_data * init(JNIEnv *env, jclass cls) { const char *codeset, *dict_version; per_thread_data *ptd; /* Get the locale from the environment... * perhaps we should someday get it from the dictionary ?? */ setlocale(LC_ALL, ""); /* Everything breaks if the locale is not UTF-8; check for this, * and force the issue ! */ codeset = nl_langinfo(CODESET); if (!strstr(codeset, "UTF") && !strstr(codeset, "utf")) { prt_error("Warning: JNI: locale %s was not UTF-8; force-setting to en_US.UTF-8\n", codeset); setlocale(LC_CTYPE, "en_US.UTF-8"); } ptd = (per_thread_data *) malloc(sizeof(per_thread_data)); memset(ptd, 0, sizeof(per_thread_data)); ptd->panic_parse_opts = parse_options_create(); setup_panic_parse_options(ptd->panic_parse_opts); ptd->opts = parse_options_create(); parse_options_set_disjunct_costf(ptd->opts, 3.0f); parse_options_set_max_sentence_length(ptd->opts, 170); parse_options_set_max_parse_time(ptd->opts, 30); parse_options_set_linkage_limit(ptd->opts, 1000); parse_options_set_short_length(ptd->opts, 10); parse_options_set_verbosity(ptd->opts,0); parse_options_set_spell_guess(ptd->opts, FALSE); /* Default to the english language; will need to fix * this if/when more languages are supported. */ ptd->dict = dictionary_create_lang("en"); if (!ptd->dict) throwException(env, "Error: unable to open dictionary"); else test(); dict_version = linkgrammar_get_dict_version(ptd->dict); prt_error("Info: JNI: dictionary version %s\n", dict_version); return ptd; }
char * dictionary_get_data_dir(void) { char * data_dir = NULL; if (custom_data_dir != NULL) { data_dir = safe_strdup(custom_data_dir); return data_dir; } #ifdef _WIN32 /* Dynamically locate invocation directory of our program. * Non-ASCII characters are not supported (files will not be found). */ char prog_path[MAX_PATH_NAME]; if (!GetModuleFileNameA(NULL, prog_path, sizeof(prog_path))) { prt_error("Warning: GetModuleFileName error %d\n", (int)GetLastError()); } else { if (NULL == prog_path) { /* Can it happen? */ prt_error("Warning: GetModuleFileName returned a NULL program path!\n"); } else { if (!PathRemoveFileSpecA(prog_path)) { prt_error("Warning: Cannot get directory from program path '%s'!\n", prog_path); } else { /* Unconvertible characters are marked as '?' */ const char *unsupported = (NULL != strchr(prog_path, '?')) ? " (containing unsupported character)" : ""; lgdebug(D_USER_FILES, "Debug: Directory of executable: %s%s\n", unsupported, prog_path); data_dir = safe_strdup(prog_path); } } } #endif /* _WIN32 */ return data_dir; }
static pp_linkset *read_link_set(pp_knowledge *k, const char *label, String_set *ss) { /* read link set, marked by label in knowledge file, into a set of links whose handle is returned. Return NULL if link set not defined in file, in which case the set is taken to be empty. */ int n_strings,i; pp_linkset *ls; if (!pp_lexer_set_label(k->lt, label)) { if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Link set %s not defined: assuming empty\n", k->path, label); n_strings = 0; } else { n_strings = pp_lexer_count_tokens_of_label(k->lt); if (-1 == n_strings) return &LINK_SET_ERROR; } ls = pp_linkset_open(n_strings); for (i=0; i<n_strings; i++) pp_linkset_add(ls, string_set_add(pp_lexer_get_next_token_of_label(k->lt),ss)); return ls; }
void dictionary_delete(Dictionary dict) { if (!dict) return; if (verbosity > 0) { prt_error("Info: Freeing dictionary %s", dict->name); } #ifdef USE_CORPUS lg_corpus_delete(dict->corpus); #endif if (dict->affix_table != NULL) { affix_list_delete(dict->affix_table); dictionary_delete(dict->affix_table); } spellcheck_destroy(dict->spell_checker); connector_set_delete(dict->unlimited_connector_set); if (dict->close) dict->close(dict); pp_knowledge_close(dict->base_knowledge); pp_knowledge_close(dict->hpsg_knowledge); string_set_delete(dict->string_set); free_regexs(dict->regex_root); #ifdef USE_ANYSPLIT free_anysplit(dict); #endif free_dictionary(dict); xfree(dict, sizeof(struct Dictionary_s)); object_open(NULL, NULL, NULL); /* Free the directory path cache */ }
/** * Compiles all the given regexs. Returns 0 on success, * else an error code. */ int compile_regexs(Regex_node *re, Dictionary dict) { regex_t *preg; int rc; while (re != NULL) { /* If re->re non-null, assume compiled already. */ if(re->re == NULL) { /* Compile with default options (0) and default character * tables (NULL). */ /* re->re = pcre_compile(re->pattern, 0, &error, &erroroffset, NULL); */ preg = (regex_t *) malloc (sizeof(regex_t)); re->re = preg; rc = regcomp(preg, re->pattern, REG_EXTENDED); if (rc) { prt_regerror("Failed to compile regex", re, rc); return rc; } /* Check that the regex name is defined in the dictionary. */ if ((NULL != dict) && !boolean_dictionary_lookup(dict, re->name)) { /* TODO: better error handing. Maybe remove the regex? */ prt_error("Error: Regex name %s not found in dictionary!\n", re->name); } } re = re->next; } return 0; }
/** * * (1) opens the word file and adds it to the word file list * (2) reads in the words * (3) puts each word in a Dict_node * (4) links these together by their left pointers at the * front of the list pointed to by dn * (5) returns a pointer to the first of this list */ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename) { Dict_node * dn_new; Word_file * wf; FILE * fp; const char * s; char file_name_copy[MAX_PATH_NAME+1]; safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */ if ((fp = dictopen(file_name_copy, "r")) == NULL) { prt_error("Error opening word file %s\n", file_name_copy); return NULL; } /*printf(" Reading \"%s\"\n", file_name_copy);*/ /*printf("*"); fflush(stdout);*/ wf = (Word_file *) xalloc(sizeof (Word_file)); safe_strcpy(wf->file, file_name_copy, sizeof(wf->file)); wf->changed = FALSE; wf->next = dict->word_file_header; dict->word_file_header = wf; while ((s = get_a_word(dict, fp)) != NULL) { dn_new = (Dict_node *) xalloc(sizeof(Dict_node)); dn_new->left = dn; dn = dn_new; dn->string = s; dn->file = wf; } fclose(fp); return dn; }
/** * Reads in one word from the file, allocates space for it, * and returns it. */ static const char * get_a_word(Dictionary dict, FILE * fp) { char word[MAX_WORD+4]; /* allow for 4-byte wide chars */ const char * s; wint_t c; mbstate_t mbss; int j; do { c = fgetwc(fp); } while ((c != WEOF) && iswspace(c)); if (c == WEOF) return NULL; memset(&mbss, 0, sizeof(mbss)); for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);) { j += wctomb_check(&word[j], c, &mbss); c = fgetwc(fp); } if (j >= MAX_WORD) { word[MAX_WORD] = 0x0; prt_error("Fatal Error: The dictionary contains a word that " "is too long. The word was: %s", word); exit(1); } word[j] = '\0'; s = string_set_add(word, dict->string_set); return s; }
/** * Reads in one word from the file, allocates space for it, * and returns it. * * In case of an error, return a null string (cannot be a valid word). */ static const char * get_a_word(Dictionary dict, FILE * fp) { char word[MAX_WORD+4]; /* allow for 4-byte wide chars */ const char * s; int c, j; do { c = fgetc(fp); } while ((c != EOF) && lg_isspace(c)); if (c == EOF) return NULL; for (j=0; (j <= MAX_WORD-1) && (!lg_isspace(c)) && (c != EOF); j++) { word[j] = c; c = fgetc(fp); } if (j >= MAX_WORD) { word[MAX_WORD] = '\0'; prt_error("The dictionary contains a word that is too long: %s\n", word); return ""; /* error indication */ } word[j] = '\0'; patch_subscript(word); s = string_set_add(word, dict->string_set); return s; }
static void process_linkage(Linkage linkage, Command_Options* copts) { char * string; ConstituentDisplayStyle mode; if (!linkage) return; /* Can happen in timeout mode */ if (copts->display_bad) { string = linkage_print_pp_msgs(linkage); fprintf(stdout, "%s\n", string); linkage_free_pp_msgs(string); } if (copts->display_on) { string = linkage_print_diagram(linkage, copts->display_walls, copts->screen_width); fprintf(stdout, "%s", string); linkage_free_diagram(string); } if ((mode = copts->display_constituents)) { string = linkage_print_constituent_tree(linkage, mode); if (string != NULL) { fprintf(stdout, "%s\n", string); linkage_free_constituent_tree_str(string); } else { copts->display_constituents = 0; prt_error("Error: Can't generate constituents.\n" "Constituent processing has been turned off.\n"); } } if (copts->display_links) { string = linkage_print_links_and_domains(linkage); fprintf(stdout, "%s", string); linkage_free_links_and_domains(string); } if (copts->display_senses) { string = linkage_print_senses(linkage); fprintf(stdout, "%s", string); linkage_free_senses(string); } if (copts->display_disjuncts) { string = linkage_print_disjuncts(linkage); fprintf(stdout, "%s\n", string); linkage_free_disjuncts(string); } if (copts->display_postscript) { string = linkage_print_postscript(linkage, copts->display_walls, copts->display_ps_header); fprintf(stdout, "%s\n", string); linkage_free_postscript(string); } }
static void* db_open(const char * fullname, void * user_data) { int fd; struct stat buf; sqlite3 *db; /* Is there a file here that can be read? */ FILE * fh = fopen(fullname, "r"); if (NULL == fh) return NULL; /* Get the file size, in bytes. */ /* SQLite has a habit of leaving zero-length DB's lying around */ fd = fileno(fh); fstat(fd, &buf); if (0 == buf.st_size) { fclose(fh); return NULL; } /* Found a file, of non-zero length. See if that works. */ if (sqlite3_open(fullname, &db)) { prt_error("Error: Can't open database %s: %s\n", fullname, sqlite3_errmsg(db)); sqlite3_close(db); return NULL; } return (void *) db; }
/** * Print out the constituent tree. * mode 1: treebank-style constituent tree * mode 2: flat, bracketed tree [A like [B this B] A] * mode 3: flat, treebank-style tree (A like (B this)) */ char * linkage_print_constituent_tree(Linkage linkage, ConstituentDisplayStyle mode) { String * cs; CNode * root; char * p; if (!linkage) return NULL; if (mode == NO_DISPLAY) { return NULL; } else if (mode == MULTILINE || mode == SINGLE_LINE) { cs = string_new(); root = linkage_constituent_tree(linkage); print_tree(cs, (mode==1), root, 0, 0); linkage_free_constituent_tree(root); append_string(cs, "\n"); p = string_copy(cs); string_delete(cs); return p; } else if (mode == BRACKET_TREE) { return print_flat_constituents(linkage); } prt_error("Warning: Illegal mode %d for printing constituents\n" "Allowed values: %d to %d\n", mode, NO_DISPLAY, MAX_STYLES); return NULL; }
/** * popen a command with the given input. * If the system doesn't have fork(), popen() is used to launch "dot". * This is an inferior implementation than the one below that uses * fork(), in which the window remains open and is updated automatically * when new sentences are entered. With popen(), the program blocks at * pclose() and the user needs to close the window after each sentence. */ static void x_popen(const char *cmd, const char *wgds) { FILE *const cmdf = popen(cmd, "w"); if (NULL == cmdf) { prt_error("Error: popen of '%s' failed: %s", cmd, strerror(errno)); } else { if (fprintf(cmdf, "%s", wgds) == -1) prt_error("Error: print to display command: %s", strerror(errno)); if (pclose(cmdf) == -1) prt_error("Error: pclose of display command: %s", strerror(errno)); } }
void parse_options_set_use_sat_parser(Parse_Options opts, bool dummy) { #ifdef USE_SAT_SOLVER opts->use_sat_solver = dummy; #else prt_error("Error: cannot enable the Boolean SAT parser; this " " library was built without SAT solver support.\n"); #endif }
static void check_domain_is_legal(const char *p) { if (0x0 != p[1]) { prt_error("Fatal Error: post_process(): Domain (%s) must be a single character", p); exit(1); } }
static bool read_bounded_rules(pp_knowledge *k, const char *label) { const char **tokens; size_t n_commas, n_tokens; size_t r; if (!pp_lexer_set_label(k->lt, label)) { k->n_bounded_rules = 0; if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Not using any 'bounded' rules\n", k->path); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); k->n_bounded_rules = (n_commas + 1)/2; } k->bounded_rules = (pp_rule*) malloc ((1+k->n_bounded_rules)*sizeof(pp_rule)); for (r=0; r<k->n_bounded_rules; r++) { /* read domain */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens!=1) { prt_error("Error: File %s: Invalid syntax: rule %zu of %s\n", k->path, r+1,label); return false; } k->bounded_rules[r].domain = (int) tokens[0][0]; /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens!=1) { prt_error("Error: File %s: Invalid syntax: rule %zu of %s\n", k->path, r+1,label); return false; } k->bounded_rules[r].msg = string_set_add(tokens[0], k->string_set); k->bounded_rules[r].use_count = 0; } /* sentinel entry */ k->bounded_rules[k->n_bounded_rules].msg = 0; k->bounded_rules[k->n_bounded_rules].use_count = 0; return true; }