示例#1
0
int spellcheck_suggest(void * chk, char ***sug, const char * word)
{
    struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
    if (!sug) {
        prt_error("Error: Aspell. Corrupt pointer.\n");
        return 0;
    }
    if (aspell && aspell->speller) {
        const AspellWordList *list = NULL;
        AspellStringEnumeration *elem = NULL;
        const char *aword = NULL;
        unsigned int size, i;
        char **array = NULL;

        list = aspell_speller_suggest(aspell->speller, word, -1);
        elem = aspell_word_list_elements(list);
        size = aspell_word_list_size(list);
        /* allocate an array of char* for returning back to link-parser
         */
        array = (char **)malloc(sizeof(char *) * size);
        if (!array) {
            prt_error("Error: Aspell. Out of memory.\n");
            delete_aspell_string_enumeration(elem);
            return 0;
        }
        i = 0;
        while ((aword = aspell_string_enumeration_next(elem)) != NULL) {
            array[i++] = strdup(aword);
        }
        delete_aspell_string_enumeration(elem);
        *sug = array;
        return size;
    }
    return 0;
}
示例#2
0
/**
 * Upcase the first letter of the word.
 * XXX FIXME This works 'most of the time', but is not technically correct.
 * This is because towlower() and towupper() are locale dependent, and also
 * because the byte-counts might not match up, e.g. German ß and SS.
 * The correct long-term fix is to use ICU or glib g_utf8_strup(), etc.
 */
void upcase_utf8_str(char *to, const char * from, size_t usize)
{
	wchar_t c;
	int i, nbl, nbh;
	char low[MB_LEN_MAX];
	mbstate_t mbs;

	memset(&mbs, 0, sizeof(mbs));
	nbh = mbrtowc (&c, from, MB_CUR_MAX, &mbs);
	if (nbh < 0)
	{
		prt_error("Error: Invalid multi-byte string!");
		return;
	}
	c = towupper(c);
	nbl = wctomb_check(low, c);

	/* Check for error on an in-place copy */
	if ((nbh < nbl) && (to == from))
	{
		/* I'm to lazy to fix this */
		prt_error("Error: can't upcase multi-byte string!");
		return;
	}

	/* Upcase */
	for (i=0; i<nbl; i++) { to[i] = low[i]; }

	if ((nbh == nbl) && (to == from)) return;

	from += nbh;
	to += nbl;
	safe_strcpy(to, from, usize-nbl);
}
示例#3
0
static char *
win32_getlocale (void)
{
	char lbuf[10];
	char locale[32];

	LCID lcid = GetThreadLocale();

	if (0 >= GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, lbuf, sizeof(lbuf)))
	{
		prt_error("Error: GetLocaleInfoA LOCALE_SENGLISHLANGUAGENAME LCID=%d: "
		          "Error %d\n", (int)lcid, (int)GetLastError());
		return NULL;
	}
	strcpy(locale, lbuf);
	strcat(locale, "-");

	if (0 >= GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, lbuf, sizeof(lbuf)))
	{
		prt_error("Error: GetLocaleInfoA LOCALE_SISO3166CTRYNAME LCID=%d: "
		          "Error %d\n", (int)lcid, (int)GetLastError());
		return NULL;
	}
	strcat(locale, lbuf);

	return strdup(locale);
}
示例#4
0
/**
 * Ensure that the program's locale has a UTF-8 codeset.
 */
void set_utf8_program_locale(void)
{
#ifndef _WIN32
	/* The LG library doesn't use mbrtowc_l(), since it doesn't exist in
	 * the dynamic glibc (2.22). mbsrtowcs_l() could also be used, but for
	 * some reason it exists only in the static glibc.
	 * In order that mbrtowc() will work for any UTF-8 character, UTF-8
	 * codeset is ensured. */
	const char *codeset = nl_langinfo(CODESET);
	if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
	{
		const char *locale = setlocale(LC_CTYPE, NULL);
		/* Avoid an initial spurious message. */
		if ((0 != strcmp(locale, "C")) && (0 != strcmp(locale, "POSIX")))
		{
			prt_error("Warning: Program locale \"%s\" (codeset %s) was not UTF-8; "
						 "force-setting to en_US.UTF-8\n", locale, codeset);
		}
		locale = setlocale(LC_CTYPE, "en_US.UTF-8");
		if (NULL == locale)
		{
			prt_error("Warning: Program locale en_US.UTF-8 could not be set; "
			          "force-setting to C.UTF-8\n");
			locale = setlocale(LC_CTYPE, "C.UTF-8");
			if (NULL == locale)
			{
				prt_error("Warning: Could not set a UTF-8 program locale; "
				          "program may malfunction\n");
			}
		}
	}
#endif /* !_WIN32 */
}
示例#5
0
/**
 * Assumes that the sentence expression lists have been generated.
 */
void prepare_to_parse(Sentence sent, Parse_Options opts)
{
	size_t i;

	build_sentence_disjuncts(sent, opts->disjunct_cost, opts);
	if (verbosity_level(D_PREP))
	{
		prt_error("Debug: After expanding expressions into disjuncts:\n");
		print_disjunct_counts(sent);
	}
	print_time(opts, "Built disjuncts");

	for (i=0; i<sent->length; i++)
	{
		sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d);

		/* Some long Russian sentences can really blow up, here. */
		if (resources_exhausted(opts->resources))
			return;
	}
	print_time(opts, "Eliminated duplicate disjuncts");

	if (verbosity_level(D_PREP))
	{
		prt_error("Debug: After expression pruning and duplicate elimination:\n");
		print_disjunct_counts(sent);
	}

	setup_connectors(sent);
}
示例#6
0
/**
 * Read table of [link, domain type].
 * This tells us what domain type each link belongs to.
 * This lookup table *must* be defined in the knowledge file.
 */
static void read_starting_link_table(pp_knowledge *k)
{
  const char *p;
  const char label[] = "STARTING_LINK_TYPE_TABLE";
  int i, n_tokens;
  if (!pp_lexer_set_label(k->lt, label))
  {
    prt_error("Fatal error: post_process: Couldn't find starting link table %s",label);
    exit(1);
  }
  n_tokens = pp_lexer_count_tokens_of_label(k->lt);
  if (n_tokens %2)
  {
    prt_error("Fatal error: post_process: Link table must have format [<link> <domain name>]+");
    exit(1);
  }
  k->nStartingLinks = n_tokens/2;
  k->starting_link_lookup_table = (StartingLinkAndDomain*)
    xalloc((1+k->nStartingLinks)*sizeof(StartingLinkAndDomain));
  for (i=0; i<k->nStartingLinks; i++)
    {
      /* read the starting link itself */
      k->starting_link_lookup_table[i].starting_link =
	string_set_add(pp_lexer_get_next_token_of_label(k->lt),k->string_set);

      /* read the domain type of the link */
      p = pp_lexer_get_next_token_of_label(k->lt);
      check_domain_is_legal(p);
      k->starting_link_lookup_table[i].domain = (int) p[0];
    }

  /* end sentinel */
  k->starting_link_lookup_table[k->nStartingLinks].domain = -1;
}
示例#7
0
int sentence_parse(Sentence sent, Parse_Options opts)
{
	int rc;

	sent->num_valid_linkages = 0;

	/* If the sentence has not yet been split, do so now.
	 * This is for backwards compatibility, for existing programs
	 * that do not explicitly call the splitter.
	 */
	if (0 == sent->length)
	{
		rc = sentence_split(sent, opts);
		if (rc) return -1;
	}
	else
	{
		/* During a panic parse, we enter here a second time, with leftover
		 * garbage. Free it. We really should make the code that is panicking
		 * do this free, but right now, they have no API for it, so we do it
		 * as a favor. XXX FIXME someday. */
		free_sentence_disjuncts(sent);
	}

	/* Check for bad sentence length */
	if (MAX_SENTENCE <= sent->length)
	{
		prt_error("Error: sentence too long, contains more than %d words\n",
			MAX_SENTENCE);
		return -2;
	}

	resources_reset(opts->resources);

	/* Expressions were set up during the tokenize stage.
	 * Prune them, and then parse.
	 */
	expression_prune(sent, opts);
	print_time(opts, "Finished expression pruning");
	if (opts->use_sat_solver)
	{
		sat_parse(sent, opts);
	}
	else
	{
		classic_parse(sent, opts);
	}
	print_time(opts, "Finished parse");

	if ((verbosity > 0) &&
	   (PARSE_NUM_OVERFLOW < sent->num_linkages_found))
	{
		prt_error("Warning: Combinatorial explosion! nulls=%zu cnt=%d\n"
			"Consider retrying the parse with the max allowed disjunct cost set lower.\n"
			"At the command line, use !cost-max\n",
			sent->null_count, sent->num_linkages_found);
	}
	return sent->num_valid_linkages;
}
示例#8
0
int sentence_parse(Sentence sent, Parse_Options opts)
{
	int rc;

	verbosity = opts->verbosity;
	debug = opts->debug;
	test = opts->test;

	sent->num_valid_linkages = 0;

	/* If the sentence has not yet been split, do so now.
	 * This is for backwards compatibility, for existing programs
	 * that do not explicitly call the splitter.
	 */
	if (0 == sent->length)
	{
		rc = sentence_split(sent, opts);
		if (rc) return -1;
	}

	/* Check for bad sentence length */
	if (MAX_SENTENCE <= sent->length)
	{
		prt_error("Error: sentence too long, contains more than %d words\n",
			MAX_SENTENCE);
		return -2;
	}

	/* Initialize/free any leftover garbage */
	free_sentence_disjuncts(sent);  /* Is this really needed ??? */
	resources_reset_space(opts->resources);

	if (resources_exhausted(opts->resources))
		return 0;

	/* Expressions were previously set up during the tokenize stage. */
	expression_prune(sent);
	print_time(opts, "Finished expression pruning");
	if (opts->use_sat_solver)
	{
		sat_parse(sent, opts);
	}
	else
	{
		chart_parse(sent, opts);
	}
	print_time(opts, "Finished parse");

	if ((verbosity > 0) &&
	   (PARSE_NUM_OVERFLOW < sent->num_linkages_found))
	{
		prt_error("WARNING: Combinatorial explosion! nulls=%zu cnt=%d\n"
			"Consider retrying the parse with the max allowed disjunct cost set lower.\n"
			"At the command line, use !cost-max\n",
			sent->null_count, sent->num_linkages_found);
	}
	return sent->num_valid_linkages;
}
示例#9
0
/**
 * Read in the whole stinkin file. This routine returns
 * malloced memory, which should be freed as soon as possible.
 */
char *get_file_contents(const char * dict_name)
{
	int fd;
	size_t tot_size;
	size_t tot_read = 0;
	struct stat buf;
	char * contents;

	/* On Windows, 'b' (binary mode) is mandatory, otherwise fstat file length
	 * is confused by crlf counted as one byte. POSIX systems just ignore it. */
	FILE *fp = dictopen(dict_name, "rb");

	if (fp == NULL)
		return NULL;

	/* Get the file size, in bytes. */
	fd = fileno(fp);
	fstat(fd, &buf);
	tot_size = buf.st_size;

	contents = (char *) malloc(sizeof(char) * (tot_size+7));

	/* Now, read the whole file.
	 * Normally, a single fread() call below reads the whole file. */
	while (1)
	{
		size_t read_size = fread(contents, 1, tot_size+7, fp);

		if (0 == read_size)
		{
			bool err = (0 != ferror(fp));

			if (err)
			{
				char errbuf[64];

				strerror_r(errno, errbuf, sizeof(errbuf));
				fclose(fp);
				prt_error("Error: %s: Read error (%s)\n", dict_name, errbuf);
				free(contents);
				return NULL;
			}
			fclose(fp);
			break;
		}
		tot_read += read_size;
	}

	if (tot_read > tot_size+6)
	{
		prt_error("Error: %s: File size is insane (%zu)!\n", dict_name, tot_size);
		free(contents);
		return NULL;
	}

	contents[tot_read] = '\0';
	return contents;
}
示例#10
0
void  LGCheckGrammar::P_UNDEFINED ()
{
      int t, n = n_errors;

		if (n_terms > max_char_set+1)
		{
			strcpy (gft, ".lex");
			strcpy (grmfid, gdn);
			strcat (grmfid, gfn);
			strcat (grmfid, gft);

			char*  Input_start = input_start;
			char*  Input_end   = input_end;
			char** Line_ptr    = line_ptr;

			input_start = lex_input_start;
			input_end   = lex_input_end;
			line_ptr    = lex_line_ptr;

			for (t = max_char_set+1; t < n_terms; t++) // Skip <eof>
			{
				if (term_type[t] & LEXFILE)
				prt_error ("'%s' is listed in the .lex file, but not defined in the .lgr file", term_name[t], 0, term_line[t]);
			}

			strcpy (gft, ".lgr");
			strcpy (grmfid, gdn);
			strcat (grmfid, gfn);
			strcat (grmfid, gft);

			input_start = Input_start;
			input_end   = Input_end;
			line_ptr    = Line_ptr;											

			for (t = max_char_set+1; t < n_terms; t++) // Skip <eof>
			{
				if (!(term_type[t] & LEXFILE))
				{
					char code = charcode[*term_name[t]];
					if ( code == DIGIT || code == QUOTE) 
					{
						prt_error ("'%s' is not a predefined symbol", term_name[t], 0, term_line[t]);
						prt_log ("Predefined symbols are:\n");
						prt_log ("   0..31\n");
						for (int i = 32; i < 127; i++) prt_log ("   %s\n", term_name[i]);
						prt_log ("   127..255\n");
						LG::Terminate(95);
					}
					else
					{
						prt_error ("'%s' is not defined in the lexical grammar (.lgr file)", term_name[t], 0, term_line[t]);
					}
				}
			}
		}

		FREE (term_type, n_terms);
}
示例#11
0
文件: corpus.c 项目: dyne/AutOrg
static Sense * lg_corpus_senses(Corpus *corp,
                                const char * inflected_word,
                                const char * disjunct,
                                int wrd)
{
	double log_prob;
	const unsigned char *sense;
	Sense *sns, *head = NULL;
	int rc;

	/* Look up the disjunct in the database */
	rc = sqlite3_bind_text(corp->sense_query, 1,
		inflected_word, -1, SQLITE_STATIC);
	if (rc != SQLITE_OK)
	{
		prt_error("Error: SQLite can't bind word in sense query: rc=%d \n", rc);
		return NULL;
	}

	rc = sqlite3_bind_text(corp->sense_query, 2,
		disjunct, -1, SQLITE_STATIC);
	if (rc != SQLITE_OK)
	{
		prt_error("Error: SQLite can't bind disjunct in sense query: rc=%d \n", rc);
		return NULL;
	}

	rc = sqlite3_step(corp->sense_query);
	while (SQLITE_ROW == rc)
	{
		sense = sqlite3_column_text(corp->sense_query, 0);
		log_prob = sqlite3_column_double(corp->sense_query, 1);
		// printf ("Word=%s dj=%s sense=%s score=%f\n", 
		// 	inflected_word, disjunct, sense, log_prob);

		sns = (Sense *) malloc(sizeof(Sense));
		sns->next = head;
		head = sns;

		sns->inflected_word = inflected_word;
		sns->disjunct = disjunct;
		sns->sense = strdup(sense);
		sns->score = log_prob;
		sns->word = wrd;

		/* Get the next row, if any */
		rc = sqlite3_step(corp->sense_query);
	}

	/* Failure to do both a reset *and* a clear will cause subsequent
	 * binds tp fail. */
	sqlite3_reset(corp->sense_query);
	sqlite3_clear_bindings(corp->sense_query);

	return head;
}
示例#12
0
文件: corpus.c 项目: dyne/AutOrg
/**
 * get_disjunct_score -- get log probability of observing disjunt.
 *
 * Given an "inflected" word and a disjunct, thris routine returns the
 * -log_2 conditional probability prob(d|w) of seeing the disjunct 'd'
 * given that the word 'w' was observed.  Here, "inflected word" means
 * the link-grammar dictionary entry, complete with its trailing period
 * and tag -- e.g. run.v or running.g -- everything after the dot is the
 * "inflection".
 */
static double get_disjunct_score(Corpus *corp,
                                 const char * inflected_word,
                                 const char * disjunct)
{
	double val;
	int rc;

	/* Look up the disjunct in the database */
	rc = sqlite3_bind_text(corp->rank_query, 1,
		inflected_word, -1, SQLITE_STATIC);
	if (rc != SQLITE_OK)
	{
		const char *errmsg = sqlite3_errmsg(corp->dbconn);
		prt_error("Error: SQLite can't bind word: rc=%d %s\n", rc, errmsg);
		return LOW_SCORE;
	}

	rc = sqlite3_bind_text(corp->rank_query, 2,
		disjunct, -1, SQLITE_STATIC);
	if (rc != SQLITE_OK)
	{
		const char *errmsg = sqlite3_errmsg(corp->dbconn);
		prt_error("Error: SQLite can't bind disjunct: rc=%d %s\n", rc, errmsg);
		return LOW_SCORE;
	}

	rc = sqlite3_step(corp->rank_query);
	if (rc != SQLITE_ROW)
	{
		val = LOW_SCORE;
#ifdef DEBUG
		printf ("Word=%s dj=%s not found in dict, assume score=%f\n",
			inflected_word, disjunct, val);
#endif
		if (rc < SQLITE_ROW)
		{
			const char *errmsg = sqlite3_errmsg(corp->dbconn);
			prt_error("Error: SQLite can't ifind word: rc=%d %s\n", rc, errmsg);
		}
	}
	else
	{
		val = sqlite3_column_double(corp->rank_query, 0);
		if (LOW_SCORE < val) val = LOW_SCORE;
#ifdef DEBUG
		printf ("Word=%s dj=%s score=%f\n", inflected_word, disjunct, val);
#endif
	}

	/* Failure to do both a reset *and* a clear will cause subsequent
	 * binds to fail. */
	sqlite3_reset(corp->rank_query);
	sqlite3_clear_bindings(corp->rank_query);
	return val;
}
示例#13
0
static void read_contains_rules(pp_knowledge *k, const char *label,
				pp_rule **rules, int *nRules)
{
  /* Reading the 'contains_one_rules' and reading the
     'contains_none_rules' into their respective arrays */
  int n_commas, n_tokens, i, r;
  const char *p;
  const char **tokens;
  if (!pp_lexer_set_label(k->lt, label)) {
      *nRules = 0;
      if (verbosity>0) printf("PP warning: Not using any %s rules\n", label);
  }
  else {
    n_commas = pp_lexer_count_commas_of_label(k->lt);
    *nRules = (n_commas + 1)/3;
  }
  *rules = (pp_rule*) xalloc ((1+*nRules)*sizeof(pp_rule));
  for (r=0; r<*nRules; r++)
    {
      /* first read link */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens>1)
      {
        prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1);
        exit(1);
      }
      (*rules)[r].selector = string_set_add(tokens[0], k->string_set);

      /* read link set */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      (*rules)[r].link_set = pp_linkset_open(n_tokens);
      (*rules)[r].link_set_size = n_tokens;
      (*rules)[r].link_array = (const char **) xalloc((1+n_tokens)*sizeof(const char*));
      for (i=0; i<n_tokens; i++)
      {
        p = string_set_add(tokens[i], k->string_set);
        pp_linkset_add((*rules)[r].link_set, p);
        (*rules)[r].link_array[i] = p;
      }
      (*rules)[r].link_array[i]=0; /* NULL-terminator */

      /* read error message */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens>1)
      {
        prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1);
        exit(1);
      }
      (*rules)[r].msg = string_set_add(tokens[0], k->string_set);
    }

  /* sentinel entry */
  (*rules)[*nRules].msg = 0;
}
示例#14
0
static bool read_form_a_cycle_rules(pp_knowledge *k, const char *label)
{
  size_t n_commas, n_tokens;
  size_t r, i;
  pp_linkset *lsHandle;
  const char **tokens;
  if (!pp_lexer_set_label(k->lt, label)) {
      k->n_form_a_cycle_rules = 0;
      if (verbosity_level(+D_PPK))
          prt_error("Warning: File %s: Not using any 'form a cycle' rules\n",
                    k->path);
  }
  else {
    n_commas = pp_lexer_count_commas_of_label(k->lt);
    k->n_form_a_cycle_rules = (n_commas + 1)/2;
  }
  k->form_a_cycle_rules=
    (pp_rule*) malloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule));
  for (r=0; r<k->n_form_a_cycle_rules; r++)
    {
      /* read link set */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens <= 0)
      {
        prt_error("Error: File %s: Syntax error\n", k->path);
        return false;
      }
      lsHandle = pp_linkset_open(n_tokens);
      for (i=0; i<n_tokens; i++)
          pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set));
      k->form_a_cycle_rules[r].link_set = lsHandle;

      /* read error message */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens > 1)
      {
         prt_error("Error: File %s: Invalid syntax (rule %zu of %s)\n",
                   k->path, r+1,label);
         return false;
      }
      k->form_a_cycle_rules[r].msg = string_set_add(tokens[0], k->string_set);
      k->form_a_cycle_rules[r].use_count = 0;
    }

  /* sentinel entry */
  k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0;
  k->form_a_cycle_rules[k->n_form_a_cycle_rules].use_count = 0;

  return true;
}
示例#15
0
static void prt_regerror(const char *msg, const Regex_node *re, int rc,
                         int erroffset)
{
#if HAVE_PCRE2_H
	PCRE2_UCHAR errbuf[ERRBUFFLEN];
	pcre2_get_error_message(rc, errbuf, ERRBUFFLEN);
#else
	char errbuf[ERRBUFFLEN];
	regerror(rc, re->re, errbuf, ERRBUFFLEN);
#endif /* HAVE_PCRE2_H */

	prt_error("Error: %s: \"%s\" (%s", msg, re->pattern, re->name);
	if (-1 != erroffset) prt_error(" at %d", erroffset);
	prt_error("): %s (%d)\n", errbuf, rc);
}
示例#16
0
文件: jni-client.c 项目: dyne/AutOrg
static per_thread_data * init(JNIEnv *env, jclass cls)
{
	const char *codeset, *dict_version;
	per_thread_data *ptd;

	/* Get the locale from the environment...
	 * perhaps we should someday get it from the dictionary ??
	 */
	setlocale(LC_ALL, "");

	/* Everything breaks if the locale is not UTF-8; check for this,
	 * and force  the issue !
	 */
	codeset = nl_langinfo(CODESET);
	if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
	{
		prt_error("Warning: JNI: locale %s was not UTF-8; force-setting to en_US.UTF-8\n",
			codeset);
		setlocale(LC_CTYPE, "en_US.UTF-8");
	}

	ptd = (per_thread_data *) malloc(sizeof(per_thread_data));
	memset(ptd, 0, sizeof(per_thread_data));

	ptd->panic_parse_opts = parse_options_create();
	setup_panic_parse_options(ptd->panic_parse_opts);

	ptd->opts = parse_options_create();
	parse_options_set_disjunct_costf(ptd->opts, 3.0f);
	parse_options_set_max_sentence_length(ptd->opts, 170);
	parse_options_set_max_parse_time(ptd->opts, 30);
	parse_options_set_linkage_limit(ptd->opts, 1000);
	parse_options_set_short_length(ptd->opts, 10);
	parse_options_set_verbosity(ptd->opts,0);
	parse_options_set_spell_guess(ptd->opts, FALSE);

	/* Default to the english language; will need to fix
	 * this if/when more languages are supported.
	 */
	ptd->dict = dictionary_create_lang("en");
	if (!ptd->dict) throwException(env, "Error: unable to open dictionary");
	else test();

	dict_version = linkgrammar_get_dict_version(ptd->dict);
	prt_error("Info: JNI: dictionary version %s\n", dict_version);

	return ptd;
}
示例#17
0
char * dictionary_get_data_dir(void)
{
	char * data_dir = NULL;

	if (custom_data_dir != NULL) {
		data_dir = safe_strdup(custom_data_dir);
		return data_dir;
	}

#ifdef _WIN32
	/* Dynamically locate invocation directory of our program.
	 * Non-ASCII characters are not supported (files will not be found). */
	char prog_path[MAX_PATH_NAME];

	if (!GetModuleFileNameA(NULL, prog_path, sizeof(prog_path)))
	{
		prt_error("Warning: GetModuleFileName error %d\n", (int)GetLastError());
	}
	else
	{
		if (NULL == prog_path)
		{
			/* Can it happen? */
			prt_error("Warning: GetModuleFileName returned a NULL program path!\n");
		}
		else
		{
			if (!PathRemoveFileSpecA(prog_path))
			{
				prt_error("Warning: Cannot get directory from program path '%s'!\n",
				          prog_path);
			}
			else
			{
				/* Unconvertible characters are marked as '?' */
				const char *unsupported = (NULL != strchr(prog_path, '?')) ?
					" (containing unsupported character)" : "";

				lgdebug(D_USER_FILES, "Debug: Directory of executable: %s%s\n",
				        unsupported, prog_path);
				data_dir = safe_strdup(prog_path);
			}
		}
	}
#endif /* _WIN32 */

	return data_dir;
}
示例#18
0
static pp_linkset *read_link_set(pp_knowledge *k,
                                 const char *label, String_set *ss)
{
  /* read link set, marked by label in knowledge file, into a set of links
     whose handle is returned. Return NULL if link set not defined in file,
     in which case the set is taken to be empty. */
  int n_strings,i;
  pp_linkset *ls;
  if (!pp_lexer_set_label(k->lt, label))
  {
    if (verbosity_level(+D_PPK))
      prt_error("Warning: File %s: Link set %s not defined: assuming empty\n",
             k->path, label);
    n_strings = 0;
  }
  else
  {
    n_strings = pp_lexer_count_tokens_of_label(k->lt);
    if (-1 == n_strings) return &LINK_SET_ERROR;
  }
  ls = pp_linkset_open(n_strings);
  for (i=0; i<n_strings; i++)
    pp_linkset_add(ls,
                   string_set_add(pp_lexer_get_next_token_of_label(k->lt),ss));
  return ls;
}
示例#19
0
void dictionary_delete(Dictionary dict)
{
	if (!dict) return;

	if (verbosity > 0) {
		prt_error("Info: Freeing dictionary %s", dict->name);
	}

#ifdef USE_CORPUS
	lg_corpus_delete(dict->corpus);
#endif

	if (dict->affix_table != NULL) {
		affix_list_delete(dict->affix_table);
		dictionary_delete(dict->affix_table);
	}
	spellcheck_destroy(dict->spell_checker);

	connector_set_delete(dict->unlimited_connector_set);

	if (dict->close) dict->close(dict);

	pp_knowledge_close(dict->base_knowledge);
	pp_knowledge_close(dict->hpsg_knowledge);
	string_set_delete(dict->string_set);
	free_regexs(dict->regex_root);
#ifdef USE_ANYSPLIT
	free_anysplit(dict);
#endif
	free_dictionary(dict);
	xfree(dict, sizeof(struct Dictionary_s));
	object_open(NULL, NULL, NULL); /* Free the directory path cache */
}
示例#20
0
/**
 * Compiles all the given regexs. Returns 0 on success,
 * else an error code.
 */
int compile_regexs(Regex_node *re, Dictionary dict)
{
    regex_t *preg;
    int rc;

    while (re != NULL)
    {
        /* If re->re non-null, assume compiled already. */
        if(re->re == NULL)
        {
            /* Compile with default options (0) and default character
             * tables (NULL). */
            /* re->re = pcre_compile(re->pattern, 0, &error, &erroroffset, NULL); */
            preg = (regex_t *) malloc (sizeof(regex_t));
            re->re = preg;
            rc = regcomp(preg, re->pattern, REG_EXTENDED);
            if (rc)
            {
                prt_regerror("Failed to compile regex", re, rc);
                return rc;
            }

            /* Check that the regex name is defined in the dictionary. */
            if ((NULL != dict) && !boolean_dictionary_lookup(dict, re->name))
            {
                /* TODO: better error handing. Maybe remove the regex? */
                prt_error("Error: Regex name %s not found in dictionary!\n",
                          re->name);
            }
        }
        re = re->next;
    }
    return 0;
}
示例#21
0
/**
 *
 * (1) opens the word file and adds it to the word file list
 * (2) reads in the words
 * (3) puts each word in a Dict_node
 * (4) links these together by their left pointers at the
 *     front of the list pointed to by dn
 * (5) returns a pointer to the first of this list
 */
Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename)
{
	Dict_node * dn_new;
	Word_file * wf;
	FILE * fp;
	const char * s;
	char file_name_copy[MAX_PATH_NAME+1];

	safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */

	if ((fp = dictopen(file_name_copy, "r")) == NULL) {
		prt_error("Error opening word file %s\n", file_name_copy);
		return NULL;
	}

	/*printf("   Reading \"%s\"\n", file_name_copy);*/
	/*printf("*"); fflush(stdout);*/

	wf = (Word_file *) xalloc(sizeof (Word_file));
	safe_strcpy(wf->file, file_name_copy, sizeof(wf->file));
	wf->changed = FALSE;
	wf->next = dict->word_file_header;
	dict->word_file_header = wf;

	while ((s = get_a_word(dict, fp)) != NULL) {
		dn_new = (Dict_node *) xalloc(sizeof(Dict_node));
		dn_new->left = dn;
		dn = dn_new;
		dn->string = s;
		dn->file = wf;
	}
	fclose(fp);
	return dn;
}
示例#22
0
/**
 * Reads in one word from the file, allocates space for it,
 * and returns it.
 */
static const char * get_a_word(Dictionary dict, FILE * fp)
{
	char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
	const char * s;
	wint_t c;
	mbstate_t mbss;
	int j;

	do {
		c = fgetwc(fp);
	} while ((c != WEOF) && iswspace(c));
	if (c == WEOF) return NULL;

	memset(&mbss, 0, sizeof(mbss));
	for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);)
	{
		j += wctomb_check(&word[j], c, &mbss);
		c = fgetwc(fp);
	}

	if (j >= MAX_WORD) {
		word[MAX_WORD] = 0x0;
		prt_error("Fatal Error: The dictionary contains a word that "
		          "is too long. The word was: %s", word);
		exit(1);
	}
	word[j] = '\0';
	s = string_set_add(word, dict->string_set);
	return s;
}
示例#23
0
/**
 * Reads in one word from the file, allocates space for it,
 * and returns it.
 *
 * In case of an error, return a null string (cannot be a valid word).
 */
static const char * get_a_word(Dictionary dict, FILE * fp)
{
	char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
	const char * s;
	int c, j;

	do {
		c = fgetc(fp);
	} while ((c != EOF) && lg_isspace(c));
	if (c == EOF) return NULL;

	for (j=0; (j <= MAX_WORD-1) && (!lg_isspace(c)) && (c != EOF); j++)
	{
		word[j] = c;
		c = fgetc(fp);
	}

	if (j >= MAX_WORD) {
		word[MAX_WORD] = '\0';
		prt_error("The dictionary contains a word that is too long: %s\n", word);
		return ""; /* error indication */
	}
	word[j] = '\0';
	patch_subscript(word);
	s = string_set_add(word, dict->string_set);
	return s;
}
示例#24
0
static void process_linkage(Linkage linkage, Command_Options* copts)
{
	char * string;
	ConstituentDisplayStyle mode;

	if (!linkage) return;  /* Can happen in timeout mode */

	if (copts->display_bad)
	{
		string = linkage_print_pp_msgs(linkage);
		fprintf(stdout, "%s\n", string);
		linkage_free_pp_msgs(string);
	}
	if (copts->display_on)
	{
		string = linkage_print_diagram(linkage, copts->display_walls, copts->screen_width);
		fprintf(stdout, "%s", string);
		linkage_free_diagram(string);
	}
	if ((mode = copts->display_constituents))
	{
		string = linkage_print_constituent_tree(linkage, mode);
		if (string != NULL)
		{
			fprintf(stdout, "%s\n", string);
			linkage_free_constituent_tree_str(string);
		}
		else
		{
			copts->display_constituents = 0;
			prt_error("Error: Can't generate constituents.\n"
			          "Constituent processing has been turned off.\n");
		}
	}
	if (copts->display_links)
	{
		string = linkage_print_links_and_domains(linkage);
		fprintf(stdout, "%s", string);
		linkage_free_links_and_domains(string);
	}
	if (copts->display_senses)
	{
		string = linkage_print_senses(linkage);
		fprintf(stdout, "%s", string);
		linkage_free_senses(string);
	}
	if (copts->display_disjuncts)
	{
		string = linkage_print_disjuncts(linkage);
		fprintf(stdout, "%s\n", string);
		linkage_free_disjuncts(string);
	}
	if (copts->display_postscript)
	{
		string = linkage_print_postscript(linkage,
		          copts->display_walls, copts->display_ps_header);
		fprintf(stdout, "%s\n", string);
		linkage_free_postscript(string);
	}
}
示例#25
0
static void* db_open(const char * fullname, void * user_data)
{
	int fd;
	struct stat buf;
	sqlite3 *db;

	/* Is there a file here that can be read? */
	FILE * fh =  fopen(fullname, "r");
	if (NULL == fh)
		return NULL;

	/* Get the file size, in bytes. */
	/* SQLite has a habit of leaving zero-length DB's lying around */
	fd = fileno(fh);
	fstat(fd, &buf);
	if (0 == buf.st_size)
	{
		fclose(fh);
		return NULL;
	}

	/* Found a file, of non-zero length. See if that works. */
	if (sqlite3_open(fullname, &db))
	{
		prt_error("Error: Can't open database %s: %s\n",
			fullname, sqlite3_errmsg(db));
		sqlite3_close(db);
		return NULL;
	}
	return (void *) db;
}
示例#26
0
/**
 * Print out the constituent tree.
 * mode 1: treebank-style constituent tree
 * mode 2: flat, bracketed tree [A like [B this B] A]
 * mode 3: flat, treebank-style tree (A like (B this))
 */
char * linkage_print_constituent_tree(Linkage linkage, ConstituentDisplayStyle mode)
{
	String * cs;
	CNode * root;
	char * p;

	if (!linkage) return NULL;
	if (mode == NO_DISPLAY)
	{
		return NULL;
	}
	else if (mode == MULTILINE || mode == SINGLE_LINE)
	{
		cs = string_new();
		root = linkage_constituent_tree(linkage);
		print_tree(cs, (mode==1), root, 0, 0);
		linkage_free_constituent_tree(root);
		append_string(cs, "\n");
		p = string_copy(cs);
		string_delete(cs);
		return p;
	}
	else if (mode == BRACKET_TREE)
	{
		return print_flat_constituents(linkage);
	}
	prt_error("Warning: Illegal mode %d for printing constituents\n"
	          "Allowed values: %d to %d\n", mode, NO_DISPLAY, MAX_STYLES);
	return NULL;
}
示例#27
0
/**
 * popen a command with the given input.
 * If the system doesn't have fork(), popen() is used to launch "dot".
 * This is an inferior implementation than the one below that uses
 * fork(), in which the window remains open and is updated automatically
 * when new sentences are entered. With popen(), the program blocks at
 * pclose() and the user needs to close the window after each sentence.
 */
static void x_popen(const char *cmd, const char *wgds)
{
	FILE *const cmdf = popen(cmd, "w");

	if (NULL == cmdf)
	{
		prt_error("Error: popen of '%s' failed: %s", cmd, strerror(errno));
	}
	else
	{
		if (fprintf(cmdf, "%s", wgds) == -1)
			prt_error("Error: print to display command: %s", strerror(errno));
		if (pclose(cmdf) == -1)
			prt_error("Error: pclose of display command: %s", strerror(errno));
	}
}
示例#28
0
void parse_options_set_use_sat_parser(Parse_Options opts, bool dummy) {
#ifdef USE_SAT_SOLVER
	opts->use_sat_solver = dummy;
#else
	prt_error("Error: cannot enable the Boolean SAT parser; this "
	          " library was built without SAT solver support.\n");
#endif
}
示例#29
0
static void check_domain_is_legal(const char *p)
{
  if (0x0 != p[1])
  {
    prt_error("Fatal Error: post_process(): Domain (%s) must be a single character", p);
    exit(1);
  }
}
示例#30
0
static bool read_bounded_rules(pp_knowledge *k, const char *label)
{
  const char **tokens;
  size_t n_commas, n_tokens;
  size_t r;
  if (!pp_lexer_set_label(k->lt, label)) {
      k->n_bounded_rules = 0;
      if (verbosity_level(+D_PPK))
        prt_error("Warning: File %s: Not using any 'bounded' rules\n", k->path);
  }
  else {
    n_commas = pp_lexer_count_commas_of_label(k->lt);
    k->n_bounded_rules = (n_commas + 1)/2;
  }
  k->bounded_rules = (pp_rule*) malloc ((1+k->n_bounded_rules)*sizeof(pp_rule));
  for (r=0; r<k->n_bounded_rules; r++)
    {
      /* read domain */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens!=1)
      {
        prt_error("Error: File %s: Invalid syntax: rule %zu of %s\n",
                  k->path, r+1,label);
        return false;
      }
      k->bounded_rules[r].domain = (int) tokens[0][0];

      /* read error message */
      tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens);
      if (n_tokens!=1)
      {
        prt_error("Error: File %s: Invalid syntax: rule %zu of %s\n",
                  k->path, r+1,label);
        return false;
      }
      k->bounded_rules[r].msg = string_set_add(tokens[0], k->string_set);
      k->bounded_rules[r].use_count = 0;
    }

  /* sentinel entry */
  k->bounded_rules[k->n_bounded_rules].msg = 0;
  k->bounded_rules[k->n_bounded_rules].use_count = 0;

  return true;
}