コード例 #1
0
ファイル: word-file.c プロジェクト: ampli/link-grammar
/**
 * Reads in one word from the file, allocates space for it,
 * and returns it.
 *
 * In case of an error, return a null string (cannot be a valid word).
 */
static const char * get_a_word(Dictionary dict, FILE * fp)
{
	char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
	const char * s;
	int c, j;

	do {
		c = fgetc(fp);
	} while ((c != EOF) && lg_isspace(c));
	if (c == EOF) return NULL;

	for (j=0; (j <= MAX_WORD-1) && (!lg_isspace(c)) && (c != EOF); j++)
	{
		word[j] = c;
		c = fgetc(fp);
	}

	if (j >= MAX_WORD) {
		word[MAX_WORD] = '\0';
		prt_error("The dictionary contains a word that is too long: %s\n", word);
		return ""; /* error indication */
	}
	word[j] = '\0';
	patch_subscript(word);
	s = string_set_add(word, dict->string_set);
	return s;
}
コード例 #2
0
ファイル: read-sql.c プロジェクト: agayardo/link-grammar
static Exp * make_expression(Dictionary dict, const char *exp_str)
{
	Exp* e;
	Exp* and;
	Exp* rest;
	E_list *ell, *elr;

	char *constr = NULL;
	const char * p = exp_str;
	const char * con_start = NULL;

	/* search for the start of a conector */
	while (*p && (lg_isspace(*p) || '&' == *p)) p++;
	con_start = p;

	if (0 == *p) return NULL;

	/* search for the end of a conector */
	while (*p && (isalnum(*p) || '*' == *p)) p++;
		
	/* Connectors always end with a + or - */
	assert (('+' == *p) || ('-' == *p),
			"Missing direction character in connector string: %s", con_start);

	/* Create an expression to hold the connector */
	e = (Exp *) xalloc(sizeof(Exp));
	e->dir = *p;
	e->type = CONNECTOR_type;
	e->cost = 0.0;
	if ('@' == *con_start)
	{
		constr = strndup(con_start+1, p-con_start-1);
		e->multi = true;
	}
	else
	{
		constr = strndup(con_start, p-con_start);
		e->multi = false;
	}

	/* We have to use the string set, mostly because copy_Exp
	 * in build_disjuncts fails to copy the string ...
	 */
	e->u.string = string_set_add(constr, dict->string_set);
	free(constr);

	rest = make_expression(dict, ++p);
	if (NULL == rest)
		return e;

	/* Join it all together with an AND node */
	and = (Exp *) xalloc(sizeof(Exp));
	and->type = AND_type;
	and->cost = 0.0;
	and->u.l = ell = (E_list *) xalloc(sizeof(E_list));
	ell->next = elr = (E_list *) xalloc(sizeof(E_list));
	elr->next = NULL;

	ell->e = e;
	elr->e = rest;

	return and;
}
コード例 #3
0
ファイル: read-regex.c プロジェクト: agayardo/link-grammar
int read_regex_file(Dictionary dict, const char *file_name)
{
	Regex_node **tail = &dict->regex_root; /* Last Regex_node * in list */
	Regex_node *new_re;
	char name[MAX_REGEX_NAME_LENGTH];
	char regex[MAX_REGEX_LENGTH];
	int c,prev,i,line=1;
	FILE *fp;
	
	fp = dictopen(file_name, "r");
	if (fp == NULL)
	{
		prt_error("Error: cannot open regex file %s\n", file_name);
		return 1;
	}

	/* read in regexs. loop broken on EOF. */
	while (1)
	{
		/* skip whitespace and comments. */
		do
		{
			do
			{ 
				c = fgetc(fp);
				if (c == '\n') { line++; }
			}
			while (lg_isspace(c));

			if (c == '%')
			{
				while ((c != EOF) && (c != '\n')) { c = fgetc(fp); }
				line++;
			}
		}
		while (lg_isspace(c));

		if (c == EOF) { break; } /* done. */

		/* read in the name of the regex. */
		i = 0;
		do
		{
			if (i > MAX_REGEX_NAME_LENGTH-1)
			{
				prt_error("Error: Regex name too long on line %d\n", line);
				goto failure;
			}
			name[i++] = c;
			c = fgetc(fp);
		}
		while ((!lg_isspace(c)) && (c != ':') && (c != EOF));
		name[i] = '\0';
		
		/* Skip possible whitespace after name, expect colon. */
		while (lg_isspace(c))
		{ 
			if (c == '\n') { line++; }
			c = fgetc(fp); 
		}
		if (c != ':')
		{
			prt_error("Error: Regex missing colon on line %d\n", line);
			goto failure;
		}

		/* Skip whitespace after colon, expect slash. */
		do
		{
			if (c == '\n') { line++; }
			c = fgetc(fp); 
		}
		while (lg_isspace(c));
		if (c != '/') {
			prt_error("Error: Regex missing leading slash on line %d\n", line);
			goto failure;
		}

		/* Read in the regex. */
		prev = 0;
		i = 0;
		do
		{
			if (i > MAX_REGEX_LENGTH-1)
			{
				prt_error("Error: Regex too long on line %d\n", line);
				goto failure;
			}
			prev = c;
			c = fgetc(fp);
			regex[i++] = c;
		}
		while ((c != '/' || prev == '\\') && (c != EOF));
		regex[i-1] = '\0';

		/* Expect termination by a slash. */
		if (c != '/')
		{
			prt_error("Error: Regex missing trailing slash on line %d\n", line);
			goto failure;
		}

		/* Create new Regex_node and add to dict list. */
		new_re = (Regex_node *) malloc(sizeof(Regex_node));
		new_re->name    = strdup(name);
		new_re->pattern = strdup(regex);
		new_re->re      = NULL;
		new_re->next    = NULL;
		*tail = new_re;
		tail	= &new_re->next;
	}

	fclose(fp);
	return 0;
failure:
	fclose(fp);
	return 1;
}