/** * Reads in one word from the file, allocates space for it, * and returns it. * * In case of an error, return a null string (cannot be a valid word). */ static const char * get_a_word(Dictionary dict, FILE * fp) { char word[MAX_WORD+4]; /* allow for 4-byte wide chars */ const char * s; int c, j; do { c = fgetc(fp); } while ((c != EOF) && lg_isspace(c)); if (c == EOF) return NULL; for (j=0; (j <= MAX_WORD-1) && (!lg_isspace(c)) && (c != EOF); j++) { word[j] = c; c = fgetc(fp); } if (j >= MAX_WORD) { word[MAX_WORD] = '\0'; prt_error("The dictionary contains a word that is too long: %s\n", word); return ""; /* error indication */ } word[j] = '\0'; patch_subscript(word); s = string_set_add(word, dict->string_set); return s; }
static Exp * make_expression(Dictionary dict, const char *exp_str) { Exp* e; Exp* and; Exp* rest; E_list *ell, *elr; char *constr = NULL; const char * p = exp_str; const char * con_start = NULL; /* search for the start of a conector */ while (*p && (lg_isspace(*p) || '&' == *p)) p++; con_start = p; if (0 == *p) return NULL; /* search for the end of a conector */ while (*p && (isalnum(*p) || '*' == *p)) p++; /* Connectors always end with a + or - */ assert (('+' == *p) || ('-' == *p), "Missing direction character in connector string: %s", con_start); /* Create an expression to hold the connector */ e = (Exp *) xalloc(sizeof(Exp)); e->dir = *p; e->type = CONNECTOR_type; e->cost = 0.0; if ('@' == *con_start) { constr = strndup(con_start+1, p-con_start-1); e->multi = true; } else { constr = strndup(con_start, p-con_start); e->multi = false; } /* We have to use the string set, mostly because copy_Exp * in build_disjuncts fails to copy the string ... */ e->u.string = string_set_add(constr, dict->string_set); free(constr); rest = make_expression(dict, ++p); if (NULL == rest) return e; /* Join it all together with an AND node */ and = (Exp *) xalloc(sizeof(Exp)); and->type = AND_type; and->cost = 0.0; and->u.l = ell = (E_list *) xalloc(sizeof(E_list)); ell->next = elr = (E_list *) xalloc(sizeof(E_list)); elr->next = NULL; ell->e = e; elr->e = rest; return and; }
int read_regex_file(Dictionary dict, const char *file_name) { Regex_node **tail = &dict->regex_root; /* Last Regex_node * in list */ Regex_node *new_re; char name[MAX_REGEX_NAME_LENGTH]; char regex[MAX_REGEX_LENGTH]; int c,prev,i,line=1; FILE *fp; fp = dictopen(file_name, "r"); if (fp == NULL) { prt_error("Error: cannot open regex file %s\n", file_name); return 1; } /* read in regexs. loop broken on EOF. */ while (1) { /* skip whitespace and comments. */ do { do { c = fgetc(fp); if (c == '\n') { line++; } } while (lg_isspace(c)); if (c == '%') { while ((c != EOF) && (c != '\n')) { c = fgetc(fp); } line++; } } while (lg_isspace(c)); if (c == EOF) { break; } /* done. */ /* read in the name of the regex. */ i = 0; do { if (i > MAX_REGEX_NAME_LENGTH-1) { prt_error("Error: Regex name too long on line %d\n", line); goto failure; } name[i++] = c; c = fgetc(fp); } while ((!lg_isspace(c)) && (c != ':') && (c != EOF)); name[i] = '\0'; /* Skip possible whitespace after name, expect colon. */ while (lg_isspace(c)) { if (c == '\n') { line++; } c = fgetc(fp); } if (c != ':') { prt_error("Error: Regex missing colon on line %d\n", line); goto failure; } /* Skip whitespace after colon, expect slash. */ do { if (c == '\n') { line++; } c = fgetc(fp); } while (lg_isspace(c)); if (c != '/') { prt_error("Error: Regex missing leading slash on line %d\n", line); goto failure; } /* Read in the regex. */ prev = 0; i = 0; do { if (i > MAX_REGEX_LENGTH-1) { prt_error("Error: Regex too long on line %d\n", line); goto failure; } prev = c; c = fgetc(fp); regex[i++] = c; } while ((c != '/' || prev == '\\') && (c != EOF)); regex[i-1] = '\0'; /* Expect termination by a slash. */ if (c != '/') { prt_error("Error: Regex missing trailing slash on line %d\n", line); goto failure; } /* Create new Regex_node and add to dict list. */ new_re = (Regex_node *) malloc(sizeof(Regex_node)); new_re->name = strdup(name); new_re->pattern = strdup(regex); new_re->re = NULL; new_re->next = NULL; *tail = new_re; tail = &new_re->next; } fclose(fp); return 0; failure: fclose(fp); return 1; }