/** * Read in the whole stinkin file. This routine returns * malloced memory, which should be freed as soon as possible. */ char *get_file_contents(const char * dict_name) { int fd; size_t tot_size; int left; struct stat buf; char * contents, *p; #if defined(_MSC_VER) || defined(__MINGW32__) /* binary, otherwise fstat file length is confused by crlf * counted as one byte. */ FILE *fp = dictopen(dict_name, "rb"); #else FILE *fp = dictopen(dict_name, "r"); #endif if (fp == NULL) return NULL; /* Get the file size, in bytes. */ fd = fileno(fp); fstat(fd, &buf); tot_size = buf.st_size; contents = (char *) malloc(sizeof(char) * (tot_size+7)); /* Now, read the whole file. */ p = contents; left = tot_size + 7; while (1) { char *rv = fgets(p, left, fp); if (NULL == rv || feof(fp)) break; while (*p != 0x0) { p++; left--; } if (left < 0) break; } fclose(fp); if (left < 0) { prt_error("Fatal Error: File size is insane!"); exit(1); } return contents; }
/** * * (1) opens the word file and adds it to the word file list * (2) reads in the words * (3) puts each word in a Dict_node * (4) links these together by their left pointers at the * front of the list pointed to by dn * (5) returns a pointer to the first of this list */ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename) { Dict_node * dn_new; Word_file * wf; FILE * fp; const char * s; char file_name_copy[MAX_PATH_NAME+1]; safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */ if ((fp = dictopen(file_name_copy, "r")) == NULL) { prt_error("Error opening word file %s\n", file_name_copy); return NULL; } /*printf(" Reading \"%s\"\n", file_name_copy);*/ /*printf("*"); fflush(stdout);*/ wf = (Word_file *) xalloc(sizeof (Word_file)); safe_strcpy(wf->file, file_name_copy, sizeof(wf->file)); wf->changed = FALSE; wf->next = dict->word_file_header; dict->word_file_header = wf; while ((s = get_a_word(dict, fp)) != NULL) { dn_new = (Dict_node *) xalloc(sizeof(Dict_node)); dn_new->left = dn; dn = dn_new; dn->string = s; dn->file = wf; } fclose(fp); return dn; }
/** * * (1) opens the word file and adds it to the word file list * (2) reads in the words * (3) puts each word in a Dict_node * (4) links these together by their left pointers at the * front of the list pointed to by dn * (5) returns a pointer to the first of this list */ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename) { Word_file * wf; FILE * fp; const char * s; filename += 1; /* get rid of leading '/' */ if ((fp = dictopen(filename, "r")) == NULL) { return NULL; } wf = malloc(sizeof (Word_file)); wf->file = string_set_add(filename, dict->string_set); wf->changed = false; wf->next = dict->word_file_header; dict->word_file_header = wf; while ((s = get_a_word(dict, fp)) != NULL) { if ('\0' == s[0]) /* returned error indication */ { fclose(fp); free_insert_list(dn); return NULL; } Dict_node * dn_new = malloc(sizeof(Dict_node)); dn_new->left = dn; dn = dn_new; dn->string = s; dn->file = wf; } fclose(fp); return dn; }
/** * Read in the whole stinkin file. This routine returns * malloced memory, which should be freed as soon as possible. */ char *get_file_contents(const char * dict_name) { int fd; size_t tot_size; size_t tot_read = 0; struct stat buf; char * contents; /* On Windows, 'b' (binary mode) is mandatory, otherwise fstat file length * is confused by crlf counted as one byte. POSIX systems just ignore it. */ FILE *fp = dictopen(dict_name, "rb"); if (fp == NULL) return NULL; /* Get the file size, in bytes. */ fd = fileno(fp); fstat(fd, &buf); tot_size = buf.st_size; contents = (char *) malloc(sizeof(char) * (tot_size+7)); /* Now, read the whole file. * Normally, a single fread() call below reads the whole file. */ while (1) { size_t read_size = fread(contents, 1, tot_size+7, fp); if (0 == read_size) { bool err = (0 != ferror(fp)); if (err) { char errbuf[64]; strerror_r(errno, errbuf, sizeof(errbuf)); fclose(fp); prt_error("Error: %s: Read error (%s)\n", dict_name, errbuf); free(contents); return NULL; } fclose(fp); break; } tot_read += read_size; } if (tot_read > tot_size+6) { prt_error("Error: %s: File size is insane (%zu)!\n", dict_name, tot_size); free(contents); return NULL; } contents[tot_read] = '\0'; return contents; }
/** * Check to see if a file exists. */ bool file_exists(const char * dict_name) { bool retval = false; int fd; struct stat buf; #if defined(_MSC_VER) || defined(__MINGW32__) /* binary, otherwise fstat file length is confused by crlf * counted as one byte. */ FILE *fp = dictopen(dict_name, "rb"); #else FILE *fp = dictopen(dict_name, "r"); #endif if (fp == NULL) return false; /* Get the file size, in bytes. */ fd = fileno(fp); fstat(fd, &buf); if (0 < buf.st_size) retval = true; fclose(fp); return retval; }
pp_knowledge *pp_knowledge_open(const char *path) { /* read knowledge from disk into pp_knowledge */ FILE *f = dictopen(path, "r"); pp_knowledge *k = (pp_knowledge *) xalloc (sizeof(pp_knowledge)); if (!f) { prt_error("Fatal Error: Couldn't find post-process knowledge file %s", path); exit(1); } k->lt = pp_lexer_open(f); fclose(f); k->string_set = string_set_create(); k->path = string_set_add(path, k->string_set); read_starting_link_table(k); read_link_sets(k); read_rules(k); initialize_set_of_links_starting_bounded_domain(k); return k; }
/** * Check to see if a file exists. */ bool file_exists(const char * dict_name) { bool retval = false; int fd; struct stat buf; /* On Windows, 'b' (binary mode) is mandatory, otherwise fstat file length * is confused by crlf counted as one byte. POSIX systems just ignore it. */ FILE *fp = dictopen(dict_name, "rb"); if (fp == NULL) return false; /* Get the file size, in bytes. */ fd = fileno(fp); fstat(fd, &buf); if (0 < buf.st_size) retval = true; fclose(fp); return retval; }
pp_knowledge *pp_knowledge_open(const char *path) { /* read knowledge from disk into pp_knowledge */ FILE *f = dictopen(path, "r"); if (NULL == f) { prt_error("Error: Couldn't find post-process knowledge file %s\n", path); return NULL; } pp_knowledge *k = (pp_knowledge *) malloc (sizeof(pp_knowledge)); *k = (pp_knowledge){0}; k->lt = pp_lexer_open(f); fclose(f); if (NULL == k->lt) goto failure; k->string_set = string_set_create(); k->path = string_set_add(path, k->string_set); if (!read_starting_link_table(k)) goto failure; if (!read_link_sets(k)) goto failure; if (!read_rules(k)) goto failure; initialize_set_of_links_starting_bounded_domain(k); /* If the knowledge file was empty, do nothing at all. */ if (0 == k->nStartingLinks) { pp_knowledge_close(k); return NULL; } return k; failure: prt_error("Error: Unable to open knowledge file %s.\n", path); pp_knowledge_close(k); return NULL; }
int read_regex_file(Dictionary dict, const char *file_name) { Regex_node **tail = &dict->regex_root; /* Last Regex_node * in list */ Regex_node *new_re; char name[MAX_REGEX_NAME_LENGTH]; char regex[MAX_REGEX_LENGTH]; int c,prev,i,line=1; FILE *fp; fp = dictopen(file_name, "r"); if (fp == NULL) { prt_error("Error: cannot open regex file %s\n", file_name); return 1; } /* read in regexs. loop broken on EOF. */ while (1) { /* skip whitespace and comments. */ do { do { c = fgetc(fp); if (c == '\n') { line++; } } while(isspace(c)); if (c == '%') { while ((c != EOF) && (c != '\n')) { c = fgetc(fp); } line++; } } while(isspace(c)); if (c == EOF) { break; } /* done. */ /* read in the name of the regex. */ i = 0; do { if (i > MAX_REGEX_NAME_LENGTH-1) { prt_error("Error: Regex name too long on line %d\n", line); goto failure; } name[i++] = c; c = fgetc(fp); } while ((!isspace(c)) && (c != ':') && (c != EOF)); name[i] = '\0'; /* Skip possible whitespace after name, expect colon. */ while (isspace(c)) { if (c == '\n') { line++; } c = fgetc(fp); } if (c != ':') { prt_error("Error: Regex missing colon on line %d\n", line); goto failure; } /* Skip whitespace after colon, expect slash. */ do { if (c == '\n') { line++; } c = fgetc(fp); } while (isspace(c)); if (c != '/') { prt_error("Error: Regex missing leading slash on line %d\n", line); goto failure; } /* Read in the regex. */ prev = 0; i = 0; do { if (i > MAX_REGEX_LENGTH-1) { prt_error("Error: Regex too long on line %d\n", line); goto failure; } prev = c; c = fgetc(fp); regex[i++] = c; } while ((c != '/' || prev == '\\') && (c != EOF)); regex[i-1] = '\0'; /* Expect termination by a slash. */ if (c != '/') { prt_error("Error: Regex missing trailing slash on line %d\n", line); goto failure; } /* Create new Regex_node and add to dict list. */ new_re = (Regex_node *) malloc(sizeof(Regex_node)); new_re->name = strdup(name); new_re->pattern = strdup(regex); new_re->re = NULL; new_re->next = NULL; *tail = new_re; tail = &new_re->next; } fclose(fp); return 0; failure: fclose(fp); return 1; }