void enchant_pwl_add(EnchantPWL *pwl, const char *const word, size_t len) { enchant_pwl_refresh_from_file(pwl); enchant_pwl_add_to_trie(pwl, word, len); if (pwl->filename != NULL) { FILE *f; f = enchant_fopen(pwl->filename, "a"); if (f) { struct stat stats; enchant_lock_file (f); if(g_stat(pwl->filename, &stats)==0) pwl->file_changed = stats.st_mtime; /* we write the new line first since we can't guarantee that the file was terminated by a new line before and we are just appending to the end of the file */ fwrite ("\n", sizeof(char), 1, f); fwrite (word, sizeof(char), len, f); enchant_unlock_file (f); fclose (f); } } }
/** * enchant_pwl_init_with_file * * Returns: a new PWL object used to store/check/suggest words * or NULL if the file cannot be opened or created */ EnchantPWL* enchant_pwl_init_with_file(const char * file) { FILE* fd; EnchantPWL *pwl; g_return_val_if_fail (file != NULL, NULL); fd = enchant_fopen(file, "ab+"); if(fd == NULL) { return NULL; } fclose(fd); pwl = enchant_pwl_init(); pwl->filename = g_strdup(file); pwl->file_changed = 0; enchant_pwl_refresh_from_file(pwl); return pwl; }
void enchant_pwl_remove(EnchantPWL *pwl, const char *const word, size_t len) { if(enchant_pwl_check(pwl, word, len) == 1) return; enchant_pwl_refresh_from_file(pwl); enchant_pwl_remove_from_trie(pwl, word, len); if (pwl->filename) { char * contents; size_t length; FILE *f; if(!g_file_get_contents(pwl->filename, &contents, &length, NULL)) return; f = enchant_fopen(pwl->filename, "wb"); /*binary because g_file_get_contents reads binary*/ if (f) { const gunichar BOM = 0xfeff; char * filestart, *searchstart, *needle; char * key; struct stat stats; enchant_lock_file (f); key = g_strndup(word, len); if(BOM == g_utf8_get_char(contents)) { filestart = g_utf8_next_char(contents); fwrite (contents, sizeof(char), filestart-contents, f); } else filestart = contents; searchstart = filestart; for(;;) { /*find word*/ needle = strstr(searchstart, key); if(needle == NULL) { fwrite (searchstart, sizeof(char), length - (searchstart - contents), f); break; } else { char* foundend = needle+len; if((needle == filestart || contents[needle-contents-1] == '\n' || contents[needle-contents-1] == '\r') && (foundend == contents + length || *foundend == '\n' || *foundend == '\r')) { fwrite (searchstart, sizeof(char), needle - searchstart, f); searchstart = foundend; while (*searchstart == '\n' || *searchstart == '\r') ++searchstart; } else { fwrite (searchstart, sizeof(char), needle - searchstart+1, f); searchstart = needle+1; } } } g_free(key); if(g_stat(pwl->filename, &stats)==0) pwl->file_changed = stats.st_mtime; enchant_unlock_file (f); fclose (f); } g_free(contents); } }
static void enchant_pwl_refresh_from_file(EnchantPWL* pwl) { char buffer[BUFSIZ]; char* line; size_t line_number = 1; FILE *f; struct stat stats; if(!pwl->filename) return; if(g_stat(pwl->filename, &stats)!=0) return; /*presumably I won't be able to open the file either*/ if(pwl->file_changed == stats.st_mtime) return; /*nothing changed since last read*/ enchant_trie_free(pwl->trie); pwl->trie = NULL; g_hash_table_destroy (pwl->words_in_trie); pwl->words_in_trie = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); f = enchant_fopen(pwl->filename, "r"); if (!f) return; pwl->file_changed = stats.st_mtime; enchant_lock_file (f); for (;NULL != (fgets (buffer, sizeof (buffer), f));++line_number) { const gunichar BOM = 0xfeff; size_t l; line = buffer; if(line_number == 1 && BOM == g_utf8_get_char(line)) line = g_utf8_next_char(line); l = strlen(line)-1; if (line[l]=='\n') line[l] = '\0'; else if(!feof(f)) /* ignore lines longer than BUFSIZ. */ { g_warning ("Line too long (ignored) in %s at line:%u\n", pwl->filename, line_number); while (NULL != (fgets (buffer, sizeof (buffer), f))) { if (line[strlen(buffer)-1]=='\n') break; } continue; } if( line[0] != '#') { if(g_utf8_validate(line, -1, NULL)) enchant_pwl_add_to_trie(pwl, line, strlen(line)); else g_warning ("Bad UTF-8 sequence in %s at line:%u\n", pwl->filename, line_number); } } enchant_unlock_file (f); fclose (f); }
/*! * \param hashname name of the hash file (dictionary) * * \return */ int ISpellChecker::linit (char *hashname) { FILE* fpHash; register int i; register struct dent * dp; struct flagent * entry; struct flagptr * ind; int nextchar, x; int viazero; register ichar_t * cp; if ((fpHash = enchant_fopen (hashname, "rb")) == NULL) { return (-1); } m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash); if (m_hashsize < static_cast<int>(sizeof(m_hashheader))) { if (m_hashsize < 0) fprintf (stderr, LOOKUP_C_CANT_READ, hashname); else if (m_hashsize == 0) fprintf (stderr, LOOKUP_C_NULL_HASH, hashname); else fprintf (stderr, LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize, static_cast<int>(sizeof m_hashheader))); return (-1); } else if (m_hashheader.magic != MAGIC) { fprintf (stderr, LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC), static_cast<unsigned int>(m_hashheader.magic))); return (-1); } else if (m_hashheader.magic2 != MAGIC) { fprintf (stderr, LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC), static_cast<unsigned int>(m_hashheader.magic2))); return (-1); } /* else if (hashheader.compileoptions != COMPILEOPTIONS*/ else if ( 1 != 1 || m_hashheader.maxstringchars != MAXSTRINGCHARS || m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN) { fprintf (stderr, LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions), m_hashheader.maxstringchars, m_hashheader.maxstringcharlen, static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN)); return (-1); } { m_hashtbl = (struct dent *) calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent)); m_hashsize = m_hashheader.tblsize; m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize))); } m_numsflags = m_hashheader.stblsize; m_numpflags = m_hashheader.ptblsize; m_sflaglist = (struct flagent *) malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent)); if (m_hashtbl == NULL || m_hashstrings == NULL || m_sflaglist == NULL) { fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); return (-1); } m_pflaglist = m_sflaglist + m_numsflags; { if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash) != static_cast<size_t>(m_hashheader.stringsize) ) { fprintf (stderr, LOOKUP_C_BAD_FORMAT); fprintf (stderr, "stringsize err\n" ); return (-1); } if ( m_hashheader.compileoptions & 0x04 ) { if( fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash) != (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent)))) { fprintf (stderr, LOOKUP_C_BAD_FORMAT); return (-1); } } else { for( x=0; x<m_hashheader.tblsize; x++ ) { if( fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash) != 1) { fprintf (stderr, LOOKUP_C_BAD_FORMAT); return (-1); } } /*for*/ } /*else*/ } if (fread (reinterpret_cast<char *>(m_sflaglist), 1, static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash) != (m_numsflags + m_numpflags) * sizeof (struct flagent)) { fprintf (stderr, LOOKUP_C_BAD_FORMAT); return (-1); } fclose (fpHash); { for (i = m_hashsize, dp = m_hashtbl; --i >= 0; dp++) { if (dp->word == (char *) -1) dp->word = NULL; else dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ]; if (dp->next == (struct dent *) -1) dp->next = NULL; else dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ]; } } for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++) { if (entry->stripl) entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]); else entry->strip = NULL; if (entry->affl) entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]); else entry->affix = NULL; } /* ** Warning - 'entry' and 'i' are reset in the body of the loop ** below. Don't try to optimize it by (e.g.) moving the decrement ** of i into the loop condition. */ for (i = m_numsflags, entry = m_sflaglist; i > 0; i--, entry++) { if (entry->affl == 0) { cp = NULL; ind = &m_sflagindex[0]; viazero = 1; } else { cp = entry->affix + entry->affl - 1; ind = &m_sflagindex[*cp]; viazero = 0; while (ind->numents == 0 && ind->pu.fp != NULL) { if (cp == entry->affix) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*--cp]; viazero = 0; } } } if (ind->numents == 0) ind->pu.ent = entry; ind->numents++; /* ** If this index entry has more than MAXSEARCH flags in ** it, we will split it into subentries to reduce the ** searching. However, the split doesn't make sense in ** two cases: (a) if we are already at the end of the ** current affix, or (b) if all the entries in the list ** have identical affixes. Since the list is sorted, (b) ** is true if the first and last affixes in the list ** are identical. */ if (!viazero && ind->numents >= MAXSEARCH && icharcmp (entry->affix, ind->pu.ent->affix) != 0) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = m_numsflags - (entry - m_sflaglist); ind->pu.fp = (struct flagptr *) calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars), sizeof (struct flagptr)); if (ind->pu.fp == NULL) { fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } ind->numents = 0; } } /* ** Warning - 'entry' and 'i' are reset in the body of the loop ** below. Don't try to optimize it by (e.g.) moving the decrement ** of i into the loop condition. */ for (i = m_numpflags, entry = m_pflaglist; i > 0; i--, entry++) { if (entry->affl == 0) { cp = NULL; ind = &m_pflagindex[0]; viazero = 1; } else { cp = entry->affix; ind = &m_pflagindex[*cp++]; viazero = 0; while (ind->numents == 0 && ind->pu.fp != NULL) { if (*cp == 0) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*cp++]; viazero = 0; } } } if (ind->numents == 0) ind->pu.ent = entry; ind->numents++; /* ** If this index entry has more than MAXSEARCH flags in ** it, we will split it into subentries to reduce the ** searching. However, the split doesn't make sense in ** two cases: (a) if we are already at the end of the ** current affix, or (b) if all the entries in the list ** have identical affixes. Since the list is sorted, (b) ** is true if the first and last affixes in the list ** are identical. */ if (!viazero && ind->numents >= MAXSEARCH && icharcmp (entry->affix, ind->pu.ent->affix) != 0) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = m_numpflags - (entry - m_pflaglist); ind->pu.fp = static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars, sizeof (struct flagptr))); if (ind->pu.fp == NULL) { fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } ind->numents = 0; } } #ifdef INDEXDUMP fprintf (stderr, "Prefix index table:\n"); dumpindex (m_pflagindex, 0); fprintf (stderr, "Suffix index table:\n"); dumpindex (m_sflagindex, 0); #endif if (m_hashheader.nstrchartype == 0) m_chartypes = NULL; else { m_chartypes = (struct strchartype *) malloc (m_hashheader.nstrchartype * sizeof (struct strchartype)); if (m_chartypes == NULL) { fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } for (i = 0, nextchar = m_hashheader.strtypestart; i < m_hashheader.nstrchartype; i++) { m_chartypes[i].name = &m_hashstrings[nextchar]; nextchar += strlen (m_chartypes[i].name) + 1; m_chartypes[i].deformatter = &m_hashstrings[nextchar]; nextchar += strlen (m_chartypes[i].deformatter) + 1; m_chartypes[i].suffixes = &m_hashstrings[nextchar]; while (m_hashstrings[nextchar] != '\0') nextchar += strlen (&m_hashstrings[nextchar]) + 1; nextchar++; } } initckch(NULL); return (0); }
int main (int argc, char ** argv) { IspellMode_t mode = MODE_NONE; char * file = NULL; int i, rval = 0; FILE * fp = stdin; int countLines = 0; gchar *dictionary = 0; /* -d dictionary */ /* Initialize system locale */ setlocale(LC_ALL, ""); #ifdef WIN32 /* Workaround about glib's "locale" not being the set C locale */ if (GetFileType(GetStdHandle(STD_INPUT_HANDLE)) != FILE_TYPE_CHAR) { sprintf_s(charset,15,"CP%u",GetACP()); } else { sprintf_s(charset,15,"CP%u",GetConsoleCP()); } #endif for (i = 1; i < argc; i++) { char * arg = argv[i]; if (arg[0] == '-') { if (strlen (arg) == 2) { /* It seems that the first one of these that is specified gets precedence. */ if (arg[1] == 'a' && MODE_NONE == mode) mode = MODE_A; else if (arg[1] == 'l' && MODE_NONE == mode) mode = MODE_L; else if (arg[1] == 'v' && MODE_NONE == mode) mode = MODE_VERSION; else if (arg[1] == 'L' && MODE_NONE == mode) countLines = 1; else if (arg[1] == 'm') ; /* Ignore. Emacs calls ispell with '-m'. */ else if (arg[1] == 'd') { i++; dictionary = argv[i]; /* Emacs calls ispell with '-d dictionary'. */ } } else if ((strlen (arg) == 3) && (arg[1] == 'v') && (arg[2] == 'v')) { mode = MODE_VERSION; /* Emacs (or ispell.el) calls [ai]spell with '-vv'. */ } else if (arg[1] == 'd') { dictionary = arg + 2; /* Accept "-ddictionary", i.e. no space between -d and dictionary. */ } else if (strlen (arg) > 2) { fprintf (stderr, "-%c does not take any parameters.\n", arg[1]); exit(1); } else file = arg; } else file = arg; } if (mode == MODE_VERSION) { print_version (stdout); } else if (mode == MODE_NONE && !file) { print_help (stdout, argv[0]); } else { if (file) { fp = enchant_fopen (file, "rb"); if (!fp) { fprintf (stderr, "Error: Could not open the file \"%s\" for reading.\n", file); exit (1); } } rval = parse_file (fp, stdout, mode, countLines, dictionary); if (file) fclose (fp); } return rval; }