/*! * \param hashname name of the hash file (dictionary) * * \return */ int ISpellChecker::linit(char *hashname) { FILE *fpHash; register int i; register struct dent *dp; struct flagent *entry; struct flagptr *ind; int nextchar, x; int viazero; register ichar_t *cp; if((fpHash = fopen(hashname, "rb")) == NULL) { return (-1); } m_hashsize = fread(reinterpret_cast< char * >(&m_hashheader), 1, sizeof m_hashheader, fpHash); if(m_hashsize < static_cast< int >(sizeof(m_hashheader))) { if(m_hashsize < 0) fprintf(stderr, LOOKUP_C_CANT_READ, hashname); else if(m_hashsize == 0) fprintf(stderr, LOOKUP_C_NULL_HASH, hashname); else fprintf(stderr, LOOKUP_C_SHORT_HASH(m_hashname, m_hashsize, static_cast< int >(sizeof m_hashheader))); return (-1); } else if(m_hashheader.magic != MAGIC) { fprintf(stderr, LOOKUP_C_BAD_MAGIC(hashname, static_cast< unsigned int >(MAGIC), static_cast< unsigned int >(m_hashheader.magic))); return (-1); } else if(m_hashheader.magic2 != MAGIC) { fprintf(stderr, LOOKUP_C_BAD_MAGIC2(hashname, static_cast< unsigned int >(MAGIC), static_cast< unsigned int >(m_hashheader.magic2))); return (-1); } /* else if (hashheader.compileoptions != COMPILEOPTIONS*/ else if(1 != 1 || m_hashheader.maxstringchars != MAXSTRINGCHARS || m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN) { fprintf(stderr, LOOKUP_C_BAD_OPTIONS(static_cast< unsigned int >(m_hashheader.compileoptions), m_hashheader.maxstringchars, m_hashheader.maxstringcharlen, static_cast< unsigned int >(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN)); return (-1); } { m_hashtbl = (struct dent *)calloc(static_cast< unsigned >(m_hashheader.tblsize), sizeof(struct dent)); m_hashsize = m_hashheader.tblsize; m_hashstrings = static_cast< char * >(malloc(static_cast< unsigned >(m_hashheader.stringsize))); } m_numsflags = m_hashheader.stblsize; m_numpflags = m_hashheader.ptblsize; m_sflaglist = (struct flagent *)malloc((m_numsflags + m_numpflags) * sizeof(struct flagent)); if(m_hashtbl == NULL || m_hashstrings == NULL || m_sflaglist == NULL) { fprintf(stderr, LOOKUP_C_NO_HASH_SPACE); return (-1); } m_pflaglist = m_sflaglist + m_numsflags; { if(fread(m_hashstrings, 1, static_cast< unsigned >(m_hashheader.stringsize), fpHash) != static_cast< size_t >(m_hashheader.stringsize)) { fprintf(stderr, LOOKUP_C_BAD_FORMAT); fprintf(stderr, "stringsize err\n"); return (-1); } if(m_hashheader.compileoptions & 0x04) { if(fread(reinterpret_cast< char * >(m_hashtbl), 1, static_cast< unsigned >(m_hashheader.tblsize) * sizeof(struct dent), fpHash) != (static_cast< size_t >(m_hashheader.tblsize * sizeof(struct dent)))) { fprintf(stderr, LOOKUP_C_BAD_FORMAT); return (-1); } } else { for(x = 0; x < m_hashheader.tblsize; x++) { if(fread(reinterpret_cast< char * >(m_hashtbl + x), sizeof(struct dent) - sizeof(MASKTYPE), 1, fpHash) != 1) { fprintf(stderr, LOOKUP_C_BAD_FORMAT); return (-1); } } /*for*/ } /*else*/ } if(fread(reinterpret_cast< char * >(m_sflaglist), 1, static_cast< unsigned >(m_numsflags + m_numpflags) * sizeof(struct flagent), fpHash) != (m_numsflags + m_numpflags) * sizeof(struct flagent)) { fprintf(stderr, LOOKUP_C_BAD_FORMAT); return (-1); } fclose(fpHash); { for(i = m_hashsize, dp = m_hashtbl; --i >= 0; dp++) { if(dp->word == (char *)-1) dp->word = NULL; else dp->word = &m_hashstrings[reinterpret_cast< size_t >(dp->word)]; if(dp->next == (struct dent *)-1) dp->next = NULL; else dp->next = &m_hashtbl[reinterpret_cast< size_t >(dp->next)]; } } for(i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++) { if(entry->stripl) entry->strip = reinterpret_cast< ichar_t * >(&m_hashstrings[reinterpret_cast< size_t >(entry->strip)]); else entry->strip = NULL; if(entry->affl) entry->affix = reinterpret_cast< ichar_t * >(&m_hashstrings[reinterpret_cast< size_t >(entry->affix)]); else entry->affix = NULL; } /* ** Warning - 'entry' and 'i' are reset in the body of the loop ** below. Don't try to optimize it by (e.g.) moving the decrement ** of i into the loop condition. */ for(i = m_numsflags, entry = m_sflaglist; i > 0; i--, entry++) { if(entry->affl == 0) { cp = NULL; ind = &m_sflagindex[0]; viazero = 1; } else { cp = entry->affix + entry->affl - 1; ind = &m_sflagindex[*cp]; viazero = 0; while(ind->numents == 0 && ind->pu.fp != NULL) { if(cp == entry->affix) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*--cp]; viazero = 0; } } } if(ind->numents == 0) ind->pu.ent = entry; ind->numents++; /* ** If this index entry has more than MAXSEARCH flags in ** it, we will split it into subentries to reduce the ** searching. However, the split doesn't make sense in ** two cases: (a) if we are already at the end of the ** current affix, or (b) if all the entries in the list ** have identical affixes. Since the list is sorted, (b) ** is true if the first and last affixes in the list ** are identical. */ if(!viazero && ind->numents >= MAXSEARCH && icharcmp(entry->affix, ind->pu.ent->affix) != 0) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = m_numsflags - (entry - m_sflaglist); ind->pu.fp = (struct flagptr *)calloc(static_cast< unsigned >(SET_SIZE + m_hashheader.nstrchars), sizeof(struct flagptr)); if(ind->pu.fp == NULL) { fprintf(stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } ind->numents = 0; } } /* ** Warning - 'entry' and 'i' are reset in the body of the loop ** below. Don't try to optimize it by (e.g.) moving the decrement ** of i into the loop condition. */ for(i = m_numpflags, entry = m_pflaglist; i > 0; i--, entry++) { if(entry->affl == 0) { cp = NULL; ind = &m_pflagindex[0]; viazero = 1; } else { cp = entry->affix; ind = &m_pflagindex[*cp++]; viazero = 0; while(ind->numents == 0 && ind->pu.fp != NULL) { if(*cp == 0) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*cp++]; viazero = 0; } } } if(ind->numents == 0) ind->pu.ent = entry; ind->numents++; /* ** If this index entry has more than MAXSEARCH flags in ** it, we will split it into subentries to reduce the ** searching. However, the split doesn't make sense in ** two cases: (a) if we are already at the end of the ** current affix, or (b) if all the entries in the list ** have identical affixes. Since the list is sorted, (b) ** is true if the first and last affixes in the list ** are identical. */ if(!viazero && ind->numents >= MAXSEARCH && icharcmp(entry->affix, ind->pu.ent->affix) != 0) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = m_numpflags - (entry - m_pflaglist); ind->pu.fp = static_cast< struct flagptr * >(calloc(SET_SIZE + m_hashheader.nstrchars, sizeof(struct flagptr))); if(ind->pu.fp == NULL) { fprintf(stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } ind->numents = 0; } } #ifdef INDEXDUMP fprintf(stderr, "Prefix index table:\n"); dumpindex(m_pflagindex, 0); fprintf(stderr, "Suffix index table:\n"); dumpindex(m_sflagindex, 0); #endif if(m_hashheader.nstrchartype == 0) m_chartypes = NULL; else { m_chartypes = (struct strchartype *)malloc(m_hashheader.nstrchartype * sizeof(struct strchartype)); if(m_chartypes == NULL) { fprintf(stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } for(i = 0, nextchar = m_hashheader.strtypestart; i < m_hashheader.nstrchartype; i++) { m_chartypes[i].name = &m_hashstrings[nextchar]; nextchar += strlen(m_chartypes[i].name) + 1; m_chartypes[i].deformatter = &m_hashstrings[nextchar]; nextchar += strlen(m_chartypes[i].deformatter) + 1; m_chartypes[i].suffixes = &m_hashstrings[nextchar]; while(m_hashstrings[nextchar] != '\0') nextchar += strlen(&m_hashstrings[nextchar]) + 1; nextchar++; } } initckch(NULL); return (0); }
int linit ( ) { int hashfd; register int i; register struct dent *dp; struct flagent *entry; struct flagptr *ind; int nextchar; int viazero; register ichar_t *cp; if ( inited ) return 0; if ( ( hashfd = open ( hashname, 0 | MSDOS_BINARY_OPEN ) ) < 0 ) { ( void ) fprintf ( stderr, CANT_OPEN, hashname ); perror ( "Unable to open file" ); return ( -1 ); } hashsize = read ( hashfd, ( char * ) &hashheader, sizeof hashheader ); if ( hashsize < sizeof hashheader ) { if ( hashsize < 0 ) ( void ) fprintf ( stderr, LOOKUP_C_CANT_READ, hashname ); else if ( hashsize == 0 ) ( void ) fprintf ( stderr, LOOKUP_C_NULL_HASH, hashname ); else ( void ) fprintf ( stderr, LOOKUP_C_SHORT_HASH ( hashname, hashsize, ( int ) sizeof hashheader ) ); return ( -1 ); } else if ( hashheader.magic != MAGIC ) { ( void ) fprintf ( stderr, LOOKUP_C_BAD_MAGIC ( hashname, ( unsigned int ) MAGIC, ( unsigned int ) hashheader.magic ) ); return ( -1 ); } else if ( hashheader.magic2 != MAGIC ) { ( void ) fprintf ( stderr, LOOKUP_C_BAD_MAGIC2 ( hashname, ( unsigned int ) MAGIC, ( unsigned int ) hashheader.magic2 ) ); return ( -1 ); } else if ( hashheader.compileoptions != COMPILEOPTIONS || hashheader.maxstringchars != MAXSTRINGCHARS || hashheader.maxstringcharlen != MAXSTRINGCHARLEN ) { ( void ) fprintf ( stderr, LOOKUP_C_BAD_OPTIONS ( ( unsigned int ) hashheader.compileoptions, hashheader.maxstringchars, hashheader.maxstringcharlen, ( unsigned int ) COMPILEOPTIONS, MAXSTRINGCHARS, MAXSTRINGCHARLEN ) ); return ( -1 ); } if ( nodictflag ) { /* Dictionary is not needed - create an empty dummy table. We * actually have to have one entry since the hash algorithm involves * a divide by the table size (actually modulo, but zero is still * unacceptable). So we create an empty entry. */ hashsize = 1; /* This prevents divides by zero */ hashtbl = ( struct dent * ) calloc ( 1, sizeof ( struct dent ) ); if ( hashtbl == NULL ) { ( void ) fprintf ( stderr, LOOKUP_C_NO_HASH_SPACE ); return ( -1 ); } hashtbl[0].word = NULL; hashtbl[0].next = NULL; hashtbl[0].flagfield &= ~( USED | KEEP ); /* The flag bits don't matter, but calloc cleared them. */ hashstrings = ( char * ) malloc ( ( unsigned ) hashheader.lstringsize ); } else { hashtbl = ( struct dent * ) malloc ( ( unsigned ) hashheader.tblsize * sizeof ( struct dent ) ); hashsize = hashheader.tblsize; hashstrings = ( char * ) malloc ( ( unsigned ) hashheader.stringsize ); } numsflags = hashheader.stblsize; numpflags = hashheader.ptblsize; sflaglist = ( struct flagent * ) malloc ( ( numsflags + numpflags ) * sizeof ( struct flagent ) ); if ( hashtbl == NULL || hashstrings == NULL || sflaglist == NULL ) { ( void ) fprintf ( stderr, LOOKUP_C_NO_HASH_SPACE ); return ( -1 ); } pflaglist = sflaglist + numsflags; if ( nodictflag ) { /* Read just the strings for the language table, and skip over the * rest of the strings and all of the hash table. */ if ( read ( hashfd, hashstrings, ( unsigned ) hashheader.lstringsize ) != hashheader.lstringsize ) { ( void ) fprintf ( stderr, LOOKUP_C_BAD_FORMAT ); return ( -1 ); } ( void ) lseek ( hashfd, ( long ) hashheader.stringsize - ( long ) hashheader.lstringsize + ( long ) hashheader.tblsize * ( long ) sizeof ( struct dent ), 1 ); } else { if ( read ( hashfd, hashstrings, ( unsigned ) hashheader.stringsize ) != hashheader.stringsize || read ( hashfd, ( char * ) hashtbl, ( unsigned ) hashheader.tblsize * sizeof ( struct dent ) ) != hashheader.tblsize * sizeof ( struct dent ) ) { ( void ) fprintf ( stderr, LOOKUP_C_BAD_FORMAT ); return ( -1 ); } } if ( read ( hashfd, ( char * ) sflaglist, ( unsigned ) ( numsflags + numpflags ) * sizeof ( struct flagent ) ) != ( numsflags + numpflags ) * sizeof ( struct flagent ) ) { ( void ) fprintf ( stderr, LOOKUP_C_BAD_FORMAT ); return ( -1 ); } ( void ) close ( hashfd ); if ( !nodictflag ) { for ( i = hashsize, dp = hashtbl; --i >= 0; dp++ ) { if ( dp->word == ( char * ) -1 ) dp->word = NULL; else dp->word = &hashstrings[( int ) ( dp->word )]; if ( dp->next == ( struct dent * ) - 1 ) dp->next = NULL; else dp->next = &hashtbl[( int ) ( dp->next )]; } } for ( i = numsflags + numpflags, entry = sflaglist; --i >= 0; entry++ ) { if ( entry->stripl ) entry->strip = ( ichar_t * ) & hashstrings[( int ) entry->strip]; else entry->strip = NULL; if ( entry->affl ) entry->affix = ( ichar_t * ) & hashstrings[( int ) entry->affix]; else entry->affix = NULL; } /* * Warning - 'entry' and 'i' are reset in the body of the loop * * below. Don't try to optimize it by (e.g.) moving the decrement * of * i into the loop condition. */ for ( i = numsflags, entry = sflaglist; i > 0; i--, entry++ ) { if ( entry->affl == 0 ) { cp = NULL; ind = &sflagindex[0]; viazero = 1; } else { cp = entry->affix + entry->affl - 1; ind = &sflagindex[*cp]; viazero = 0; while ( ind->numents == 0 && ind->pu.fp != NULL ) { if ( cp == entry->affix ) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*--cp]; viazero = 0; } } } if ( ind->numents == 0 ) ind->pu.ent = entry; ind->numents++; /* * If this index entry has more than MAXSEARCH flags in * it, we * will split it into subentries to reduce the * searching. * However, the split doesn't make sense in * two cases: (a) if we * are already at the end of the * current affix, or (b) if all the * entries in the list * have identical affixes. Since the list is * sorted, (b) * is true if the first and last affixes in the list * * are identical. */ if ( !viazero && ind->numents >= MAXSEARCH && icharcmp ( entry->affix, ind->pu.ent->affix ) != 0 ) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = numsflags - ( entry - sflaglist ); ind->pu.fp = ( struct flagptr * ) calloc ( ( unsigned ) ( SET_SIZE + hashheader.nstrchars ), sizeof ( struct flagptr ) ); if ( ind->pu.fp == NULL ) { ( void ) fprintf ( stderr, LOOKUP_C_NO_LANG_SPACE ); return ( -1 ); } ind->numents = 0; } } /* * Warning - 'entry' and 'i' are reset in the body of the loop * * below. Don't try to optimize it by (e.g.) moving the decrement * of * i into the loop condition. */ for ( i = numpflags, entry = pflaglist; i > 0; i--, entry++ ) { if ( entry->affl == 0 ) { cp = NULL; ind = &pflagindex[0]; viazero = 1; } else { cp = entry->affix; ind = &pflagindex[*cp++]; viazero = 0; while ( ind->numents == 0 && ind->pu.fp != NULL ) { if ( *cp == 0 ) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*cp++]; viazero = 0; } } } if ( ind->numents == 0 ) ind->pu.ent = entry; ind->numents++; /* * If this index entry has more than MAXSEARCH flags in * it, we * will split it into subentries to reduce the * searching. * However, the split doesn't make sense in * two cases: (a) if we * are already at the end of the * current affix, or (b) if all the * entries in the list * have identical affixes. Since the list is * sorted, (b) * is true if the first and last affixes in the list * * are identical. */ if ( !viazero && ind->numents >= MAXSEARCH && icharcmp ( entry->affix, ind->pu.ent->affix ) != 0 ) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = numpflags - ( entry - pflaglist ); ind->pu.fp = ( struct flagptr * ) calloc ( SET_SIZE + hashheader.nstrchars, sizeof ( struct flagptr ) ); if ( ind->pu.fp == NULL ) { ( void ) fprintf ( stderr, LOOKUP_C_NO_LANG_SPACE ); return ( -1 ); } ind->numents = 0; } } #ifdef INDEXDUMP ( void ) fprintf ( stderr, "Prefix index table:\n" ); dumpindex ( pflagindex, 0 ); ( void ) fprintf ( stderr, "Suffix index table:\n" ); dumpindex ( sflagindex, 0 ); #endif if ( hashheader.nstrchartype == 0 ) chartypes = NULL; else { chartypes = ( struct strchartype * ) malloc ( hashheader.nstrchartype * sizeof ( struct strchartype ) ); if ( chartypes == NULL ) { ( void ) fprintf ( stderr, LOOKUP_C_NO_LANG_SPACE ); return ( -1 ); } for ( i = 0, nextchar = hashheader.strtypestart; i < hashheader.nstrchartype; i++ ) { chartypes[i].name = &hashstrings[nextchar]; nextchar += strlen ( chartypes[i].name ) + 1; chartypes[i].deformatter = &hashstrings[nextchar]; nextchar += strlen ( chartypes[i].deformatter ) + 1; chartypes[i].suffixes = &hashstrings[nextchar]; while ( hashstrings[nextchar] != '\0' ) nextchar += strlen ( &hashstrings[nextchar] ) + 1; nextchar++; } } inited = 1; return ( 0 ); }