Exemplo n.º 1
0
/*!
 * \param hashname name of the hash file (dictionary)
 *
 * \return
 */
int ISpellChecker::linit(char *hashname)
{
    FILE *fpHash;

    register int i;
    register struct dent *dp;
    struct flagent *entry;
    struct flagptr *ind;
    int nextchar, x;
    int viazero;
    register ichar_t *cp;

    if((fpHash = fopen(hashname, "rb")) == NULL)
    {
        return (-1);
    }

    m_hashsize = fread(reinterpret_cast< char * >(&m_hashheader), 1, sizeof m_hashheader, fpHash);
    if(m_hashsize < static_cast< int >(sizeof(m_hashheader)))
    {
        if(m_hashsize < 0)
            fprintf(stderr, LOOKUP_C_CANT_READ, hashname);
        else if(m_hashsize == 0)
            fprintf(stderr, LOOKUP_C_NULL_HASH, hashname);
        else
            fprintf(stderr, LOOKUP_C_SHORT_HASH(m_hashname, m_hashsize, static_cast< int >(sizeof m_hashheader)));
        return (-1);
    }
    else if(m_hashheader.magic != MAGIC)
    {
        fprintf(stderr, LOOKUP_C_BAD_MAGIC(hashname, static_cast< unsigned int >(MAGIC), static_cast< unsigned int >(m_hashheader.magic)));
        return (-1);
    }
    else if(m_hashheader.magic2 != MAGIC)
    {
        fprintf(stderr, LOOKUP_C_BAD_MAGIC2(hashname, static_cast< unsigned int >(MAGIC), static_cast< unsigned int >(m_hashheader.magic2)));
        return (-1);
    }
    /*  else if (hashheader.compileoptions != COMPILEOPTIONS*/
    else if(1 != 1 || m_hashheader.maxstringchars != MAXSTRINGCHARS || m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
    {
        fprintf(stderr,
                LOOKUP_C_BAD_OPTIONS(static_cast< unsigned int >(m_hashheader.compileoptions), m_hashheader.maxstringchars,
                                     m_hashheader.maxstringcharlen, static_cast< unsigned int >(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN));
        return (-1);
    }

    {
        m_hashtbl = (struct dent *)calloc(static_cast< unsigned >(m_hashheader.tblsize), sizeof(struct dent));
        m_hashsize = m_hashheader.tblsize;
        m_hashstrings = static_cast< char * >(malloc(static_cast< unsigned >(m_hashheader.stringsize)));
    }
    m_numsflags = m_hashheader.stblsize;
    m_numpflags = m_hashheader.ptblsize;
    m_sflaglist = (struct flagent *)malloc((m_numsflags + m_numpflags) * sizeof(struct flagent));
    if(m_hashtbl == NULL || m_hashstrings == NULL || m_sflaglist == NULL)
    {
        fprintf(stderr, LOOKUP_C_NO_HASH_SPACE);
        return (-1);
    }
    m_pflaglist = m_sflaglist + m_numsflags;

    {
        if(fread(m_hashstrings, 1, static_cast< unsigned >(m_hashheader.stringsize), fpHash) != static_cast< size_t >(m_hashheader.stringsize))
        {
            fprintf(stderr, LOOKUP_C_BAD_FORMAT);
            fprintf(stderr, "stringsize err\n");
            return (-1);
        }
        if(m_hashheader.compileoptions & 0x04)
        {
            if(fread(reinterpret_cast< char * >(m_hashtbl), 1, static_cast< unsigned >(m_hashheader.tblsize) * sizeof(struct dent), fpHash)
               != (static_cast< size_t >(m_hashheader.tblsize * sizeof(struct dent))))
            {
                fprintf(stderr, LOOKUP_C_BAD_FORMAT);
                return (-1);
            }
        }
        else
        {
            for(x = 0; x < m_hashheader.tblsize; x++)
            {
                if(fread(reinterpret_cast< char * >(m_hashtbl + x), sizeof(struct dent) - sizeof(MASKTYPE), 1, fpHash) != 1)
                {
                    fprintf(stderr, LOOKUP_C_BAD_FORMAT);
                    return (-1);
                }
            } /*for*/
        }     /*else*/
    }
    if(fread(reinterpret_cast< char * >(m_sflaglist), 1, static_cast< unsigned >(m_numsflags + m_numpflags) * sizeof(struct flagent), fpHash)
       != (m_numsflags + m_numpflags) * sizeof(struct flagent))
    {
        fprintf(stderr, LOOKUP_C_BAD_FORMAT);
        return (-1);
    }
    fclose(fpHash);

    {
        for(i = m_hashsize, dp = m_hashtbl; --i >= 0; dp++)
        {
            if(dp->word == (char *)-1)
                dp->word = NULL;
            else
                dp->word = &m_hashstrings[reinterpret_cast< size_t >(dp->word)];
            if(dp->next == (struct dent *)-1)
                dp->next = NULL;
            else
                dp->next = &m_hashtbl[reinterpret_cast< size_t >(dp->next)];
        }
    }

    for(i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++)
    {
        if(entry->stripl)
            entry->strip = reinterpret_cast< ichar_t * >(&m_hashstrings[reinterpret_cast< size_t >(entry->strip)]);
        else
            entry->strip = NULL;
        if(entry->affl)
            entry->affix = reinterpret_cast< ichar_t * >(&m_hashstrings[reinterpret_cast< size_t >(entry->affix)]);
        else
            entry->affix = NULL;
    }
    /*
    ** Warning - 'entry' and 'i' are reset in the body of the loop
    ** below.  Don't try to optimize it by (e.g.) moving the decrement
    ** of i into the loop condition.
    */
    for(i = m_numsflags, entry = m_sflaglist; i > 0; i--, entry++)
    {
        if(entry->affl == 0)
        {
            cp = NULL;
            ind = &m_sflagindex[0];
            viazero = 1;
        }
        else
        {
            cp = entry->affix + entry->affl - 1;
            ind = &m_sflagindex[*cp];
            viazero = 0;
            while(ind->numents == 0 && ind->pu.fp != NULL)
            {
                if(cp == entry->affix)
                {
                    ind = &ind->pu.fp[0];
                    viazero = 1;
                }
                else
                {
                    ind = &ind->pu.fp[*--cp];
                    viazero = 0;
                }
            }
        }
        if(ind->numents == 0)
            ind->pu.ent = entry;
        ind->numents++;
        /*
        ** If this index entry has more than MAXSEARCH flags in
        ** it, we will split it into subentries to reduce the
        ** searching.  However, the split doesn't make sense in
        ** two cases:  (a) if we are already at the end of the
        ** current affix, or (b) if all the entries in the list
        ** have identical affixes.  Since the list is sorted, (b)
        ** is true if the first and last affixes in the list
        ** are identical.
        */
        if(!viazero && ind->numents >= MAXSEARCH && icharcmp(entry->affix, ind->pu.ent->affix) != 0)
        {
            /* Sneaky trick:  back up and reprocess */
            entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
            i = m_numsflags - (entry - m_sflaglist);
            ind->pu.fp = (struct flagptr *)calloc(static_cast< unsigned >(SET_SIZE + m_hashheader.nstrchars), sizeof(struct flagptr));
            if(ind->pu.fp == NULL)
            {
                fprintf(stderr, LOOKUP_C_NO_LANG_SPACE);
                return (-1);
            }
            ind->numents = 0;
        }
    }
    /*
    ** Warning - 'entry' and 'i' are reset in the body of the loop
    ** below.  Don't try to optimize it by (e.g.) moving the decrement
    ** of i into the loop condition.
    */
    for(i = m_numpflags, entry = m_pflaglist; i > 0; i--, entry++)
    {
        if(entry->affl == 0)
        {
            cp = NULL;
            ind = &m_pflagindex[0];
            viazero = 1;
        }
        else
        {
            cp = entry->affix;
            ind = &m_pflagindex[*cp++];
            viazero = 0;
            while(ind->numents == 0 && ind->pu.fp != NULL)
            {
                if(*cp == 0)
                {
                    ind = &ind->pu.fp[0];
                    viazero = 1;
                }
                else
                {
                    ind = &ind->pu.fp[*cp++];
                    viazero = 0;
                }
            }
        }
        if(ind->numents == 0)
            ind->pu.ent = entry;
        ind->numents++;
        /*
        ** If this index entry has more than MAXSEARCH flags in
        ** it, we will split it into subentries to reduce the
        ** searching.  However, the split doesn't make sense in
        ** two cases:  (a) if we are already at the end of the
        ** current affix, or (b) if all the entries in the list
        ** have identical affixes.  Since the list is sorted, (b)
        ** is true if the first and last affixes in the list
        ** are identical.
        */
        if(!viazero && ind->numents >= MAXSEARCH && icharcmp(entry->affix, ind->pu.ent->affix) != 0)
        {
            /* Sneaky trick:  back up and reprocess */
            entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
            i = m_numpflags - (entry - m_pflaglist);
            ind->pu.fp = static_cast< struct flagptr * >(calloc(SET_SIZE + m_hashheader.nstrchars, sizeof(struct flagptr)));
            if(ind->pu.fp == NULL)
            {
                fprintf(stderr, LOOKUP_C_NO_LANG_SPACE);
                return (-1);
            }
            ind->numents = 0;
        }
    }
#ifdef INDEXDUMP
    fprintf(stderr, "Prefix index table:\n");
    dumpindex(m_pflagindex, 0);
    fprintf(stderr, "Suffix index table:\n");
    dumpindex(m_sflagindex, 0);
#endif
    if(m_hashheader.nstrchartype == 0)
        m_chartypes = NULL;
    else
    {
        m_chartypes = (struct strchartype *)malloc(m_hashheader.nstrchartype * sizeof(struct strchartype));
        if(m_chartypes == NULL)
        {
            fprintf(stderr, LOOKUP_C_NO_LANG_SPACE);
            return (-1);
        }
        for(i = 0, nextchar = m_hashheader.strtypestart; i < m_hashheader.nstrchartype; i++)
        {
            m_chartypes[i].name = &m_hashstrings[nextchar];
            nextchar += strlen(m_chartypes[i].name) + 1;
            m_chartypes[i].deformatter = &m_hashstrings[nextchar];
            nextchar += strlen(m_chartypes[i].deformatter) + 1;
            m_chartypes[i].suffixes = &m_hashstrings[nextchar];
            while(m_hashstrings[nextchar] != '\0')
                nextchar += strlen(&m_hashstrings[nextchar]) + 1;
            nextchar++;
        }
    }

    initckch(NULL);

    return (0);
}
Exemplo n.º 2
0
int linit (  )
{
   int hashfd;
   register int i;
   register struct dent *dp;
   struct flagent *entry;
   struct flagptr *ind;
   int nextchar;
   int viazero;
   register ichar_t *cp;

   if ( inited )
      return 0;

   if ( ( hashfd = open ( hashname, 0 | MSDOS_BINARY_OPEN ) ) < 0 ) {
      ( void ) fprintf ( stderr, CANT_OPEN, hashname );
      perror ( "Unable to open file" );
      return ( -1 );
   }
   hashsize = read ( hashfd, ( char * ) &hashheader, sizeof hashheader );
   if ( hashsize < sizeof hashheader ) {
      if ( hashsize < 0 )
         ( void ) fprintf ( stderr, LOOKUP_C_CANT_READ, hashname );
      else if ( hashsize == 0 )
         ( void ) fprintf ( stderr, LOOKUP_C_NULL_HASH, hashname );
      else 
         ( void ) fprintf ( stderr,
            LOOKUP_C_SHORT_HASH ( hashname, hashsize,
               ( int ) sizeof hashheader ) );
      return ( -1 );
   } else if ( hashheader.magic != MAGIC ) {
      ( void ) fprintf ( stderr,
         LOOKUP_C_BAD_MAGIC ( hashname, ( unsigned int ) MAGIC,
            ( unsigned int ) hashheader.magic ) );
      return ( -1 );
   } else if ( hashheader.magic2 != MAGIC ) {
      ( void ) fprintf ( stderr,
         LOOKUP_C_BAD_MAGIC2 ( hashname, ( unsigned int ) MAGIC,
            ( unsigned int ) hashheader.magic2 ) );
      return ( -1 );
   } else if ( hashheader.compileoptions != COMPILEOPTIONS
         || hashheader.maxstringchars != MAXSTRINGCHARS
      || hashheader.maxstringcharlen != MAXSTRINGCHARLEN ) {
      ( void ) fprintf ( stderr,
         LOOKUP_C_BAD_OPTIONS ( ( unsigned int ) hashheader.compileoptions,
            hashheader.maxstringchars, hashheader.maxstringcharlen,
            ( unsigned int ) COMPILEOPTIONS, MAXSTRINGCHARS, MAXSTRINGCHARLEN ) );
      return ( -1 );
   }
   if ( nodictflag ) {
      /* Dictionary is not needed - create an empty dummy table.  We
       * actually have to have one entry since the hash algorithm involves
       * a divide by the table size (actually modulo, but zero is still
       * unacceptable). So we create an empty entry. */
      hashsize = 1;             /* This prevents divides by zero */
      hashtbl = ( struct dent * ) calloc ( 1, sizeof ( struct dent ) );
      if ( hashtbl == NULL ) {
         ( void ) fprintf ( stderr, LOOKUP_C_NO_HASH_SPACE );
         return ( -1 );
      }
      hashtbl[0].word = NULL;
      hashtbl[0].next = NULL;
      hashtbl[0].flagfield &= ~( USED | KEEP );
      /* The flag bits don't matter, but calloc cleared them. */
      hashstrings = ( char * ) malloc ( ( unsigned ) hashheader.lstringsize );
   } else {
      hashtbl =
         ( struct dent * )
         malloc ( ( unsigned ) hashheader.tblsize * sizeof ( struct dent ) );
      hashsize = hashheader.tblsize;
      hashstrings = ( char * ) malloc ( ( unsigned ) hashheader.stringsize );
   }
   numsflags = hashheader.stblsize;
   numpflags = hashheader.ptblsize;
   sflaglist = ( struct flagent * )
      malloc ( ( numsflags + numpflags ) * sizeof ( struct flagent ) );
   if ( hashtbl == NULL || hashstrings == NULL || sflaglist == NULL ) {
      ( void ) fprintf ( stderr, LOOKUP_C_NO_HASH_SPACE );
      return ( -1 );
   }
   pflaglist = sflaglist + numsflags;

   if ( nodictflag ) {
      /* Read just the strings for the language table, and skip over the
       * rest of the strings and all of the hash table. */
      if ( read ( hashfd, hashstrings, ( unsigned ) hashheader.lstringsize )
         != hashheader.lstringsize ) {
         ( void ) fprintf ( stderr, LOOKUP_C_BAD_FORMAT );
         return ( -1 );
      }
      ( void ) lseek ( hashfd,
         ( long ) hashheader.stringsize - ( long ) hashheader.lstringsize
         + ( long ) hashheader.tblsize * ( long ) sizeof ( struct dent ),
         1 );
   } else {
      if ( read ( hashfd, hashstrings, ( unsigned ) hashheader.stringsize )
         != hashheader.stringsize
         || read ( hashfd, ( char * ) hashtbl,
            ( unsigned ) hashheader.tblsize * sizeof ( struct dent ) )
         != hashheader.tblsize * sizeof ( struct dent ) ) {
         ( void ) fprintf ( stderr, LOOKUP_C_BAD_FORMAT );
         return ( -1 );
      }
   }
   if ( read ( hashfd, ( char * ) sflaglist,
         ( unsigned ) ( numsflags + numpflags ) * sizeof ( struct flagent ) )
      != ( numsflags + numpflags ) * sizeof ( struct flagent ) ) {
      ( void ) fprintf ( stderr, LOOKUP_C_BAD_FORMAT );
      return ( -1 );
   }
   ( void ) close ( hashfd );

   if ( !nodictflag ) {
      for ( i = hashsize, dp = hashtbl; --i >= 0; dp++ ) {
         if ( dp->word == ( char * ) -1 )
            dp->word = NULL;
         else
            dp->word = &hashstrings[( int ) ( dp->word )];
         if ( dp->next == ( struct dent * ) - 1 )
            dp->next = NULL;
         else
            dp->next = &hashtbl[( int ) ( dp->next )];
      }
   }
   for ( i = numsflags + numpflags, entry = sflaglist; --i >= 0; entry++ ) {
      if ( entry->stripl )
         entry->strip = ( ichar_t * ) & hashstrings[( int ) entry->strip];
      else
         entry->strip = NULL;
      if ( entry->affl )
         entry->affix = ( ichar_t * ) & hashstrings[( int ) entry->affix];
      else
         entry->affix = NULL;
   }
   /* * Warning - 'entry' and 'i' are reset in the body of the loop *
    * below.  Don't try to optimize it by (e.g.) moving the decrement * of
    * i into the loop condition. */
   for ( i = numsflags, entry = sflaglist; i > 0; i--, entry++ ) {
      if ( entry->affl == 0 ) {
         cp = NULL;
         ind = &sflagindex[0];
         viazero = 1;
      } else {
         cp = entry->affix + entry->affl - 1;
         ind = &sflagindex[*cp];
         viazero = 0;
         while ( ind->numents == 0 && ind->pu.fp != NULL ) {
            if ( cp == entry->affix ) {
               ind = &ind->pu.fp[0];
               viazero = 1;
            } else {
               ind = &ind->pu.fp[*--cp];
               viazero = 0;
            }
         }
      }
      if ( ind->numents == 0 )
         ind->pu.ent = entry;
      ind->numents++;
      /* * If this index entry has more than MAXSEARCH flags in * it, we
       * will split it into subentries to reduce the * searching.
       * However, the split doesn't make sense in * two cases:  (a) if we
       * are already at the end of the * current affix, or (b) if all the
       * entries in the list * have identical affixes.  Since the list is
       * sorted, (b) * is true if the first and last affixes in the list *
       * are identical. */
      if ( !viazero && ind->numents >= MAXSEARCH
         && icharcmp ( entry->affix, ind->pu.ent->affix ) != 0 ) {
         /* Sneaky trick:  back up and reprocess */
         entry = ind->pu.ent - 1;       /* -1 is for entry++ in loop */
         i = numsflags - ( entry - sflaglist );
         ind->pu.fp =
            ( struct flagptr * )
            calloc ( ( unsigned ) ( SET_SIZE + hashheader.nstrchars ),
            sizeof ( struct flagptr ) );
         if ( ind->pu.fp == NULL ) {
            ( void ) fprintf ( stderr, LOOKUP_C_NO_LANG_SPACE );
            return ( -1 );
         }
         ind->numents = 0;
      }
   }
   /* * Warning - 'entry' and 'i' are reset in the body of the loop *
    * below.  Don't try to optimize it by (e.g.) moving the decrement * of
    * i into the loop condition. */
   for ( i = numpflags, entry = pflaglist; i > 0; i--, entry++ ) {
      if ( entry->affl == 0 ) {
         cp = NULL;
         ind = &pflagindex[0];
         viazero = 1;
      } else {
         cp = entry->affix;
         ind = &pflagindex[*cp++];
         viazero = 0;
         while ( ind->numents == 0 && ind->pu.fp != NULL ) {
            if ( *cp == 0 ) {
               ind = &ind->pu.fp[0];
               viazero = 1;
            } else {
               ind = &ind->pu.fp[*cp++];
               viazero = 0;
            }
         }
      }
      if ( ind->numents == 0 )
         ind->pu.ent = entry;
      ind->numents++;
      /* * If this index entry has more than MAXSEARCH flags in * it, we
       * will split it into subentries to reduce the * searching.
       * However, the split doesn't make sense in * two cases:  (a) if we
       * are already at the end of the * current affix, or (b) if all the
       * entries in the list * have identical affixes.  Since the list is
       * sorted, (b) * is true if the first and last affixes in the list *
       * are identical. */
      if ( !viazero && ind->numents >= MAXSEARCH
         && icharcmp ( entry->affix, ind->pu.ent->affix ) != 0 ) {
         /* Sneaky trick:  back up and reprocess */
         entry = ind->pu.ent - 1;       /* -1 is for entry++ in loop */
         i = numpflags - ( entry - pflaglist );
         ind->pu.fp =
            ( struct flagptr * ) calloc ( SET_SIZE + hashheader.nstrchars,
            sizeof ( struct flagptr ) );
         if ( ind->pu.fp == NULL ) {
            ( void ) fprintf ( stderr, LOOKUP_C_NO_LANG_SPACE );
            return ( -1 );
         }
         ind->numents = 0;
      }
   }
#ifdef INDEXDUMP
   ( void ) fprintf ( stderr, "Prefix index table:\n" );
   dumpindex ( pflagindex, 0 );
   ( void ) fprintf ( stderr, "Suffix index table:\n" );
   dumpindex ( sflagindex, 0 );
#endif
   if ( hashheader.nstrchartype == 0 )
      chartypes = NULL;
   else {
      chartypes = ( struct strchartype * )
         malloc ( hashheader.nstrchartype * sizeof ( struct strchartype ) );
      if ( chartypes == NULL ) {
         ( void ) fprintf ( stderr, LOOKUP_C_NO_LANG_SPACE );
         return ( -1 );
      }
      for ( i = 0, nextchar = hashheader.strtypestart;
         i < hashheader.nstrchartype;
         i++ ) {
         chartypes[i].name = &hashstrings[nextchar];
         nextchar += strlen ( chartypes[i].name ) + 1;
         chartypes[i].deformatter = &hashstrings[nextchar];
         nextchar += strlen ( chartypes[i].deformatter ) + 1;
         chartypes[i].suffixes = &hashstrings[nextchar];
         while ( hashstrings[nextchar] != '\0' )
            nextchar += strlen ( &hashstrings[nextchar] ) + 1;
         nextchar++;
      }
   }
   inited = 1;
   return ( 0 );
}