Example #1
0
int dict_data_zip( const char *inFilename, const char *outFilename,
		   const char *preFilter, const char *postFilter )
{
   char          inBuffer[IN_BUFFER_SIZE];
   char          outBuffer[OUT_BUFFER_SIZE];
   int           count;
   unsigned long inputCRC = crc32( 0L, Z_NULL, 0 );
   z_stream      zStream;
   FILE          *outStr;
   FILE          *inStr;
   int           len;
   struct stat   st;
   char          *header;
   int           headerLength;
   int           dataLength;
   int           extraLength;
   int           chunkLength;
#if HEADER_CRC
   int           headerCRC;
#endif
   unsigned long chunks;
   unsigned long chunk = 0;
   unsigned long total = 0;
   int           i;
   char          tail[8];
   char          *pt, *origFilename;

   
   /* Open files */
   if (!(inStr = fopen( inFilename, "r" )))
      err_fatal_errno( __func__,
		       "Cannot open \"%s\" for read\n", inFilename );
   if (!(outStr = fopen( outFilename, "w" )))
      err_fatal_errno( __func__,
		       "Cannot open \"%s\"for write\n", outFilename );

   origFilename = xmalloc( strlen( inFilename ) + 1 );
   if ((pt = strrchr( inFilename, '/' )))
      strcpy( origFilename, pt + 1 );
   else
      strcpy( origFilename, inFilename );

   /* Initialize compression engine */
   zStream.zalloc    = NULL;
   zStream.zfree     = NULL;
   zStream.opaque    = NULL;
   zStream.next_in   = NULL;
   zStream.avail_in  = 0;
   zStream.next_out  = NULL;
   zStream.avail_out = 0;
   if (deflateInit2( &zStream,
		     Z_BEST_COMPRESSION,
		     Z_DEFLATED,
		     -15,	/* Suppress zlib header */
		     Z_BEST_COMPRESSION,
		     Z_DEFAULT_STRATEGY ) != Z_OK)
      err_internal( __func__,
		    "Cannot initialize deflation engine: %s\n", zStream.msg );

   /* Write initial header information */
   chunkLength = (preFilter ? PREFILTER_IN_BUFFER_SIZE : IN_BUFFER_SIZE );
   fstat( fileno( inStr ), &st );
   chunks = st.st_size / chunkLength;
   if (st.st_size % chunkLength) ++chunks;
   PRINTF(DBG_VERBOSE,("%lu chunks * %u per chunk = %lu (filesize = %lu)\n",
			chunks, chunkLength, chunks * chunkLength,
			(unsigned long) st.st_size ));
   dataLength   = chunks * 2;
   extraLength  = 10 + dataLength;
   headerLength = GZ_FEXTRA_START
		  + extraLength		/* FEXTRA */
		  + strlen( origFilename ) + 1	/* FNAME  */
		  + (HEADER_CRC ? 2 : 0);	/* FHCRC  */
   PRINTF(DBG_VERBOSE,("(data = %d, extra = %d, header = %d)\n",
		       dataLength, extraLength, headerLength ));
   header = xmalloc( headerLength );
   for (i = 0; i < headerLength; i++) header[i] = 0;
   header[GZ_ID1]        = GZ_MAGIC1;
   header[GZ_ID2]        = GZ_MAGIC2;
   header[GZ_CM]         = Z_DEFLATED;
   header[GZ_FLG]        = GZ_FEXTRA | GZ_FNAME;
#if HEADER_CRC
   header[GZ_FLG]        |= GZ_FHCRC;
#endif
   header[GZ_MTIME+3]    = (st.st_mtime & 0xff000000) >> 24;
   header[GZ_MTIME+2]    = (st.st_mtime & 0x00ff0000) >> 16;
   header[GZ_MTIME+1]    = (st.st_mtime & 0x0000ff00) >>  8;
   header[GZ_MTIME+0]    = (st.st_mtime & 0x000000ff) >>  0;
   header[GZ_XFL]        = GZ_MAX;
   header[GZ_OS]         = GZ_OS_UNIX;
   header[GZ_XLEN+1]     = (extraLength & 0xff00) >> 8;
   header[GZ_XLEN+0]     = (extraLength & 0x00ff) >> 0;
   header[GZ_SI1]        = GZ_RND_S1;
   header[GZ_SI2]        = GZ_RND_S2;
   header[GZ_SUBLEN+1]   = ((extraLength - 4) & 0xff00) >> 8;
   header[GZ_SUBLEN+0]   = ((extraLength - 4) & 0x00ff) >> 0;
   header[GZ_VERSION+1]  = 0;
   header[GZ_VERSION+0]  = 1;
   header[GZ_CHUNKLEN+1] = (chunkLength & 0xff00) >> 8;
   header[GZ_CHUNKLEN+0] = (chunkLength & 0x00ff) >> 0;
   header[GZ_CHUNKCNT+1] = (chunks & 0xff00) >> 8;
   header[GZ_CHUNKCNT+0] = (chunks & 0x00ff) >> 0;
   strcpy( &header[GZ_FEXTRA_START + extraLength], origFilename );
   xfwrite( header, 1, headerLength, outStr );
    
   /* Read, compress, write */
   while (!feof( inStr )) {
      if ((count = fread( inBuffer, 1, chunkLength, inStr ))) {
	 dict_data_filter( inBuffer, &count, IN_BUFFER_SIZE, preFilter );

	 inputCRC = crc32( inputCRC, (const Bytef *) inBuffer, count );
	 zStream.next_in   = (Bytef *) inBuffer;
	 zStream.avail_in  = count;
	 zStream.next_out  = (Bytef *) outBuffer;
	 zStream.avail_out = OUT_BUFFER_SIZE;
	 if (deflate( &zStream, Z_FULL_FLUSH ) != Z_OK)
	    err_fatal( __func__, "deflate: %s\n", zStream.msg );
	 assert( zStream.avail_in == 0 );
	 len = OUT_BUFFER_SIZE - zStream.avail_out;
	 assert( len <= 0xffff );

	 dict_data_filter( outBuffer, &len, OUT_BUFFER_SIZE, postFilter );
	 
	 assert( len <= 0xffff );
	 header[GZ_RNDDATA + chunk*2 + 1] = (len & 0xff00) >>  8;
	 header[GZ_RNDDATA + chunk*2 + 0] = (len & 0x00ff) >>  0;
	 xfwrite( outBuffer, 1, len, outStr );

	 ++chunk;
	 total += count;
	 if (dbg_test( DBG_VERBOSE )) {
	    printf( "chunk %5lu: %lu of %lu total\r",
		    chunk, total, (unsigned long) st.st_size );
	    xfflush( stdout );
	 }
      }
   }
Example #2
0
char *dict_data_read_ (
   dictData *h, unsigned long start, unsigned long size,
   const char *preFilter, const char *postFilter )
{
   (void) preFilter;
   (void) postFilter;
   char          *buffer, *pt;
   unsigned long end;
   int           count;
   char          *inBuffer;
   char          outBuffer[OUT_BUFFER_SIZE];
   int           firstChunk, lastChunk;
   int           firstOffset, lastOffset;
   int           i, j;
   int           found, target, lastStamp;
   static int    stamp = 0;

   end  = start + size;

   buffer = xmalloc( size + 1 );

   if ( !size )
   {
     *buffer = 0;
     return buffer;
   }
   
   PRINTF(DBG_UNZIP,
	  ("dict_data_read( %p, %lu, %lu, %s, %s )\n",
	   h, start, size, preFilter, postFilter ));

   assert( h != NULL);
   switch (h->type) {
   case DICT_GZIP:
      err_fatal( __func__,
		 "Cannot seek on pure gzip format files.\n"
		 "Use plain text (for performance)"
		 " or dzip format (for space savings).\n" );
      break;
   case DICT_TEXT:
   {
     if ( fseek( h->fd, start, SEEK_SET ) != 0 ||
          fread( buffer, size, 1, h->fd ) != 1 )
     {
       xfree( buffer );
       return 0;
     }

     buffer[size] = '\0';
   }
   break;
   case DICT_DZIP:
      if (!h->initialized) {
	 ++h->initialized;
	 h->zStream.zalloc    = NULL;
	 h->zStream.zfree     = NULL;
	 h->zStream.opaque    = NULL;
	 h->zStream.next_in   = 0;
	 h->zStream.avail_in  = 0;
	 h->zStream.next_out  = NULL;
	 h->zStream.avail_out = 0;
	 if (inflateInit2( &h->zStream, -15 ) != Z_OK)
	    err_internal( __func__,
			  "Cannot initialize inflation engine: %s\n",
			  h->zStream.msg );
      }
      firstChunk  = start / h->chunkLength;
      firstOffset = start - firstChunk * h->chunkLength;
      lastChunk   = end / h->chunkLength;
      lastOffset  = end - lastChunk * h->chunkLength;
      PRINTF(DBG_UNZIP,
	     ("   start = %lu, end = %lu\n"
	      "firstChunk = %d, firstOffset = %d,"
	      " lastChunk = %d, lastOffset = %d\n",
	      start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
      for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {

				/* Access cache */
	 found  = 0;
	 target = 0;
	 lastStamp = INT_MAX;
	 for (j = 0; j < DICT_CACHE_SIZE; j++) {
#if USE_CACHE
	    if (h->cache[j].chunk == i) {
	       found  = 1;
	       target = j;
	       break;
	    }
#endif
	    if (h->cache[j].stamp < lastStamp) {
	       lastStamp = h->cache[j].stamp;
	       target = j;
	    }
	 }

	 h->cache[target].stamp = ++stamp;
	 if (found) {
	    count = h->cache[target].count;
	    inBuffer = h->cache[target].inBuffer;
	 } else {
	    h->cache[target].chunk = i;
	    if (!h->cache[target].inBuffer)
	       h->cache[target].inBuffer = xmalloc( IN_BUFFER_SIZE );
	    inBuffer = h->cache[target].inBuffer;

	    if (h->chunks[i] >= OUT_BUFFER_SIZE ) {
	       err_internal( __func__,
			     "h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
			     i, h->chunks[i], OUT_BUFFER_SIZE );
	    }

      if ( fseek( h->fd, h->offsets[ i ], SEEK_SET ) != 0 ||
           fread( outBuffer, h->chunks[ i ], 1, h->fd ) != 1 )
      {
        xfree( buffer );
        return 0;
      }

      dict_data_filter( outBuffer, &count, OUT_BUFFER_SIZE, preFilter );
	 
	    h->zStream.next_in   = outBuffer;
	    h->zStream.avail_in  = h->chunks[i];
	    h->zStream.next_out  = inBuffer;
	    h->zStream.avail_out = IN_BUFFER_SIZE;
	    if (inflate( &h->zStream,  Z_PARTIAL_FLUSH ) != Z_OK)
	       err_fatal( __func__, "inflate: %s\n", h->zStream.msg );
	    if (h->zStream.avail_in)
	       err_internal( __func__,
			     "inflate did not flush (%d pending, %d avail)\n",
			     h->zStream.avail_in, h->zStream.avail_out );
	    
	    count = IN_BUFFER_SIZE - h->zStream.avail_out;
      dict_data_filter( inBuffer, &count, IN_BUFFER_SIZE, postFilter );

	    h->cache[target].count = count;
	 }
	 
	 if (i == firstChunk) {
	    if (i == lastChunk) {
	       memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
	       pt += lastOffset - firstOffset;
	    } else {
	       if (count != h->chunkLength )
		  err_internal( __func__,
				"Length = %d instead of %d\n",
				count, h->chunkLength );
	       memcpy( pt, inBuffer + firstOffset,
		       h->chunkLength - firstOffset );
	       pt += h->chunkLength - firstOffset;
	    }
	 } else if (i == lastChunk) {
	    memcpy( pt, inBuffer, lastOffset );
	    pt += lastOffset;
	 } else {
	    assert( count == h->chunkLength );
	    memcpy( pt, inBuffer, h->chunkLength );
	    pt += h->chunkLength;
	 }
      }
      *pt = '\0';
      break;
   case DICT_UNKNOWN:
      err_fatal( __func__, "Cannot read unknown file type\n" );
      break;
   }
   
   return buffer;
}
Example #3
0
char *dict_data_read_ (
   dictData *h, unsigned long start, unsigned long size,
   const char *preFilter, const char *postFilter )
{
   (void) preFilter;
   (void) postFilter;
   char          *buffer, *pt;
   unsigned long end;
   int           count;
   char          *inBuffer;
   char          outBuffer[OUT_BUFFER_SIZE];
   int           firstChunk, lastChunk;
   int           firstOffset, lastOffset;
   int           i, j;
   int           found, target, lastStamp;

   end  = start + size;

   buffer = xmalloc( size + 1 );
   if( !buffer )
   {
     strcpy( h->errorString, "Cannot allocate memory" );
     return 0;
   }

   if ( !size )
   {
     *buffer = 0;
     return buffer;
   }

   PRINTF(DBG_UNZIP,
	  ("dict_data_read( %p, %lu, %lu, %s, %s )\n",
	   h, start, size, preFilter, postFilter ));

   assert( h != NULL);
   switch (h->type) {
   case DICT_GZIP:
/*
      err_fatal( __func__,
		 "Cannot seek on pure gzip format files.\n"
		 "Use plain text (for performance)"
		 " or dzip format (for space savings).\n" );
      break;
*/
      strcpy( h->errorString, "Cannot seek on pure gzip format files" );
      xfree( buffer );
      return 0;
   case DICT_TEXT:
   {
#ifdef __WIN32
     DWORD pos = SetFilePointer( h->fd, start, 0, FILE_BEGIN );
     DWORD readed = 0;
     if( pos != INVALID_SET_FILE_POINTER || GetLastError() != NO_ERROR )
       ReadFile( h->fd, buffer, size, &readed, 0 );
     if( size != readed )
#else
     if ( fseek( h->fd, start, SEEK_SET ) != 0 ||
          fread( buffer, size, 1, h->fd ) != 1 )
#endif
     {
       strcpy( h->errorString, "Cannot read file" );
       xfree( buffer );
       return 0;
     }

     buffer[size] = '\0';
   }
   break;
   case DICT_DZIP:
      if (!h->initialized) {
	 h->zStream.zalloc    = NULL;
	 h->zStream.zfree     = NULL;
	 h->zStream.opaque    = NULL;
	 h->zStream.next_in   = 0;
	 h->zStream.avail_in  = 0;
	 h->zStream.next_out  = NULL;
	 h->zStream.avail_out = 0;
	 if (inflateInit2( &h->zStream, -15 ) != Z_OK)
/*
	    err_internal( __func__,
			  "Cannot initialize inflation engine: %s\n",
			  h->zStream.msg );
*/
	 {
	   sprintf( h->errorString, "Cannot initialize inflation engine: %s", h->zStream.msg );
	   xfree( buffer );
	   return 0;
	 }
	 ++h->initialized;
      }
      firstChunk  = start / h->chunkLength;
      firstOffset = start - firstChunk * h->chunkLength;
      lastChunk   = end / h->chunkLength;
      lastOffset  = end - lastChunk * h->chunkLength;
      PRINTF(DBG_UNZIP,
	     ("   start = %lu, end = %lu\n"
	      "firstChunk = %d, firstOffset = %d,"
	      " lastChunk = %d, lastOffset = %d\n",
	      start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
      for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {

				/* Access cache */
	 found  = 0;
	 target = 0;
	 lastStamp = INT_MAX;
	 for (j = 0; j < DICT_CACHE_SIZE; j++) {
#if USE_CACHE
	    if (h->cache[j].chunk == i) {
	       found  = 1;
	       target = j;
	       break;
	    }
#endif
	    if (h->cache[j].stamp < lastStamp) {
	       lastStamp = h->cache[j].stamp;
	       target = j;
	    }
	 }

	 h->cache[target].stamp = ++h->stamp;
	 if( h->stamp < 0 )
	 {
	    h->stamp = 0;
	    for (j = 0; j < DICT_CACHE_SIZE; j++)
	      h->cache[j].stamp = -1;
	 }
	 if (found) {
	    count = h->cache[target].count;
	    inBuffer = h->cache[target].inBuffer;
	 } else {
	    h->cache[target].chunk = -1;
	    if (!h->cache[target].inBuffer)
	       h->cache[target].inBuffer = xmalloc( h->chunkLength );
	    inBuffer = h->cache[target].inBuffer;

	    if (h->chunks[i] >= OUT_BUFFER_SIZE ) {
/*
	       err_internal( __func__,
			     "h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
			     i, h->chunks[i], OUT_BUFFER_SIZE );
*/
              sprintf( h->errorString, "h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
                       i, h->chunks[i], OUT_BUFFER_SIZE );
              xfree( buffer );
              return 0;
	    }

#ifdef __WIN32
      DWORD pos = SetFilePointer( h->fd, h->offsets[ i ], 0, FILE_BEGIN );
      DWORD readed = 0;
      if( pos != INVALID_SET_FILE_POINTER || GetLastError() != NO_ERROR )
        ReadFile( h->fd, outBuffer, h->chunks[ i ], &readed, 0 );
      if( h->chunks[ i ] != readed )
#else
      if ( fseek( h->fd, h->offsets[ i ], SEEK_SET ) != 0 ||
           fread( outBuffer, h->chunks[ i ], 1, h->fd ) != 1 )
#endif
      {
        xfree( buffer );
        return 0;
      }

      dict_data_filter( outBuffer, &count, OUT_BUFFER_SIZE, preFilter );
	 
	    h->zStream.next_in   = (Bytef *)outBuffer;
	    h->zStream.avail_in  = h->chunks[i];
	    h->zStream.next_out  = (Bytef *)inBuffer;
	    h->zStream.avail_out = h->chunkLength;
	    if (inflate( &h->zStream,  Z_PARTIAL_FLUSH ) != Z_OK)
	    {
//	       err_fatal( __func__, "inflate: %s\n", h->zStream.msg );
	      sprintf( h->errorString, "inflate: %s\n", h->zStream.msg );
	      xfree( buffer );
	      return 0;
	    }
	    if (h->zStream.avail_in)
/*
	       err_internal( __func__,
			     "inflate did not flush (%d pending, %d avail)\n",
			     h->zStream.avail_in, h->zStream.avail_out );
*/
	    {
	      sprintf( h->errorString, "inflate did not flush (%d pending, %d avail)\n",
		       h->zStream.avail_in, h->zStream.avail_out );
	      xfree( buffer );
	      return 0;
	    }
	    
	    count = h->chunkLength - h->zStream.avail_out;
      dict_data_filter( inBuffer, &count, h->chunkLength, postFilter );

	    h->cache[target].count = count;
	    h->cache[target].chunk = i;
	 }
	 
	 if (i == firstChunk) {
	    if (i == lastChunk) {
	       memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
	       pt += lastOffset - firstOffset;
	    } else {
	       if (count != h->chunkLength )
/*
		  err_internal( __func__,
				"Length = %d instead of %d\n",
				count, h->chunkLength );
*/
	       {
		 sprintf( h->errorString, "Length = %d instead of %d\n",
			  count, h->chunkLength );
		 xfree( buffer );
		 return 0;
	       }
	       memcpy( pt, inBuffer + firstOffset,
		       h->chunkLength - firstOffset );
	       pt += h->chunkLength - firstOffset;
	    }
	 } else if (i == lastChunk) {
	    memcpy( pt, inBuffer, lastOffset );
	    pt += lastOffset;
	 } else {
	    assert( count == h->chunkLength );
	    memcpy( pt, inBuffer, h->chunkLength );
	    pt += h->chunkLength;
	 }
      }
      *pt = '\0';
      break;
   case DICT_UNKNOWN:
//      err_fatal( __func__, "Cannot read unknown file type\n" );
      strcpy( h->errorString, "Cannot read unknown file type" );
      xfree( buffer );
      return 0;
   }
   
   return buffer;
}