int dict_data_zip( const char *inFilename, const char *outFilename, const char *preFilter, const char *postFilter ) { char inBuffer[IN_BUFFER_SIZE]; char outBuffer[OUT_BUFFER_SIZE]; int count; unsigned long inputCRC = crc32( 0L, Z_NULL, 0 ); z_stream zStream; FILE *outStr; FILE *inStr; int len; struct stat st; char *header; int headerLength; int dataLength; int extraLength; int chunkLength; #if HEADER_CRC int headerCRC; #endif unsigned long chunks; unsigned long chunk = 0; unsigned long total = 0; int i; char tail[8]; char *pt, *origFilename; /* Open files */ if (!(inStr = fopen( inFilename, "r" ))) err_fatal_errno( __func__, "Cannot open \"%s\" for read\n", inFilename ); if (!(outStr = fopen( outFilename, "w" ))) err_fatal_errno( __func__, "Cannot open \"%s\"for write\n", outFilename ); origFilename = xmalloc( strlen( inFilename ) + 1 ); if ((pt = strrchr( inFilename, '/' ))) strcpy( origFilename, pt + 1 ); else strcpy( origFilename, inFilename ); /* Initialize compression engine */ zStream.zalloc = NULL; zStream.zfree = NULL; zStream.opaque = NULL; zStream.next_in = NULL; zStream.avail_in = 0; zStream.next_out = NULL; zStream.avail_out = 0; if (deflateInit2( &zStream, Z_BEST_COMPRESSION, Z_DEFLATED, -15, /* Suppress zlib header */ Z_BEST_COMPRESSION, Z_DEFAULT_STRATEGY ) != Z_OK) err_internal( __func__, "Cannot initialize deflation engine: %s\n", zStream.msg ); /* Write initial header information */ chunkLength = (preFilter ? PREFILTER_IN_BUFFER_SIZE : IN_BUFFER_SIZE ); fstat( fileno( inStr ), &st ); chunks = st.st_size / chunkLength; if (st.st_size % chunkLength) ++chunks; PRINTF(DBG_VERBOSE,("%lu chunks * %u per chunk = %lu (filesize = %lu)\n", chunks, chunkLength, chunks * chunkLength, (unsigned long) st.st_size )); dataLength = chunks * 2; extraLength = 10 + dataLength; headerLength = GZ_FEXTRA_START + extraLength /* FEXTRA */ + strlen( origFilename ) + 1 /* FNAME */ + (HEADER_CRC ? 2 : 0); /* FHCRC */ PRINTF(DBG_VERBOSE,("(data = %d, extra = %d, header = %d)\n", dataLength, extraLength, headerLength )); header = xmalloc( headerLength ); for (i = 0; i < headerLength; i++) header[i] = 0; header[GZ_ID1] = GZ_MAGIC1; header[GZ_ID2] = GZ_MAGIC2; header[GZ_CM] = Z_DEFLATED; header[GZ_FLG] = GZ_FEXTRA | GZ_FNAME; #if HEADER_CRC header[GZ_FLG] |= GZ_FHCRC; #endif header[GZ_MTIME+3] = (st.st_mtime & 0xff000000) >> 24; header[GZ_MTIME+2] = (st.st_mtime & 0x00ff0000) >> 16; header[GZ_MTIME+1] = (st.st_mtime & 0x0000ff00) >> 8; header[GZ_MTIME+0] = (st.st_mtime & 0x000000ff) >> 0; header[GZ_XFL] = GZ_MAX; header[GZ_OS] = GZ_OS_UNIX; header[GZ_XLEN+1] = (extraLength & 0xff00) >> 8; header[GZ_XLEN+0] = (extraLength & 0x00ff) >> 0; header[GZ_SI1] = GZ_RND_S1; header[GZ_SI2] = GZ_RND_S2; header[GZ_SUBLEN+1] = ((extraLength - 4) & 0xff00) >> 8; header[GZ_SUBLEN+0] = ((extraLength - 4) & 0x00ff) >> 0; header[GZ_VERSION+1] = 0; header[GZ_VERSION+0] = 1; header[GZ_CHUNKLEN+1] = (chunkLength & 0xff00) >> 8; header[GZ_CHUNKLEN+0] = (chunkLength & 0x00ff) >> 0; header[GZ_CHUNKCNT+1] = (chunks & 0xff00) >> 8; header[GZ_CHUNKCNT+0] = (chunks & 0x00ff) >> 0; strcpy( &header[GZ_FEXTRA_START + extraLength], origFilename ); xfwrite( header, 1, headerLength, outStr ); /* Read, compress, write */ while (!feof( inStr )) { if ((count = fread( inBuffer, 1, chunkLength, inStr ))) { dict_data_filter( inBuffer, &count, IN_BUFFER_SIZE, preFilter ); inputCRC = crc32( inputCRC, (const Bytef *) inBuffer, count ); zStream.next_in = (Bytef *) inBuffer; zStream.avail_in = count; zStream.next_out = (Bytef *) outBuffer; zStream.avail_out = OUT_BUFFER_SIZE; if (deflate( &zStream, Z_FULL_FLUSH ) != Z_OK) err_fatal( __func__, "deflate: %s\n", zStream.msg ); assert( zStream.avail_in == 0 ); len = OUT_BUFFER_SIZE - zStream.avail_out; assert( len <= 0xffff ); dict_data_filter( outBuffer, &len, OUT_BUFFER_SIZE, postFilter ); assert( len <= 0xffff ); header[GZ_RNDDATA + chunk*2 + 1] = (len & 0xff00) >> 8; header[GZ_RNDDATA + chunk*2 + 0] = (len & 0x00ff) >> 0; xfwrite( outBuffer, 1, len, outStr ); ++chunk; total += count; if (dbg_test( DBG_VERBOSE )) { printf( "chunk %5lu: %lu of %lu total\r", chunk, total, (unsigned long) st.st_size ); xfflush( stdout ); } } }
char *dict_data_read_ ( dictData *h, unsigned long start, unsigned long size, const char *preFilter, const char *postFilter ) { (void) preFilter; (void) postFilter; char *buffer, *pt; unsigned long end; int count; char *inBuffer; char outBuffer[OUT_BUFFER_SIZE]; int firstChunk, lastChunk; int firstOffset, lastOffset; int i, j; int found, target, lastStamp; static int stamp = 0; end = start + size; buffer = xmalloc( size + 1 ); if ( !size ) { *buffer = 0; return buffer; } PRINTF(DBG_UNZIP, ("dict_data_read( %p, %lu, %lu, %s, %s )\n", h, start, size, preFilter, postFilter )); assert( h != NULL); switch (h->type) { case DICT_GZIP: err_fatal( __func__, "Cannot seek on pure gzip format files.\n" "Use plain text (for performance)" " or dzip format (for space savings).\n" ); break; case DICT_TEXT: { if ( fseek( h->fd, start, SEEK_SET ) != 0 || fread( buffer, size, 1, h->fd ) != 1 ) { xfree( buffer ); return 0; } buffer[size] = '\0'; } break; case DICT_DZIP: if (!h->initialized) { ++h->initialized; h->zStream.zalloc = NULL; h->zStream.zfree = NULL; h->zStream.opaque = NULL; h->zStream.next_in = 0; h->zStream.avail_in = 0; h->zStream.next_out = NULL; h->zStream.avail_out = 0; if (inflateInit2( &h->zStream, -15 ) != Z_OK) err_internal( __func__, "Cannot initialize inflation engine: %s\n", h->zStream.msg ); } firstChunk = start / h->chunkLength; firstOffset = start - firstChunk * h->chunkLength; lastChunk = end / h->chunkLength; lastOffset = end - lastChunk * h->chunkLength; PRINTF(DBG_UNZIP, (" start = %lu, end = %lu\n" "firstChunk = %d, firstOffset = %d," " lastChunk = %d, lastOffset = %d\n", start, end, firstChunk, firstOffset, lastChunk, lastOffset )); for (pt = buffer, i = firstChunk; i <= lastChunk; i++) { /* Access cache */ found = 0; target = 0; lastStamp = INT_MAX; for (j = 0; j < DICT_CACHE_SIZE; j++) { #if USE_CACHE if (h->cache[j].chunk == i) { found = 1; target = j; break; } #endif if (h->cache[j].stamp < lastStamp) { lastStamp = h->cache[j].stamp; target = j; } } h->cache[target].stamp = ++stamp; if (found) { count = h->cache[target].count; inBuffer = h->cache[target].inBuffer; } else { h->cache[target].chunk = i; if (!h->cache[target].inBuffer) h->cache[target].inBuffer = xmalloc( IN_BUFFER_SIZE ); inBuffer = h->cache[target].inBuffer; if (h->chunks[i] >= OUT_BUFFER_SIZE ) { err_internal( __func__, "h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", i, h->chunks[i], OUT_BUFFER_SIZE ); } if ( fseek( h->fd, h->offsets[ i ], SEEK_SET ) != 0 || fread( outBuffer, h->chunks[ i ], 1, h->fd ) != 1 ) { xfree( buffer ); return 0; } dict_data_filter( outBuffer, &count, OUT_BUFFER_SIZE, preFilter ); h->zStream.next_in = outBuffer; h->zStream.avail_in = h->chunks[i]; h->zStream.next_out = inBuffer; h->zStream.avail_out = IN_BUFFER_SIZE; if (inflate( &h->zStream, Z_PARTIAL_FLUSH ) != Z_OK) err_fatal( __func__, "inflate: %s\n", h->zStream.msg ); if (h->zStream.avail_in) err_internal( __func__, "inflate did not flush (%d pending, %d avail)\n", h->zStream.avail_in, h->zStream.avail_out ); count = IN_BUFFER_SIZE - h->zStream.avail_out; dict_data_filter( inBuffer, &count, IN_BUFFER_SIZE, postFilter ); h->cache[target].count = count; } if (i == firstChunk) { if (i == lastChunk) { memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset); pt += lastOffset - firstOffset; } else { if (count != h->chunkLength ) err_internal( __func__, "Length = %d instead of %d\n", count, h->chunkLength ); memcpy( pt, inBuffer + firstOffset, h->chunkLength - firstOffset ); pt += h->chunkLength - firstOffset; } } else if (i == lastChunk) { memcpy( pt, inBuffer, lastOffset ); pt += lastOffset; } else { assert( count == h->chunkLength ); memcpy( pt, inBuffer, h->chunkLength ); pt += h->chunkLength; } } *pt = '\0'; break; case DICT_UNKNOWN: err_fatal( __func__, "Cannot read unknown file type\n" ); break; } return buffer; }
char *dict_data_read_ ( dictData *h, unsigned long start, unsigned long size, const char *preFilter, const char *postFilter ) { (void) preFilter; (void) postFilter; char *buffer, *pt; unsigned long end; int count; char *inBuffer; char outBuffer[OUT_BUFFER_SIZE]; int firstChunk, lastChunk; int firstOffset, lastOffset; int i, j; int found, target, lastStamp; end = start + size; buffer = xmalloc( size + 1 ); if( !buffer ) { strcpy( h->errorString, "Cannot allocate memory" ); return 0; } if ( !size ) { *buffer = 0; return buffer; } PRINTF(DBG_UNZIP, ("dict_data_read( %p, %lu, %lu, %s, %s )\n", h, start, size, preFilter, postFilter )); assert( h != NULL); switch (h->type) { case DICT_GZIP: /* err_fatal( __func__, "Cannot seek on pure gzip format files.\n" "Use plain text (for performance)" " or dzip format (for space savings).\n" ); break; */ strcpy( h->errorString, "Cannot seek on pure gzip format files" ); xfree( buffer ); return 0; case DICT_TEXT: { #ifdef __WIN32 DWORD pos = SetFilePointer( h->fd, start, 0, FILE_BEGIN ); DWORD readed = 0; if( pos != INVALID_SET_FILE_POINTER || GetLastError() != NO_ERROR ) ReadFile( h->fd, buffer, size, &readed, 0 ); if( size != readed ) #else if ( fseek( h->fd, start, SEEK_SET ) != 0 || fread( buffer, size, 1, h->fd ) != 1 ) #endif { strcpy( h->errorString, "Cannot read file" ); xfree( buffer ); return 0; } buffer[size] = '\0'; } break; case DICT_DZIP: if (!h->initialized) { h->zStream.zalloc = NULL; h->zStream.zfree = NULL; h->zStream.opaque = NULL; h->zStream.next_in = 0; h->zStream.avail_in = 0; h->zStream.next_out = NULL; h->zStream.avail_out = 0; if (inflateInit2( &h->zStream, -15 ) != Z_OK) /* err_internal( __func__, "Cannot initialize inflation engine: %s\n", h->zStream.msg ); */ { sprintf( h->errorString, "Cannot initialize inflation engine: %s", h->zStream.msg ); xfree( buffer ); return 0; } ++h->initialized; } firstChunk = start / h->chunkLength; firstOffset = start - firstChunk * h->chunkLength; lastChunk = end / h->chunkLength; lastOffset = end - lastChunk * h->chunkLength; PRINTF(DBG_UNZIP, (" start = %lu, end = %lu\n" "firstChunk = %d, firstOffset = %d," " lastChunk = %d, lastOffset = %d\n", start, end, firstChunk, firstOffset, lastChunk, lastOffset )); for (pt = buffer, i = firstChunk; i <= lastChunk; i++) { /* Access cache */ found = 0; target = 0; lastStamp = INT_MAX; for (j = 0; j < DICT_CACHE_SIZE; j++) { #if USE_CACHE if (h->cache[j].chunk == i) { found = 1; target = j; break; } #endif if (h->cache[j].stamp < lastStamp) { lastStamp = h->cache[j].stamp; target = j; } } h->cache[target].stamp = ++h->stamp; if( h->stamp < 0 ) { h->stamp = 0; for (j = 0; j < DICT_CACHE_SIZE; j++) h->cache[j].stamp = -1; } if (found) { count = h->cache[target].count; inBuffer = h->cache[target].inBuffer; } else { h->cache[target].chunk = -1; if (!h->cache[target].inBuffer) h->cache[target].inBuffer = xmalloc( h->chunkLength ); inBuffer = h->cache[target].inBuffer; if (h->chunks[i] >= OUT_BUFFER_SIZE ) { /* err_internal( __func__, "h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", i, h->chunks[i], OUT_BUFFER_SIZE ); */ sprintf( h->errorString, "h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", i, h->chunks[i], OUT_BUFFER_SIZE ); xfree( buffer ); return 0; } #ifdef __WIN32 DWORD pos = SetFilePointer( h->fd, h->offsets[ i ], 0, FILE_BEGIN ); DWORD readed = 0; if( pos != INVALID_SET_FILE_POINTER || GetLastError() != NO_ERROR ) ReadFile( h->fd, outBuffer, h->chunks[ i ], &readed, 0 ); if( h->chunks[ i ] != readed ) #else if ( fseek( h->fd, h->offsets[ i ], SEEK_SET ) != 0 || fread( outBuffer, h->chunks[ i ], 1, h->fd ) != 1 ) #endif { xfree( buffer ); return 0; } dict_data_filter( outBuffer, &count, OUT_BUFFER_SIZE, preFilter ); h->zStream.next_in = (Bytef *)outBuffer; h->zStream.avail_in = h->chunks[i]; h->zStream.next_out = (Bytef *)inBuffer; h->zStream.avail_out = h->chunkLength; if (inflate( &h->zStream, Z_PARTIAL_FLUSH ) != Z_OK) { // err_fatal( __func__, "inflate: %s\n", h->zStream.msg ); sprintf( h->errorString, "inflate: %s\n", h->zStream.msg ); xfree( buffer ); return 0; } if (h->zStream.avail_in) /* err_internal( __func__, "inflate did not flush (%d pending, %d avail)\n", h->zStream.avail_in, h->zStream.avail_out ); */ { sprintf( h->errorString, "inflate did not flush (%d pending, %d avail)\n", h->zStream.avail_in, h->zStream.avail_out ); xfree( buffer ); return 0; } count = h->chunkLength - h->zStream.avail_out; dict_data_filter( inBuffer, &count, h->chunkLength, postFilter ); h->cache[target].count = count; h->cache[target].chunk = i; } if (i == firstChunk) { if (i == lastChunk) { memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset); pt += lastOffset - firstOffset; } else { if (count != h->chunkLength ) /* err_internal( __func__, "Length = %d instead of %d\n", count, h->chunkLength ); */ { sprintf( h->errorString, "Length = %d instead of %d\n", count, h->chunkLength ); xfree( buffer ); return 0; } memcpy( pt, inBuffer + firstOffset, h->chunkLength - firstOffset ); pt += h->chunkLength - firstOffset; } } else if (i == lastChunk) { memcpy( pt, inBuffer, lastOffset ); pt += lastOffset; } else { assert( count == h->chunkLength ); memcpy( pt, inBuffer, h->chunkLength ); pt += h->chunkLength; } } *pt = '\0'; break; case DICT_UNKNOWN: // err_fatal( __func__, "Cannot read unknown file type\n" ); strcpy( h->errorString, "Cannot read unknown file type" ); xfree( buffer ); return 0; } return buffer; }