예제 #1
0
inline LZHASH LZHLCompressor::_updateTable( LZHASH hash, const uint8_t* src, LZPOS pos, ptrdiff_t len )
{
  if ( len <= 0 )
    return 0;

  if ( len > LZSKIPHASH ) {
    ++src;
    hash = 0;
    const uint8_t* pEnd = src + len + LZMATCH;

    for ( const uint8_t* p=src+len; p < pEnd ; ) {
      UPDATE_HASH( hash, *p++ );
    }

    return hash;
  }

  UPDATE_HASH_EX( hash, src );
  ++src;

  for ( int i=0; i < len ; ++i ) {
    table[ HASH_POS( hash ) ] = (LZTableItem)_wrap( pos + i );
    UPDATE_HASH_EX( hash, src + i );
  }

  return hash;
}
예제 #2
0
void Deflator::init_hash()
{
   register unsigned j;

   for (ins_h=0, j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);
   /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
      not important since only literal bytes will be emitted. */
}
예제 #3
0
/*  If the file is very small, copies it.
    copies the first two pixels of the first segment, and sends the segments
    one by one to compress_seg.
    the number of bytes compressed are stored inside encoder. */
static void FNAME(compress)(Encoder *encoder)
{
    uint32_t seg_id = encoder->cur_image.first_win_seg;
    PIXEL    *ip;
    SharedDictionary *dict = encoder->dict;
    int hval;

    // fetch the first image segment that is not too small
    while ((seg_id != NULL_IMAGE_SEG_ID) &&
           (dict->window.segs[seg_id].image->id == encoder->cur_image.id) &&
           ((((PIXEL *)dict->window.segs[seg_id].lines_end) -
             ((PIXEL *)dict->window.segs[seg_id].lines)) < 4)) {
        // coping the segment
        if (dict->window.segs[seg_id].lines != dict->window.segs[seg_id].lines_end) {
            ip = (PIXEL *)dict->window.segs[seg_id].lines;
            // Note: we assume MAX_COPY > 3
            encode_copy_count(encoder, (uint8_t)(
                                  (((PIXEL *)dict->window.segs[seg_id].lines_end) -
                                   ((PIXEL *)dict->window.segs[seg_id].lines)) - 1));
            while (ip < (PIXEL *)dict->window.segs[seg_id].lines_end) {
                ENCODE_PIXEL(encoder, *ip);
                ip++;
            }
        }
        seg_id = dict->window.segs[seg_id].next;
    }

    if ((seg_id == NULL_IMAGE_SEG_ID) ||
        (dict->window.segs[seg_id].image->id != encoder->cur_image.id)) {
        return;
    }

    ip = (PIXEL *)dict->window.segs[seg_id].lines;


    encode_copy_count(encoder, MAX_COPY - 1);

    HASH_FUNC(hval, ip);
    UPDATE_HASH(encoder->dict, hval, seg_id, 0);

    ENCODE_PIXEL(encoder, *ip);
    ip++;
    ENCODE_PIXEL(encoder, *ip);
    ip++;
#ifdef DEBUG_ENCODE
    printf("copy, copy");
#endif
    // compressing the first segment
    FNAME(compress_seg)(encoder, seg_id, ip, 2);

    // compressing the next segments
    for (seg_id = dict->window.segs[seg_id].next;
        seg_id != NULL_IMAGE_SEG_ID && (
        dict->window.segs[seg_id].image->id == encoder->cur_image.id);
        seg_id = dict->window.segs[seg_id].next) {
        FNAME(compress_seg)(encoder, seg_id, (PIXEL *)dict->window.segs[seg_id].lines, 0);
    }
}
예제 #4
0
void CsObjectInt::HashInit (void)
{
  int j;

  /* If Lookahead < MIN_MATCH, ins_h is garbage, but this is .........*/
  /* not important since only literal bytes will be emitted. .........*/

  csh.ins_h = 0;
  for (j = 0; j < MIN_MATCH-1; j++) UPDATE_HASH (csh.ins_h, csh.window[j]);
}
예제 #5
0
inline LZHASH _calcHash( const uint8_t* src )
{
  LZHASH hash = 0;
  const uint8_t* pEnd = src + LZMATCH;
  for( const uint8_t* p = src; p < pEnd ; )
  {
    UPDATE_HASH( hash, *p++ );
  }
  return hash;
}
예제 #6
0
파일: deflate.c 프로젝트: g2p/pristine-tar
/* ===========================================================================
 * Initialize the "longest match" routines for a new file
 */
void lm_init (int pack_level, /* 1: best speed, 9: best compression */
              ush *flags)     /* general purpose bit flag */
{
    register unsigned j;

    if (pack_level < 1 || pack_level > 9) gzip_error ("bad pack level");

    /* Initialize the hash table. */
    memzero((char*)head, HASH_SIZE*sizeof(*head));
    /* prev will be initialized on the fly */

    /* rsync params */
    rsync_chunk_end = 0xFFFFFFFFUL;
    rsync_sum = 0;

    /* Set the default configuration parameters:
     */
    max_lazy_match   = configuration_table[pack_level].max_lazy;
    good_match       = configuration_table[pack_level].good_length;
#ifndef FULL_SEARCH
    nice_match       = configuration_table[pack_level].nice_length;
#endif
    max_chain_length = configuration_table[pack_level].max_chain;
    if (pack_level == 1) {
       *flags |= FAST;
    } else if (pack_level == 9) {
       *flags |= SLOW;
    }
    /* ??? reduce max_chain_length for binary files */

    strstart = 0;
    block_start = 0L;

    lookahead = read_buf((char*)window,
			 sizeof(int) <= 2 ? (unsigned)WSIZE : 2*WSIZE);

    if (lookahead == 0 || lookahead == (unsigned)EOF) {
       eofile = 1, lookahead = 0;
       return;
    }
    eofile = 0;
    /* Make sure that we always have enough lookahead. This is important
     * if input comes from a device such as a tty.
     */
    while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();

    ins_h = 0;
    for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);
    /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
     * not important since only literal bytes will be emitted.
     */
}
예제 #7
0
static void insert_match(deflate_state *s, struct match match)
{
    if (zunlikely(s->lookahead <= match.match_length + MIN_MATCH))
        return;

        /* matches that are not long enough we need to emit as litterals */
        if (match.match_length < MIN_MATCH) {
            while (match.match_length) {
                    match.strstart++;
                    match.match_length--;

                    if (match.match_length) {
                        if (match.strstart >= match.orgstart) {
                            insert_string(s, match.strstart);
                        }
                    }
            }
            return;
        }

        /* Insert new strings in the hash table only if the match length
         * is not too large. This saves time but degrades compression.
         */
        if (match.match_length <= 16* s->max_insert_length &&
                    s->lookahead >= MIN_MATCH) {
                match.match_length--; /* string at strstart already in table */
                do {
                        match.strstart++;
                        if (zlikely(match.strstart >= match.orgstart)) {
                            insert_string(s, match.strstart);
                        }
                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
                     * always MIN_MATCH bytes ahead.
                     */
                } while (--match.match_length != 0);
                match.strstart++;
        } else {
                match.strstart += match.match_length;
                match.match_length = 0;
                s->ins_h = s->window[match.strstart];
                if (match.strstart >= 1)
                    UPDATE_HASH(s, s->ins_h, match.strstart+2-MIN_MATCH);
#if MIN_MATCH != 3
#warning Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
        /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
         * matter since it will be recomputed at next deflate call.
         */
        }
}
예제 #8
0
/* ========================================================================= */
EXPORT_C int ZEXPORT deflateSetDictionary (
    z_streamp strm,
    const Bytef *dictionary,
    uInt  dictLength)
{
    // Line to stop compiler warning about unused mandatory global variable
    char __z=deflate_copyright[0];
    __z=__z;

    deflate_state *s;
    uInt length = dictLength;
    uInt n;
    IPos hash_head = 0;

    if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
            strm->state->status != INIT_STATE) return Z_STREAM_ERROR;

    s = strm->state;
    strm->adler = adler32(strm->adler, dictionary, dictLength);

    if (length < MIN_MATCH) return Z_OK;
    if (length > MAX_DIST(s)) {
        length = MAX_DIST(s);
#ifndef USE_DICT_HEAD
        dictionary += dictLength - length; /* use the tail of the dictionary */
#endif
    }
    zmemcpy(s->window, dictionary, length);
    s->strstart = length;
    s->block_start = (long)length;

    /* Insert all strings in the hash table (except for the last two bytes).
     * s->lookahead stays null, so s->ins_h will be recomputed at the next
     * call of fill_window.
     */
    s->ins_h = s->window[0];
    UPDATE_HASH(s, s->ins_h, s->window[1]);
    for (n = 0; n <= length - MIN_MATCH; n++) {
        INSERT_STRING(s, n, hash_head);
    }
    if (hash_head) hash_head = 0;  /* to make compiler happy */
    return Z_OK;
}
예제 #9
0
int zlib_deflateSetDictionary(
	z_streamp strm,
	const Byte *dictionary,
	uInt  dictLength
)
{
    deflate_state *s;
    uInt length = dictLength;
    uInt n;
    IPos hash_head = 0;

    if (strm == NULL || strm->state == NULL || dictionary == NULL)
	return Z_STREAM_ERROR;

    s = (deflate_state *) strm->state;
    if (s->status != INIT_STATE) return Z_STREAM_ERROR;

    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);

    if (length < MIN_MATCH) return Z_OK;
    if (length > MAX_DIST(s)) {
	length = MAX_DIST(s);
#ifndef USE_DICT_HEAD
	dictionary += dictLength - length; /* use the tail of the dictionary */
#endif
    }
    memcpy((char *)s->window, dictionary, length);
    s->strstart = length;
    s->block_start = (long)length;

    /* Insert all strings in the hash table (except for the last two bytes).
     * s->lookahead stays null, so s->ins_h will be recomputed at the next
     * call of fill_window.
     */
    s->ins_h = s->window[0];
    UPDATE_HASH(s, s->ins_h, s->window[1]);
    for (n = 0; n <= length - MIN_MATCH; n++) {
	INSERT_STRING(s, n, hash_head);
    }
    if (hash_head) hash_head = 0;  /* to make compiler happy */
    return Z_OK;
}
예제 #10
0
/* compresses one segment starting from 'from'.
   In order to encode a match, we use pixels resolution when we encode RGB image,
   and bytes count when we encode PLT.
*/
static void FNAME(compress_seg)(Encoder *encoder, uint32_t seg_idx, PIXEL *from, int copied)
{
    WindowImageSegment *seg = &encoder->dict->window.segs[seg_idx];
    const PIXEL *ip = from;
    const PIXEL *ip_bound = (PIXEL *)(seg->lines_end) - BOUND_OFFSET;
    const PIXEL *ip_limit = (PIXEL *)(seg->lines_end) - LIMIT_OFFSET;
    int hval;
    int copy = copied;
#ifdef  LZ_PLT
    int pix_per_byte = PLT_PIXELS_PER_BYTE[encoder->cur_image.type];
#endif

#ifdef DEBUG_ENCODE
    int n_encoded = 0;
#endif

    if (copy == 0) {
        encode_copy_count(encoder, MAX_COPY - 1);
    }


    while (LZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
        const PIXEL            *ref;
        const PIXEL            *ref_limit;
        WindowImageSegment     *ref_seg;
        uint32_t ref_seg_idx;
        size_t pix_dist;
        size_t image_dist;
        /* minimum match length */
        size_t len = 0;

        /* comparison starting-point */
        const PIXEL            *anchor = ip;
#ifdef CHAINED_HASH
        int hash_id = 0;
        size_t best_len = 0;
        size_t best_pix_dist = 0;
        size_t best_image_dist = 0;
#endif

        /* check for a run */

        if (LZ_EXPECT_CONDITIONAL(ip > (PIXEL *)(seg->lines))) {
            if (SAME_PIXEL(ip[-1], ip[0]) && SAME_PIXEL(ip[0], ip[1]) && SAME_PIXEL(ip[1], ip[2])) {
                PIXEL x;
                pix_dist = 1;
                image_dist = 0;

                ip += 3;
                ref = anchor + 2;
                ref_limit = (PIXEL *)(seg->lines_end);
                len = 3;

                x = *ref;

                while (ip < ip_bound) { // TODO: maybe separate a run from the same seg or from
                                       // different ones in order to spare ref < ref_limit
                    if (!SAME_PIXEL(*ip, x)) {
                        ip++;
                        break;
                    } else {
                        ip++;
                        len++;
                    }
                }

                goto match;
            } // END RLE MATCH
        }

        /* find potential match */
        HASH_FUNC(hval, ip);

#ifdef CHAINED_HASH
        for (hash_id = 0; hash_id < HASH_CHAIN_SIZE; hash_id++) {
            ref_seg_idx = encoder->dict->htab[hval][hash_id].image_seg_idx;
#else
        ref_seg_idx = encoder->dict->htab[hval].image_seg_idx;
#endif
            ref_seg = encoder->dict->window.segs + ref_seg_idx;
            if (REF_SEG_IS_VALID(encoder->dict, encoder->id,
                                 ref_seg, seg)) {
#ifdef CHAINED_HASH
                ref = ((PIXEL *)ref_seg->lines) + encoder->dict->htab[hval][hash_id].ref_pix_idx;
#else
                ref = ((PIXEL *)ref_seg->lines) + encoder->dict->htab[hval].ref_pix_idx;
#endif
                ref_limit = (PIXEL *)ref_seg->lines_end;

                len = FNAME(do_match)(encoder->dict, ref_seg, ref, ref_limit, seg, ip, ip_bound,
#ifdef  LZ_PLT
                                      pix_per_byte,
#endif
                                      &image_dist, &pix_dist);

#ifdef CHAINED_HASH
                // TODO. not compare len but rather len - encode_size
                if (len > best_len) {
                    best_len = len;
                    best_pix_dist = pix_dist;
                    best_image_dist = image_dist;
                }
#endif
            }

#ifdef CHAINED_HASH
        } // end chain loop
        len = best_len;
        pix_dist = best_pix_dist;
        image_dist = best_image_dist;
#endif

        /* update hash table */
        UPDATE_HASH(encoder->dict, hval, seg_idx, anchor - ((PIXEL *)seg->lines));

        if (!len) {
            goto literal;
        }

match:        // RLE or dictionary (both are encoded by distance from ref (-1) and length)
#ifdef DEBUG_ENCODE
        printf(", match(%d, %d, %d)", image_dist, pix_dist, len);
        n_encoded += len;
#endif

        /* distance is biased */
        if (!image_dist) {
            pix_dist--;
        }

        /* if we have copied something, adjust the copy count */
        if (copy) {
            /* copy is biased, '0' means 1 byte copy */
            update_copy_count(encoder, copy - 1);
        } else {
            /* back, to overwrite the copy count */
            compress_output_prev(encoder);
        }

        /* reset literal counter */
        copy = 0;

        /* length is biased, '1' means a match of 3 pixels for PLT and alpha*/
        /* for RGB 16 1 means 2 */
        /* for RGB24/32 1 means 1...*/
        ip = anchor + len - 2;

#if defined(LZ_RGB16)
        len--;
#elif defined(LZ_PLT) || defined(LZ_RGB_ALPHA)
        len -= 2;
#endif
        GLZ_ASSERT(encoder->usr, len > 0);
        encode_match(encoder, image_dist, pix_dist, len);

        /* update the hash at match boundary */
#if defined(LZ_RGB16) || defined(LZ_RGB24) || defined(LZ_RGB32)
        if (ip > anchor) {
#endif
            HASH_FUNC(hval, ip);
            UPDATE_HASH(encoder->dict, hval, seg_idx, ip - ((PIXEL *)seg->lines));
            ip++;
#if defined(LZ_RGB16) || defined(LZ_RGB24) || defined(LZ_RGB32)
        } else {ip++;
        }
#endif
#if defined(LZ_RGB24) || defined(LZ_RGB32)
        if (ip > anchor) {
#endif
            HASH_FUNC(hval, ip);
            UPDATE_HASH(encoder->dict, hval, seg_idx, ip - ((PIXEL *)seg->lines));
            ip++;
#if defined(LZ_RGB24) || defined(LZ_RGB32)
        } else {
            ip++;
        }
#endif
        /* assuming literal copy */
        encode_copy_count(encoder, MAX_COPY - 1);
        continue;

literal:
#ifdef DEBUG_ENCODE
        printf(", copy");
        n_encoded++;
#endif
        ENCODE_PIXEL(encoder, *anchor);
        anchor++;
        ip = anchor;
        copy++;

        if (LZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {
            copy = 0;
            encode_copy_count(encoder, MAX_COPY - 1);
        }
    } // END LOOP (ip < ip_limit)


    /* left-over as literal copy */
    ip_bound++;
    while (ip <= ip_bound) {
#ifdef DEBUG_ENCODE
        printf(", copy");
        n_encoded++;
#endif
        ENCODE_PIXEL(encoder, *ip);
        ip++;
        copy++;
        if (copy == MAX_COPY) {
            copy = 0;
            encode_copy_count(encoder, MAX_COPY - 1);
        }
    }

    /* if we have copied something, adjust the copy length */
    if (copy) {
        update_copy_count(encoder, copy - 1);
    } else {
        compress_output_prev(encoder);
    }
#ifdef DEBUG_ENCODE
    printf("\ntotal encoded=%d\n", n_encoded);
#endif
}
예제 #11
0
void fill_window_sse(deflate_state *s)
{
    z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);

    register unsigned n;
    register Posf *p;
    unsigned more;    /* Amount of free space at the end of the window. */
    uInt wsize = s->w_size;

    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");

    do {
        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);

        /* Deal with !@#$% 64K limit: */
        if (sizeof(int) <= 2) {
            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
                more = wsize;

            } else if (more == (unsigned)(-1)) {
                /* Very unlikely, but possible on 16 bit machine if
                 * strstart == 0 && lookahead == 1 (input done a byte at time)
                 */
                more--;
            }
        }

        /* If the window is almost full and there is insufficient lookahead,
         * move the upper half to the lower one to make room in the upper half.
         */
        if (s->strstart >= wsize+MAX_DIST(s)) {

            zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
            s->match_start -= wsize;
            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
            s->block_start -= (long) wsize;

            /* Slide the hash table (could be avoided with 32 bit values
               at the expense of memory usage). We slide even when level == 0
               to keep the hash table consistent if we switch back to level > 0
               later. (Using level 0 permanently is not an optimal usage of
               zlib, so we don't care about this pathological case.)
             */
            n = s->hash_size;
            p = &s->head[n];
            p -= 8;
            do {
                __m128i value, result;

                value = _mm_loadu_si128((__m128i *)p);
                result = _mm_subs_epu16(value, xmm_wsize);
                _mm_storeu_si128((__m128i *)p, result);

                p -= 8;
                n -= 8;
            } while (n > 0);

            n = wsize;
#ifndef FASTEST
            p = &s->prev[n];
            p -= 8;
            do {
                __m128i value, result;

                value = _mm_loadu_si128((__m128i *)p);
                result = _mm_subs_epu16(value, xmm_wsize);
                _mm_storeu_si128((__m128i *)p, result);
                
                p -= 8;
                n -= 8;
            } while (n > 0);
#endif
            more += wsize;
        }
        if (s->strm->avail_in == 0) break;

        /* If there was no sliding:
         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
         *    more == window_size - lookahead - strstart
         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
         * => more >= window_size - 2*WSIZE + 2
         * In the BIG_MEM or MMAP case (not yet supported),
         *   window_size == input_size + MIN_LOOKAHEAD  &&
         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
         * Otherwise, window_size == 2*WSIZE so more >= 2.
         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
         */
        Assert(more >= 2, "more < 2");

        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
        s->lookahead += n;

        /* Initialize the hash value now that we have some input: */
        if (s->lookahead + s->insert >= MIN_MATCH) {
            uInt str = s->strstart - s->insert;
            s->ins_h = s->window[str];
            if (str >= 1)
                UPDATE_HASH(s, s->ins_h, str + 1 - (MIN_MATCH-1));
#if MIN_MATCH != 3
            Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
            while (s->insert) {
                UPDATE_HASH(s, s->ins_h, str);
#ifndef FASTEST
                s->prev[str & s->w_mask] = s->head[s->ins_h];
#endif
                s->head[s->ins_h] = (Pos)str;
                str++;
                s->insert--;
                if (s->lookahead + s->insert < MIN_MATCH)
                    break;
            }
        }
        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
         * but this is not important since only literal bytes will be emitted.
         */

    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);

    /* If the WIN_INIT bytes after the end of the current data have never been
     * written, then zero those bytes in order to avoid memory check reports of
     * the use of uninitialized (or uninitialised as Julian writes) bytes by
     * the longest match routines.  Update the high water mark for the next
     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
     */
    if (s->high_water < s->window_size) {
        ulg curr = s->strstart + (ulg)(s->lookahead);
        ulg init;

        if (s->high_water < curr) {
            /* Previous high water mark below current data -- zero WIN_INIT
             * bytes or up to end of window, whichever is less.
             */
            init = s->window_size - curr;
            if (init > WIN_INIT)
                init = WIN_INIT;
            zmemzero(s->window + curr, (unsigned)init);
            s->high_water = curr + init;
        }
        else if (s->high_water < (ulg)curr + WIN_INIT) {
            /* High water mark at or above current data, but below current data
             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
             * to end of window, whichever is less.
             */
            init = (ulg)curr + WIN_INIT - s->high_water;
            if (init > s->window_size - s->high_water)
                init = s->window_size - s->high_water;
            zmemzero(s->window + s->high_water, (unsigned)init);
            s->high_water += init;
        }
    }

    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
           "not enough room for search");
}
예제 #12
0
파일: deflate.cpp 프로젝트: sasq64/fastzip
/* ===========================================================================
 * Initialize the "longest match" routines for a new file
 *
 * IN assertion: window_size is > 0 if the input file is already read or
 *    mmap'ed in the window[] array, 0 otherwise. In the first case,
 *    window_size is sufficient to contain the whole input file plus
 *    MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
 *    of window[] when looking for matches towards the end).
 */
void IZDeflate::lm_init (int pack_level, ush *flags)
    //int pack_level; /* 0: store, 1: best speed, 9: best compression */
    //ush *flags;     /* general purpose bit flag */
{
    unsigned j;

    if (pack_level < 1 || pack_level > 9) error("bad pack level");

    /* Do not slide the window if the whole input is already in memory
     * (window_size > 0)
     */
    sliding = 0;
    if (window_size == 0L) {
        sliding = 1;
        window_size = (ulg)2L*WSIZE;
    }

    /* Use dynamic allocation if compiler does not like big static arrays: */
#ifdef DYN_ALLOC
    if (window == NULL) {
        window = (uch *) zcalloc(WSIZE,   2*sizeof(uch));
        if (window == NULL) ziperr(ZE_MEM, "window allocation");
    }
    if (prev == NULL) {
        prev   = (Pos *) zcalloc(WSIZE,     sizeof(Pos));
        head   = (Pos *) zcalloc(HASH_SIZE, sizeof(Pos));
        if (prev == NULL || head == NULL) {
            ziperr(ZE_MEM, "hash table allocation");
        }
    }
#endif /* DYN_ALLOC */

    /* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
     * prev[] will be initialized on the fly.
     */
    head[HASH_SIZE-1] = NIL;
    memset((char*)head, NIL, (unsigned)(HASH_SIZE-1)*sizeof(*head));

    /* Set the default configuration parameters:
     */
    max_lazy_match   = configuration_table[pack_level].max_lazy;
    good_match       = configuration_table[pack_level].good_length;
#ifndef FULL_SEARCH
    nice_match       = configuration_table[pack_level].nice_length;
#endif
    max_chain_length = configuration_table[pack_level].max_chain;
    if (pack_level <= 2) {
       *flags |= FAST;
    } else if (pack_level >= 8) {
       *flags |= SLOW;
    }
    /* ??? reduce max_chain_length for binary files */

    strstart = 0;
    block_start = 0L;
#if defined(ASMV) && !defined(RISCOS)
    match_init(); /* initialize the asm code */
#endif

    j = WSIZE;
#ifndef MAXSEG_64K
    if (sizeof(int) > 2) j <<= 1; /* Can read 64K in one step */
#endif
    lookahead = (*read_buf)(read_handle, (char*)window, j);

    if (lookahead == 0 || lookahead == (unsigned)EOF) {
       eofile = 1, lookahead = 0;
       return;
    }
    eofile = 0;
    /* Make sure that we always have enough lookahead. This is important
     * if input comes from a device such as a tty.
     */
    if (lookahead < MIN_LOOKAHEAD) fill_window();

    ins_h = 0;
    for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);
    /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
     * not important since only literal bytes will be emitted.
     */
}
예제 #13
0
size_t LZHLCompressor::compress( uint8_t* dst, const uint8_t* src, size_t sz ) {
  LZHLEncoder coder( &stat, dst );
  // (unused) const uint8_t* srcBegin = src;
  const uint8_t* srcEnd = src + sz;

  LZHASH hash = 0;

  if ( sz >= LZMATCH ) {
    const uint8_t* pEnd = src + LZMATCH;

    for ( const uint8_t* p=src; p < pEnd ; ) {
      UPDATE_HASH( hash, *p++ );
    }
  }

  for (;;) {
    ptrdiff_t srcLeft = srcEnd - src;
    if ( srcLeft < LZMATCH ) {
      if ( srcLeft ) {
        _toBuf( src, srcLeft );
        coder.putRaw( src, srcLeft );
      }

      break;  //forever
    }

    ptrdiff_t nRaw = 0;
    ptrdiff_t maxRaw = std::min( srcLeft - LZMATCH, (ptrdiff_t)LZHLEncoder::maxRaw );

#ifdef LZLAZYMATCH
    int    lazyMatchLen = 0;
    int    lazyMatchHashPos = 0;
    LZPOS  lazyMatchBufPos = 0;
    ptrdiff_t    lazyMatchNRaw = 0;
    LZHASH lazyMatchHash = 0;
    bool   lazyForceMatch = false;
#endif
    for (;;) {
      LZHASH hash2 = HASH_POS( hash );

      LZPOS hashPos = table[ hash2 ];
      LZPOS wrapBufPos = _wrap( bufPos );
      table[ hash2 ] = (LZTableItem)wrapBufPos;

      int matchLen = 0;
      if ( hashPos != (LZTABLEINT)(-1) && hashPos != wrapBufPos )
      {
        int matchLimit = std::min( std::min( _distance( wrapBufPos - hashPos ), (int)(srcLeft - nRaw) ), LZMIN + LZHLEncoder::maxMatchOver );
        matchLen = _nMatch( hashPos, src + nRaw, matchLimit );

#ifdef LZOVERLAP
        if ( _wrap( hashPos + matchLen ) == wrapBufPos )
        {
          assert( matchLen != 0 );
          ptrdiff_t xtraMatchLimit = std::min( LZMIN + (ptrdiff_t)LZHLEncoder::maxMatchOver - matchLen, srcLeft - nRaw - matchLen );
          int xtraMatch;
          for ( xtraMatch = 0; xtraMatch < xtraMatchLimit ; ++xtraMatch )
          {
            if ( src[ nRaw + xtraMatch ] != src[ nRaw + xtraMatch + matchLen ] )
              break;//for ( xtraMatch )
          }

          matchLen += xtraMatch;
        }
#endif

#ifdef LZBACKWARDMATCH
        if ( matchLen >= LZMIN - 1 )//to ensure that buf will be overwritten
        {
          int xtraMatchLimit = (int)std::min( LZMIN + LZHLEncoder::maxMatchOver - (ptrdiff_t)matchLen, nRaw );
          int d = (int)_distance( bufPos - hashPos );
          xtraMatchLimit = std::min( std::min( xtraMatchLimit, d - matchLen ), LZBUFSIZE - d );
          int xtraMatch;
          for ( xtraMatch = 0; xtraMatch < xtraMatchLimit ; ++xtraMatch )
          {
            if ( buf[ _wrap( hashPos - xtraMatch - 1 ) ] != src[ nRaw - xtraMatch - 1 ] )
              break;//for ( xtraMatch )
          }

          if ( xtraMatch > 0 ) {
            assert( matchLen + xtraMatch >= LZMIN );
            assert( matchLen + xtraMatch <= _distance( bufPos - hashPos ) );

            nRaw -= xtraMatch;
            bufPos -= xtraMatch;
            hashPos -= xtraMatch;
            matchLen += xtraMatch;
            wrapBufPos = _wrap( bufPos );
            hash = _calcHash( src + nRaw );

#ifdef LZLAZYMATCH
            lazyForceMatch = true;
#endif
          }
        }
#endif
      }

#ifdef LZLAZYMATCH
      if ( lazyMatchLen >= LZMIN ) {
        if ( matchLen > lazyMatchLen ) {
          coder.putMatch( src, nRaw, matchLen - LZMIN, _distance( wrapBufPos - hashPos ) );
          hash = _updateTable( hash, src + nRaw, bufPos + 1, std::min( (ptrdiff_t)matchLen - 1, srcEnd - (src + nRaw + 1) - LZMATCH ) );
          _toBuf( src + nRaw, matchLen );
          src += nRaw + matchLen;
          break;//for ( nRaw )

        } else {
          nRaw = lazyMatchNRaw;
          bufPos = lazyMatchBufPos;

          hash = lazyMatchHash;
          UPDATE_HASH_EX( hash, src + nRaw );
          coder.putMatch( src, nRaw, lazyMatchLen - LZMIN, _distance( bufPos - lazyMatchHashPos ) );
          hash = _updateTable( hash, src + nRaw + 1, bufPos + 2, std::min( (ptrdiff_t)lazyMatchLen - 2, srcEnd - (src + nRaw + 2) - LZMATCH ) );
          _toBuf( src + nRaw, lazyMatchLen );
          src += nRaw + lazyMatchLen;

          break;//for ( nRaw )
        }
      }
#endif

      if ( matchLen >= LZMIN ) {

#ifdef LZLAZYMATCH
        if ( !lazyForceMatch ) {
          lazyMatchLen = matchLen;
          lazyMatchHashPos = hashPos;
          lazyMatchNRaw = nRaw;
          lazyMatchBufPos = bufPos;
          lazyMatchHash = hash;
        } else
#endif
        {
          coder.putMatch( src, nRaw, matchLen - LZMIN, _distance( wrapBufPos - hashPos ) );
          hash = _updateTable( hash, src + nRaw, bufPos + 1, std::min( (ptrdiff_t)matchLen - 1, srcEnd - (src + nRaw + 1) - LZMATCH ) );
          _toBuf( src + nRaw, matchLen );
          src += nRaw + matchLen;

          break;//for ( nRaw )
        }
      }

#ifdef LZLAZYMATCH
      assert( !lazyForceMatch );
#endif

      if ( nRaw + 1 > maxRaw )
      {
#ifdef LZLAZYMATCH
        if ( lazyMatchLen >= LZMIN )
        {
          coder.putMatch( src, nRaw, lazyMatchLen - LZMIN, _distance( bufPos - lazyMatchHashPos ) );
          hash = _updateTable( hash, src + nRaw, bufPos + 1, std::min( (ptrdiff_t)lazyMatchLen - 1, srcEnd - (src + nRaw + 1) - LZMATCH ) );
          _toBuf( src + nRaw, lazyMatchLen );
          src += nRaw + lazyMatchLen;
          break;//for ( nRaw )
        }
#endif

        if ( nRaw + LZMATCH >= srcLeft && srcLeft <= LZHLEncoder::maxRaw )
        {
          _toBuf( src + nRaw, srcLeft - nRaw );
          nRaw = srcLeft;
        }

        coder.putRaw( src, nRaw );
        src += nRaw;
        break;//for ( nRaw )
      }

      UPDATE_HASH_EX( hash, src + nRaw );
      _toBuf( src[ nRaw++ ] );
    }//for ( nRaw )
  }//forever

  return coder.flush();
}