/* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_generic ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 mls, const U32 extDict) { U32* const chainTable = zc->chainTable; const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog); const U32 chainMask = chainSize-1; const BYTE* const base = zc->base; const BYTE* const dictBase = zc->dictBase; const U32 dictLimit = zc->dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const U32 lowLimit = zc->lowLimit; const U32 current = (U32)(ip-base); const U32 minChain = current > chainSize ? current - chainSize : 0; int nbAttempts=maxNbAttempts; size_t ml=4-1; /* HC4 match finder */ U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { const BYTE* match; size_t currentMl=0; if ((!extDict) || matchIndex >= dictLimit) { match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ currentMl = ZSTD_count(ip, match, iLimit); } else { match = dictBase + matchIndex; if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; } /* save best solution */ if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } if (matchIndex <= minChain) break; matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } return ml; }
static size_t ZSTD_ldm_generateSequences_internal( ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, ldmParams_t const* params, void const* src, size_t srcSize) { /* LDM parameters */ int const extDict = ZSTD_window_hasExtDict(ldmState->window); U32 const minMatchLength = params->minMatchLength; U64 const hashPower = ldmState->hashPower; U32 const hBits = params->hashLog - params->bucketSizeLog; U32 const ldmBucketSize = 1U << params->bucketSizeLog; U32 const hashRateLog = params->hashRateLog; U32 const ldmTagMask = (1U << params->hashRateLog) - 1; /* Prefix and extDict parameters */ U32 const dictLimit = ldmState->window.dictLimit; U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; BYTE const* const base = ldmState->window.base; BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; BYTE const* const lowPrefixPtr = base + dictLimit; /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); /* Input positions */ BYTE const* anchor = istart; BYTE const* ip = istart; /* Rolling hash */ BYTE const* lastHashed = NULL; U64 rollingHash = 0; while (ip <= ilimit) { size_t mLength; U32 const current = (U32)(ip - base); size_t forwardMatchLength = 0, backwardMatchLength = 0; ldmEntry_t* bestEntry = NULL; if (ip != istart) { rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], lastHashed[minMatchLength], hashPower); } else { rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); } lastHashed = ip; /* Do not insert and do not look for a match */ if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { ip++; continue; } /* Get the best entry and compute the match lengths */ { ldmEntry_t* const bucket = ZSTD_ldm_getBucket(ldmState, ZSTD_ldm_getSmallHash(rollingHash, hBits), *params); ldmEntry_t* cur; size_t bestMatchLength = 0; U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { size_t curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength; if (cur->checksum != checksum || cur->offset <= lowestIndex) { continue; } if (extDict) { BYTE const* const curMatchBase = cur->offset < dictLimit ? dictBase : base; BYTE const* const pMatch = curMatchBase + cur->offset; BYTE const* const matchEnd = cur->offset < dictLimit ? dictEnd : iend; BYTE const* const lowMatchPtr = cur->offset < dictLimit ? dictStart : lowPrefixPtr; curForwardMatchLength = ZSTD_count_2segments( ip, pMatch, iend, matchEnd, lowPrefixPtr); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, lowMatchPtr); curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; } else { /* !extDict */ BYTE const* const pMatch = base + cur->offset; curForwardMatchLength = ZSTD_count(ip, pMatch, iend); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, lowPrefixPtr); curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; } if (curTotalMatchLength > bestMatchLength) { bestMatchLength = curTotalMatchLength; forwardMatchLength = curForwardMatchLength; backwardMatchLength = curBackwardMatchLength; bestEntry = cur; } } } /* No match found -- continue searching */ if (bestEntry == NULL) { ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, current, *params); ip++; continue; } /* Match found */ mLength = forwardMatchLength + backwardMatchLength; ip -= backwardMatchLength; { /* Store the sequence: * ip = current - backwardMatchLength * The match is at (bestEntry->offset - backwardMatchLength) */ U32 const matchIndex = bestEntry->offset; U32 const offset = current - matchIndex; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; /* Out of sequence storage */ if (rawSeqStore->size == rawSeqStore->capacity) return ERROR(dstSize_tooSmall); seq->litLength = (U32)(ip - anchor); seq->matchLength = (U32)mLength; seq->offset = offset; rawSeqStore->size++; } /* Insert the current entry into the hash table */ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, (U32)(lastHashed - base), *params); assert(ip + backwardMatchLength == lastHashed); /* Fill the hash table from lastHashed+1 to ip+mLength*/ /* Heuristic: don't need to fill the entire table at end of block */ if (ip + mLength <= ilimit) { rollingHash = ZSTD_ldm_fillLdmHashTable( ldmState, rollingHash, lastHashed, ip + mLength, base, hBits, *params); lastHashed = ip + mLength - 1; } ip += mLength; anchor = ip; } return iend - anchor; }
/** ZSTD_insertDUBT1() : * sort one already inserted but unsorted position * assumption : current >= btlow == (current - btmask) * doesn't fail */ static void ZSTD_insertDUBT1(ZSTD_matchState_t* ms, U32 current, const BYTE* inputEnd, U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; U32 const btMask = (1 << btLog) - 1; size_t commonLengthSmaller=0, commonLengthLarger=0; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* match; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = smallerPtr + 1; U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 dummy32; /* to be nullified at the end */ U32 const windowLow = ms->window.lowLimit; DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", current, dictLimit, windowLow); assert(current >= btLow); assert(ip < iend); /* condition for ZSTD_count */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ assert(matchIndex < current); /* note : all candidates are now supposed sorted, * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ if ( (dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ || (current < dictLimit) /* both in extDict */) { const BYTE* const mBase = ( (dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) ? base : dictBase; assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ || (current < dictLimit) ); match = mBase + matchIndex; matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* preparation for next read of match[matchLength] */ } DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", current, matchIndex, (U32)matchLength); if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ } if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", matchIndex, btLow, nextPtr[1]); smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ } else { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", matchIndex, btLow, nextPtr[0]); largerPtr = nextPtr; matchIndex = nextPtr[0]; } } *smallerPtr = *largerPtr = 0; }
/* ******************************* * Common parser - lazy strategy *********************************/ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, const U32 searchMethod, const U32 depth, ZSTD_dictMode_e const dictMode) { const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; const BYTE* const base = ms->window.base; const U32 prefixLowestIndex = ms->window.dictLimit; const BYTE* const prefixLowest = base + prefixLowestIndex; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? dms->window.dictLimit : 0; const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? dictBase + dictLowestIndex : NULL; const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); /* init */ ip += (dictAndPrefixLength == 0); ms->nextToUpdate3 = ms->nextToUpdate; if (dictMode == ZSTD_noDict) { U32 const maxRep = (U32)(ip - prefixLowest); if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } if (dictMode == ZSTD_dictMatchState) { /* dictMatchState repCode checks don't currently handle repCode == 0 * disabling. */ assert(offset_1 <= dictAndPrefixLength); assert(offset_2 <= dictAndPrefixLength); } /* Match Loop */ while (ip < ilimit) { size_t matchLength=0; size_t offset=0; const BYTE* start=ip+1; /* check repCode */ if (dictMode == ZSTD_dictMatchState) { const U32 repIndex = (U32)(ip - base) + 1 - offset_1; const BYTE* repMatch = (dictMode == ZSTD_dictMatchState && repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; if (depth==0) goto _storeSequence; } } if ( dictMode == ZSTD_noDict && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; if (depth==0) goto _storeSequence; } /* first search (depth 0) */ { size_t offsetFound = 999999999; size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); if (ml2 > matchLength) matchLength = ml2, start = ip, offset=offsetFound; } if (matchLength < 4) { ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ continue; } /* let's try to find a better solution */ if (depth>=1) while (ip<ilimit) { ip ++; if ( (dictMode == ZSTD_noDict) && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } if (dictMode == ZSTD_dictMatchState) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; int const gain2 = (int)(mlRep * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } } { size_t offset2=999999999; size_t const ml2 = searchMax(ms, ip, iend, &offset2); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; /* search a better one */ } } /* let's find an even better one */ if ((depth==2) && (ip<ilimit)) { ip ++; if ( (dictMode == ZSTD_noDict) && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } if (dictMode == ZSTD_dictMatchState) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; int const gain2 = (int)(mlRep * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } } { size_t offset2=999999999; size_t const ml2 = searchMax(ms, ip, iend, &offset2); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* NOTE: * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which * overflows the pointer, which is undefined behavior. */ /* catch up */ if (offset) { if (dictMode == ZSTD_noDict) { while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } } if (dictMode == ZSTD_dictMatchState) { U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ } offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); } /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } /* check immediate repcode */ if (dictMode == ZSTD_dictMatchState) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex = current2 - offset_2; const BYTE* repMatch = dictMode == ZSTD_dictMatchState && repIndex < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex : base + repIndex; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; } break; } } if (dictMode == ZSTD_noDict) { while ( ((ip <= ilimit) & (offset_2>0)) && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ } } }
/* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_generic ( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 mls, const ZSTD_dictMode_e dictMode) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const chainTable = ms->chainTable; const U32 chainSize = (1 << cParams->chainLog); const U32 chainMask = chainSize-1; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const U32 lowLimit = ms->window.lowLimit; const U32 current = (U32)(ip-base); const U32 minChain = current > chainSize ? current - chainSize : 0; U32 nbAttempts = 1U << cParams->searchLog; size_t ml=4-1; /* HC4 match finder */ U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { const BYTE* const match = base + matchIndex; assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ if (match[ml] == ip[ml]) /* potentially better */ currentMl = ZSTD_count(ip, match, iLimit); } else { const BYTE* const match = dictBase + matchIndex; assert(match+4 <= dictEnd); if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; } /* save best solution */ if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } if (matchIndex <= minChain) break; matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } if (dictMode == ZSTD_dictMatchState) { const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); const U32 dmsChainMask = dmsChainSize - 1; const U32 dmsLowestIndex = dms->window.dictLimit; const BYTE* const dmsBase = dms->window.base; const BYTE* const dmsEnd = dms->window.nextSrc; const U32 dmsSize = (U32)(dmsEnd - dmsBase); const U32 dmsIndexDelta = dictLimit - dmsSize; const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; const BYTE* const match = dmsBase + matchIndex; assert(match+4 <= dmsEnd); if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; /* save best solution */ if (currentMl > ml) { ml = currentMl; *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } if (matchIndex <= dmsMinChain) break; matchIndex = dmsChainTable[matchIndex & dmsChainMask]; } } return ml; }
static size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, size_t* offsetPtr, U32 const mls, const ZSTD_dictMode_e dictMode) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hashLog = cParams->hashLog; size_t const h = ZSTD_hashPtr(ip, hashLog, mls); U32 matchIndex = hashTable[h]; const BYTE* const base = ms->window.base; U32 const current = (U32)(ip-base); U32 const windowLow = ms->window.lowLimit; U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; U32 const btMask = (1 << btLog) - 1; U32 const btLow = (btMask >= current) ? 0 : current - btMask; U32 const unsortLimit = MAX(btLow, windowLow); U32* nextCandidate = bt + 2*(matchIndex&btMask); U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; U32 nbCompares = 1U << cParams->searchLog; U32 nbCandidates = nbCompares; U32 previousCandidate = 0; DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); assert(ip <= iend-8); /* required for h calculation */ /* reach end of unsorted candidates list */ while ( (matchIndex > unsortLimit) && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) && (nbCandidates > 1) ) { DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", matchIndex); *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ previousCandidate = matchIndex; matchIndex = *nextCandidate; nextCandidate = bt + 2*(matchIndex&btMask); unsortedMark = bt + 2*(matchIndex&btMask) + 1; nbCandidates --; } /* nullify last candidate if it's still unsorted * simplification, detrimental to compression ratio, beneficial for speed */ if ( (matchIndex > unsortLimit) && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", matchIndex); *nextCandidate = *unsortedMark = 0; } /* batch sort stacked candidates */ matchIndex = previousCandidate; while (matchIndex) { /* will end on matchIndex == 0 */ U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; U32 const nextCandidateIdx = *nextCandidateIdxPtr; ZSTD_insertDUBT1(ms, matchIndex, iend, nbCandidates, unsortLimit, dictMode); matchIndex = nextCandidateIdx; nbCandidates++; } /* find longest match */ { size_t commonLengthSmaller = 0, commonLengthLarger = 0; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; U32 matchEndIdx = current + 8 + 1; U32 dummy32; /* to be nullified at the end */ size_t bestLength = 0; matchIndex = hashTable[h]; hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match; if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } if (matchLength > bestLength) { if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (dictMode == ZSTD_dictMatchState) { nbCompares = 0; /* in addition to avoiding checking any * further in this loop, make sure we * skip checking in the dictionary. */ } break; /* drop, to guarantee consistency (miss a little bit of compression) */ } } if (match[matchLength] < ip[matchLength]) { /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ } else { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ largerPtr = nextPtr; matchIndex = nextPtr[0]; } } *smallerPtr = *largerPtr = 0; if (dictMode == ZSTD_dictMatchState && nbCompares) { bestLength = ZSTD_DUBT_findBetterDictMatch( ms, ip, iend, offsetPtr, bestLength, nbCompares, mls, dictMode); } assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ if (bestLength >= MINMATCH) { U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", current, (U32)bestLength, (U32)*offsetPtr, mIndex); } return bestLength; } }
/* ******************************* * Common parser - lazy strategy *********************************/ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base + ctx->dictLimit; U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog; U32 const mls = ctx->appliedParams.cParams.searchLength; typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; /* init */ ip += (ip==base); ctx->nextToUpdate3 = ctx->nextToUpdate; { U32 const maxRep = (U32)(ip-base); if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } /* Match Loop */ while (ip < ilimit) { size_t matchLength=0; size_t offset=0; const BYTE* start=ip+1; /* check repCode */ if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { /* repcode : we take it */ matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; if (depth==0) goto _storeSequence; } /* first search (depth 0) */ { size_t offsetFound = 99999999; size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) matchLength = ml2, start = ip, offset=offsetFound; } if (matchLength < 4) { ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ continue; } /* let's try to find a better solution */ if (depth>=1) while (ip<ilimit) { ip ++; if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } { size_t offset2=99999999; size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; /* search a better one */ } } /* let's find an even better one */ if ((depth==2) && (ip<ilimit)) { ip ++; if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(ml2 * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); if ((ml2 >= 4) && (gain2 > gain1)) matchLength = ml2, offset = 0, start = ip; } { size_t offset2=99999999; size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* NOTE: * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which * overflows the pointer, which is undefined behavior. */ /* catch up */ if (offset) { while ( (start > anchor) && (start > base+offset-ZSTD_REP_MOVE) && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); } /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } /* check immediate repcode */ while ( (ip <= ilimit) && ((offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ } }
/** ZSTD_insertBt1() : add one or multiple positions to tree. * ip : assumed <= iend-8 . * @return : nb of positions added */ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, U32 extDict) { U32* const hashTable = zc->hashTable; U32 const hashLog = zc->appliedParams.cParams.hashLog; size_t const h = ZSTD_hashPtr(ip, hashLog, mls); U32* const bt = zc->chainTable; U32 const btLog = zc->appliedParams.cParams.chainLog - 1; U32 const btMask = (1 << btLog) - 1; U32 matchIndex = hashTable[h]; size_t commonLengthSmaller=0, commonLengthLarger=0; const BYTE* const base = zc->base; const BYTE* const dictBase = zc->dictBase; const U32 dictLimit = zc->dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* match; const U32 current = (U32)(ip-base); const U32 btLow = btMask >= current ? 0 : current - btMask; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ U32 const windowLow = zc->lowLimit; U32 matchEndIdx = current+8; size_t bestLength = 8; #ifdef ZSTD_C_PREDICT U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); predictedSmall += (predictedSmall>0); predictedLarge += (predictedLarge>0); #endif /* ZSTD_C_PREDICT */ assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ if (matchIndex == predictedSmall) { /* no need to check length, result known */ *smallerPtr = matchIndex; if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ predictedSmall = predictPtr[1] + (predictPtr[1]>0); continue; } if (matchIndex == predictedLarge) { *largerPtr = matchIndex; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ largerPtr = nextPtr; matchIndex = nextPtr[0]; predictedLarge = predictPtr[0] + (predictPtr[0]>0); continue; } #endif if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; if (match[matchLength] == ip[matchLength]) matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } if (matchLength > bestLength) { bestLength = matchLength; if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; } if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ /* match+1 is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ } else { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ largerPtr = nextPtr; matchIndex = nextPtr[0]; } } *smallerPtr = *largerPtr = 0; if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ if (matchEndIdx > current + 8) return matchEndIdx - (current + 8); return 1; }
static size_t ZSTD_insertBtAndFindBestMatch ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, size_t* offsetPtr, U32 nbCompares, const U32 mls, U32 extDict) { U32* const hashTable = zc->hashTable; U32 const hashLog = zc->appliedParams.cParams.hashLog; size_t const h = ZSTD_hashPtr(ip, hashLog, mls); U32* const bt = zc->chainTable; U32 const btLog = zc->appliedParams.cParams.chainLog - 1; U32 const btMask = (1 << btLog) - 1; U32 matchIndex = hashTable[h]; size_t commonLengthSmaller=0, commonLengthLarger=0; const BYTE* const base = zc->base; const BYTE* const dictBase = zc->dictBase; const U32 dictLimit = zc->dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; const U32 current = (U32)(ip-base); const U32 btLow = btMask >= current ? 0 : current - btMask; const U32 windowLow = zc->lowLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; U32 matchEndIdx = current+8; U32 dummy32; /* to be nullified at the end */ size_t bestLength = 0; assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match; if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; if (match[matchLength] == ip[matchLength]) matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } if (matchLength > bestLength) { if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ } if (match[matchLength] < ip[matchLength]) { /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ } else { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ largerPtr = nextPtr; matchIndex = nextPtr[0]; } } *smallerPtr = *largerPtr = 0; zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; return bestLength; }
FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) { U32* const hashLong = cctx->hashTable; const U32 hBitsL = cctx->appliedParams.cParams.hashLog; U32* const hashSmall = cctx->chainTable; const U32 hBitsS = cctx->appliedParams.cParams.chainLog; seqStore_t* seqStorePtr = &(cctx->seqStore); const BYTE* const base = cctx->base; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; const U32 lowestIndex = cctx->dictLimit; const BYTE* const lowest = base + lowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; U32 offsetSaved = 0; /* init */ ip += (ip==lowest); { U32 const maxRep = (U32)(ip-lowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mLength; size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); U32 const current = (U32)(ip-base); U32 const matchIndexL = hashLong[h2]; U32 const matchIndexS = hashSmall[h]; const BYTE* matchLong = base + matchIndexL; const BYTE* match = base + matchIndexS; hashLong[h2] = hashSmall[h] = current; /* update hash tables */ assert(offset_1 <= current); /* supposed guaranteed by construction */ if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* favor repcode */ mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); } else { U32 offset; if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; offset = (U32)(ip-matchLong); while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); U32 const matchIndexL3 = hashLong[hl3]; const BYTE* matchL3 = base + matchIndexL3; hashLong[hl3] = current + 1; if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; ip++; offset = (U32)(ip-matchL3); while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ } else { mLength = ZSTD_count(ip+4, match+4, iend) + 4; offset = (U32)(ip-match); while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ } } else { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } offset_2 = offset_1; offset_1 = offset; ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } /* match found */ ip += mLength; anchor = ip; if (ip <= ilimit) { /* Fill Table */ hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ } } } /* save reps for next block */ seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ return iend - anchor; }
FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const hlog, U32 const stepSize, U32 const mls) { U32* const hashTable = ms->hashTable; const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; const U32 lowestIndex = ms->window.dictLimit; const BYTE* const lowest = base + lowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; /* init */ ip += (ip==lowest); { U32 const maxRep = (U32)(ip-lowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mLength; size_t const h = ZSTD_hashPtr(ip, hlog, mls); U32 const current = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; hashTable[h] = current; /* update hash table */ if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else { if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { assert(stepSize >= 1); ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; } mLength = ZSTD_count(ip+4, match+4, iend) + 4; { U32 const offset = (U32)(ip-match); while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } /* match found */ ip += mLength; anchor = ip; if (ip <= ilimit) { /* Fill Table */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ } } }