// -------------------------------------------------------------------------- // // Function // Name: static bool SecondStageMatch(xxx) // Purpose: When a match in the hash table is found, scan for second stage match using strong checksum. // Created: 14/1/04 // // -------------------------------------------------------------------------- static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings, int Offset, int32_t BlockSize, int64_t FileBlockNumber, BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks) { // Check parameters ASSERT(pBeginnings != 0); ASSERT(pEndings != 0); ASSERT(Offset >= 0); ASSERT(BlockSize > 0); ASSERT(pFirstInHashList != 0); ASSERT(pIndex != 0); #ifndef BOX_RELEASE_BUILD uint16_t DEBUG_Hash = fastSum.GetComponentForHashing(); #endif uint32_t Checksum = fastSum.GetChecksum(); // Before we go to the expense of the MD5, make sure it's a darn good match on the checksum we already know. BlocksAvailableEntry *scan = pFirstInHashList; bool found=false; while(scan != 0) { if(scan->mWeakChecksum == Checksum) { found = true; break; } scan = scan->mpNextInHashList; } if(!found) { return false; } // Calculate the strong MD5 digest for this block MD5Digest strong; // Add the data from the beginnings strong.Add(pBeginnings + Offset, BlockSize - Offset); // Add any data from the endings if(Offset > 0) { strong.Add(pEndings, Offset); } strong.Finish(); // Then go through the entries in the hash list, comparing with the strong digest calculated scan = pFirstInHashList; //BOX_TRACE("second stage match"); while(scan != 0) { //BOX_TRACE("scan size " << scan->mSize << // ", block size " << BlockSize << // ", hash " << Hash); ASSERT(scan->mSize == BlockSize); ASSERT(RollingChecksum::ExtractHashingComponent(scan->mWeakChecksum) == DEBUG_Hash); // Compare? if(strong.DigestMatches(scan->mStrongChecksum)) { //BOX_TRACE("Match!\n"); // Found! Add to list of found blocks... int64_t fileOffset = (FileBlockNumber * BlockSize) + Offset; int64_t blockIndex = (scan - pIndex); // pointer arthmitic is frowned upon. But most efficient way of doing it here -- alternative is to use more memory // We do NOT search for smallest blocks first, as this code originally assumed. // To prevent this from potentially overwriting a better match, the caller must determine // the relative "goodness" of any existing match and this one, and avoid the call if it // could be detrimental. rFoundBlocks[fileOffset] = blockIndex; // No point in searching further, report success return true; } // Next scan = scan->mpNextInHashList; } // Not matched return false; }