/* * If the image file has a signature, read that. * Otherwise, parse the image file to extract the information. */ static int gethashinfo(char *name, struct hashinfo **hinfop) { if (strcmp(name, "-") != 0) { if (readhashinfo(name, hinfop) == 0) return 0; fprintf(stderr, "%s: no valid signature, using image file instead...\n", name); } return hashimage(name, hinfop); }
/* * Intersect the current allocated disk ranges (curranges) with the * hashinfo ranges read from the signature file (hfile). * Return the resulting range list. */ struct range * hashmap_compute_delta(struct range *curranges, char *hfile, int infd, uint32_t ssect) { uint32_t gapstart, gapsize, lastdrangeend = 0; unsigned int hashlen; unsigned char *(*hashfunc)(const unsigned char *, size_t, unsigned char *); struct range dummy_head, *range_tail; struct hashregion *hreg, *ereg; char *hashstr; struct hashinfo *hinfo; struct range *drange; int retval, changed, gapcount; /* * No allocated ranges, that was easy! */ if (curranges == NULL) return NULL; /* * First we read the hashfile to get hash ranges and values */ retval = readhashinfo(hfile, &hinfo, ssect); if (retval < 0) { fprintf(stderr, "readhashinfo: failed !\n" " * * * Aborting * * *\n"); exit(1); } /* * Deterimine the hash function */ switch (hinfo->hashtype) { case HASH_TYPE_MD5: default: hashlen = 16; hashfunc = MD5; hashstr = "MD5 digest"; break; case HASH_TYPE_SHA1: hashlen = 20; hashfunc = SHA1; hashstr = "SHA1 digest"; break; } /* * The new range list. Use a dummy element as the head and * keep track of the tail for easy appending. The dummy element * is initialized such that add_to_range() will not coalesce * anything with it and it will remain distinct. */ dummy_head.start = ~0; dummy_head.size = 0; dummy_head.next = 0; range_tail = &dummy_head; /* * Loop through all hash regions, comparing with the currently * allocated disk regions. */ drange = curranges; ereg = hinfo->regions + hinfo->nregions; for (hreg = hinfo->regions; hreg < ereg; hreg++) { assert(drange && drange->size > 0); #ifdef FOLLOW fprintf(stderr, "H: [%u-%u] start\n", hreg->region.start, hreg->region.start + hreg->region.size - 1); fprintf(stderr, " D: [%u-%u] start\n", drange->start, drange->start + drange->size - 1); #endif /* * Any allocated ranges on disk that are before the * hash range are newly allocated, and must be put in the image. */ while (drange && (drange->start + drange->size) <= hreg->region.start) { #ifdef FOLLOW fprintf(stderr, " D: [%u-%u] pre-hreg skip\n", drange->start, drange->start + drange->size - 1); #endif #ifdef HASHSTATS hashstats.cur_allocated += drange->size; hashstats.cur_only += drange->size; #endif if (add_to_range(&range_tail, drange->start, drange->size) < 0) goto error; lastdrangeend = drange->start + drange->size; drange = drange->next; assert(drange == NULL || drange->size > 0); } if (drange == NULL) break; assert(hreg->region.start < (drange->start + drange->size)); #ifdef FOLLOW fprintf(stderr, " D: [%u-%u] after pre-hreg skip\n", drange->start, drange->start + drange->size - 1); #endif /* * Any allocated range in the original image that is below our * first allocated range on the current disk can be ignored. * (The blocks must have been deallocated.) */ if (hreg->region.start + hreg->region.size <= drange->start) { #ifdef HASHSTATS hashstats.orig_only += hreg->region.size; #endif continue; } /* * Otherwise there is some overlap between the current drange * and hreg. To simplfy things, we split drange so that we can * treat the portion of drange before the overlap seperately. * thus aligning with hash boundaries */ assert(hreg->region.start + hreg->region.size > drange->start); assert(hreg->region.start < drange->start + drange->size); /* * Any part of the drange that falls before the hreg is * new data and needs to be in the image. */ if (drange->start < hreg->region.start) { uint32_t before = hreg->region.start - drange->start; #ifdef HASHSTATS hashstats.cur_allocated += before; hashstats.cur_only += before; #endif if (add_to_range(&range_tail, drange->start, before) < 0) goto error; #ifdef FOLLOW fprintf(stderr, " D: [%u-%u]/[%u-%u] drange head split\n", drange->start, drange->start + before - 1, drange->start + before, drange->start + drange->size); #endif /* * Update drange with new start and size to account * for the stuff we've taken off. We continue * processing with this new range. */ drange->start += before; drange->size -= before; } /* * We have now isolated one or more dranges that are "covered" * by the current hreg. Here we might use the hash value * associated with the hreg to determine whether the * corresponding disk contents have changed. If there is a * single drange that exactly matches the hreg, then we * obviously do this. But what if there are gaps in the * coverage, i.e., multiple non-adjacent dranges covered by * the hreg? This implies that not all blocks described by * the original hash are still important in the current image. * In fact there could be as little as a single disk block * still valid for a very large hrange. * * In this case we can either blindly include the dranges * in the merged list (hash_free==0), or we can go ahead and * do the hash over the entire range (hash_free==1) on the * chance that the blocks that are no longer allocated (the * "gaps" between dranges) have not changed content and the * hash will still match and thus we can avoid including the * dranges in the merged list. The latter is valid, but is * it likely to pay off? We will have to see. */ if (hash_free || (drange->start == hreg->region.start && drange->size >= hreg->region.size)) { /* * XXX if there is a fixup, all bets are off * (e.g., they might compare equal now, but not * after the fixup). Just force inclusion of all * data. * * XXX we could do this on a drange by drange basis * below, but I deem it not worth the trouble since * all this code will be changing anyway. */ if (hasfixup(hreg->region.start, hreg->region.size)) { changed = 3; #ifdef FOLLOW fprintf(stderr, " H: [%u-%u] fixup overlap\n", hreg->region.start, hreg->region.start + hreg->region.size-1); #endif } else { TIMEOP( changed = hash_and_cmp(infd, hashfunc, hashlen, hreg, ereg - hreg), time_hash_and_cmp); if (changed < 0) goto error; #ifdef FOLLOW fprintf(stderr, " H: [%u-%u] hash %s\n", hreg->region.start, hreg->region.start + hreg->region.size-1, changed ? "differs" : "matches"); #endif } } else { /* * There is a gap in the dranges covered by the hreg. * Just save all dranges covered by this hreg. */ changed = 2; #ifdef FOLLOW fprintf(stderr, " H: [%u-%u] no compare\n", hreg->region.start, hreg->region.start + hreg->region.size - 1); #endif } #ifdef HASHSTATS hashstats.shared += hreg->region.size; if (!changed) hashstats.unchanged += hreg->region.size; else if (changed > 1) { hashstats.nocompare += hreg->region.size; if (changed == 3) hashstats.fixup += hreg->region.size; } gapstart = hreg->region.start; gapsize = gapcount = 0; #endif /* * Loop through all dranges completely covered by the hreg * and add them or skip them depending on changed. */ assert(drange && drange->start < hreg->region.start + hreg->region.size); while (drange && drange->start < hreg->region.start + hreg->region.size) { uint32_t curstart = drange->start; uint32_t curend = curstart + drange->size; uint32_t hregstart = hreg->region.start; uint32_t hregend = hregstart + hreg->region.size; /* * There may be a final drange which crosses over the * hreg end, in which case we split it, treating the * initial part here, and leaving the rest for the next * iteration. */ if (curend > hregend) { uint32_t after = curend - hregend; #ifdef FOLLOW fprintf(stderr, " D: [%u-%u]/[%u-%u] drange tail split\n", curstart, hregend - 1, hregend, curend - 1); #endif drange->start = hregend; drange->size = after; curend = hregend; } assert(curstart >= hregstart); assert(curend <= hregend); #ifdef FOLLOW fprintf(stderr, " D: [%u-%u] drange covered\n", curstart, curend - 1); #endif #ifdef HASHSTATS /* * Keep track of the gaps */ if (gapstart < curstart) { #ifdef FOLLOW fprintf(stderr, " G: [%u-%u]\n", gapstart, curstart - 1); #endif gapsize += curstart - gapstart; gapcount++; } gapstart = curend; hashstats.cur_allocated += curend - curstart; #endif if (changed) { /* * add the overlapping region. */ if (add_to_range(&range_tail, curstart, curend - curstart) < 0) goto error; } /* * Unless we split the current entry, bump * drange to the next entry. */ if (curstart == drange->start) { lastdrangeend = curend; drange = drange->next; assert(drange == NULL || drange->size > 0); } } #ifdef HASHSTATS /* * Check for an end gap */ if (gapstart < hreg->region.start + hreg->region.size) { uint32_t hregend = hreg->region.start + hreg->region.size; #ifdef FOLLOW fprintf(stderr, " G: [%u-%u]\n", gapstart, hregend - 1); #endif gapsize += hregend - gapstart; gapcount++; } /* * Properly account for gaps. * Earlier we counted the gap as part of the shared * space and as either unchanged or uncompared--adjust * those counts now. */ if (gapcount) { hashstats.gaps++; /* note adjustment of counts set above */ hashstats.shared -= gapsize; hashstats.gapsects += gapsize; if (!changed) { hashstats.unchanged -= gapsize; hashstats.unchangedgaps++; hashstats.gapunchanged += gapsize; } else if (changed > 1) { hashstats.nocompare -= gapsize; if (changed == 3) hashstats.fixup -= gapsize; hashstats.gapnocompare += gapsize; } #ifdef FOLLOW fprintf(stderr, " H: [%u-%u] %d/%d free\n", hreg->region.start, hreg->region.start + hreg->region.size - 1, gapsize, hreg->region.size); #endif } #endif if (drange == NULL) break; assert(drange->start >= hreg->region.start + hreg->region.size); } assert(drange == NULL || hreg == ereg); assert(lastdrangeend > 0); /* * Remaining hash entries are ignored since they are deallocated * space. We do keep stats about them however. */ #ifdef HASHSTATS while (hreg < ereg) { uint32_t size; /* * If we ran out of dranges in the middle of an hreg, * the rest of the hreg is deallocated. */ if (lastdrangeend > 0 && lastdrangeend <= hreg->region.start + hreg->region.size) { size = hreg->region.start + hreg->region.size - lastdrangeend; #ifdef FOLLOW fprintf(stderr, "H: [%u-%u]/[", hreg->region.start, lastdrangeend - 1); if (size) fprintf(stderr, "%u-%u", lastdrangeend, hreg->region.start + hreg->region.size - 1); fprintf(stderr, "] split, tail skipped\n"); #endif } else { size = hreg->region.size; #ifdef FOLLOW fprintf(stderr, "H: [%u-%u] skipped\n", hreg->region.start, hreg->region.start + hreg->region.size - 1); #endif } hashstats.orig_only += size; lastdrangeend = 0; hreg++; } #endif /* * Remaining dranges are added to the changed blocks list. */ while (drange) { assert(hreg == ereg); #ifdef HASHSTATS hashstats.cur_allocated += drange->size; hashstats.cur_only += drange->size; #endif if (add_to_range(&range_tail, drange->start, drange->size) < 0) goto error; drange = drange->next; assert(drange == NULL || drange->size > 0); } return dummy_head.next; error: freeranges(dummy_head.next); return NULL; }