Пример #1
0
/*
 * If the image file has a signature, read that.
 * Otherwise, parse the image file to extract the information.
 */
static int
gethashinfo(char *name, struct hashinfo **hinfop)
{
	if (strcmp(name, "-") != 0) {
		if (readhashinfo(name, hinfop) == 0)
			return 0;
		fprintf(stderr,
			"%s: no valid signature, using image file instead...\n",
			name);
	}
	
	return hashimage(name, hinfop);
}
Пример #2
0
/*
 * Intersect the current allocated disk ranges (curranges) with the
 * hashinfo ranges read from the signature file (hfile).
 * Return the resulting range list.
 */
struct range *
hashmap_compute_delta(struct range *curranges, char *hfile, int infd,
		      uint32_t ssect)
{
	uint32_t		gapstart, gapsize, lastdrangeend = 0;
	unsigned int		hashlen;
	unsigned char		*(*hashfunc)(const unsigned char *,
					     size_t, unsigned char *);
	struct range		dummy_head, *range_tail;
	struct hashregion	*hreg, *ereg;
	char			*hashstr;
	struct hashinfo		*hinfo;
	struct range		*drange;
	int			retval, changed, gapcount;
	
	/*
	 * No allocated ranges, that was easy!
	 */
	if (curranges == NULL)
		return NULL;

	/*
	 * First we read the hashfile to get hash ranges and values
	 */
	retval = readhashinfo(hfile, &hinfo, ssect);
	if (retval < 0) {
		fprintf(stderr, "readhashinfo: failed !\n"
			" * * * Aborting * * *\n");
		exit(1);
	}

	/*
	 * Deterimine the hash function
	 */
	switch (hinfo->hashtype) {
	case HASH_TYPE_MD5:
	default:
		hashlen = 16;
		hashfunc = MD5;
		hashstr = "MD5 digest";
		break;
	case HASH_TYPE_SHA1:
		hashlen = 20;
		hashfunc = SHA1;
		hashstr = "SHA1 digest";
		break;
	}

	/*
	 * The new range list.  Use a dummy element as the head and
	 * keep track of the tail for easy appending.  The dummy element
	 * is initialized such that add_to_range() will not coalesce
	 * anything with it and it will remain distinct.
	 */
	dummy_head.start = ~0;
	dummy_head.size = 0;
	dummy_head.next = 0;
	range_tail = &dummy_head;

	/*
	 * Loop through all hash regions, comparing with the currently
	 * allocated disk regions.
	 */
	drange = curranges;
	ereg = hinfo->regions + hinfo->nregions;
	for (hreg = hinfo->regions; hreg < ereg; hreg++) {
		assert(drange && drange->size > 0);
#ifdef FOLLOW
		fprintf(stderr, "H: [%u-%u] start\n",
			hreg->region.start,
			hreg->region.start + hreg->region.size - 1);
		fprintf(stderr, "  D: [%u-%u] start\n",
			drange->start,
			drange->start + drange->size - 1);
#endif

		/*
		 * Any allocated ranges on disk that are before the
		 * hash range are newly allocated, and must be put in the image.
		 */
		while (drange &&
		       (drange->start + drange->size) <= hreg->region.start) {
#ifdef FOLLOW
			fprintf(stderr, "    D: [%u-%u] pre-hreg skip\n",
				drange->start,
				drange->start + drange->size - 1);
#endif
#ifdef HASHSTATS
			hashstats.cur_allocated += drange->size;
			hashstats.cur_only += drange->size;
#endif
			if (add_to_range(&range_tail,
					 drange->start, drange->size) < 0)
				goto error;

			lastdrangeend = drange->start + drange->size;
			drange = drange->next;
			assert(drange == NULL || drange->size > 0);
		}
		if (drange == NULL)
			break;
		assert(hreg->region.start < (drange->start + drange->size));

#ifdef FOLLOW
		fprintf(stderr, "  D: [%u-%u] after pre-hreg skip\n",
			drange->start,
			drange->start + drange->size - 1);
#endif

		/*
		 * Any allocated range in the original image that is below our
		 * first allocated range on the current disk can be ignored.
		 * (The blocks must have been deallocated.)
		 */

		if (hreg->region.start + hreg->region.size <= drange->start) {
#ifdef HASHSTATS
			hashstats.orig_only += hreg->region.size;
#endif
			continue;
		}

		/*
		 * Otherwise there is some overlap between the current drange
		 * and hreg.  To simplfy things, we split drange so that we can
		 * treat the portion of drange before the overlap seperately.
		 * thus aligning with hash boundaries
		 */
		assert(hreg->region.start + hreg->region.size > drange->start);
		assert(hreg->region.start < drange->start + drange->size);

		/*
		 * Any part of the drange that falls before the hreg is
		 * new data and needs to be in the image.
		 */
		if (drange->start < hreg->region.start) {
			uint32_t before = hreg->region.start - drange->start;
#ifdef HASHSTATS
			hashstats.cur_allocated += before;
			hashstats.cur_only += before;
#endif
			if (add_to_range(&range_tail,
					 drange->start, before) < 0)
				goto error;
			
#ifdef FOLLOW
			fprintf(stderr, "  D: [%u-%u]/[%u-%u] drange head split\n",
				drange->start,
				drange->start + before - 1,
				drange->start + before,
				drange->start + drange->size);
#endif
			/*
			 * Update drange with new start and size to account
			 * for the stuff we've taken off.  We continue
			 * processing with this new range.
			 */
			drange->start += before;
			drange->size -= before;
		}

		/*
		 * We have now isolated one or more dranges that are "covered"
		 * by the current hreg.  Here we might use the hash value
		 * associated with the hreg to determine whether the
		 * corresponding disk contents have changed.  If there is a
		 * single drange that exactly matches the hreg, then we
		 * obviously do this.  But what if there are gaps in the
		 * coverage, i.e., multiple non-adjacent dranges covered by
		 * the hreg?  This implies that not all blocks described by
		 * the original hash are still important in the current image.
		 * In fact there could be as little as a single disk block
		 * still valid for a very large hrange.
		 *
		 * In this case we can either blindly include the dranges
		 * in the merged list (hash_free==0), or we can go ahead and
		 * do the hash over the entire range (hash_free==1) on the
		 * chance that the blocks that are no longer allocated (the
		 * "gaps" between dranges) have not changed content and the
		 * hash will still match and thus we can avoid including the
		 * dranges in the merged list.  The latter is valid, but is
		 * it likely to pay off?  We will have to see.
		 */
		if (hash_free ||
		    (drange->start == hreg->region.start &&
		     drange->size >= hreg->region.size)) {

			/*
			 * XXX if there is a fixup, all bets are off
			 * (e.g., they might compare equal now, but not
			 * after the fixup).  Just force inclusion of all
			 * data.
			 *
			 * XXX we could do this on a drange by drange basis
			 * below, but I deem it not worth the trouble since
			 * all this code will be changing anyway.
			 */
			if (hasfixup(hreg->region.start, hreg->region.size)) {
				changed = 3;
#ifdef FOLLOW
				fprintf(stderr, "  H: [%u-%u] fixup overlap\n",
					hreg->region.start,
					hreg->region.start + hreg->region.size-1);
#endif
			} else {
				
				TIMEOP(
				       changed = hash_and_cmp(infd, hashfunc,
							      hashlen, hreg,
							      ereg - hreg),
				       time_hash_and_cmp);
				if (changed < 0)
					goto error;

#ifdef FOLLOW
				fprintf(stderr, "  H: [%u-%u] hash %s\n",
					hreg->region.start,
					hreg->region.start + hreg->region.size-1,
					changed ? "differs" : "matches");
#endif
			}
		} else {
			/*
			 * There is a gap in the dranges covered by the hreg.
			 * Just save all dranges covered by this hreg.
			 */
			changed = 2;
#ifdef FOLLOW
			fprintf(stderr, "  H: [%u-%u] no compare\n",
				hreg->region.start,
				hreg->region.start + hreg->region.size - 1);
#endif
		}

#ifdef HASHSTATS
		hashstats.shared += hreg->region.size;
		if (!changed)
			hashstats.unchanged += hreg->region.size;
		else if (changed > 1) {
			hashstats.nocompare += hreg->region.size;
			if (changed == 3)
				hashstats.fixup += hreg->region.size;
		}
		gapstart = hreg->region.start;
		gapsize = gapcount = 0;
#endif
		/*
		 * Loop through all dranges completely covered by the hreg
		 * and add them or skip them depending on changed.
		 */
		assert(drange &&
		       drange->start < hreg->region.start + hreg->region.size);
		while (drange &&
		       drange->start < hreg->region.start + hreg->region.size) {
			uint32_t curstart = drange->start;
			uint32_t curend = curstart + drange->size;
			uint32_t hregstart = hreg->region.start;
			uint32_t hregend = hregstart + hreg->region.size;

			/*
			 * There may be a final drange which crosses over the
			 * hreg end, in which case we split it, treating the
			 * initial part here, and leaving the rest for the next
			 * iteration.
			 */
			if (curend > hregend) {
				uint32_t after = curend - hregend;
#ifdef FOLLOW
				fprintf(stderr, "    D: [%u-%u]/[%u-%u] drange tail split\n",
					curstart,
					hregend - 1,
					hregend,
					curend - 1);
#endif

				drange->start = hregend;
				drange->size = after;

				curend = hregend;
			}

			assert(curstart >= hregstart);
			assert(curend <= hregend);

#ifdef FOLLOW
			fprintf(stderr, "    D: [%u-%u] drange covered\n",
				curstart,
				curend - 1);
#endif

#ifdef HASHSTATS
			/*
			 * Keep track of the gaps
			 */
			if (gapstart < curstart) {
#ifdef FOLLOW
				fprintf(stderr,
					"    G: [%u-%u]\n",
					gapstart, curstart - 1);
#endif
				gapsize += curstart - gapstart;
				gapcount++;
			}
			gapstart = curend;
			hashstats.cur_allocated += curend - curstart;
#endif
			if (changed) {
				/*
				 * add the overlapping region.
				 */
				if (add_to_range(&range_tail, curstart,
						 curend - curstart) < 0)
					goto error;

			}

			/*
			 * Unless we split the current entry, bump
			 * drange to the next entry.
			 */
			if (curstart == drange->start) {
				lastdrangeend = curend;
				drange = drange->next;
				assert(drange == NULL || drange->size > 0);
			}
		}

#ifdef HASHSTATS
		/*
		 * Check for an end gap
		 */
		if (gapstart < hreg->region.start + hreg->region.size) {
			uint32_t hregend =
				hreg->region.start + hreg->region.size;
#ifdef FOLLOW
			fprintf(stderr, "    G: [%u-%u]\n",
				gapstart, hregend - 1);
#endif
			gapsize += hregend - gapstart;
			gapcount++;
		}

		/*
		 * Properly account for gaps.
		 * Earlier we counted the gap as part of the shared
		 * space and as either unchanged or uncompared--adjust
		 * those counts now.
		 */
		if (gapcount) {
			hashstats.gaps++;

			/* note adjustment of counts set above */
			hashstats.shared -= gapsize;
			hashstats.gapsects += gapsize;
			if (!changed) {
				hashstats.unchanged -= gapsize;
				hashstats.unchangedgaps++;
				hashstats.gapunchanged += gapsize;
			} else if (changed > 1) {
				hashstats.nocompare -= gapsize;
				if (changed == 3)
					hashstats.fixup -= gapsize;
				hashstats.gapnocompare += gapsize;
			}
#ifdef FOLLOW
			fprintf(stderr, "  H: [%u-%u] %d/%d free\n",
				hreg->region.start,
				hreg->region.start + hreg->region.size - 1,
				gapsize, hreg->region.size);
#endif
		}
#endif
		if (drange == NULL)
			break;
		assert(drange->start >= hreg->region.start + hreg->region.size);
	}
	assert(drange == NULL || hreg == ereg);
	assert(lastdrangeend > 0);

	/*
	 * Remaining hash entries are ignored since they are deallocated
	 * space.  We do keep stats about them however.
	 */
#ifdef HASHSTATS
	while (hreg < ereg) {
		uint32_t size;

		/*
		 * If we ran out of dranges in the middle of an hreg,
		 * the rest of the hreg is deallocated.
		 */
		if (lastdrangeend > 0 &&
		    lastdrangeend <= hreg->region.start + hreg->region.size) {
			size = hreg->region.start + hreg->region.size -
				lastdrangeend;
#ifdef FOLLOW
			fprintf(stderr, "H: [%u-%u]/[",
				hreg->region.start,
				lastdrangeend - 1);
			if (size)
				fprintf(stderr, "%u-%u",
					lastdrangeend,
					hreg->region.start +
					hreg->region.size - 1);
			fprintf(stderr, "] split, tail skipped\n");
#endif
		} else {
			size = hreg->region.size;
#ifdef FOLLOW
			fprintf(stderr, "H: [%u-%u] skipped\n",
				hreg->region.start,
				hreg->region.start + hreg->region.size - 1);
#endif
		}
		hashstats.orig_only += size;

		lastdrangeend = 0;
		hreg++;
	}
#endif

	/*
	 * Remaining dranges are added to the changed blocks list.
	 */
	while (drange) {
		assert(hreg == ereg);
#ifdef HASHSTATS
		hashstats.cur_allocated += drange->size;
		hashstats.cur_only += drange->size;
#endif
		if (add_to_range(&range_tail, drange->start, drange->size) < 0)
			goto error;

		drange = drange->next;
		assert(drange == NULL || drange->size > 0);
	}

	return dummy_head.next;

error:
	freeranges(dummy_head.next);
	return NULL;
}