예제 #1
0
static void
dumpchunkhash(char *name, char *buf, int chunkno, int checkindex)
{
    unsigned char hash[16];
    MD5((unsigned char *)buf, CHUNKSIZE, hash);
    printf("%s %s %d\n", spewhash(hash, 16), name, chunkno);
}
예제 #2
0
static void
dumphash(struct hashinfo *hinfo)
{
	uint32_t i, total = 0;
	struct hashregion *reg;

	for (i = 0; i < hinfo->nregions; i++) {
		reg = &hinfo->regions[i];
		printf("[%u-%u]: chunk %d, hash %s\n", reg->region.start,
			       reg->region.start + reg->region.size - 1,
				       reg->chunkno, spewhash(reg->hash));
		total += reg->region.size;
	}
	printf("TOTAL = %u\n", total);
}
예제 #3
0
static void
dumphash(char *name, struct hashinfo *hinfo)
{
	uint32_t i;
	struct hashregion *reg;

	if (detail > 1) {
		for (i = 0; i < hinfo->nregions; i++) {
			reg = &hinfo->regions[i];
			printf("[%u-%u]: chunk %d, hash %s\n",
			       reg->region.start,
			       reg->region.start + reg->region.size - 1,
			       reg->chunkno, spewhash(reg->hash));
		}
	}
}
예제 #4
0
/*
 * Read from infd, hash the contents and compare with the hash from sig file.
 * Optionally (READ_CACHE), read-ahead and cache the blocks
 */
static int
hash_and_cmp(int infd,
	     unsigned char *(*hashfunc)(const unsigned char *, size_t,
					unsigned char *),
	     int hashlen, struct hashregion *hashreg, int num_reg)
{
	unsigned char		*bp;
	size_t			count, byte_size;
	ssize_t			cc;
	off_t			byte_start, retval;
	unsigned char 		hash[HASH_MAXSIZE];
	struct region		hreg = hashreg->region;
	int			iretval;

	//printf("hash_and_cmp: in -- start = %u, size = %x, num_reg = %d.\n",
	//				hreg.start, hreg.size, num_reg);
#ifdef READ_CACHE
	static struct range	cache = { 0, 0, NULL, NULL };
	static char		*odata = NULL;
	/*
	 * We read the blocks here. try to optimize here by reading 
	 * as many contguous blocks as possible (by looking thru the
	 * hashregions) and store the cached data's range.
	 * all subsequent calls that can be served from this cache are served.
	 * when the first request outside this data comes, we purge the cache
	 * (since request comes sequentially), and fetch the next bunch of
	 * consecutive blocks....
	 */
	if (hreg.start + hreg.size <= cache.start + cache.size) {
		/*
		 * serve the request from the cache
		 */
		buf = cache.data + sectobytes((hreg.start - cache.start));

		//printf("hash_and_cmp: fetching from cache start = %d...\n",
		//		sectobytes((hreg.start - cache.start)));
	} else {
		int i;
		/*
		 * bad luck ! gotta hit the disk...
		 */
		//printf("hash_and_cmp: NOT in cache...\n");

		/*
		 * find the contiguous blocks
		 */
		cache.start = hreg.start;
		cache.size = hreg.size;
		for (i = 0; i < num_reg - 1; i++) {
			/*
			 * since there are NO overlaps in hashed blocks
			 * just check end points..
			 */
			if (hashreg[i].region.start + hashreg[i].region.size
						!= hashreg[i+1].region.start) {
				break;
			}

			/*
			 * voila ! contiguous...
			 */
			cache.size += hashreg[i+1].region.size;
		}
	
		byte_size = sectobytes(cache.size);
		byte_start = sectobytes(cache.start);

		if (cache.data) {
			free(cache.data);
		}
		cache.data = (unsigned char *) malloc(byte_size);
		if (!cache.data) {
			fprintf(stderr, "hash_and_cmp: unable to malloc !\n:");
			goto error;
		}
		bzero(cache.data, byte_size);

		//printf("hash_and_cmp: gonna fetch start = %d, size = %d\n",
		//				cache.start, cache.size);

		/*
		 * go fetch the blocks.
		 */
		retval = lseek(infd, byte_start, SEEK_SET);
	//	printf("BUG_DBG: hash_and_cmp(): retval = %ld,"
	//		" byte_start = %ld\n", retval, byte_start);
		if (retval < 0) {
			fprintf(stderr, "hash_and_cmp: lseek error !\n:");
			goto free_error;
		}

		count = byte_size;
		bp = cache.data;
		while (count) {
			TIMEOP(cc = read(infd, bp, count), time_curr_read);
			if (cc < 0) {
				perror("hash_and_cmp: read error -- ");
				goto free_error;
			}
			count -= cc;
			//printf("looping...%d %d\n", cc, count);
			bp += cc;
		}
		buf = cache.data;

	}
#else
	/*
	 * Read from the disk !
	 */
	byte_size = sectobytes(hreg.size);
	byte_start = sectobytes(hreg.start);
	assert(hreg.size <= hashdatasize);

	retval = lseek(infd, byte_start, SEEK_SET);
	if (retval < 0) {
		perror("hash_and_cmp: lseek error");
		return -1;
	}

	count = byte_size;
	bp = hashdata;
	while (count > 0) {
		TIMEOP(cc = read(infd, bp, count), time_curr_read);
		if (cc < 0) {
			perror("hash_and_cmp: read error");
			return -1;
		}
		if (cc == 0) {
			fprintf(stderr, "hash_and_cmp: unexpected EOF\n");
			return -1;
		}
		count -= cc;
		bp += cc;
	}
#endif

	/*
	 * now caculate the hash and compare it.
	 */
	TIMEOP(
	    (void)(*hashfunc)(hashdata, byte_size, hash),
	time_hash);

#if 0
	fprintf(stderr, "disk: %s\n", spewhash(hash));
	fprintf(stderr, "sig:  %s\n", spewhash(hashreg->hash));
#endif

	iretval = (memcmp(hashreg->hash, hash, hashlen) != 0);

#ifdef HASHSTATS
	hashstats.hash_compares++;
	hashstats.hash_scompares += hreg.size;
	if (!iretval) {
		hashstats.hash_identical++;
		hashstats.hash_sidentical += hreg.size;
	}
#endif

	return iretval;

#ifdef READ_CACHE
free_error:
	free(cache.data);
	cache.data = NULL;
error:
	cache.start = 0;
	cache.size = 0;
#endif
	return -1;
}
예제 #5
0
static int
checkhash(char *name, struct hashinfo *hinfo)
{
	uint32_t i, inbad, badstart, badsize, reportbad;
	uint32_t badhashes, badchunks, lastbadchunk;
	uint64_t badhashdata;
	struct hashregion *reg;
	int hashlen, chunkno;
	unsigned char hash[HASH_MAXSIZE];
	unsigned char *(*hashfunc)(const unsigned char *, unsigned long,
				   unsigned char *);
	char *hashstr;
	readbuf_t *rbuf;
	size_t size;
#ifdef TIMEIT
	u_int64_t sstamp, estamp;
#endif

	if (startreader(name, hinfo))
		return -1;

	chunkno = lastbadchunk = -1;
	badhashes = badchunks = inbad = reportbad = 0;
	badhashdata = 0;
	badstart = badsize = ~0;
	switch (hinfo->hashtype) {
	case HASH_TYPE_MD5:
	default:
		hashlen = 16;
		hashfunc = MD5;
		hashstr = "MD5";
		break;
	case HASH_TYPE_SHA1:
		hashlen = 20;
		hashfunc = SHA1;
		hashstr = "SHA1";
		break;
	}
	fprintf(stderr, "Checking disk contents using %s digest\n", hashstr);

	for (i = 0, reg = hinfo->regions; i < hinfo->nregions; i++, reg++) {
		if (chunkno != reg->chunkno) {
			nchunks++;
			chunkno = reg->chunkno;
		}
		size = sectobytes(reg->region.size);
		rbuf = getblock(reg);
#ifdef TIMEIT
		sstamp = rdtsc();
#endif
		(void)(*hashfunc)(rbuf->data, size, hash);
#ifdef TIMEIT
		estamp = rdtsc();
		hcycles += (estamp - sstamp);
#endif
		putblock(rbuf);
		ndatabytes += size;

		if (detail > 2) {
			printf("[%u-%u]:\n", reg->region.start,
			       reg->region.start + reg->region.size - 1);
			printf("  sig  %s\n", spewhash(reg->hash));
			printf("  disk %s\n", spewhash(hash));
		}

		if (memcmp(reg->hash, hash, hashlen) == 0) {
			/*
			 * Hash is good.
			 * If we were in a bad stretch, be sure to dump info
			 */
			if (inbad)
				reportbad = 1;
		} else {
			/*
			 * Hash is bad.
			 * If not already in a bad stretch, start one.
			 * If in a bad stretch, lengthen it if contig.
			 * Otherwise, dump the info.
			 */
			badhashes++;
			if (chunkno != lastbadchunk) {
				badchunks++;
				lastbadchunk = chunkno;
			}
			badhashdata += size;
			if (!inbad) {
				inbad = 1;
				badstart = reg->region.start;
				badsize = reg->region.size;
			} else {
				if (badstart + badsize == reg->region.start)
					badsize += reg->region.size;
				else
					reportbad = 1;
			}
		}
#ifdef TIMEIT
		sstamp = rdtsc();
		ccycles += (sstamp - estamp);
#endif
		/*
		 * Report on a bad stretch
		 */
		if (reportbad) {
			if (detail)
				fprintf(stderr, "%s: bad hash [%u-%u]\n",
					name, badstart, badstart + badsize - 1);
			reportbad = inbad = 0;
		}
	}
	/*
	 * Finished on a sour note, report the final bad stretch.
	 */
	if (inbad && detail)
		fprintf(stderr, "%s: bad hash [%u-%u]\n",
			name, badstart, badstart + badsize - 1);

	stopreader();

	nhregions = hinfo->nregions;
	printf("%s: %lu chunks, %lu hashregions, %qu data bytes\n",
	       name, nchunks, nhregions, ndatabytes);
	if (badhashes)
		printf("%s: %u regions (%d chunks) had bad hashes, "
		       "%qu bytes affected\n",
		       name, badhashes, badchunks, badhashdata);
	dump_readbufs();
#ifdef TIMEIT
	printf("%qu bytes: read cycles: %qu, hash cycles: %qu, cmp cycles: %qu\n",
	       ndatabytes, rcycles, hcycles, ccycles);
#endif
	return 0;
}