static void dumpchunkhash(char *name, char *buf, int chunkno, int checkindex) { unsigned char hash[16]; MD5((unsigned char *)buf, CHUNKSIZE, hash); printf("%s %s %d\n", spewhash(hash, 16), name, chunkno); }
static void dumphash(struct hashinfo *hinfo) { uint32_t i, total = 0; struct hashregion *reg; for (i = 0; i < hinfo->nregions; i++) { reg = &hinfo->regions[i]; printf("[%u-%u]: chunk %d, hash %s\n", reg->region.start, reg->region.start + reg->region.size - 1, reg->chunkno, spewhash(reg->hash)); total += reg->region.size; } printf("TOTAL = %u\n", total); }
static void dumphash(char *name, struct hashinfo *hinfo) { uint32_t i; struct hashregion *reg; if (detail > 1) { for (i = 0; i < hinfo->nregions; i++) { reg = &hinfo->regions[i]; printf("[%u-%u]: chunk %d, hash %s\n", reg->region.start, reg->region.start + reg->region.size - 1, reg->chunkno, spewhash(reg->hash)); } } }
/* * Read from infd, hash the contents and compare with the hash from sig file. * Optionally (READ_CACHE), read-ahead and cache the blocks */ static int hash_and_cmp(int infd, unsigned char *(*hashfunc)(const unsigned char *, size_t, unsigned char *), int hashlen, struct hashregion *hashreg, int num_reg) { unsigned char *bp; size_t count, byte_size; ssize_t cc; off_t byte_start, retval; unsigned char hash[HASH_MAXSIZE]; struct region hreg = hashreg->region; int iretval; //printf("hash_and_cmp: in -- start = %u, size = %x, num_reg = %d.\n", // hreg.start, hreg.size, num_reg); #ifdef READ_CACHE static struct range cache = { 0, 0, NULL, NULL }; static char *odata = NULL; /* * We read the blocks here. try to optimize here by reading * as many contguous blocks as possible (by looking thru the * hashregions) and store the cached data's range. * all subsequent calls that can be served from this cache are served. * when the first request outside this data comes, we purge the cache * (since request comes sequentially), and fetch the next bunch of * consecutive blocks.... */ if (hreg.start + hreg.size <= cache.start + cache.size) { /* * serve the request from the cache */ buf = cache.data + sectobytes((hreg.start - cache.start)); //printf("hash_and_cmp: fetching from cache start = %d...\n", // sectobytes((hreg.start - cache.start))); } else { int i; /* * bad luck ! gotta hit the disk... */ //printf("hash_and_cmp: NOT in cache...\n"); /* * find the contiguous blocks */ cache.start = hreg.start; cache.size = hreg.size; for (i = 0; i < num_reg - 1; i++) { /* * since there are NO overlaps in hashed blocks * just check end points.. */ if (hashreg[i].region.start + hashreg[i].region.size != hashreg[i+1].region.start) { break; } /* * voila ! contiguous... */ cache.size += hashreg[i+1].region.size; } byte_size = sectobytes(cache.size); byte_start = sectobytes(cache.start); if (cache.data) { free(cache.data); } cache.data = (unsigned char *) malloc(byte_size); if (!cache.data) { fprintf(stderr, "hash_and_cmp: unable to malloc !\n:"); goto error; } bzero(cache.data, byte_size); //printf("hash_and_cmp: gonna fetch start = %d, size = %d\n", // cache.start, cache.size); /* * go fetch the blocks. */ retval = lseek(infd, byte_start, SEEK_SET); // printf("BUG_DBG: hash_and_cmp(): retval = %ld," // " byte_start = %ld\n", retval, byte_start); if (retval < 0) { fprintf(stderr, "hash_and_cmp: lseek error !\n:"); goto free_error; } count = byte_size; bp = cache.data; while (count) { TIMEOP(cc = read(infd, bp, count), time_curr_read); if (cc < 0) { perror("hash_and_cmp: read error -- "); goto free_error; } count -= cc; //printf("looping...%d %d\n", cc, count); bp += cc; } buf = cache.data; } #else /* * Read from the disk ! */ byte_size = sectobytes(hreg.size); byte_start = sectobytes(hreg.start); assert(hreg.size <= hashdatasize); retval = lseek(infd, byte_start, SEEK_SET); if (retval < 0) { perror("hash_and_cmp: lseek error"); return -1; } count = byte_size; bp = hashdata; while (count > 0) { TIMEOP(cc = read(infd, bp, count), time_curr_read); if (cc < 0) { perror("hash_and_cmp: read error"); return -1; } if (cc == 0) { fprintf(stderr, "hash_and_cmp: unexpected EOF\n"); return -1; } count -= cc; bp += cc; } #endif /* * now caculate the hash and compare it. */ TIMEOP( (void)(*hashfunc)(hashdata, byte_size, hash), time_hash); #if 0 fprintf(stderr, "disk: %s\n", spewhash(hash)); fprintf(stderr, "sig: %s\n", spewhash(hashreg->hash)); #endif iretval = (memcmp(hashreg->hash, hash, hashlen) != 0); #ifdef HASHSTATS hashstats.hash_compares++; hashstats.hash_scompares += hreg.size; if (!iretval) { hashstats.hash_identical++; hashstats.hash_sidentical += hreg.size; } #endif return iretval; #ifdef READ_CACHE free_error: free(cache.data); cache.data = NULL; error: cache.start = 0; cache.size = 0; #endif return -1; }
static int checkhash(char *name, struct hashinfo *hinfo) { uint32_t i, inbad, badstart, badsize, reportbad; uint32_t badhashes, badchunks, lastbadchunk; uint64_t badhashdata; struct hashregion *reg; int hashlen, chunkno; unsigned char hash[HASH_MAXSIZE]; unsigned char *(*hashfunc)(const unsigned char *, unsigned long, unsigned char *); char *hashstr; readbuf_t *rbuf; size_t size; #ifdef TIMEIT u_int64_t sstamp, estamp; #endif if (startreader(name, hinfo)) return -1; chunkno = lastbadchunk = -1; badhashes = badchunks = inbad = reportbad = 0; badhashdata = 0; badstart = badsize = ~0; switch (hinfo->hashtype) { case HASH_TYPE_MD5: default: hashlen = 16; hashfunc = MD5; hashstr = "MD5"; break; case HASH_TYPE_SHA1: hashlen = 20; hashfunc = SHA1; hashstr = "SHA1"; break; } fprintf(stderr, "Checking disk contents using %s digest\n", hashstr); for (i = 0, reg = hinfo->regions; i < hinfo->nregions; i++, reg++) { if (chunkno != reg->chunkno) { nchunks++; chunkno = reg->chunkno; } size = sectobytes(reg->region.size); rbuf = getblock(reg); #ifdef TIMEIT sstamp = rdtsc(); #endif (void)(*hashfunc)(rbuf->data, size, hash); #ifdef TIMEIT estamp = rdtsc(); hcycles += (estamp - sstamp); #endif putblock(rbuf); ndatabytes += size; if (detail > 2) { printf("[%u-%u]:\n", reg->region.start, reg->region.start + reg->region.size - 1); printf(" sig %s\n", spewhash(reg->hash)); printf(" disk %s\n", spewhash(hash)); } if (memcmp(reg->hash, hash, hashlen) == 0) { /* * Hash is good. * If we were in a bad stretch, be sure to dump info */ if (inbad) reportbad = 1; } else { /* * Hash is bad. * If not already in a bad stretch, start one. * If in a bad stretch, lengthen it if contig. * Otherwise, dump the info. */ badhashes++; if (chunkno != lastbadchunk) { badchunks++; lastbadchunk = chunkno; } badhashdata += size; if (!inbad) { inbad = 1; badstart = reg->region.start; badsize = reg->region.size; } else { if (badstart + badsize == reg->region.start) badsize += reg->region.size; else reportbad = 1; } } #ifdef TIMEIT sstamp = rdtsc(); ccycles += (sstamp - estamp); #endif /* * Report on a bad stretch */ if (reportbad) { if (detail) fprintf(stderr, "%s: bad hash [%u-%u]\n", name, badstart, badstart + badsize - 1); reportbad = inbad = 0; } } /* * Finished on a sour note, report the final bad stretch. */ if (inbad && detail) fprintf(stderr, "%s: bad hash [%u-%u]\n", name, badstart, badstart + badsize - 1); stopreader(); nhregions = hinfo->nregions; printf("%s: %lu chunks, %lu hashregions, %qu data bytes\n", name, nchunks, nhregions, ndatabytes); if (badhashes) printf("%s: %u regions (%d chunks) had bad hashes, " "%qu bytes affected\n", name, badhashes, badchunks, badhashdata); dump_readbufs(); #ifdef TIMEIT printf("%qu bytes: read cycles: %qu, hash cycles: %qu, cmp cycles: %qu\n", ndatabytes, rcycles, hcycles, ccycles); #endif return 0; }