int deduplicate(struct asfd *asfd, struct conf *conf) { struct blk *blk; struct incoming *in=asfd->in; struct candidate *champ; struct candidate *champ_last=NULL; int count=0; int blk_count=0; if(!in) return 0; incoming_found_reset(in); count=0; while((champ=candidates_choose_champ(in, champ_last))) { // printf("Got champ: %s %d\n", champ->path, *(champ->score)); if(hash_load(champ->path, conf)) return -1; if(++count==CHAMPS_MAX) break; champ_last=champ; } blk_count=0; for(blk=asfd->blist->blk_to_dedup; blk; blk=blk->next) { //printf("try: %lu\n", blk->index); blk_count++; if(blk_is_zero_length(blk)) { //printf("got: %s %s\n", blk->weak, blk->strong); blk->got=BLK_GOT; in->got++; continue; } // If already got, this function will set blk->save_path // to be the location of the already got block. if(already_got_block(asfd, blk)) return -1; //printf("after agb: %lu %d\n", blk->index, blk->got); } logp("%s: %04d/%04d - %04d/%04d\n", asfd->desc, count, candidates_len, in->got, blk_count); //cntr_add_same_val(conf->cntr, CMD_DATA, in->got); // Start the incoming array again. in->size=0; // Destroy the deduplication hash table. hash_delete_all(); asfd->blist->blk_to_dedup=NULL; return 0; }
/* add a value to the hash */ void hash_set(hashtable_type *void_table, void *key, void * value) { key_value_type *kv = 0; hash_internal_type *table=(hash_internal_type *)void_table; size_t new_size = 0; kv = hash_find(table, key, CREATE); kv->value = value; /* check to see if we need to resize hash */ if(hash_load(table) > MAX_LOAD) { /* calculate new size */ new_size = floor(table->entries / TARGET_LOAD) + 1; hash_resize(table, new_size); } }
/* output some useful stats about a hash table */ void hash_info(hashtable_type *void_table) { hash_internal_type *table=(hash_internal_type *)void_table; key_value_type *kv = 0; uint64_t max = 0; uint64_t current = 0; uint64_t active_chains = 0; printf("Hash Info: Entries %lu Size %lu Load %f COW %i\n", table->entries, table->size, hash_load(table), table->copy_on_write); printf("\tHash Keys: "); for(int i=0; i < table->size; i++) { kv = table->table[i]; current = 0; while(kv) { /* save the previous value in the new table */ printf(" %s", (char *)kv->key); current++; kv = kv->next; } if(current > 0 ) { active_chains++; } if(current > max ) { max = current; } } printf("\n\tLongest Chain %" PRIi64 " Active Chains %" PRIi64 " Average Chain %f\n", max, active_chains, (table->entries * 1.0) / active_chains); }
END_TEST START_TEST(test_hash_load_fail_to_open) { fail_unless(hash_load("champ", "dir")==HASH_RET_TEMP); }