RmHasherTask *rm_hasher_task_new(RmHasher *hasher, RmDigest *digest, gpointer task_user_data) { g_mutex_lock(&hasher->lock); { hasher->active_tasks++; } g_mutex_unlock(&hasher->lock); RmHasherTask *self = g_slice_new0(RmHasherTask); self->hasher = hasher; if(digest) { self->digest = digest; } else { self->digest = rm_digest_new(hasher->digest_type, 0, 0, 0, hasher->digest_type == RM_DIGEST_PARANOID); } /* get a recycled hashpipe if available */ self->hashpipe = g_async_queue_try_pop(hasher->hashpipe_pool); if(!self->hashpipe) { if(g_atomic_int_get(&hasher->unalloc_hashpipes) > 0) { /* create a new hashpipe */ g_atomic_int_dec_and_test(&hasher->unalloc_hashpipes); self->hashpipe = rm_util_thread_pool_new((GFunc)rm_hasher_hashpipe_worker, hasher, 1); } else { /* already at thread limit - wait for a hashpipe to come available */ self->hashpipe = g_async_queue_pop(hasher->hashpipe_pool); } } rm_assert_gentle(self->hashpipe); self->task_user_data = task_user_data; return self; }
RmDigest *rm_digest_copy(RmDigest *digest) { g_assert(digest); RmDigest *self = NULL; switch(digest->type) { case RM_DIGEST_MD5: case RM_DIGEST_SHA512: case RM_DIGEST_SHA256: case RM_DIGEST_SHA1: self = g_slice_new0(RmDigest); self->bytes = digest->bytes; self->type = digest->type; self->glib_checksum = g_checksum_copy(digest->glib_checksum); break; case RM_DIGEST_PARANOID: case RM_DIGEST_SPOOKY: case RM_DIGEST_SPOOKY32: case RM_DIGEST_SPOOKY64: case RM_DIGEST_MURMUR: case RM_DIGEST_CITY: case RM_DIGEST_CITY256: case RM_DIGEST_MURMUR256: case RM_DIGEST_CITY512: case RM_DIGEST_MURMUR512: case RM_DIGEST_BASTARD: case RM_DIGEST_CUMULATIVE: self = rm_digest_new( digest->type, digest->initial_seed1, digest->initial_seed2, digest->bytes ); if(self->type == RM_DIGEST_PARANOID) { self->paranoid_offset = digest->paranoid_offset; rm_digest_free(self->shadow_hash); self->shadow_hash = rm_digest_copy(digest->shadow_hash); } if(self->checksum && digest->checksum) { memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes); } break; default: g_assert_not_reached(); } return self; }
RmDigest *rm_digest_copy(RmDigest *digest) { rm_assert_gentle(digest); RmDigest *self = NULL; switch(digest->type) { case RM_DIGEST_MD5: case RM_DIGEST_SHA512: case RM_DIGEST_SHA256: case RM_DIGEST_SHA1: self = g_slice_new0(RmDigest); self->bytes = digest->bytes; self->type = digest->type; self->glib_checksum = g_checksum_copy(digest->glib_checksum); break; case RM_DIGEST_SPOOKY: case RM_DIGEST_SPOOKY32: case RM_DIGEST_SPOOKY64: case RM_DIGEST_MURMUR: case RM_DIGEST_CITY: case RM_DIGEST_CITY256: case RM_DIGEST_MURMUR256: case RM_DIGEST_CITY512: case RM_DIGEST_MURMUR512: case RM_DIGEST_XXHASH: case RM_DIGEST_FARMHASH: case RM_DIGEST_BASTARD: case RM_DIGEST_CUMULATIVE: case RM_DIGEST_EXT: self = rm_digest_new(digest->type, 0, 0, digest->bytes, FALSE); if(self->checksum && digest->checksum) { memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes); } break; case RM_DIGEST_PARANOID: default: rm_assert_gentle_not_reached(); } return self; }
static RmDirectory *rm_directory_new(char *dirname) { RmDirectory *self = g_new0(RmDirectory, 1); self->file_count = 0; self->dupe_count = 0; self->prefd_files = 0; self->was_merged = false; self->was_inserted = false; self->mergeups = 0; self->dirname = dirname; self->finished = false; self->depth = 0; for(char *s = dirname; *s; s++) { self->depth += (*s == G_DIR_SEPARATOR); } RmStat dir_stat; if(rm_sys_stat(self->dirname, &dir_stat) == -1) { rm_log_perror("stat(2) failed during sort"); } else { self->metadata.dir_mtime = dir_stat.st_mtime; self->metadata.dir_inode = dir_stat.st_ino; self->metadata.dir_dev = dir_stat.st_dev; } /* Special cumulative hashsum, that is not dependent on the * order in which the file hashes were added. * It is not used as full hash, but as sorting speedup. */ self->digest = rm_digest_new(RM_DIGEST_CUMULATIVE, 0, 0, 0, false); g_queue_init(&self->known_files); g_queue_init(&self->children); self->hash_set = g_hash_table_new((GHashFunc)rm_digest_hash, (GEqualFunc)rm_digest_equal); return self; }
RmDigest *rm_digest_new(RmDigestType type, RmOff seed1, RmOff seed2, RmOff ext_size, bool use_shadow_hash) { RmDigest *digest = g_slice_new0(RmDigest); digest->checksum = NULL; digest->type = type; digest->bytes = 0; switch(type) { case RM_DIGEST_SPOOKY32: /* cannot go lower than 64, since we read 8 byte in some places. * simulate by leaving the part at the end empty */ digest->bytes = 64 / 8; break; case RM_DIGEST_XXHASH: case RM_DIGEST_FARMHASH: case RM_DIGEST_SPOOKY64: digest->bytes = 64 / 8; break; case RM_DIGEST_MD5: digest->glib_checksum = g_checksum_new(G_CHECKSUM_MD5); ADD_SEED(digest, seed1); digest->bytes = 128 / 8; return digest; #if HAVE_SHA512 case RM_DIGEST_SHA512: digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA512); ADD_SEED(digest, seed1); digest->bytes = 512 / 8; return digest; #endif case RM_DIGEST_SHA256: digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA256); ADD_SEED(digest, seed1); digest->bytes = 256 / 8; return digest; case RM_DIGEST_SHA1: digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA1); ADD_SEED(digest, seed1); digest->bytes = 160 / 8; return digest; case RM_DIGEST_MURMUR512: case RM_DIGEST_CITY512: digest->bytes = 512 / 8; break; case RM_DIGEST_EXT: /* gets allocated on rm_digest_update() */ digest->bytes = ext_size; break; case RM_DIGEST_MURMUR256: case RM_DIGEST_CITY256: case RM_DIGEST_BASTARD: digest->bytes = 256 / 8; break; case RM_DIGEST_SPOOKY: case RM_DIGEST_MURMUR: case RM_DIGEST_CITY: case RM_DIGEST_CUMULATIVE: digest->bytes = 128 / 8; break; case RM_DIGEST_PARANOID: digest->bytes = 0; digest->paranoid = g_slice_new0(RmParanoid); digest->paranoid->incoming_twin_candidates = g_async_queue_new(); if(use_shadow_hash) { digest->paranoid->shadow_hash = rm_digest_new(RM_DIGEST_XXHASH, seed1, seed2, 0, false); } break; default: rm_assert_gentle_not_reached(); } /* starting values to let us generate up to 4 different hashes in parallel with * different starting seeds: * */ static const RmOff seeds[4] = {0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x3333333333333333, 0xaaaaaaaaaaaaaaaa}; if(digest->bytes > 0 && type != RM_DIGEST_PARANOID) { const int n_seeds = sizeof(seeds) / sizeof(seeds[0]); /* checksum type - allocate memory and initialise */ digest->checksum = g_slice_alloc0(digest->bytes); for(gsize block = 0; block < (digest->bytes / 16); block++) { digest->checksum[block].first = seeds[block % n_seeds] ^ seed1; digest->checksum[block].second = seeds[block % n_seeds] ^ seed2; } } if(digest->type == RM_DIGEST_BASTARD) { /* bastard type *always* has *pure* murmur hash for first checksum * and seeded city for second checksum */ digest->checksum[0].first = digest->checksum[0].second = 0; } return digest; }