RmNode *rm_trie_insert(RmTrie *self, const char *path, void *value) { rm_assert_gentle(self); rm_assert_gentle(path); RmPathIter iter; rm_path_iter_init(&iter, path); g_mutex_lock(&self->lock); char *path_elem = NULL; RmNode *curr_node = self->root; while((path_elem = rm_path_iter_next(&iter))) { curr_node = rm_node_insert(self, curr_node, path_elem); } if(curr_node != NULL) { curr_node->has_value = true; curr_node->data = value; self->size++; } g_mutex_unlock(&self->lock); return curr_node; }
RmNode *rm_trie_search_node(RmTrie *self, const char *path) { rm_assert_gentle(self); rm_assert_gentle(path); RmPathIter iter; rm_path_iter_init(&iter, path); g_mutex_lock(&self->lock); char *path_elem = NULL; RmNode *curr_node = self->root; while(curr_node && (path_elem = rm_path_iter_next(&iter))) { if(curr_node->children == NULL) { /* Can't go any further */ g_mutex_unlock(&self->lock); return NULL; } curr_node = g_hash_table_lookup(curr_node->children, path_elem); } g_mutex_unlock(&self->lock); return curr_node; }
static RmOff rm_pp_handler_other_lint(const RmSession *session) { RmOff num_handled = 0; RmFileTables *tables = session->tables; for(RmOff type = 0; type < RM_LINT_TYPE_DUPE_CANDIDATE; ++type) { if(type == RM_LINT_TYPE_EMPTY_DIR) { tables->other_lint[type] = g_list_sort( tables->other_lint[type], (GCompareFunc)rm_pp_cmp_reverse_alphabetical); } GList *list = tables->other_lint[type]; for(GList *iter = list; iter; iter = iter->next) { RmFile *file = iter->data; rm_assert_gentle(file); rm_assert_gentle(type == file->lint_type); num_handled++; rm_fmt_write(file, session->formats, -1); } if(!session->cfg->cache_file_structs) { g_list_free_full(list, (GDestroyNotify)rm_file_destroy); } else { g_list_free(list); } } return num_handled; }
dev_t rm_mounts_get_disk_id(RmMountTable *self, dev_t partition, const char *path) { if(self == NULL) { return 0; } #if RM_MOUNTTABLE_IS_USABLE RmPartitionInfo *part = g_hash_table_lookup(self->part_table, GINT_TO_POINTER(partition)); if(part) { return part->disk; } else { /* probably a btrfs subvolume which is not a mountpoint; walk up tree until we get * to * * a recognisable partition */ char *prev = g_strdup(path); while(TRUE) { char *temp = g_strdup(prev); char *parent_path = g_strdup(dirname(temp)); g_free(temp); RmStat stat_buf; if(!rm_sys_stat(parent_path, &stat_buf)) { RmPartitionInfo *parent_part = g_hash_table_lookup( self->part_table, GINT_TO_POINTER(stat_buf.st_dev)); if(parent_part) { /* create new partition table entry */ rm_log_debug_line("Adding partition info for " GREEN "%s" RESET " - looks like subvolume %s on disk " GREEN "%s" RESET, path, prev, parent_part->name); part = rm_part_info_new(prev, parent_part->fsname, parent_part->disk); g_hash_table_insert(self->part_table, GINT_TO_POINTER(partition), part); if(g_hash_table_contains(self->reflinkfs_table, GUINT_TO_POINTER(stat_buf.st_dev))) { g_hash_table_insert(self->reflinkfs_table, GUINT_TO_POINTER(partition), GUINT_TO_POINTER(1)); } g_free(prev); g_free(parent_path); return parent_part->disk; } } g_free(prev); prev = parent_path; rm_assert_gentle(strcmp(prev, "/") != 0); rm_assert_gentle(strcmp(prev, ".") != 0); } } #else (void) partition; (void) path; return 0; #endif }
RmHasherTask *rm_hasher_task_new(RmHasher *hasher, RmDigest *digest, gpointer task_user_data) { g_mutex_lock(&hasher->lock); { hasher->active_tasks++; } g_mutex_unlock(&hasher->lock); RmHasherTask *self = g_slice_new0(RmHasherTask); self->hasher = hasher; if(digest) { self->digest = digest; } else { self->digest = rm_digest_new(hasher->digest_type, 0, 0, 0, hasher->digest_type == RM_DIGEST_PARANOID); } /* get a recycled hashpipe if available */ self->hashpipe = g_async_queue_try_pop(hasher->hashpipe_pool); if(!self->hashpipe) { if(g_atomic_int_get(&hasher->unalloc_hashpipes) > 0) { /* create a new hashpipe */ g_atomic_int_dec_and_test(&hasher->unalloc_hashpipes); self->hashpipe = rm_util_thread_pool_new((GFunc)rm_hasher_hashpipe_worker, hasher, 1); } else { /* already at thread limit - wait for a hashpipe to come available */ self->hashpipe = g_async_queue_pop(hasher->hashpipe_pool); } } rm_assert_gentle(self->hashpipe); self->task_user_data = task_user_data; return self; }
RmBuffer *rm_buffer_pool_get(RmBufferPool *pool) { RmBuffer *buffer = NULL; g_mutex_lock(&pool->lock); { while(!buffer) { if(pool->stack) { buffer = pool->stack->data; pool->stack = g_slist_delete_link(pool->stack, pool->stack); } else if(pool->avail_buffers > 0) { buffer = rm_buffer_new(pool); } else { if(!pool->mem_warned) { rm_log_warning_line( "read buffer limit reached - waiting for " "processing to catch up"); pool->mem_warned = true; } g_cond_wait(&pool->change, &pool->lock); } } pool->avail_buffers--; if(pool->avail_buffers < pool->min_kept_buffers) { pool->min_kept_buffers = pool->avail_buffers; } } g_mutex_unlock(&pool->lock); rm_assert_gentle(buffer); return buffer; }
guint rm_digest_hash(RmDigest *digest) { guint8 *buf = NULL; gsize bytes = 0; guint hash = 0; if(digest->type == RM_DIGEST_PARANOID) { if(digest->paranoid->shadow_hash) { buf = rm_digest_steal(digest->paranoid->shadow_hash); bytes = digest->paranoid->shadow_hash->bytes; } else { /* steal the first few bytes of the first buffer */ if(digest->paranoid->buffers) { RmBuffer *buffer = digest->paranoid->buffers->data; if(buffer->len >= sizeof(guint)) { hash = *(guint *)buffer->data; return hash; } } } } else { buf = rm_digest_steal(digest); bytes = digest->bytes; } if(buf != NULL) { rm_assert_gentle(bytes >= sizeof(guint)); hash = *(guint *)buf; g_slice_free1(bytes, buf); } return hash; }
RmBuffer *rm_buffer_get(RmBufferPool *pool) { RmBuffer *buffer = NULL; g_mutex_lock(&pool->lock); { while(!buffer) { buffer = rm_util_slist_pop(&pool->stack, NULL); if (!buffer && pool->avail_buffers > 0) { buffer = rm_buffer_new(pool); } if (!buffer) { if(!pool->mem_warned) { rm_log_warning_line( "read buffer limit reached - waiting for " "processing to catch up"); pool->mem_warned = true; } g_cond_wait(&pool->change, &pool->lock); } } pool->avail_buffers--; } g_mutex_unlock(&pool->lock); rm_assert_gentle(buffer); return buffer; }
bool rm_userlist_contains(RmUserList *self, unsigned long uid, unsigned gid, bool *valid_uid, bool *valid_gid) { rm_assert_gentle(self); bool gid_found = FALSE; bool uid_found = FALSE; g_mutex_lock(&self->lock); { gid_found = g_sequence_lookup(self->groups, GUINT_TO_POINTER(gid), rm_userlist_cmp_ids, NULL); uid_found = g_sequence_lookup(self->users, GUINT_TO_POINTER(uid), rm_userlist_cmp_ids, NULL); } g_mutex_unlock(&self->lock); if(valid_uid != NULL) { *valid_uid = uid_found; } if(valid_gid != NULL) { *valid_gid = gid_found; } return (gid_found && uid_found); }
static ino_t rm_path_parent_inode(RmFile *file) { char parent_path[PATH_MAX]; rm_trie_build_path((RmTrie *)&file->session->cfg->file_trie, file->folder->parent, parent_path, PATH_MAX); RmStat stat_buf; int retval = rm_sys_stat(parent_path, &stat_buf); rm_assert_gentle(retval != -1); return stat_buf.st_ino; }
void rm_userlist_destroy(RmUserList *self) { rm_assert_gentle(self); g_sequence_free(self->users); g_sequence_free(self->groups); g_mutex_clear(&self->lock); g_free(self); }
/* This does preprocessing including handling of "other lint" (non-dupes) * After rm_preprocess(), all remaining duplicate candidates are in * a jagged GSList of GSLists as follows: * session->tables->size_groups->group1->file1a * ->file1b * ->file1c * ->group2->file2a * ->file2b * etc */ void rm_preprocess(RmSession *session) { RmFileTables *tables = session->tables; GQueue *all_files = tables->all_files; session->total_filtered_files = session->total_files; /* initial sort by size */ g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session); rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files", g_timer_elapsed(session->timer, NULL), session->total_files); /* split into file size groups; for each size, remove path doubles and bundle * hardlinks */ rm_assert_gentle(all_files->head); RmFile *file = g_queue_pop_head(all_files); RmFile *current_size_file = file; guint removed = 0; GHashTable *node_table = tables->node_table; while(file && !rm_session_was_aborted()) { /* group files into inode clusters */ GQueue *inode_cluster = rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new); g_queue_push_tail(inode_cluster, file); /* get next file and check if it is part of the same group */ file = g_queue_pop_head(all_files); if(!file || rm_file_cmp_split(file, current_size_file, session) != 0) { /* process completed group (all same size & other criteria)*/ /* remove path doubles and handle "other" lint */ /* add an empty GSlist to our list of lists */ tables->size_groups = g_slist_prepend(tables->size_groups, NULL); removed += g_hash_table_foreach_remove( node_table, (GHRFunc)rm_pp_handle_inode_clusters, session); /* free up the node table for the next group */ g_hash_table_steal_all(node_table); if(tables->size_groups->data == NULL) { /* zero size group after handling other lint; remove it */ tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups); } } current_size_file = file; } session->other_lint_cnt += rm_pp_handler_other_lint(session); rm_log_debug_line( "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u " "of %d", g_timer_elapsed(session->timer, NULL), removed, session->total_files); rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS); }
int rm_json_cache_read(RmTrie *file_trie, const char *json_path) { #if !HAVE_JSON_GLIB (void)file_trie; (void)json_path; rm_log_info_line(_("caching is not supported due to missing json-glib library.")); return EXIT_FAILURE; #else rm_assert_gentle(file_trie); rm_assert_gentle(json_path); int result = EXIT_FAILURE; GError *error = NULL; size_t keys_in_table = rm_trie_size(file_trie); JsonParser *parser = json_parser_new(); rm_log_info_line(_("Loading json-cache `%s'"), json_path); if(!json_parser_load_from_file(parser, json_path, &error)) { rm_log_warning_line(_("FAILED: %s\n"), error->message); g_error_free(error); goto failure; } JsonNode *root = json_parser_get_root(parser); if(JSON_NODE_TYPE(root) != JSON_NODE_ARRAY) { rm_log_warning_line(_("No valid json cache (no array in /)")); goto failure; } /* Iterate over all objects in it */ json_array_foreach_element(json_node_get_array(root), (JsonArrayForeach)rm_json_cache_parse_entry, file_trie); /* check if some entries were added */ result = (keys_in_table >= rm_trie_size(file_trie)); failure: if(parser) { g_object_unref(parser); } return result; #endif }
bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest) { rm_assert_gentle(self); if(g_hash_table_contains(self->reflinkfs_table, GUINT_TO_POINTER(source))) { if(source == dest) { return true; } else { RmPartitionInfo *source_part = g_hash_table_lookup(self->part_table, GINT_TO_POINTER(source)); RmPartitionInfo *dest_part = g_hash_table_lookup(self->part_table, GINT_TO_POINTER(dest)); rm_assert_gentle(source_part); rm_assert_gentle(dest_part); return (strcmp(source_part->fsname, dest_part->fsname) == 0); } } else { return false; } }
static int rm_directory_add(RmDirectory *directory, RmFile *file) { /* Update the directorie's hash with the file's hash Since we cannot be sure in which order the files come in we have to add the hash cummulatively. */ int new_dupes = 0; rm_assert_gentle(file); rm_assert_gentle(file->digest); rm_assert_gentle(directory); guint8 *file_digest = NULL; RmOff digest_bytes = 0; if(file->digest->type == RM_DIGEST_PARANOID) { file_digest = rm_digest_steal(file->digest->paranoid->shadow_hash); digest_bytes = file->digest->paranoid->shadow_hash->bytes; } else { file_digest = rm_digest_steal(file->digest); digest_bytes = file->digest->bytes; } /* + and not XOR, since ^ would yield 0 for same hashes always. No matter * which hashes. Also this would be confusing. For me and for debuggers. */ rm_digest_update(directory->digest, file_digest, digest_bytes); /* The file value is not really used, but we need some non-null value */ g_hash_table_add(directory->hash_set, file->digest); g_slice_free1(digest_bytes, file_digest); if(file->hardlinks.is_head && file->hardlinks.files) { new_dupes = 1 + g_queue_get_length(file->hardlinks.files); } else { new_dupes = 1; } directory->dupe_count += new_dupes; directory->prefd_files += file->is_prefd; return new_dupes; }
void rm_trie_init(RmTrie *self) { rm_assert_gentle(self); self->root = rm_node_new(self, NULL); /* Average path len is 93.633236. * I did ze science! :-) */ self->chunks = g_string_chunk_new(100); g_mutex_init(&self->lock); }
void rm_mds_configure(RmMDS *self, const RmMDSFunc func, const gpointer user_data, const gint pass_quota, const gint threads_per_disk, RmMDSSortFunc prioritiser) { rm_assert_gentle(self->running == FALSE); self->func = func; self->user_data = user_data; self->threads_per_disk = threads_per_disk; self->pass_quota = (pass_quota > 0) ? pass_quota : G_MAXINT; self->prioritiser = prioritiser; }
/** @brief Push an RmMDSDevice to the threadpool **/ void rm_mds_device_start(RmMDSDevice *device, RmMDS *mds) { rm_assert_gentle(device->threads == 0); device->threads = mds->threads_per_disk; g_mutex_lock(&device->lock); { for(int i = 0; i < mds->threads_per_disk; ++i) { rm_log_debug_line("Starting disk %" LLU " (pointer %p) thread #%i", (RmOff)device->disk, device, i + 1); rm_util_thread_pool_push(mds->pool, device); } } g_mutex_unlock(&device->lock); }
guint8 *rm_digest_steal(RmDigest *digest) { guint8 *result = g_slice_alloc0(digest->bytes); gsize buflen = digest->bytes; if(rm_digest_needs_steal(digest->type)) { /* reading the digest is destructive, so we need to take a copy */ RmDigest *copy = rm_digest_copy(digest); g_checksum_get_digest(copy->glib_checksum, result, &buflen); rm_assert_gentle(buflen == digest->bytes); rm_digest_free(copy); } else { memcpy(result, digest->checksum, digest->bytes); } return result; }
static void rm_mount_list_close(RmMountEntries *self) { rm_assert_gentle(self); for(GList *iter = self->entries; iter; iter = iter->next) { RmMountEntry *entry = iter->data; g_free(entry->fsname); g_free(entry->dir); g_free(entry->type); g_slice_free(RmMountEntry, entry); } g_list_free_full(self->mnt_entries, (GDestroyNotify)g_unix_mount_free); g_list_free(self->entries); g_slice_free(RmMountEntries, self); }
static RmMountEntry *rm_mount_list_next(RmMountEntries *self) { rm_assert_gentle(self); if(self->current) { self->current = self->current->next; } else { self->current = self->entries; } if(self->current) { return self->current->data; } else { return NULL; } }
/* GThreadPool Worker for hashing */ static void rm_hasher_hashpipe_worker(RmBuffer *buffer, RmHasher *hasher) { if(buffer->len > 0) { /* Update digest with buffer->data */ rm_assert_gentle(buffer->user_data == NULL); rm_digest_buffered_update(buffer); } else if(buffer->user_data) { /* finalise via callback */ RmHasherTask *task = buffer->user_data; rm_assert_gentle(task->digest == buffer->digest); hasher->callback(hasher, task->digest, hasher->session_user_data, task->task_user_data); rm_hasher_task_free(task); rm_buffer_release(buffer); g_mutex_lock(&hasher->lock); { /* decrease active task count and signal same */ hasher->active_tasks--; g_cond_signal(&hasher->cond); } g_mutex_unlock(&hasher->lock); } }
/* Preprocess files, including embedded hardlinks. Any embedded hardlinks * that are "other lint" types are sent to rm_pp_handle_other_lint. If the * file itself is "other lint" types it is likewise sent to rm_pp_handle_other_lint. * If there are no files left after this then return TRUE so that the * cluster can be deleted from the node_table hash table. * NOTE: we rely on rm_file_list_insert to select an RM_LINT_TYPE_DUPE_CANDIDATE as head * file (unless ALL the files are "other lint"). */ static gboolean rm_pp_handle_inode_clusters(_UNUSED gpointer key, GQueue *inode_cluster, RmSession *session) { RmCfg *cfg = session->cfg; if(inode_cluster->length > 1) { /* there is a cluster of inode matches */ /* remove path doubles */ session->total_filtered_files -= rm_util_queue_foreach_remove(inode_cluster, (RmRFunc)rm_pp_check_path_double, session->tables->unique_paths_table); /* clear the hashtable ready for the next cluster */ g_hash_table_remove_all(session->tables->unique_paths_table); } /* process and remove other lint */ session->total_filtered_files -= rm_util_queue_foreach_remove( inode_cluster, (RmRFunc)rm_pp_handle_other_lint, (RmSession *)session); if(inode_cluster->length > 1) { /* bundle or free the non-head files */ RmFile *headfile = inode_cluster->head->data; if(cfg->find_hardlinked_dupes) { /* prepare to bundle files under the hardlink head */ headfile->hardlinks.files = g_queue_new(); headfile->hardlinks.is_head = TRUE; } /* hardlink cluster are counted as filtered files since they are either * ignored or treated as automatic duplicates depending on settings (so * no effort eaither way); rm_pp_handle_hardlink will either free or bundle * the hardlinks depending on value of headfile->hardlinks.is_head. */ session->total_filtered_files -= rm_util_queue_foreach_remove( inode_cluster, (RmRFunc)rm_pp_handle_hardlink, headfile); } /* update counters */ rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS); rm_assert_gentle(inode_cluster->length <= 1); if(inode_cluster->length == 1) { session->tables->size_groups->data = g_slist_prepend( session->tables->size_groups->data, inode_cluster->head->data); } return TRUE; }
static gboolean rm_pp_check_path_double(RmFile *file, GHashTable *unique_paths_table) { RmPathDoubleKey *key = rm_path_double_new(file); /* Lookup if there is a file with the same path */ RmPathDoubleKey *match_double_key = g_hash_table_lookup(unique_paths_table, key); if(match_double_key == NULL) { g_hash_table_add(unique_paths_table, key); return FALSE; } RmFile *match_double = match_double_key->file; rm_assert_gentle(match_double != file); rm_path_double_free(key); rm_file_destroy(file); return TRUE; }
static RmMDSDevice *rm_mds_device_get_by_disk(RmMDS *mds, const dev_t disk) { RmMDSDevice *result = NULL; g_mutex_lock(&mds->lock); { rm_assert_gentle(mds->disks); result = g_hash_table_lookup(mds->disks, GINT_TO_POINTER(disk)); if(!result) { result = rm_mds_device_new(mds, disk); g_hash_table_insert(mds->disks, GINT_TO_POINTER(disk), result); if(g_atomic_int_get(&mds->running) == TRUE) { rm_mds_device_start(result, mds); } } } g_mutex_unlock(&mds->lock); return result; }
RmDigest *rm_digest_copy(RmDigest *digest) { rm_assert_gentle(digest); RmDigest *self = NULL; switch(digest->type) { case RM_DIGEST_MD5: case RM_DIGEST_SHA512: case RM_DIGEST_SHA256: case RM_DIGEST_SHA1: self = g_slice_new0(RmDigest); self->bytes = digest->bytes; self->type = digest->type; self->glib_checksum = g_checksum_copy(digest->glib_checksum); break; case RM_DIGEST_SPOOKY: case RM_DIGEST_SPOOKY32: case RM_DIGEST_SPOOKY64: case RM_DIGEST_MURMUR: case RM_DIGEST_CITY: case RM_DIGEST_CITY256: case RM_DIGEST_MURMUR256: case RM_DIGEST_CITY512: case RM_DIGEST_MURMUR512: case RM_DIGEST_XXHASH: case RM_DIGEST_FARMHASH: case RM_DIGEST_BASTARD: case RM_DIGEST_CUMULATIVE: case RM_DIGEST_EXT: self = rm_digest_new(digest->type, 0, 0, digest->bytes, FALSE); if(self->checksum && digest->checksum) { memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes); } break; case RM_DIGEST_PARANOID: default: rm_assert_gentle_not_reached(); } return self; }
RmHasher *rm_hasher_new(RmDigestType digest_type, guint num_threads, gboolean use_buffered_read, gsize buf_size, guint64 cache_quota_bytes, RmHasherCallback joiner, gpointer session_user_data) { RmHasher *self = g_slice_new0(RmHasher); self->digest_type = digest_type; self->use_buffered_read = use_buffered_read; self->buf_size = buf_size; self->cache_quota_bytes = cache_quota_bytes; if(joiner) { self->callback = joiner; } else { self->callback = (RmHasherCallback)rm_hasher_joiner; self->return_queue = g_async_queue_new(); } self->session_user_data = session_user_data; /* initialise mutex & cond */ g_mutex_init(&self->lock); g_cond_init(&self->cond); /* Create buffer mem pool */ self->mem_pool = rm_buffer_pool_init(buf_size, cache_quota_bytes); /* Create a pool of hashing thread "pools" - each "pool" can only have * one thread because hashing must be done in order */ self->hashpipe_pool = g_async_queue_new_full((GDestroyNotify)rm_hasher_hashpipe_free); rm_assert_gentle(num_threads > 0); self->unalloc_hashpipes = num_threads; return self; }
gboolean rm_digest_equal(RmDigest *a, RmDigest *b) { rm_assert_gentle(a && b); if(a->type != b->type) { return false; } if(a->bytes != b->bytes) { return false; } if(a->type == RM_DIGEST_PARANOID) { if(!a->paranoid->buffers) { /* buffers have been freed so we need to rely on shadow hash */ return rm_digest_equal(a->paranoid->shadow_hash, b->paranoid->shadow_hash); } /* check if pre-matched twins */ if(a->paranoid->twin_candidate == b || b->paranoid->twin_candidate == a) { return true; } /* check if already rejected */ if(g_slist_find(a->paranoid->rejects, b) || g_slist_find(b->paranoid->rejects, a)) { return false; } /* all the "easy" ways failed... do manual check of all buffers */ GSList *a_iter = a->paranoid->buffers; GSList *b_iter = b->paranoid->buffers; guint bytes = 0; while(a_iter && b_iter) { if(!rm_buffer_equal(a_iter->data, b_iter->data)) { rm_log_error_line( "Paranoid digest compare found mismatch - must be hash collision in " "shadow hash"); return false; } bytes += ((RmBuffer *)a_iter->data)->len; a_iter = a_iter->next; b_iter = b_iter->next; } return (!a_iter && !b_iter && bytes == a->bytes); } else if(rm_digest_needs_steal(a->type)) { guint8 *buf_a = rm_digest_steal(a); guint8 *buf_b = rm_digest_steal(b); gboolean result; if(a->bytes != b->bytes) { result = false; } else { result = !memcmp(buf_a, buf_b, MIN(a->bytes, b->bytes)); } g_slice_free1(a->bytes, buf_a); g_slice_free1(b->bytes, buf_b); return result; } else { return !memcmp(a->checksum, b->checksum, MIN(a->bytes, b->bytes)); } }
void rm_digest_update(RmDigest *digest, const unsigned char *data, RmOff size) { switch(digest->type) { case RM_DIGEST_EXT: /* Data is assumed to be a hex representation of a cchecksum. * Needs to be compressed in pure memory first. * * Checksum is not updated but rather overwritten. * */ #define CHAR_TO_NUM(c) (unsigned char)(g_ascii_isdigit(c) ? c - '0' : (c - 'a') + 10) rm_assert_gentle(data); digest->bytes = size / 2; digest->checksum = g_slice_alloc0(digest->bytes); for(unsigned i = 0; i < digest->bytes; ++i) { ((guint8 *)digest->checksum)[i] = (CHAR_TO_NUM(data[2 * i]) << 4) + CHAR_TO_NUM(data[2 * i + 1]); } break; case RM_DIGEST_MD5: case RM_DIGEST_SHA512: case RM_DIGEST_SHA256: case RM_DIGEST_SHA1: g_checksum_update(digest->glib_checksum, (const guchar *)data, size); break; case RM_DIGEST_SPOOKY32: digest->checksum[0].first = spooky_hash32(data, size, digest->checksum[0].first); break; case RM_DIGEST_SPOOKY64: digest->checksum[0].first = spooky_hash64(data, size, digest->checksum[0].first); break; case RM_DIGEST_SPOOKY: spooky_hash128(data, size, (uint64_t *)&digest->checksum[0].first, (uint64_t *)&digest->checksum[0].second); break; case RM_DIGEST_XXHASH: digest->checksum[0].first = XXH64(data, size, digest->checksum[0].first); break; case RM_DIGEST_FARMHASH: digest->checksum[0].first = cfarmhash((const char *)data, size); break; case RM_DIGEST_MURMUR512: case RM_DIGEST_MURMUR256: case RM_DIGEST_MURMUR: for(guint8 block = 0; block < (digest->bytes / 16); block++) { #if RM_PLATFORM_32 MurmurHash3_x86_128(data, size, (uint32_t)digest->checksum[block].first, &digest->checksum[block]); //& #elif RM_PLATFORM_64 MurmurHash3_x64_128(data, size, (uint32_t)digest->checksum[block].first, &digest->checksum[block]); #else #error "Probably not a good idea to compile rmlint on 16bit." #endif } break; case RM_DIGEST_CITY: case RM_DIGEST_CITY256: case RM_DIGEST_CITY512: for(guint8 block = 0; block < (digest->bytes / 16); block++) { /* Opt out for the more optimized version. * This needs the crc command of sse4.2 * (available on Intel Nehalem and up; my amd box doesn't have this though) */ uint128 old = {digest->checksum[block].first, digest->checksum[block].second}; old = CityHash128WithSeed((const char *)data, size, old); memcpy(&digest->checksum[block], &old, sizeof(uint128)); } break; case RM_DIGEST_BASTARD: MurmurHash3_x86_128(data, size, (uint32_t)digest->checksum[0].first, &digest->checksum[0]); uint128 old = {digest->checksum[1].first, digest->checksum[1].second}; old = CityHash128WithSeed((const char *)data, size, old); memcpy(&digest->checksum[1], &old, sizeof(uint128)); break; case RM_DIGEST_CUMULATIVE: { /* This is basically FNV1a, it is just important that the order of * adding data to the hash has no effect on the result, so it can * be used as a lookup key: * * http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function * */ RmOff hash = 0xcbf29ce484222325; for(gsize i = 0; i < digest->bytes; ++i) { hash ^= ((guint8 *)data)[i % size]; hash *= 0x100000001b3; ((guint8 *)digest->checksum)[i] += hash; } } break; case RM_DIGEST_PARANOID: default: rm_assert_gentle_not_reached(); } }
void rm_digest_paranoia_shrink(RmDigest *digest, gsize new_size) { rm_assert_gentle(digest->type == RM_DIGEST_PARANOID); digest->bytes = new_size; }