示例#1
0
RmNode *rm_trie_insert(RmTrie *self, const char *path, void *value) {
    rm_assert_gentle(self);
    rm_assert_gentle(path);

    RmPathIter iter;
    rm_path_iter_init(&iter, path);

    g_mutex_lock(&self->lock);

    char *path_elem = NULL;
    RmNode *curr_node = self->root;

    while((path_elem = rm_path_iter_next(&iter))) {
        curr_node = rm_node_insert(self, curr_node, path_elem);
    }

    if(curr_node != NULL) {
        curr_node->has_value = true;
        curr_node->data = value;
        self->size++;
    }

    g_mutex_unlock(&self->lock);

    return curr_node;
}
示例#2
0
RmNode *rm_trie_search_node(RmTrie *self, const char *path) {
    rm_assert_gentle(self);
    rm_assert_gentle(path);

    RmPathIter iter;
    rm_path_iter_init(&iter, path);

    g_mutex_lock(&self->lock);

    char *path_elem = NULL;
    RmNode *curr_node = self->root;

    while(curr_node && (path_elem = rm_path_iter_next(&iter))) {
        if(curr_node->children == NULL) {
            /* Can't go any further */
            g_mutex_unlock(&self->lock);
            return NULL;
        }

        curr_node = g_hash_table_lookup(curr_node->children, path_elem);
    }

    g_mutex_unlock(&self->lock);
    return curr_node;
}
示例#3
0
static RmOff rm_pp_handler_other_lint(const RmSession *session) {
    RmOff num_handled = 0;
    RmFileTables *tables = session->tables;

    for(RmOff type = 0; type < RM_LINT_TYPE_DUPE_CANDIDATE; ++type) {
        if(type == RM_LINT_TYPE_EMPTY_DIR) {
            tables->other_lint[type] = g_list_sort(
                tables->other_lint[type], (GCompareFunc)rm_pp_cmp_reverse_alphabetical);
        }

        GList *list = tables->other_lint[type];
        for(GList *iter = list; iter; iter = iter->next) {
            RmFile *file = iter->data;

            rm_assert_gentle(file);
            rm_assert_gentle(type == file->lint_type);

            num_handled++;

            rm_fmt_write(file, session->formats, -1);
        }

        if(!session->cfg->cache_file_structs) {
            g_list_free_full(list, (GDestroyNotify)rm_file_destroy);
        } else {
            g_list_free(list);
        }
    }

    return num_handled;
}
示例#4
0
dev_t rm_mounts_get_disk_id(RmMountTable *self, dev_t partition, const char *path) {
    if(self == NULL) {
        return 0;
    }

#if RM_MOUNTTABLE_IS_USABLE

    RmPartitionInfo *part =
        g_hash_table_lookup(self->part_table, GINT_TO_POINTER(partition));
    if(part) {
        return part->disk;
    } else {
        /* probably a btrfs subvolume which is not a mountpoint; walk up tree until we get
         * to *
         * a recognisable partition */
        char *prev = g_strdup(path);
        while(TRUE) {
            char *temp = g_strdup(prev);
            char *parent_path = g_strdup(dirname(temp));
            g_free(temp);

            RmStat stat_buf;
            if(!rm_sys_stat(parent_path, &stat_buf)) {
                RmPartitionInfo *parent_part = g_hash_table_lookup(
                    self->part_table, GINT_TO_POINTER(stat_buf.st_dev));
                if(parent_part) {
                    /* create new partition table entry */
                    rm_log_debug_line("Adding partition info for " GREEN "%s" RESET
                                      " - looks like subvolume %s on disk " GREEN
                                      "%s" RESET,
                                      path, prev, parent_part->name);
                    part = rm_part_info_new(prev, parent_part->fsname, parent_part->disk);
                    g_hash_table_insert(self->part_table, GINT_TO_POINTER(partition),
                                        part);
                    if(g_hash_table_contains(self->reflinkfs_table,
                                             GUINT_TO_POINTER(stat_buf.st_dev))) {
                        g_hash_table_insert(self->reflinkfs_table,
                                            GUINT_TO_POINTER(partition),
                                            GUINT_TO_POINTER(1));
                    }
                    g_free(prev);
                    g_free(parent_path);
                    return parent_part->disk;
                }
            }
            g_free(prev);
            prev = parent_path;
            rm_assert_gentle(strcmp(prev, "/") != 0);
            rm_assert_gentle(strcmp(prev, ".") != 0);
        }
    }
#else
    (void) partition;
    (void) path;
    return 0;
#endif
}
示例#5
0
文件: hasher.c 项目: albertnet/rmlint
RmHasherTask *rm_hasher_task_new(RmHasher *hasher, RmDigest *digest,
                                 gpointer task_user_data) {
    g_mutex_lock(&hasher->lock);
    { hasher->active_tasks++; }
    g_mutex_unlock(&hasher->lock);

    RmHasherTask *self = g_slice_new0(RmHasherTask);
    self->hasher = hasher;
    if(digest) {
        self->digest = digest;
    } else {
        self->digest = rm_digest_new(hasher->digest_type, 0, 0, 0,
                                     hasher->digest_type == RM_DIGEST_PARANOID);
    }

    /* get a recycled hashpipe if available */
    self->hashpipe = g_async_queue_try_pop(hasher->hashpipe_pool);
    if(!self->hashpipe) {
        if(g_atomic_int_get(&hasher->unalloc_hashpipes) > 0) {
            /* create a new hashpipe */
            g_atomic_int_dec_and_test(&hasher->unalloc_hashpipes);
            self->hashpipe =
                rm_util_thread_pool_new((GFunc)rm_hasher_hashpipe_worker, hasher, 1);

        } else {
            /* already at thread limit - wait for a hashpipe to come available */
            self->hashpipe = g_async_queue_pop(hasher->hashpipe_pool);
        }
    }
    rm_assert_gentle(self->hashpipe);

    self->task_user_data = task_user_data;
    return self;
}
示例#6
0
RmBuffer *rm_buffer_pool_get(RmBufferPool *pool) {
    RmBuffer *buffer = NULL;
    g_mutex_lock(&pool->lock);
    {
        while(!buffer) {
            if(pool->stack) {
                buffer = pool->stack->data;
                pool->stack = g_slist_delete_link(pool->stack, pool->stack);
            } else if(pool->avail_buffers > 0) {
                buffer = rm_buffer_new(pool);
            } else {
                if(!pool->mem_warned) {
                    rm_log_warning_line(
                        "read buffer limit reached - waiting for "
                        "processing to catch up");
                    pool->mem_warned = true;
                }
                g_cond_wait(&pool->change, &pool->lock);
            }
        }
        pool->avail_buffers--;

        if(pool->avail_buffers < pool->min_kept_buffers) {
            pool->min_kept_buffers = pool->avail_buffers;
        }
    }
    g_mutex_unlock(&pool->lock);

    rm_assert_gentle(buffer);
    return buffer;
}
示例#7
0
文件: checksum.c 项目: FihlaTV/rmlint
guint rm_digest_hash(RmDigest *digest) {
    guint8 *buf = NULL;
    gsize bytes = 0;
    guint hash = 0;

    if(digest->type == RM_DIGEST_PARANOID) {
        if(digest->paranoid->shadow_hash) {
            buf = rm_digest_steal(digest->paranoid->shadow_hash);
            bytes = digest->paranoid->shadow_hash->bytes;
        } else {
            /* steal the first few bytes of the first buffer */
            if(digest->paranoid->buffers) {
                RmBuffer *buffer = digest->paranoid->buffers->data;
                if(buffer->len >= sizeof(guint)) {
                    hash = *(guint *)buffer->data;
                    return hash;
                }
            }
        }
    } else {
        buf = rm_digest_steal(digest);
        bytes = digest->bytes;
    }

    if(buf != NULL) {
        rm_assert_gentle(bytes >= sizeof(guint));
        hash = *(guint *)buf;
        g_slice_free1(bytes, buf);
    }
    return hash;
}
示例#8
0
文件: checksum.c 项目: FihlaTV/rmlint
RmBuffer *rm_buffer_get(RmBufferPool *pool) {
    RmBuffer *buffer = NULL;
    g_mutex_lock(&pool->lock);
    {
        while(!buffer) {
            buffer = rm_util_slist_pop(&pool->stack, NULL);
            if (!buffer && pool->avail_buffers > 0) {
                buffer = rm_buffer_new(pool);
            }
            if (!buffer) {
                if(!pool->mem_warned) {
                    rm_log_warning_line(
                        "read buffer limit reached - waiting for "
                        "processing to catch up");
                    pool->mem_warned = true;
                }
                g_cond_wait(&pool->change, &pool->lock);
            }
        }
        pool->avail_buffers--;

    }
    g_mutex_unlock(&pool->lock);

    rm_assert_gentle(buffer);
    return buffer;
}
示例#9
0
bool rm_userlist_contains(RmUserList *self, unsigned long uid, unsigned gid,
                          bool *valid_uid, bool *valid_gid) {
    rm_assert_gentle(self);
    bool gid_found = FALSE;
    bool uid_found = FALSE;

    g_mutex_lock(&self->lock);
    {
        gid_found = g_sequence_lookup(self->groups, GUINT_TO_POINTER(gid),
                                      rm_userlist_cmp_ids, NULL);
        uid_found = g_sequence_lookup(self->users, GUINT_TO_POINTER(uid),
                                      rm_userlist_cmp_ids, NULL);
    }
    g_mutex_unlock(&self->lock);

    if(valid_uid != NULL) {
        *valid_uid = uid_found;
    }

    if(valid_gid != NULL) {
        *valid_gid = gid_found;
    }

    return (gid_found && uid_found);
}
示例#10
0
static ino_t rm_path_parent_inode(RmFile *file) {
    char parent_path[PATH_MAX];
    rm_trie_build_path((RmTrie *)&file->session->cfg->file_trie, file->folder->parent, parent_path, PATH_MAX);
    RmStat stat_buf;
    int retval = rm_sys_stat(parent_path, &stat_buf);
    rm_assert_gentle(retval != -1);
    return stat_buf.st_ino;
}
示例#11
0
void rm_userlist_destroy(RmUserList *self) {
    rm_assert_gentle(self);

    g_sequence_free(self->users);
    g_sequence_free(self->groups);
    g_mutex_clear(&self->lock);
    g_free(self);
}
示例#12
0
/* This does preprocessing including handling of "other lint" (non-dupes)
 * After rm_preprocess(), all remaining duplicate candidates are in
 * a jagged GSList of GSLists as follows:
 * session->tables->size_groups->group1->file1a
 *                                     ->file1b
 *                                     ->file1c
 *                             ->group2->file2a
 *                                     ->file2b
 *                                       etc
 */
void rm_preprocess(RmSession *session) {
    RmFileTables *tables = session->tables;
    GQueue *all_files = tables->all_files;

    session->total_filtered_files = session->total_files;

    /* initial sort by size */
    g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session);
    rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files",
                      g_timer_elapsed(session->timer, NULL),
                      session->total_files);

    /* split into file size groups; for each size, remove path doubles and bundle
     * hardlinks */
    rm_assert_gentle(all_files->head);
    RmFile *file = g_queue_pop_head(all_files);
    RmFile *current_size_file = file;
    guint removed = 0;
    GHashTable *node_table = tables->node_table;
    while(file && !rm_session_was_aborted()) {
        /* group files into inode clusters */
        GQueue *inode_cluster =
            rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new);

        g_queue_push_tail(inode_cluster, file);

        /* get next file and check if it is part of the same group */
        file = g_queue_pop_head(all_files);
        if(!file || rm_file_cmp_split(file, current_size_file, session) != 0) {
            /* process completed group (all same size & other criteria)*/
            /* remove path doubles and handle "other" lint */

            /* add an empty GSlist to our list of lists */
            tables->size_groups = g_slist_prepend(tables->size_groups, NULL);

            removed += g_hash_table_foreach_remove(
                node_table, (GHRFunc)rm_pp_handle_inode_clusters, session);

            /* free up the node table for the next group */
            g_hash_table_steal_all(node_table);
            if(tables->size_groups->data == NULL) {
                /* zero size group after handling other lint; remove it */
                tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups);
            }
        }

        current_size_file = file;
    }

    session->other_lint_cnt += rm_pp_handler_other_lint(session);

    rm_log_debug_line(
        "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u "
        "of %d",
        g_timer_elapsed(session->timer, NULL), removed, session->total_files);

    rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
}
示例#13
0
int rm_json_cache_read(RmTrie *file_trie, const char *json_path) {
#if !HAVE_JSON_GLIB
    (void)file_trie;
    (void)json_path;

    rm_log_info_line(_("caching is not supported due to missing json-glib library."));
    return EXIT_FAILURE;
#else
    rm_assert_gentle(file_trie);
    rm_assert_gentle(json_path);

    int result = EXIT_FAILURE;
    GError *error = NULL;
    size_t keys_in_table = rm_trie_size(file_trie);
    JsonParser *parser = json_parser_new();

    rm_log_info_line(_("Loading json-cache `%s'"), json_path);

    if(!json_parser_load_from_file(parser, json_path, &error)) {
        rm_log_warning_line(_("FAILED: %s\n"), error->message);
        g_error_free(error);
        goto failure;
    }

    JsonNode *root = json_parser_get_root(parser);
    if(JSON_NODE_TYPE(root) != JSON_NODE_ARRAY) {
        rm_log_warning_line(_("No valid json cache (no array in /)"));
        goto failure;
    }

    /* Iterate over all objects in it */
    json_array_foreach_element(json_node_get_array(root),
                               (JsonArrayForeach)rm_json_cache_parse_entry,
                               file_trie);

    /* check if some entries were added */
    result = (keys_in_table >= rm_trie_size(file_trie));

failure:
    if(parser) {
        g_object_unref(parser);
    }
    return result;
#endif
}
示例#14
0
bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest) {
    rm_assert_gentle(self);
    if(g_hash_table_contains(self->reflinkfs_table, GUINT_TO_POINTER(source))) {
        if(source == dest) {
            return true;
        } else {
            RmPartitionInfo *source_part =
                g_hash_table_lookup(self->part_table, GINT_TO_POINTER(source));
            RmPartitionInfo *dest_part =
                g_hash_table_lookup(self->part_table, GINT_TO_POINTER(dest));
            rm_assert_gentle(source_part);
            rm_assert_gentle(dest_part);
            return (strcmp(source_part->fsname, dest_part->fsname) == 0);
        }
    } else {
        return false;
    }
}
示例#15
0
static int rm_directory_add(RmDirectory *directory, RmFile *file) {
    /* Update the directorie's hash with the file's hash
       Since we cannot be sure in which order the files come in
       we have to add the hash cummulatively.
     */
    int new_dupes = 0;

    rm_assert_gentle(file);
    rm_assert_gentle(file->digest);
    rm_assert_gentle(directory);

    guint8 *file_digest = NULL;
    RmOff digest_bytes = 0;

    if(file->digest->type == RM_DIGEST_PARANOID) {
        file_digest = rm_digest_steal(file->digest->paranoid->shadow_hash);
        digest_bytes = file->digest->paranoid->shadow_hash->bytes;
    } else {
        file_digest = rm_digest_steal(file->digest);
        digest_bytes = file->digest->bytes;
    }

    /* + and not XOR, since ^ would yield 0 for same hashes always. No matter
     * which hashes. Also this would be confusing. For me and for debuggers.
     */
    rm_digest_update(directory->digest, file_digest, digest_bytes);

    /* The file value is not really used, but we need some non-null value */
    g_hash_table_add(directory->hash_set, file->digest);

    g_slice_free1(digest_bytes, file_digest);

    if(file->hardlinks.is_head && file->hardlinks.files) {
        new_dupes = 1 + g_queue_get_length(file->hardlinks.files);
    } else {
        new_dupes = 1;
    }

    directory->dupe_count += new_dupes;
    directory->prefd_files += file->is_prefd;

    return new_dupes;
}
示例#16
0
void rm_trie_init(RmTrie *self) {
    rm_assert_gentle(self);
    self->root = rm_node_new(self, NULL);

    /* Average path len is 93.633236.
     * I did ze science! :-)
     */
    self->chunks = g_string_chunk_new(100);

    g_mutex_init(&self->lock);
}
示例#17
0
void rm_mds_configure(RmMDS *self,
                      const RmMDSFunc func,
                      const gpointer user_data,
                      const gint pass_quota,
                      const gint threads_per_disk,
                      RmMDSSortFunc prioritiser) {
    rm_assert_gentle(self->running == FALSE);
    self->func = func;
    self->user_data = user_data;
    self->threads_per_disk = threads_per_disk;
    self->pass_quota = (pass_quota > 0) ? pass_quota : G_MAXINT;
    self->prioritiser = prioritiser;
}
示例#18
0
/** @brief Push an RmMDSDevice to the threadpool
 **/
void rm_mds_device_start(RmMDSDevice *device, RmMDS *mds) {
    rm_assert_gentle(device->threads == 0);

    device->threads = mds->threads_per_disk;
    g_mutex_lock(&device->lock);
    {
        for(int i = 0; i < mds->threads_per_disk; ++i) {
            rm_log_debug_line("Starting disk %" LLU " (pointer %p) thread #%i",
                              (RmOff)device->disk, device, i + 1);
            rm_util_thread_pool_push(mds->pool, device);
        }
    }
    g_mutex_unlock(&device->lock);
}
示例#19
0
文件: checksum.c 项目: FihlaTV/rmlint
guint8 *rm_digest_steal(RmDigest *digest) {
    guint8 *result = g_slice_alloc0(digest->bytes);
    gsize buflen = digest->bytes;

    if(rm_digest_needs_steal(digest->type)) {
        /* reading the digest is destructive, so we need to take a copy */
        RmDigest *copy = rm_digest_copy(digest);
        g_checksum_get_digest(copy->glib_checksum, result, &buflen);
        rm_assert_gentle(buflen == digest->bytes);
        rm_digest_free(copy);
    } else {
        memcpy(result, digest->checksum, digest->bytes);
    }
    return result;
}
示例#20
0
static void rm_mount_list_close(RmMountEntries *self) {
    rm_assert_gentle(self);

    for(GList *iter = self->entries; iter; iter = iter->next) {
        RmMountEntry *entry = iter->data;
        g_free(entry->fsname);
        g_free(entry->dir);
        g_free(entry->type);
        g_slice_free(RmMountEntry, entry);
    }

    g_list_free_full(self->mnt_entries, (GDestroyNotify)g_unix_mount_free);
    g_list_free(self->entries);
    g_slice_free(RmMountEntries, self);
}
示例#21
0
static RmMountEntry *rm_mount_list_next(RmMountEntries *self) {
    rm_assert_gentle(self);

    if(self->current) {
        self->current = self->current->next;
    } else {
        self->current = self->entries;
    }

    if(self->current) {
        return self->current->data;
    } else {
        return NULL;
    }
}
示例#22
0
文件: hasher.c 项目: albertnet/rmlint
/* GThreadPool Worker for hashing */
static void rm_hasher_hashpipe_worker(RmBuffer *buffer, RmHasher *hasher) {
    if(buffer->len > 0) {
        /* Update digest with buffer->data */
        rm_assert_gentle(buffer->user_data == NULL);
        rm_digest_buffered_update(buffer);
    } else if(buffer->user_data) {
        /* finalise via callback */
        RmHasherTask *task = buffer->user_data;
        rm_assert_gentle(task->digest == buffer->digest);

        hasher->callback(hasher, task->digest, hasher->session_user_data,
                         task->task_user_data);
        rm_hasher_task_free(task);
        rm_buffer_release(buffer);

        g_mutex_lock(&hasher->lock);
        {
            /* decrease active task count and signal same */
            hasher->active_tasks--;
            g_cond_signal(&hasher->cond);
        }
        g_mutex_unlock(&hasher->lock);
    }
}
示例#23
0
/* Preprocess files, including embedded hardlinks.  Any embedded hardlinks
 * that are "other lint" types are sent to rm_pp_handle_other_lint.  If the
 * file itself is "other lint" types it is likewise sent to rm_pp_handle_other_lint.
 * If there are no files left after this then return TRUE so that the
 * cluster can be deleted from the node_table hash table.
 * NOTE: we rely on rm_file_list_insert to select an RM_LINT_TYPE_DUPE_CANDIDATE as head
 * file (unless ALL the files are "other lint"). */
static gboolean rm_pp_handle_inode_clusters(_UNUSED gpointer key, GQueue *inode_cluster,
                                            RmSession *session) {
    RmCfg *cfg = session->cfg;

    if(inode_cluster->length > 1) {
        /* there is a cluster of inode matches */

        /* remove path doubles */
        session->total_filtered_files -=
            rm_util_queue_foreach_remove(inode_cluster, (RmRFunc)rm_pp_check_path_double,
                                         session->tables->unique_paths_table);
        /* clear the hashtable ready for the next cluster */
        g_hash_table_remove_all(session->tables->unique_paths_table);
    }

    /* process and remove other lint */
    session->total_filtered_files -= rm_util_queue_foreach_remove(
        inode_cluster, (RmRFunc)rm_pp_handle_other_lint, (RmSession *)session);

    if(inode_cluster->length > 1) {
        /* bundle or free the non-head files */
        RmFile *headfile = inode_cluster->head->data;
        if(cfg->find_hardlinked_dupes) {
            /* prepare to bundle files under the hardlink head */
            headfile->hardlinks.files = g_queue_new();
            headfile->hardlinks.is_head = TRUE;
        }

        /* hardlink cluster are counted as filtered files since they are either
         * ignored or treated as automatic duplicates depending on settings (so
         * no effort eaither way); rm_pp_handle_hardlink will either free or bundle
         * the hardlinks depending on value of headfile->hardlinks.is_head.
         */
        session->total_filtered_files -= rm_util_queue_foreach_remove(
            inode_cluster, (RmRFunc)rm_pp_handle_hardlink, headfile);
    }

    /* update counters */
    rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);

    rm_assert_gentle(inode_cluster->length <= 1);
    if(inode_cluster->length == 1) {
        session->tables->size_groups->data = g_slist_prepend(
            session->tables->size_groups->data, inode_cluster->head->data);
    }

    return TRUE;
}
示例#24
0
static gboolean rm_pp_check_path_double(RmFile *file, GHashTable *unique_paths_table) {
    RmPathDoubleKey *key = rm_path_double_new(file);

    /* Lookup if there is a file with the same path */
    RmPathDoubleKey *match_double_key = g_hash_table_lookup(unique_paths_table, key);

    if(match_double_key == NULL) {
        g_hash_table_add(unique_paths_table, key);
        return FALSE;
    }
    RmFile *match_double = match_double_key->file;
    rm_assert_gentle(match_double != file);

    rm_path_double_free(key);
    rm_file_destroy(file);
    return TRUE;
}
示例#25
0
static RmMDSDevice *rm_mds_device_get_by_disk(RmMDS *mds, const dev_t disk) {
    RmMDSDevice *result = NULL;
    g_mutex_lock(&mds->lock);
    {
        rm_assert_gentle(mds->disks);

        result = g_hash_table_lookup(mds->disks, GINT_TO_POINTER(disk));
        if(!result) {
            result = rm_mds_device_new(mds, disk);
            g_hash_table_insert(mds->disks, GINT_TO_POINTER(disk), result);
            if(g_atomic_int_get(&mds->running) == TRUE) {
                rm_mds_device_start(result, mds);
            }
        }
    }
    g_mutex_unlock(&mds->lock);
    return result;
}
示例#26
0
文件: checksum.c 项目: FihlaTV/rmlint
RmDigest *rm_digest_copy(RmDigest *digest) {
    rm_assert_gentle(digest);

    RmDigest *self = NULL;

    switch(digest->type) {
    case RM_DIGEST_MD5:
    case RM_DIGEST_SHA512:
    case RM_DIGEST_SHA256:
    case RM_DIGEST_SHA1:
        self = g_slice_new0(RmDigest);
        self->bytes = digest->bytes;
        self->type = digest->type;
        self->glib_checksum = g_checksum_copy(digest->glib_checksum);
        break;
    case RM_DIGEST_SPOOKY:
    case RM_DIGEST_SPOOKY32:
    case RM_DIGEST_SPOOKY64:
    case RM_DIGEST_MURMUR:
    case RM_DIGEST_CITY:
    case RM_DIGEST_CITY256:
    case RM_DIGEST_MURMUR256:
    case RM_DIGEST_CITY512:
    case RM_DIGEST_MURMUR512:
    case RM_DIGEST_XXHASH:
    case RM_DIGEST_FARMHASH:
    case RM_DIGEST_BASTARD:
    case RM_DIGEST_CUMULATIVE:
    case RM_DIGEST_EXT:
        self = rm_digest_new(digest->type, 0, 0, digest->bytes, FALSE);

        if(self->checksum && digest->checksum) {
            memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes);
        }

        break;
    case RM_DIGEST_PARANOID:
    default:
        rm_assert_gentle_not_reached();
    }

    return self;
}
示例#27
0
文件: hasher.c 项目: albertnet/rmlint
RmHasher *rm_hasher_new(RmDigestType digest_type,
                        guint num_threads,
                        gboolean use_buffered_read,
                        gsize buf_size,
                        guint64 cache_quota_bytes,
                        RmHasherCallback joiner,
                        gpointer session_user_data) {
    RmHasher *self = g_slice_new0(RmHasher);
    self->digest_type = digest_type;

    self->use_buffered_read = use_buffered_read;
    self->buf_size = buf_size;
    self->cache_quota_bytes = cache_quota_bytes;

    if(joiner) {
        self->callback = joiner;
    } else {
        self->callback = (RmHasherCallback)rm_hasher_joiner;
        self->return_queue = g_async_queue_new();
    }

    self->session_user_data = session_user_data;

    /* initialise mutex & cond */
    g_mutex_init(&self->lock);
    g_cond_init(&self->cond);

    /* Create buffer mem pool */
    self->mem_pool = rm_buffer_pool_init(buf_size, cache_quota_bytes);

    /* Create a pool of hashing thread "pools" - each "pool" can only have
     * one thread because hashing must be done in order */
    self->hashpipe_pool = g_async_queue_new_full((GDestroyNotify)rm_hasher_hashpipe_free);
    rm_assert_gentle(num_threads > 0);
    self->unalloc_hashpipes = num_threads;
    return self;
}
示例#28
0
文件: checksum.c 项目: FihlaTV/rmlint
gboolean rm_digest_equal(RmDigest *a, RmDigest *b) {
    rm_assert_gentle(a && b);

    if(a->type != b->type) {
        return false;
    }

    if(a->bytes != b->bytes) {
        return false;
    }

    if(a->type == RM_DIGEST_PARANOID) {
        if(!a->paranoid->buffers) {
            /* buffers have been freed so we need to rely on shadow hash */
            return rm_digest_equal(a->paranoid->shadow_hash, b->paranoid->shadow_hash);
        }
        /* check if pre-matched twins */
        if(a->paranoid->twin_candidate == b || b->paranoid->twin_candidate == a) {
            return true;
        }
        /* check if already rejected */
        if(g_slist_find(a->paranoid->rejects, b) ||
           g_slist_find(b->paranoid->rejects, a)) {
            return false;
        }
        /* all the "easy" ways failed... do manual check of all buffers */
        GSList *a_iter = a->paranoid->buffers;
        GSList *b_iter = b->paranoid->buffers;
        guint bytes = 0;
        while(a_iter && b_iter) {
            if(!rm_buffer_equal(a_iter->data, b_iter->data)) {
                rm_log_error_line(
                    "Paranoid digest compare found mismatch - must be hash collision in "
                    "shadow hash");
                return false;
            }
            bytes += ((RmBuffer *)a_iter->data)->len;
            a_iter = a_iter->next;
            b_iter = b_iter->next;
        }

        return (!a_iter && !b_iter && bytes == a->bytes);

    } else if(rm_digest_needs_steal(a->type)) {
        guint8 *buf_a = rm_digest_steal(a);
        guint8 *buf_b = rm_digest_steal(b);

        gboolean result;

        if(a->bytes != b->bytes) {
            result = false;
        } else {
            result = !memcmp(buf_a, buf_b, MIN(a->bytes, b->bytes));
        }

        g_slice_free1(a->bytes, buf_a);
        g_slice_free1(b->bytes, buf_b);

        return result;
    } else {
        return !memcmp(a->checksum, b->checksum, MIN(a->bytes, b->bytes));
    }
}
示例#29
0
文件: checksum.c 项目: FihlaTV/rmlint
void rm_digest_update(RmDigest *digest, const unsigned char *data, RmOff size) {
    switch(digest->type) {
    case RM_DIGEST_EXT:
/* Data is assumed to be a hex representation of a cchecksum.
 * Needs to be compressed in pure memory first.
 *
 * Checksum is not updated but rather overwritten.
 * */
#define CHAR_TO_NUM(c) (unsigned char)(g_ascii_isdigit(c) ? c - '0' : (c - 'a') + 10)

        rm_assert_gentle(data);

        digest->bytes = size / 2;
        digest->checksum = g_slice_alloc0(digest->bytes);

        for(unsigned i = 0; i < digest->bytes; ++i) {
            ((guint8 *)digest->checksum)[i] =
                (CHAR_TO_NUM(data[2 * i]) << 4) + CHAR_TO_NUM(data[2 * i + 1]);
        }

        break;
    case RM_DIGEST_MD5:
    case RM_DIGEST_SHA512:
    case RM_DIGEST_SHA256:
    case RM_DIGEST_SHA1:
        g_checksum_update(digest->glib_checksum, (const guchar *)data, size);
        break;
    case RM_DIGEST_SPOOKY32:
        digest->checksum[0].first = spooky_hash32(data, size, digest->checksum[0].first);
        break;
    case RM_DIGEST_SPOOKY64:
        digest->checksum[0].first = spooky_hash64(data, size, digest->checksum[0].first);
        break;
    case RM_DIGEST_SPOOKY:
        spooky_hash128(data, size, (uint64_t *)&digest->checksum[0].first,
                       (uint64_t *)&digest->checksum[0].second);
        break;
    case RM_DIGEST_XXHASH:
        digest->checksum[0].first = XXH64(data, size, digest->checksum[0].first);
        break;
    case RM_DIGEST_FARMHASH:
        digest->checksum[0].first = cfarmhash((const char *)data, size);
        break;
    case RM_DIGEST_MURMUR512:
    case RM_DIGEST_MURMUR256:
    case RM_DIGEST_MURMUR:
        for(guint8 block = 0; block < (digest->bytes / 16); block++) {
#if RM_PLATFORM_32
            MurmurHash3_x86_128(data, size, (uint32_t)digest->checksum[block].first,
                                &digest->checksum[block]);  //&
#elif RM_PLATFORM_64
            MurmurHash3_x64_128(data, size, (uint32_t)digest->checksum[block].first,
                                &digest->checksum[block]);
#else
#error "Probably not a good idea to compile rmlint on 16bit."
#endif
        }
        break;
    case RM_DIGEST_CITY:
    case RM_DIGEST_CITY256:
    case RM_DIGEST_CITY512:
        for(guint8 block = 0; block < (digest->bytes / 16); block++) {
            /* Opt out for the more optimized version.
            * This needs the crc command of sse4.2
            * (available on Intel Nehalem and up; my amd box doesn't have this though)
            */
            uint128 old = {digest->checksum[block].first, digest->checksum[block].second};
            old = CityHash128WithSeed((const char *)data, size, old);
            memcpy(&digest->checksum[block], &old, sizeof(uint128));
        }
        break;
    case RM_DIGEST_BASTARD:
        MurmurHash3_x86_128(data, size, (uint32_t)digest->checksum[0].first,
                            &digest->checksum[0]);

        uint128 old = {digest->checksum[1].first, digest->checksum[1].second};
        old = CityHash128WithSeed((const char *)data, size, old);
        memcpy(&digest->checksum[1], &old, sizeof(uint128));
        break;
    case RM_DIGEST_CUMULATIVE: {
        /* This is basically FNV1a, it is just important that the order of
         * adding data to the hash has no effect on the result, so it can
         * be used as a lookup key:
         *
         * http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
         * */
        RmOff hash = 0xcbf29ce484222325;
        for(gsize i = 0; i < digest->bytes; ++i) {
            hash ^= ((guint8 *)data)[i % size];
            hash *= 0x100000001b3;
            ((guint8 *)digest->checksum)[i] += hash;
        }
    } break;
    case RM_DIGEST_PARANOID:
    default:
        rm_assert_gentle_not_reached();
    }
}
示例#30
0
文件: checksum.c 项目: FihlaTV/rmlint
void rm_digest_paranoia_shrink(RmDigest *digest, gsize new_size) {
    rm_assert_gentle(digest->type == RM_DIGEST_PARANOID);
    digest->bytes = new_size;
}