Beispiel #1
0
RmHasherTask *rm_hasher_task_new(RmHasher *hasher, RmDigest *digest,
                                 gpointer task_user_data) {
    g_mutex_lock(&hasher->lock);
    { hasher->active_tasks++; }
    g_mutex_unlock(&hasher->lock);

    RmHasherTask *self = g_slice_new0(RmHasherTask);
    self->hasher = hasher;
    if(digest) {
        self->digest = digest;
    } else {
        self->digest = rm_digest_new(hasher->digest_type, 0, 0, 0,
                                     hasher->digest_type == RM_DIGEST_PARANOID);
    }

    /* get a recycled hashpipe if available */
    self->hashpipe = g_async_queue_try_pop(hasher->hashpipe_pool);
    if(!self->hashpipe) {
        if(g_atomic_int_get(&hasher->unalloc_hashpipes) > 0) {
            /* create a new hashpipe */
            g_atomic_int_dec_and_test(&hasher->unalloc_hashpipes);
            self->hashpipe =
                rm_util_thread_pool_new((GFunc)rm_hasher_hashpipe_worker, hasher, 1);

        } else {
            /* already at thread limit - wait for a hashpipe to come available */
            self->hashpipe = g_async_queue_pop(hasher->hashpipe_pool);
        }
    }
    rm_assert_gentle(self->hashpipe);

    self->task_user_data = task_user_data;
    return self;
}
Beispiel #2
0
RmDigest *rm_digest_copy(RmDigest *digest) {
    g_assert(digest);

    RmDigest *self = NULL;

    switch(digest->type) {
    case RM_DIGEST_MD5:
    case RM_DIGEST_SHA512:
    case RM_DIGEST_SHA256:
    case RM_DIGEST_SHA1:
        self = g_slice_new0(RmDigest);
        self->bytes = digest->bytes;
        self->type = digest->type;
        self->glib_checksum = g_checksum_copy(digest->glib_checksum);
        break;
    case RM_DIGEST_PARANOID:
    case RM_DIGEST_SPOOKY:
    case RM_DIGEST_SPOOKY32:
    case RM_DIGEST_SPOOKY64:
    case RM_DIGEST_MURMUR:
    case RM_DIGEST_CITY:
    case RM_DIGEST_CITY256:
    case RM_DIGEST_MURMUR256:
    case RM_DIGEST_CITY512:
    case RM_DIGEST_MURMUR512:
    case RM_DIGEST_BASTARD:
    case RM_DIGEST_CUMULATIVE:
        self = rm_digest_new(
                   digest->type, digest->initial_seed1, digest->initial_seed2, digest->bytes
               );

        if(self->type == RM_DIGEST_PARANOID) {
            self->paranoid_offset = digest->paranoid_offset;
            rm_digest_free(self->shadow_hash);
            self->shadow_hash = rm_digest_copy(digest->shadow_hash);
        }

        if(self->checksum && digest->checksum) {
            memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes);
        }

        break;
    default:
        g_assert_not_reached();
    }

    return self;
}
Beispiel #3
0
RmDigest *rm_digest_copy(RmDigest *digest) {
    rm_assert_gentle(digest);

    RmDigest *self = NULL;

    switch(digest->type) {
    case RM_DIGEST_MD5:
    case RM_DIGEST_SHA512:
    case RM_DIGEST_SHA256:
    case RM_DIGEST_SHA1:
        self = g_slice_new0(RmDigest);
        self->bytes = digest->bytes;
        self->type = digest->type;
        self->glib_checksum = g_checksum_copy(digest->glib_checksum);
        break;
    case RM_DIGEST_SPOOKY:
    case RM_DIGEST_SPOOKY32:
    case RM_DIGEST_SPOOKY64:
    case RM_DIGEST_MURMUR:
    case RM_DIGEST_CITY:
    case RM_DIGEST_CITY256:
    case RM_DIGEST_MURMUR256:
    case RM_DIGEST_CITY512:
    case RM_DIGEST_MURMUR512:
    case RM_DIGEST_XXHASH:
    case RM_DIGEST_FARMHASH:
    case RM_DIGEST_BASTARD:
    case RM_DIGEST_CUMULATIVE:
    case RM_DIGEST_EXT:
        self = rm_digest_new(digest->type, 0, 0, digest->bytes, FALSE);

        if(self->checksum && digest->checksum) {
            memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes);
        }

        break;
    case RM_DIGEST_PARANOID:
    default:
        rm_assert_gentle_not_reached();
    }

    return self;
}
Beispiel #4
0
static RmDirectory *rm_directory_new(char *dirname) {
    RmDirectory *self = g_new0(RmDirectory, 1);

    self->file_count = 0;
    self->dupe_count = 0;
    self->prefd_files = 0;
    self->was_merged = false;
    self->was_inserted = false;
    self->mergeups = 0;

    self->dirname = dirname;
    self->finished = false;

    self->depth = 0;
    for(char *s = dirname; *s; s++) {
        self->depth += (*s == G_DIR_SEPARATOR);
    }

    RmStat dir_stat;
    if(rm_sys_stat(self->dirname, &dir_stat) == -1) {
        rm_log_perror("stat(2) failed during sort");
    } else {
        self->metadata.dir_mtime = dir_stat.st_mtime;
        self->metadata.dir_inode = dir_stat.st_ino;
        self->metadata.dir_dev = dir_stat.st_dev;
    }

    /* Special cumulative hashsum, that is not dependent on the
     * order in which the file hashes were added.
     * It is not used as full hash, but as sorting speedup.
     */
    self->digest = rm_digest_new(RM_DIGEST_CUMULATIVE, 0, 0, 0, false);

    g_queue_init(&self->known_files);
    g_queue_init(&self->children);

    self->hash_set =
        g_hash_table_new((GHashFunc)rm_digest_hash, (GEqualFunc)rm_digest_equal);

    return self;
}
Beispiel #5
0
RmDigest *rm_digest_new(RmDigestType type, RmOff seed1, RmOff seed2, RmOff ext_size,
                        bool use_shadow_hash) {
    RmDigest *digest = g_slice_new0(RmDigest);

    digest->checksum = NULL;
    digest->type = type;
    digest->bytes = 0;

    switch(type) {
    case RM_DIGEST_SPOOKY32:
        /* cannot go lower than 64, since we read 8 byte in some places.
         * simulate by leaving the part at the end empty
         */
        digest->bytes = 64 / 8;
        break;
    case RM_DIGEST_XXHASH:
    case RM_DIGEST_FARMHASH:
    case RM_DIGEST_SPOOKY64:
        digest->bytes = 64 / 8;
        break;
    case RM_DIGEST_MD5:
        digest->glib_checksum = g_checksum_new(G_CHECKSUM_MD5);
        ADD_SEED(digest, seed1);
        digest->bytes = 128 / 8;
        return digest;
#if HAVE_SHA512
    case RM_DIGEST_SHA512:
        digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA512);
        ADD_SEED(digest, seed1);
        digest->bytes = 512 / 8;
        return digest;
#endif
    case RM_DIGEST_SHA256:
        digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA256);
        ADD_SEED(digest, seed1);
        digest->bytes = 256 / 8;
        return digest;
    case RM_DIGEST_SHA1:
        digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA1);
        ADD_SEED(digest, seed1);
        digest->bytes = 160 / 8;
        return digest;
    case RM_DIGEST_MURMUR512:
    case RM_DIGEST_CITY512:
        digest->bytes = 512 / 8;
        break;
    case RM_DIGEST_EXT:
        /* gets allocated on rm_digest_update() */
        digest->bytes = ext_size;
        break;
    case RM_DIGEST_MURMUR256:
    case RM_DIGEST_CITY256:
    case RM_DIGEST_BASTARD:
        digest->bytes = 256 / 8;
        break;
    case RM_DIGEST_SPOOKY:
    case RM_DIGEST_MURMUR:
    case RM_DIGEST_CITY:
    case RM_DIGEST_CUMULATIVE:
        digest->bytes = 128 / 8;
        break;
    case RM_DIGEST_PARANOID:
        digest->bytes = 0;
        digest->paranoid = g_slice_new0(RmParanoid);
        digest->paranoid->incoming_twin_candidates = g_async_queue_new();
        if(use_shadow_hash) {
            digest->paranoid->shadow_hash =
                rm_digest_new(RM_DIGEST_XXHASH, seed1, seed2, 0, false);
        }
        break;
    default:
        rm_assert_gentle_not_reached();
    }

    /* starting values to let us generate up to 4 different hashes in parallel with
     * different starting seeds:
     * */
    static const RmOff seeds[4] = {0x0000000000000000, 0xf0f0f0f0f0f0f0f0,
                                   0x3333333333333333, 0xaaaaaaaaaaaaaaaa};

    if(digest->bytes > 0 && type != RM_DIGEST_PARANOID) {
        const int n_seeds = sizeof(seeds) / sizeof(seeds[0]);

        /* checksum type - allocate memory and initialise */
        digest->checksum = g_slice_alloc0(digest->bytes);
        for(gsize block = 0; block < (digest->bytes / 16); block++) {
            digest->checksum[block].first = seeds[block % n_seeds] ^ seed1;
            digest->checksum[block].second = seeds[block % n_seeds] ^ seed2;
        }
    }

    if(digest->type == RM_DIGEST_BASTARD) {
        /* bastard type *always* has *pure* murmur hash for first checksum
         * and seeded city for second checksum */
        digest->checksum[0].first = digest->checksum[0].second = 0;
    }
    return digest;
}