Exemple #1
0
bool rm_mounts_is_nonrotational(RmMountTable *self, dev_t device) {
    if(self == NULL) {
        return true;
    }

    RmPartitionInfo *part = g_hash_table_lookup(self->part_table, GINT_TO_POINTER(device));
    if (part) {
        RmDiskInfo *disk = g_hash_table_lookup(self->disk_table, GINT_TO_POINTER(part->disk));
        if (disk) {
            return !disk->is_rotational;
        } else {
            rm_log_error_line("Disk not found in rm_mounts_is_nonrotational");
            return true;
        }
    } else {
        rm_log_error_line("Partition not found in rm_mounts_is_nonrotational");
        return true;
    }
}
Exemple #2
0
/* wrapper for g_thread_pool_new with error reporting */
GThreadPool *rm_util_thread_pool_new(GFunc func, gpointer data, int threads) {
    GError *error = NULL;
    GThreadPool *pool = g_thread_pool_new(func, data, threads, FALSE, &error);

    if(error != NULL) {
        rm_log_error_line("Unable to create thread pool.");
        g_error_free(error);
    }
    return pool;
}
Exemple #3
0
/* wrapper for g_thread_pool_push with error reporting */
bool rm_util_thread_pool_push(GThreadPool *pool, gpointer data) {
    GError *error = NULL;
    g_thread_pool_push(pool, data, &error);
    if(error != NULL) {
        rm_log_error_line("Unable to push thread to pool %p: %s", pool, error->message);
        g_error_free(error);
        return false;
    } else {
        return true;
    }
}
Exemple #4
0
static void signal_handler(int signum) {
    switch(signum) {
    case SIGINT:
        rm_session_abort();
        break;
    case SIGFPE:
    case SIGABRT:
    case SIGSEGV:
        /* logging messages might have unexpected effects in a signal handler,
         * but that's probably the least thing we have to worry about in case of 
         * a segmentation fault.
         */
        rm_log_error_line(_("Aborting due to a fatal error. (signal received: %s)"),
                          g_strsignal(signum));
        rm_log_error_line(_("Please file a bug report (See rmlint -h)"));
        exit(1);
    default:
        break;
    }
}
Exemple #5
0
static ino_t rm_path_parent_inode(RmFile *file) {
    char parent_path[PATH_MAX];
    rm_trie_build_path((RmTrie *)&file->session->cfg->file_trie, file->folder->parent, parent_path, PATH_MAX);
    RmStat stat_buf;
    int retval = rm_sys_stat(parent_path, &stat_buf);
    if (retval == -1) {
        rm_log_error_line("Failed to get parent path: stat failed: %s", g_strerror(errno));
        return 0;
    }

    return stat_buf.st_ino;
}
Exemple #6
0
gboolean rm_digest_equal(RmDigest *a, RmDigest *b) {
    rm_assert_gentle(a && b);

    if(a->type != b->type) {
        return false;
    }

    if(a->bytes != b->bytes) {
        return false;
    }

    if(a->type == RM_DIGEST_PARANOID) {
        if(!a->paranoid->buffers) {
            /* buffers have been freed so we need to rely on shadow hash */
            return rm_digest_equal(a->paranoid->shadow_hash, b->paranoid->shadow_hash);
        }
        /* check if pre-matched twins */
        if(a->paranoid->twin_candidate == b || b->paranoid->twin_candidate == a) {
            return true;
        }
        /* check if already rejected */
        if(g_slist_find(a->paranoid->rejects, b) ||
           g_slist_find(b->paranoid->rejects, a)) {
            return false;
        }
        /* all the "easy" ways failed... do manual check of all buffers */
        GSList *a_iter = a->paranoid->buffers;
        GSList *b_iter = b->paranoid->buffers;
        guint bytes = 0;
        while(a_iter && b_iter) {
            if(!rm_buffer_equal(a_iter->data, b_iter->data)) {
                rm_log_error_line(
                    "Paranoid digest compare found mismatch - must be hash collision in "
                    "shadow hash");
                return false;
            }
            bytes += ((RmBuffer *)a_iter->data)->len;
            a_iter = a_iter->next;
            b_iter = b_iter->next;
        }

        return (!a_iter && !b_iter && bytes == a->bytes);

    } else if(rm_digest_needs_steal(a->type)) {
        guint8 *buf_a = rm_digest_steal(a);
        guint8 *buf_b = rm_digest_steal(b);

        gboolean result;

        if(a->bytes != b->bytes) {
            result = false;
        } else {
            result = !memcmp(buf_a, buf_b, MIN(a->bytes, b->bytes));
        }

        g_slice_free1(a->bytes, buf_a);
        g_slice_free1(b->bytes, buf_b);

        return result;
    } else {
        return !memcmp(a->checksum, b->checksum, MIN(a->bytes, b->bytes));
    }
}
Exemple #7
0
int rm_hasher_main(int argc, const char **argv) {
    RmHasherSession tag;

    /* List of paths we got passed (or NULL)   */
    tag.paths = NULL;

    /* Print hashes in the same order as files in command line args */
    tag.print_in_order = TRUE;

    /* Print a hash with builtin identifier */
    tag.print_multihash = FALSE;

    /* Digest type (user option, default SHA1) */
    tag.digest_type = RM_DIGEST_SHA1;
    gint threads = 8;
    gint64 buffer_mbytes = 256;

    ////////////// Option Parsing ///////////////

    /* clang-format off */

    const GOptionEntry entries[] = {
        {"digest-type"    , 'd'  , 0                      , G_OPTION_ARG_CALLBACK        , (GOptionArgFunc)rm_hasher_parse_type  , _("Digest type [SHA1]")                                                            , "[TYPE]"}   ,
        {"num-threads"    , 't'  , 0                      , G_OPTION_ARG_INT             , &threads                              , _("Number of hashing threads [8]")                                                 , "N"}        ,
        {"multihash"      , 'm'  , 0                      , G_OPTION_ARG_NONE            , &tag.print_multihash                  , _("Print hash as self identifying multihash")                                      , NULL}       ,
        {"buffer-mbytes"  , 'b'  , 0                      , G_OPTION_ARG_INT64           , &buffer_mbytes                        , _("Megabytes read buffer [256 MB]")                                                , "MB"}       ,
        {"ignore-order"   , 'i'  , G_OPTION_FLAG_REVERSE  , G_OPTION_ARG_NONE            , &tag.print_in_order                   , _("Print hashes in order completed, not in order entered (reduces memory usage)")  , NULL}       ,
        {""               , 0    , 0                      , G_OPTION_ARG_FILENAME_ARRAY  , &tag.paths                            , _("Space-separated list of files")                                                 , "[FILE…]"}  ,
        {NULL             , 0    , 0                      , 0                            , NULL                                  , NULL                                                                               , NULL}};

    /* clang-format on */

    GError *error = NULL;
    GOptionContext *context = g_option_context_new(_("Hash a list of files"));
    GOptionGroup *main_group =
        g_option_group_new(argv[0], _("Hash a list of files"), "", &tag, NULL);

    char summary[4096];
    memset(summary, 0, sizeof(summary));

    g_snprintf(summary, sizeof(summary),
               _("Multi-threaded file digest (hash) calculator.\n"
                 "\n  Available digest types:"
                 "\n    %s\n"
                 "\n  Versions with different bit numbers:"
                 "\n    %s\n"
                 "\n  Supported, but not useful:"
                 "\n    %s\n"),
               "spooky, city, xxhash, sha{1,256,512}, md5, murmur",
               "spooky{32,64,128}, city{128,256,512}, murmur{512}",
               "farmhash, cumulative, paranoid, ext, bastard");

    g_option_group_add_entries(main_group, entries);
    g_option_context_set_main_group(context, main_group);
    g_option_context_set_summary(context, summary);

    if(!g_option_context_parse(context, &argc, (char ***)&argv, &error)) {
        /* print g_option error message */
        rm_log_error_line("%s", error->message);
        exit(EXIT_FAILURE);
    }

    if(tag.paths == NULL) {
        /* read paths from stdin */
        char path_buf[PATH_MAX];
        char *tokbuf = NULL;
        GPtrArray *paths = g_ptr_array_new();

        while(fgets(path_buf, PATH_MAX, stdin)) {
            char *abs_path = realpath(strtok_r(path_buf, "\n", &tokbuf), NULL);
            g_ptr_array_add(paths, abs_path);
        }

        tag.paths = (char **)g_ptr_array_free(paths, FALSE);
    }

    if(tag.paths == NULL || tag.paths[0] == NULL) {
        rm_log_error_line(_("No valid paths given."));
        exit(EXIT_FAILURE);
    }

    g_option_context_free(context);

    ////////// Implementation //////

    if(tag.print_in_order) {
        /* allocate buffer to collect results */
        tag.completed_digests_buffer =
            g_slice_alloc0((g_strv_length(tag.paths) + 1) * sizeof(RmDigest *));
        tag.path_index = 0;
    }

    /* initialise structures */
    g_mutex_init(&tag.lock);
    RmHasher *hasher = rm_hasher_new(tag.digest_type,
                                     threads,
                                     FALSE,
                                     4096,
                                     1024 * 1024 * buffer_mbytes,
                                     0,
                                     (RmHasherCallback)rm_hasher_callback,
                                     &tag);

    /* Iterate over paths, pushing to hasher threads */
    for(int i = 0; tag.paths && tag.paths[i]; ++i) {
        /* check it is a regular file */

        RmStat stat_buf;
        if(rm_sys_stat(tag.paths[i], &stat_buf) == -1) {
            rm_log_warning_line(_("Can't open directory or file \"%s\": %s"),
                                tag.paths[i], strerror(errno));
        } else if(S_ISDIR(stat_buf.st_mode)) {
            rm_log_warning_line(_("Directories are not supported: %s"), tag.paths[i]);
        } else if(S_ISREG(stat_buf.st_mode)) {
            RmHasherTask *task = rm_hasher_task_new(hasher, NULL, GINT_TO_POINTER(i));
            rm_hasher_task_hash(task, tag.paths[i], 0, 0, FALSE);
            rm_hasher_task_finish(task);
            continue;
        } else {
            rm_log_warning_line(_("%s: Unknown file type"), tag.paths[i]);
        }

        /* dummy callback for failed paths */
        g_free(tag.paths[i]);
        tag.paths[i] = NULL;
        rm_hasher_callback(hasher, NULL, &tag, GINT_TO_POINTER(i));
    }

    /* wait for all hasher threads to finish... */
    rm_hasher_free(hasher, TRUE);

    /* tidy up */
    if(tag.print_in_order) {
        g_slice_free1((g_strv_length(tag.paths) + 1) * sizeof(RmDigest *),
                      tag.completed_digests_buffer);
    }

    g_strfreev(tag.paths);

    return EXIT_SUCCESS;
}
Exemple #8
0
static gint64 rm_hasher_unbuffered_read(RmHasher *hasher, GThreadPool *hashpipe,
                                        RmDigest *digest, char *path, gint64 start_offset,
                                        gint64 bytes_to_read) {
    gint32 bytes_read = 0;
    gint64 total_bytes_read = 0;
    guint64 file_offset = start_offset;

    if(bytes_to_read == 0) {
        RmStat stat_buf;
        if(rm_sys_stat(path, &stat_buf) != -1) {
            bytes_to_read = MAX(stat_buf.st_size - start_offset, 0);
        }
    }

    /* how many buffers to read? */
    const gint16 N_BUFFERS = MIN(4, DIVIDE_CEIL(bytes_to_read, hasher->buf_size));
    struct iovec readvec[N_BUFFERS + 1];

    int fd = 0;

    fd = rm_sys_open(path, O_RDONLY);
    if(fd == -1) {
        rm_log_info("open(2) failed for %s: %s\n", path, g_strerror(errno));
        goto finish;
    }

    /* preadv() is beneficial for large files since it can cut the
     * number of syscall heavily.  I suggest N_BUFFERS=4 as good
     * compromise between memory and cpu.
     *
     * With 16 buffers: 43% cpu 33,871 total
     * With  8 buffers: 43% cpu 32,098 total
     * With  4 buffers: 42% cpu 32,091 total
     * With  2 buffers: 44% cpu 32,245 total
     * With  1 buffers: 45% cpu 34,491 total
     */

    /* Give the kernel scheduler some hints */
    rm_hasher_request_readahead(fd, start_offset, bytes_to_read);

    /* Initialize the buffers to begin with.
     * After a buffer is full, a new one is retrieved.
     */
    RmBuffer **buffers;
    buffers = g_slice_alloc(sizeof(*buffers) * N_BUFFERS);

    memset(readvec, 0, sizeof(readvec));
    for(int i = 0; i < N_BUFFERS; ++i) {
        /* buffer is one contignous memory block */
        buffers[i] = rm_buffer_get(hasher->mem_pool);
        readvec[i].iov_base = buffers[i]->data;
        readvec[i].iov_len = hasher->buf_size;
    }

    while((bytes_to_read == 0 || total_bytes_read < bytes_to_read) &&
          (bytes_read = rm_sys_preadv(fd, readvec, N_BUFFERS, file_offset)) > 0) {
        bytes_read =
            MIN(bytes_read, bytes_to_read - total_bytes_read); /* ignore over-reads */

        int blocks = DIVIDE_CEIL(bytes_read, hasher->buf_size);
        rm_assert_gentle(blocks <= N_BUFFERS);

        total_bytes_read += bytes_read;
        file_offset += bytes_read;

        for(int i = 0; i < blocks; ++i) {
            /* Get the RmBuffer from the datapointer */
            RmBuffer *buffer = buffers[i];
            buffer->len = MIN(hasher->buf_size, bytes_read - i * hasher->buf_size);
            buffer->digest = digest;
            buffer->user_data = NULL;

            /* Send it to the hasher */
            rm_util_thread_pool_push(hashpipe, buffer);

            /* Allocate a new buffer - hasher will release the old buffer */
            buffers[i] = rm_buffer_get(hasher->mem_pool);
            readvec[i].iov_base = buffers[i]->data;
            readvec[i].iov_len = hasher->buf_size;
        }
    }

    if(bytes_read == -1) {
        rm_log_perror("preadv failed");
    } else if(total_bytes_read != bytes_to_read) {
        rm_log_error_line(_("Something went wrong reading %s; expected %li bytes, "
                            "got %li; ignoring"),
                          path, (long int)bytes_to_read, (long int)total_bytes_read);
    }

    /* Release the rest of the buffers */
    for(int i = 0; i < N_BUFFERS; ++i) {
        rm_buffer_release(buffers[i]);
    }
    g_slice_free1(sizeof(*buffers) * N_BUFFERS, buffers);

finish:
    if(fd > 0) {
        rm_sys_close(fd);
    }

    return total_bytes_read;
}
Exemple #9
0
/* Method to test if a file is non stripped binary. Uses libelf*/
bool rm_util_is_nonstripped(_U const char *path, _U RmStat *statp) {
    bool is_ns = false;

#if HAVE_LIBELF
    g_return_val_if_fail(path, false);

    if(statp && (statp->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
        return false;
    }

    /* inspired by "jschmier"'s answer at http://stackoverflow.com/a/5159890 */
    int fd;

    /* ELF handle */
    Elf *elf;

    /* section descriptor pointer */
    Elf_Scn *scn;

    /* section header */
    GElf_Shdr shdr;

    /* Open ELF file to obtain file descriptor */
    if((fd = rm_sys_open(path, O_RDONLY)) == -1) {
        rm_log_warning_line(_("cannot open file '%s' for nonstripped test: "), path);
        rm_log_perror("");
        return 0;
    }

    /* Protect program from using an older library */
    if(elf_version(EV_CURRENT) == EV_NONE) {
        rm_log_error_line(_("ELF Library is out of date!"));
        return false;
    }

    /* Initialize elf pointer for examining contents of file */
    elf = elf_begin(fd, ELF_C_READ, NULL);

    /* Initialize section descriptor pointer so that elf_nextscn()
     * returns a pointer to the section descriptor at index 1.
     * */
    scn = NULL;

    /* Iterate through ELF sections */
    while((scn = elf_nextscn(elf, scn)) != NULL) {
        /* Retrieve section header */
        gelf_getshdr(scn, &shdr);

        /* If a section header holding a symbol table (.symtab)
         * is found, this ELF file has not been stripped. */
        if(shdr.sh_type == SHT_SYMTAB) {
            is_ns = true;
            break;
        }
    }
    elf_end(elf);
    rm_sys_close(fd);
#endif

    return is_ns;
}