Esempio n. 1
0
gint rm_file_cmp_full(const RmFile *file_a, const RmFile *file_b,
                      const RmSession *session) {
    gint result = rm_file_cmp(file_a, file_b);
    if(result != 0) {
        return result;
    }
    return rm_pp_cmp_orig_criteria(file_a, file_b, session);
}
Esempio n. 2
0
/* This does preprocessing including handling of "other lint" (non-dupes)
 * After rm_preprocess(), all remaining duplicate candidates are in
 * a jagged GSList of GSLists as follows:
 * session->tables->size_groups->group1->file1a
 *                                     ->file1b
 *                                     ->file1c
 *                             ->group2->file2a
 *                                     ->file2b
 *                                       etc
 */
void rm_preprocess(RmSession *session) {
    RmFileTables *tables = session->tables;
    GQueue *all_files = tables->all_files;

    session->total_filtered_files = session->total_files;

    /* initial sort by size */
    g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session);
    rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files",
                      g_timer_elapsed(session->timer, NULL),
                      session->total_files);

    /* split into file size groups; for each size, remove path doubles and bundle
     * hardlinks */
    rm_assert_gentle(all_files->head);
    RmFile *file = g_queue_pop_head(all_files);
    RmFile *current_size_file = file;
    guint removed = 0;
    GHashTable *node_table = tables->node_table;
    while(file && !rm_session_was_aborted(session)) {
        /* group files into inode clusters */
        GQueue *inode_cluster =
            rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new);

        g_queue_push_tail(inode_cluster, file);

        /* get next file and check if it is part of the same group */
        file = g_queue_pop_head(all_files);
        if(!file || rm_file_cmp(file, current_size_file) != 0) {
            /* process completed group (all same size & other criteria)*/
            /* remove path doubles and handle "other" lint */

            /* add an empty GSlist to our list of lists */
            tables->size_groups = g_slist_prepend(tables->size_groups, NULL);

            removed += g_hash_table_foreach_remove(
                node_table, (GHRFunc)rm_pp_handle_inode_clusters, session);

            /* free up the node table for the next group */
            g_hash_table_steal_all(node_table);
            if(tables->size_groups->data == NULL) {
                /* zero size group after handling other lint; remove it */
                tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups);
            }
            current_size_file = file;
        }
    }

    session->other_lint_cnt += rm_pp_handler_other_lint(session);

    rm_log_debug_line(
        "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u "
        "of %d",
        g_timer_elapsed(session->timer, NULL), removed, session->total_files);

    rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
}
Esempio n. 3
0
static gint rm_file_cmp_full(const RmFile *file_a, const RmFile *file_b,
                      const RmSession *session) {
    gint result = rm_file_cmp(file_a, file_b);
    if(result != 0) {
        return result;
    }

    if(session->cfg->mtime_window >= 0) {
        gdouble diff = file_a->mtime - file_b->mtime;
        if(FLOAT_IS_ZERO(diff)) {
            return 0;
        }

        return (diff < 0) ? -1 : +1;
    }

    return rm_pp_cmp_orig_criteria(file_a, file_b, session);
}
Esempio n. 4
0
static gint rm_file_cmp_split(const RmFile *file_a, const RmFile *file_b,
                       const RmSession *session) {
    gint result = rm_file_cmp(file_a, file_b);
    if(result != 0) {
        return result;
    }

    /* If --mtime-window is specified, we need to check if the mtime is inside
     * the window. The file list was sorted by rm_file_cmp_full by taking the
     * diff of mtimes, therefore we have to define the split criteria
     * differently.
     */
    if(session->cfg->mtime_window >= 0) {
        gdouble diff = file_a->mtime - file_b->mtime;
        if(FLOAT_IS_ZERO(diff - session->cfg->mtime_window) || fabs(diff) < session->cfg->mtime_window) {
            return 0;
        }

        /* Split the group. */
        return (diff < 0) ? -1 : +1;
    }

    return 0;
}