gint rm_file_cmp_full(const RmFile *file_a, const RmFile *file_b, const RmSession *session) { gint result = rm_file_cmp(file_a, file_b); if(result != 0) { return result; } return rm_pp_cmp_orig_criteria(file_a, file_b, session); }
/* This does preprocessing including handling of "other lint" (non-dupes) * After rm_preprocess(), all remaining duplicate candidates are in * a jagged GSList of GSLists as follows: * session->tables->size_groups->group1->file1a * ->file1b * ->file1c * ->group2->file2a * ->file2b * etc */ void rm_preprocess(RmSession *session) { RmFileTables *tables = session->tables; GQueue *all_files = tables->all_files; session->total_filtered_files = session->total_files; /* initial sort by size */ g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session); rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files", g_timer_elapsed(session->timer, NULL), session->total_files); /* split into file size groups; for each size, remove path doubles and bundle * hardlinks */ rm_assert_gentle(all_files->head); RmFile *file = g_queue_pop_head(all_files); RmFile *current_size_file = file; guint removed = 0; GHashTable *node_table = tables->node_table; while(file && !rm_session_was_aborted(session)) { /* group files into inode clusters */ GQueue *inode_cluster = rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new); g_queue_push_tail(inode_cluster, file); /* get next file and check if it is part of the same group */ file = g_queue_pop_head(all_files); if(!file || rm_file_cmp(file, current_size_file) != 0) { /* process completed group (all same size & other criteria)*/ /* remove path doubles and handle "other" lint */ /* add an empty GSlist to our list of lists */ tables->size_groups = g_slist_prepend(tables->size_groups, NULL); removed += g_hash_table_foreach_remove( node_table, (GHRFunc)rm_pp_handle_inode_clusters, session); /* free up the node table for the next group */ g_hash_table_steal_all(node_table); if(tables->size_groups->data == NULL) { /* zero size group after handling other lint; remove it */ tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups); } current_size_file = file; } } session->other_lint_cnt += rm_pp_handler_other_lint(session); rm_log_debug_line( "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u " "of %d", g_timer_elapsed(session->timer, NULL), removed, session->total_files); rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS); }
static gint rm_file_cmp_full(const RmFile *file_a, const RmFile *file_b, const RmSession *session) { gint result = rm_file_cmp(file_a, file_b); if(result != 0) { return result; } if(session->cfg->mtime_window >= 0) { gdouble diff = file_a->mtime - file_b->mtime; if(FLOAT_IS_ZERO(diff)) { return 0; } return (diff < 0) ? -1 : +1; } return rm_pp_cmp_orig_criteria(file_a, file_b, session); }
static gint rm_file_cmp_split(const RmFile *file_a, const RmFile *file_b, const RmSession *session) { gint result = rm_file_cmp(file_a, file_b); if(result != 0) { return result; } /* If --mtime-window is specified, we need to check if the mtime is inside * the window. The file list was sorted by rm_file_cmp_full by taking the * diff of mtimes, therefore we have to define the split criteria * differently. */ if(session->cfg->mtime_window >= 0) { gdouble diff = file_a->mtime - file_b->mtime; if(FLOAT_IS_ZERO(diff - session->cfg->mtime_window) || fabs(diff) < session->cfg->mtime_window) { return 0; } /* Split the group. */ return (diff < 0) ? -1 : +1; } return 0; }