/* This does preprocessing including handling of "other lint" (non-dupes) * After rm_preprocess(), all remaining duplicate candidates are in * a jagged GSList of GSLists as follows: * session->tables->size_groups->group1->file1a * ->file1b * ->file1c * ->group2->file2a * ->file2b * etc */ void rm_preprocess(RmSession *session) { RmFileTables *tables = session->tables; GQueue *all_files = tables->all_files; session->total_filtered_files = session->total_files; /* initial sort by size */ g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session); rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files", g_timer_elapsed(session->timer, NULL), session->total_files); /* split into file size groups; for each size, remove path doubles and bundle * hardlinks */ rm_assert_gentle(all_files->head); RmFile *file = g_queue_pop_head(all_files); RmFile *current_size_file = file; guint removed = 0; GHashTable *node_table = tables->node_table; while(file && !rm_session_was_aborted()) { /* group files into inode clusters */ GQueue *inode_cluster = rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new); g_queue_push_tail(inode_cluster, file); /* get next file and check if it is part of the same group */ file = g_queue_pop_head(all_files); if(!file || rm_file_cmp_split(file, current_size_file, session) != 0) { /* process completed group (all same size & other criteria)*/ /* remove path doubles and handle "other" lint */ /* add an empty GSlist to our list of lists */ tables->size_groups = g_slist_prepend(tables->size_groups, NULL); removed += g_hash_table_foreach_remove( node_table, (GHRFunc)rm_pp_handle_inode_clusters, session); /* free up the node table for the next group */ g_hash_table_steal_all(node_table); if(tables->size_groups->data == NULL) { /* zero size group after handling other lint; remove it */ tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups); } } current_size_file = file; } session->other_lint_cnt += rm_pp_handler_other_lint(session); rm_log_debug_line( "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u " "of %d", g_timer_elapsed(session->timer, NULL), removed, session->total_files); rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS); }
static void rm_tm_insert_dir(RmTreeMerger *self, RmDirectory *directory) { if(directory->was_inserted) { return; } GQueue *dir_queue = rm_hash_table_setdefault(self->result_table, directory, (RmNewFunc)g_queue_new); g_queue_push_head(dir_queue, directory); directory->was_inserted = true; }
static void rm_tm_forward_unresolved(RmTreeMerger *self, RmDirectory *directory) { if(directory->finished == true) { return; } else { directory->finished = true; } for(GList *iter = directory->known_files.head; iter; iter = iter->next) { RmFile *file = iter->data; GQueue *file_list = rm_hash_table_setdefault(self->file_groups, file->digest, (RmNewFunc)g_queue_new); g_queue_push_head(file_list, file); } /* Recursively propagate to children */ for(GList *iter = directory->children.head; iter; iter = iter->next) { rm_tm_forward_unresolved(self, (RmDirectory *)iter->data); } }