Esempio n. 1
0
/* This does preprocessing including handling of "other lint" (non-dupes)
 * After rm_preprocess(), all remaining duplicate candidates are in
 * a jagged GSList of GSLists as follows:
 * session->tables->size_groups->group1->file1a
 *                                     ->file1b
 *                                     ->file1c
 *                             ->group2->file2a
 *                                     ->file2b
 *                                       etc
 */
void rm_preprocess(RmSession *session) {
    RmFileTables *tables = session->tables;
    GQueue *all_files = tables->all_files;

    session->total_filtered_files = session->total_files;

    /* initial sort by size */
    g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session);
    rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files",
                      g_timer_elapsed(session->timer, NULL),
                      session->total_files);

    /* split into file size groups; for each size, remove path doubles and bundle
     * hardlinks */
    rm_assert_gentle(all_files->head);
    RmFile *file = g_queue_pop_head(all_files);
    RmFile *current_size_file = file;
    guint removed = 0;
    GHashTable *node_table = tables->node_table;
    while(file && !rm_session_was_aborted()) {
        /* group files into inode clusters */
        GQueue *inode_cluster =
            rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new);

        g_queue_push_tail(inode_cluster, file);

        /* get next file and check if it is part of the same group */
        file = g_queue_pop_head(all_files);
        if(!file || rm_file_cmp_split(file, current_size_file, session) != 0) {
            /* process completed group (all same size & other criteria)*/
            /* remove path doubles and handle "other" lint */

            /* add an empty GSlist to our list of lists */
            tables->size_groups = g_slist_prepend(tables->size_groups, NULL);

            removed += g_hash_table_foreach_remove(
                node_table, (GHRFunc)rm_pp_handle_inode_clusters, session);

            /* free up the node table for the next group */
            g_hash_table_steal_all(node_table);
            if(tables->size_groups->data == NULL) {
                /* zero size group after handling other lint; remove it */
                tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups);
            }
        }

        current_size_file = file;
    }

    session->other_lint_cnt += rm_pp_handler_other_lint(session);

    rm_log_debug_line(
        "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u "
        "of %d",
        g_timer_elapsed(session->timer, NULL), removed, session->total_files);

    rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
}
Esempio n. 2
0
/* Preprocess files, including embedded hardlinks.  Any embedded hardlinks
 * that are "other lint" types are sent to rm_pp_handle_other_lint.  If the
 * file itself is "other lint" types it is likewise sent to rm_pp_handle_other_lint.
 * If there are no files left after this then return TRUE so that the
 * cluster can be deleted from the node_table hash table.
 * NOTE: we rely on rm_file_list_insert to select an RM_LINT_TYPE_DUPE_CANDIDATE as head
 * file (unless ALL the files are "other lint"). */
static gboolean rm_pp_handle_inode_clusters(_UNUSED gpointer key, GQueue *inode_cluster,
                                            RmSession *session) {
    RmCfg *cfg = session->cfg;

    if(inode_cluster->length > 1) {
        /* there is a cluster of inode matches */

        /* remove path doubles */
        session->total_filtered_files -=
            rm_util_queue_foreach_remove(inode_cluster, (RmRFunc)rm_pp_check_path_double,
                                         session->tables->unique_paths_table);
        /* clear the hashtable ready for the next cluster */
        g_hash_table_remove_all(session->tables->unique_paths_table);
    }

    /* process and remove other lint */
    session->total_filtered_files -= rm_util_queue_foreach_remove(
        inode_cluster, (RmRFunc)rm_pp_handle_other_lint, (RmSession *)session);

    if(inode_cluster->length > 1) {
        /* bundle or free the non-head files */
        RmFile *headfile = inode_cluster->head->data;
        if(cfg->find_hardlinked_dupes) {
            /* prepare to bundle files under the hardlink head */
            headfile->hardlinks.files = g_queue_new();
            headfile->hardlinks.is_head = TRUE;
        }

        /* hardlink cluster are counted as filtered files since they are either
         * ignored or treated as automatic duplicates depending on settings (so
         * no effort eaither way); rm_pp_handle_hardlink will either free or bundle
         * the hardlinks depending on value of headfile->hardlinks.is_head.
         */
        session->total_filtered_files -= rm_util_queue_foreach_remove(
            inode_cluster, (RmRFunc)rm_pp_handle_hardlink, headfile);
    }

    /* update counters */
    rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);

    rm_assert_gentle(inode_cluster->length <= 1);
    if(inode_cluster->length == 1) {
        session->tables->size_groups->data = g_slist_prepend(
            session->tables->size_groups->data, inode_cluster->head->data);
    }

    return TRUE;
}
Esempio n. 3
0
static void rm_traverse_file(RmTravSession *trav_session, RmStat *statp,
                             char *path,
                             bool is_prefd, unsigned long path_index,
                             RmLintType file_type, bool is_symlink, bool is_hidden,
                             bool is_on_subvol_fs, short depth) {
    RmSession *session = trav_session->session;
    RmCfg *cfg = session->cfg;

    if(rm_fmt_is_a_output(session->formats, path)) {
        /* ignore files which are rmlint outputs */
        return;
    }

    /* Try to autodetect the type of the lint */
    if(file_type == RM_LINT_TYPE_UNKNOWN) {
        RmLintType gid_check;
        /* see if we can find a lint type */
        if(statp->st_size == 0) {
            if(!cfg->find_emptyfiles) {
                return;
            } else {
                file_type = RM_LINT_TYPE_EMPTY_FILE;
            }
        } else if(cfg->permissions && access(path, cfg->permissions) == -1) {
            /* bad permissions; ignore file */
            trav_session->session->ignored_files++;
            return;
        } else if(cfg->find_badids &&
                  (gid_check = rm_util_uid_gid_check(statp, trav_session->userlist))) {
            file_type = gid_check;
        } else if(cfg->find_nonstripped && rm_util_is_nonstripped(path, statp)) {
            file_type = RM_LINT_TYPE_NONSTRIPPED;
        } else {
            RmOff file_size = statp->st_size;
            if(!cfg->limits_specified ||
               ((cfg->minsize == (RmOff)-1 || cfg->minsize <= file_size) &&
                (cfg->maxsize == (RmOff)-1 || file_size <= cfg->maxsize))) {
                if(rm_mounts_is_evil(trav_session->session->mounts, statp->st_dev) ==
                   false) {
                    file_type = RM_LINT_TYPE_DUPE_CANDIDATE;
                } else {
                    /* A file in an evil fs. Ignore. */
                    trav_session->session->ignored_files++;
                    return;
                }
            } else {
                return;
            }
        }
    }

    RmFile *file = rm_file_new(session, path, statp, file_type, is_prefd,
                               path_index, depth);

    if(file != NULL) {
        file->is_symlink = is_symlink;
        file->is_hidden = is_hidden;
        file->is_on_subvol_fs = is_on_subvol_fs;
		file->link_count = statp->st_nlink;

        rm_file_list_insert_file(file, session);

        g_atomic_int_add(&trav_session->session->total_files, 1);
        rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_TRAVERSE);

        if(trav_session->session->cfg->clear_xattr_fields &&
           file->lint_type == RM_LINT_TYPE_DUPE_CANDIDATE) {
            rm_xattr_clear_hash(session, file);
        }
    }
}