Пример #1
0
static void rm_fmt_prog(RmSession *session,
                        RmFmtHandler *parent,
                        FILE *out,
                        RmFmtProgressState state) {
    RmFmtHandlerProgress *self = (RmFmtHandlerProgress *)parent;

    bool force_draw = false;

    if(self->timer == NULL) {
        self->timer = g_timer_new();
        force_draw = true;
    }

    if(state == RM_PROGRESS_STATE_SUMMARY) {
        return;
    }

    if(session->replay_files.length > 0) {
        /* Makes not much sense to print a progressbar with --replay */
        return;
    }

    if(state == RM_PROGRESS_STATE_INIT) {
        /* Do initializiation here */
        const char *update_interval_str =
            rm_fmt_get_config_value(session->formats, "progressbar", "update_interval");

        self->plain = true;
        if(rm_fmt_get_config_value(session->formats, "progressbar", "fancy") != NULL) {
            self->plain = false;
        }

        self->use_unicode_glyphs = true;
        if(rm_fmt_get_config_value(session->formats, "progressbar", "ascii") != NULL) {
            self->use_unicode_glyphs = false;
        }

        if(update_interval_str) {
            self->update_interval = g_ascii_strtoull(update_interval_str, NULL, 10);
        }

        if(self->update_interval == 0) {
            self->update_interval = 50; /* milliseconds */
        }

        fprintf(out, "\e[?25l"); /* Hide the cursor */
        fflush(out);
        return;
    }

    if(state == RM_PROGRESS_STATE_PRE_SHUTDOWN || rm_session_was_aborted()) {
        fprintf(out, "\e[?25h"); /* show the cursor */
        fflush(out);

        if(rm_session_was_aborted()) {
            return;
        }
    }
Пример #2
0
static void rm_fmt_prog(RmSession *session,
                        RmFmtHandler *parent,
                        FILE *out,
                        RmFmtProgressState state) {
    RmFmtHandlerProgress *self = (RmFmtHandlerProgress *)parent;
    if(state == RM_PROGRESS_STATE_SUMMARY) {
        return;
    }

    if(state == RM_PROGRESS_STATE_INIT) {
        /* Do initializiation here */
        const char *update_interval_str =
            rm_fmt_get_config_value(session->formats, "progressbar", "update_interval");

        self->plain = true;
        if(rm_fmt_get_config_value(session->formats, "progressbar", "fancy") != NULL) {
            self->plain = false;
        }

        self->use_unicode_glyphs = true;
        if(rm_fmt_get_config_value(session->formats, "progressbar", "ascii") != NULL) {
            self->use_unicode_glyphs = false;
        }

        if(update_interval_str) {
            self->update_interval = g_ascii_strtoull(update_interval_str, NULL, 10);
        }

        if(self->update_interval == 0) {
            self->update_interval = 50;
        }

        self->last_unknown_pos = 0;
        self->total_lint_bytes = 1;

        fprintf(out, "\e[?25l"); /* Hide the cursor */
        fflush(out);
        return;
    }

    if(state == RM_PROGRESS_STATE_PRE_SHUTDOWN || rm_session_was_aborted(session)) {
        fprintf(out, "\e[?25h"); /* show the cursor */
        fflush(out);

        if(rm_session_was_aborted(session)) {
            return;
        }
    }
Пример #3
0
static void rm_fmt_prog(
    RmSession *session,
    RmFmtHandler *parent,
    FILE *out,
    RmFmtProgressState state
) {
    RmFmtHandlerProgress *self = (RmFmtHandlerProgress *) parent;

    if(state == RM_PROGRESS_STATE_INIT) {
        /* Do initializiation here */
        const char *update_interval_str = rm_fmt_get_config_value(
                                              session->formats, "progressbar", "update_interval"
                                          );

        if(update_interval_str) {
            self->update_interval = g_ascii_strtoull(update_interval_str, NULL, 10);
        }

        if(self->update_interval == 0) {
            self->update_interval = 50;
        }

        self->last_unknown_pos = 0;

        fprintf(out, "\e[?25l"); /* Hide the cursor */
        fflush(out);
        return;
    }

    if(state == RM_PROGRESS_STATE_SUMMARY || rm_session_was_aborted(session)) {
        fprintf(out, "\e[?25h"); /* show the cursor */
        fflush(out);
    }
Пример #4
0
static void rm_fmt_foot(_UNUSED RmSession *session, RmFmtHandler *parent, FILE *out) {
    RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent;

    if(rm_fmt_get_config_value(session->formats, "json", "no_footer")) {
        fprintf(out, "{}");
    } else {
        rm_fmt_json_open(self, out);
        {
            rm_fmt_json_key_bool(out, "aborted", rm_session_was_aborted());
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "progress", 100); /* Footer is always last. */
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "total_files", session->total_files);
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "ignored_files", session->ignored_files);
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "ignored_folders", session->ignored_folders);
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "duplicates", session->dup_counter);
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "duplicate_sets", session->dup_group_counter);
            rm_fmt_json_sep(self, out);
            rm_fmt_json_key_int(out, "total_lint_size", session->total_lint_size);
        }
        if(self->pretty) {
            fprintf(out, "\n}");
        } else {
            fprintf(out, "}\n");
        }
    }

    fprintf(out, "]\n");
    g_hash_table_unref(self->id_set);
}
Пример #5
0
/* This does preprocessing including handling of "other lint" (non-dupes)
 * After rm_preprocess(), all remaining duplicate candidates are in
 * a jagged GSList of GSLists as follows:
 * session->tables->size_groups->group1->file1a
 *                                     ->file1b
 *                                     ->file1c
 *                             ->group2->file2a
 *                                     ->file2b
 *                                       etc
 */
void rm_preprocess(RmSession *session) {
    RmFileTables *tables = session->tables;
    GQueue *all_files = tables->all_files;

    session->total_filtered_files = session->total_files;

    /* initial sort by size */
    g_queue_sort(all_files, (GCompareDataFunc)rm_file_cmp_full, session);
    rm_log_debug_line("initial size sort finished at time %.3f; sorted %d files",
                      g_timer_elapsed(session->timer, NULL),
                      session->total_files);

    /* split into file size groups; for each size, remove path doubles and bundle
     * hardlinks */
    rm_assert_gentle(all_files->head);
    RmFile *file = g_queue_pop_head(all_files);
    RmFile *current_size_file = file;
    guint removed = 0;
    GHashTable *node_table = tables->node_table;
    while(file && !rm_session_was_aborted()) {
        /* group files into inode clusters */
        GQueue *inode_cluster =
            rm_hash_table_setdefault(node_table, file, (RmNewFunc)g_queue_new);

        g_queue_push_tail(inode_cluster, file);

        /* get next file and check if it is part of the same group */
        file = g_queue_pop_head(all_files);
        if(!file || rm_file_cmp_split(file, current_size_file, session) != 0) {
            /* process completed group (all same size & other criteria)*/
            /* remove path doubles and handle "other" lint */

            /* add an empty GSlist to our list of lists */
            tables->size_groups = g_slist_prepend(tables->size_groups, NULL);

            removed += g_hash_table_foreach_remove(
                node_table, (GHRFunc)rm_pp_handle_inode_clusters, session);

            /* free up the node table for the next group */
            g_hash_table_steal_all(node_table);
            if(tables->size_groups->data == NULL) {
                /* zero size group after handling other lint; remove it */
                tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups);
            }
        }

        current_size_file = file;
    }

    session->other_lint_cnt += rm_pp_handler_other_lint(session);

    rm_log_debug_line(
        "path doubles removal/hardlink bundling/other lint finished at %.3f; removed %u "
        "of %d",
        g_timer_elapsed(session->timer, NULL), removed, session->total_files);

    rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
}
Пример #6
0
void rm_tm_finish(RmTreeMerger *self) {
    /* Iterate over all valid directories and try to level them all layers up.
     */
    g_queue_sort(&self->valid_dirs, (GCompareDataFunc)rm_tm_sort_paths_reverse, self);
    for(GList *iter = self->valid_dirs.head; iter; iter = iter->next) {
        RmDirectory *directory = iter->data;
        rm_tm_cluster_up(self, directory);
#ifdef _RM_TREEMERGE_DEBUG
        g_printerr("###\n");
#endif
    }

    if(!rm_session_was_aborted(self->session)) {
        /* Recursively call self to march on */
        rm_tm_extract(self);
    }
}
Пример #7
0
static void rm_tm_extract(RmTreeMerger *self) {
    /* Iterate over all directories per hash (which are same therefore) */
    GList *result_table_values = g_hash_table_get_values(self->result_table);
    result_table_values =
        g_list_sort(result_table_values, (GCompareFunc)rm_tm_cmp_directory_groups);

    for(GList *iter = result_table_values; iter; iter = iter->next) {
        /* Needs at least two directories to be duplicate... */
        GQueue *dir_list = iter->data;

#ifdef _RM_TREEMERGE_DEBUG
        for(GList *i = dir_list->head; i; i = i->next) {
            RmDirectory *d = i->data;
            char buf[512];
            memset(buf, 0, sizeof(buf));
            rm_digest_hexstring(d->digest, buf);
            g_printerr("    mergeups=%" LLU ": %s - %s\n", d->mergeups, d->dirname, buf);
        }
        g_printerr("---\n");
#endif
        if(dir_list->length < 2) {
            continue;
        }

        if(rm_session_was_aborted(self->session)) {
            break;
        }

        /* List of result directories */
        GQueue result_dirs = G_QUEUE_INIT;

        /* Sort the RmDirectory list by their path depth, lowest depth first */
        g_queue_sort(dir_list, (GCompareDataFunc)rm_tm_sort_paths, self);

        /* Output the directories and mark their children to prevent
         * duplicate directory reports in lower levels.
         */
        for(GList *iter = dir_list->head; iter; iter = iter->next) {
            RmDirectory *directory = iter->data;
            if(directory->finished == false) {
                rm_tm_mark_finished(self, directory);
                g_queue_push_head(&result_dirs, directory);
            }
        }

        /* Make sure the original directory lands as first
         * in the result_dirs queue.
         */
        g_queue_sort(&result_dirs, (GCompareDataFunc)rm_tm_sort_orig_criteria, self);

        GQueue file_adaptor_group = G_QUEUE_INIT;

        for(GList *iter = result_dirs.head; iter; iter = iter->next) {
            RmDirectory *directory = iter->data;
            RmFile *mask = rm_directory_as_file(self, directory);
            g_queue_push_tail(&file_adaptor_group, mask);

            if(iter == result_dirs.head) {
                /* First one in the group -> It's the original */
                mask->is_original = true;
                rm_tm_mark_original_files(self, directory);
            } else {
                if(rm_tm_mark_duplicate_files(self, directory, 0) ==
                   directory->dupe_count) {
                    /* Mark the file as original when all files in it are preferred. */
                    mask->is_original = true;
                    rm_tm_mark_original_files(self, directory);
                }
            }

            if(self->session->cfg->write_unfinished) {
                rm_tm_write_unfinished_cksums(self, directory);
            }
        }

        if(result_dirs.length >= 2) {
            rm_shred_forward_to_output(self->session, &file_adaptor_group);
        }

        g_queue_foreach(&file_adaptor_group, (GFunc)g_free, NULL);
        g_queue_clear(&file_adaptor_group);
        g_queue_clear(&result_dirs);
    }

    g_list_free(result_table_values);

    /* Iterate over all non-finished dirs in the tree,
     * and grab unfinished files that must be dupes elsewhise.
     */
    rm_trie_iter(&self->dir_tree, NULL, true, false, rm_tm_iter_unfinished_files, self);

    /* Now here's a problem. Consider an input like this:
     *  /root
     *  ├── a
     *  ├── sub1
     *  │   ├── a
     *  │   └── b
     *  └── sub2
     *      ├── a
     *      └── b
     *
     *  This yields two duplicate dirs (sub1, sub2)
     *  and one duplicate, unmatched file (a).
     *
     *  For outputting files we need groups, which consist of at least 2 files.
     *  So how to group that, so we don't end up deleting a file many times?
     *  We always choose which directories are originals first, so we flag all
     *  files in it as originals.
     */
    GHashTableIter iter;
    g_hash_table_iter_init(&iter, self->file_groups);

    GQueue *file_list = NULL;
    while(g_hash_table_iter_next(&iter, NULL, (void **)&file_list)) {
        bool has_one_dupe = false;
        RmOff file_size_acc = 0;

        GList *next = NULL;
        for(GList *iter = file_list->head; iter; iter = next) {
            RmFile *file = iter->data;
            next = iter->next;

            bool is_duplicate = g_hash_table_contains(self->file_checks, file->digest);
            has_one_dupe |= is_duplicate;

            /* with --partial-hidden we do not want to output */
            if(self->session->cfg->partial_hidden && file->is_hidden) {
                g_queue_delete_link(file_list, iter);
                continue;
            }

            if(iter != file_list->head && !is_duplicate) {
                file_size_acc += file->file_size;
            }
        }

        if(file_list->length >= 2) {
            /* If no separate duplicate files are requested, we can stop here */
            if(self->session->cfg->find_duplicates == false) {
                self->session->total_lint_size -= file_size_acc;
                self->session->dup_group_counter -= 1;
                self->session->dup_counter -= file_list->length - 1;
            } else {
                rm_shred_group_find_original(self->session, file_list);
                rm_shred_forward_to_output(self->session, file_list);
            }
        }
    }
}
Пример #8
0
static void rm_fmt_prog(RmSession *session,
                        _UNUSED RmFmtHandler *parent,
                        _UNUSED FILE *out,
                        RmFmtProgressState state) {
    if(state != RM_PROGRESS_STATE_SUMMARY) {
        return;
    }

    if(session->total_files <= 1) {
        ARROW fprintf(out, "%s%d%s", MAYBE_RED(out, session), session->total_files,
                      MAYBE_RESET(out, session));
        fprintf(out, _(" file(s) after investigation, nothing to search through.\n"));
        return;
    }

    if(rm_session_was_aborted()) {
        /* Clear the whole terminal line.
         * Progressbar might leave some junk.
         */
        struct winsize terminal;
        ioctl(fileno(out), TIOCGWINSZ, &terminal);
        for(int i = 0; i < terminal.ws_col; ++i) {
            fprintf(out, " ");
        }

        fprintf(out, "\n");
        ARROW fprintf(out, _("Early shutdown, probably not all lint was found.\n"));
    }

    if(rm_fmt_has_formatter(session->formats, "pretty") &&
       rm_fmt_has_formatter(session->formats, "sh")) {
        ARROW fprintf(out, _("Note: Please use the saved script below for removal, not "
                             "the above output."));
        fprintf(out, "\n");
    }

    char numbers[3][512];
    snprintf(numbers[0], sizeof(numbers[0]), "%s%d%s", MAYBE_RED(out, session),
             session->total_files, MAYBE_RESET(out, session));
    snprintf(numbers[1], sizeof(numbers[1]), "%s%" LLU "%s", MAYBE_RED(out, session),
             session->dup_counter, MAYBE_RESET(out, session));
    snprintf(numbers[2], sizeof(numbers[2]), "%s%" LLU "%s", MAYBE_RED(out, session),
             session->dup_group_counter, MAYBE_RESET(out, session));

    ARROW fprintf(out, _("In total %s files, whereof %s are duplicates in %s groups.\n"),
                  numbers[0], numbers[1], numbers[2]);

    /* log10(2 ** 64) + 2 = 21; */
    char size_string_buf[22] = {0};
    rm_util_size_to_human_readable(session->total_lint_size, size_string_buf,
                                   sizeof(size_string_buf));

    ARROW fprintf(out, _("This equals %s%s%s of duplicates which could be removed.\n"),
                  MAYBE_RED(out, session), size_string_buf, MAYBE_RESET(out, session));

    if(session->other_lint_cnt > 0) {
        ARROW fprintf(out, "%s%" LLU "%s ", MAYBE_RED(out, session),
                      session->other_lint_cnt, MAYBE_RESET(out, session));

        fprintf(out, _("other suspicious item(s) found, which may vary in size.\n"));
    }

    bool first_print_flag = true;
    GHashTableIter iter;
    char *path = NULL;
    RmFmtHandler *handler = NULL;
    rm_fmt_get_pair_iter(session->formats, &iter);

    while(g_hash_table_iter_next(&iter, (gpointer *)&path, (gpointer *)&handler)) {
        static const char *forbidden[] = {"stdout", "stderr", "stdin"};
        gsize forbidden_len = sizeof(forbidden) / sizeof(forbidden[0]);
        bool forbidden_found = false;

        for(gsize i = 0; i < forbidden_len; i++) {
            if(g_strcmp0(forbidden[i], path) == 0) {
                forbidden_found = true;
                break;
            }
        }

        if(forbidden_found) {
            continue;
        }

        /* Check if the file really exists, so we can print it for sure */
        if(access(path, R_OK) == -1) {
            continue;
        }

        if(first_print_flag) {
            fprintf(out, "\n");
            first_print_flag = false;
        }

        fprintf(out, _("Wrote a %s%s%s file to: %s%s%s\n"), MAYBE_BLUE(out, session),
                handler->name, MAYBE_RESET(out, session), MAYBE_GREEN(out, session), path,
                MAYBE_RESET(out, session));
    }
}
Пример #9
0
static void rm_fmt_prog(
    RmSession *session,
    _U RmFmtHandler *parent,
    _U FILE *out,
    RmFmtProgressState state
) {
    if(state != RM_PROGRESS_STATE_SUMMARY) {
        return;
    }

    if(session->total_files <= 1) {
        ARROW fprintf(out, "%s%"LLU"%s",
                      MAYBE_RED(session), session->total_files, MAYBE_RESET(session)
                     );
        fprintf(out, _(" file(s) after investigation, nothing to search through.\n"));
        return;
    }

    if(rm_session_was_aborted(session)) {
        ARROW fprintf(out, _("Early shutdown, probably not all lint was found.\n"));
    }

    char numbers[3][512];
    snprintf(numbers[0], sizeof(numbers[0]), "%s%"LLU"%s", MAYBE_RED(session), session->total_files, MAYBE_RESET(session));
    snprintf(numbers[1], sizeof(numbers[1]), "%s%"LLU"%s", MAYBE_RED(session), session->dup_counter, MAYBE_RESET(session));
    snprintf(numbers[2], sizeof(numbers[2]), "%s%"LLU"%s", MAYBE_RED(session), session->dup_group_counter, MAYBE_RESET(session));

    ARROW fprintf(
        out,
        _("In total %s files, whereof %s are duplicates in %s groups.\n"),
        numbers[0], numbers[1], numbers[2]
    );

    /* log10(2 ** 64) + 2 = 21; */
    char size_string_buf[22] = {0};
    rm_util_size_to_human_readable(
        session->total_lint_size, size_string_buf, sizeof(size_string_buf)
    );

    ARROW fprintf(
        out,
        _("This equals %s%s%s of duplicates which could be removed.\n"),
        MAYBE_RED(session), size_string_buf, MAYBE_RESET(session)
    );

    if(session->other_lint_cnt > 0) {
        ARROW fprintf(
            out,
            "%s%"LLU"%s ",
            MAYBE_RED(session), session->other_lint_cnt, MAYBE_RESET(session)
        );

        fprintf(out, _("other suspicious item(s) found, which may vary in size.\n"));
    }

    bool first_print_flag = true;
    GHashTableIter iter;
    char *path = NULL;
    RmFmtHandler *handler = NULL;
    rm_fmt_get_pair_iter(session->formats, &iter);

    while(g_hash_table_iter_next(&iter, (gpointer *)&path, (gpointer *)&handler)) {
        static const char *forbidden[] = {"stdout", "stderr", "stdin"};
        gsize forbidden_len = sizeof(forbidden) / sizeof(forbidden[0]);

        if(lfind(path, forbidden, &forbidden_len, sizeof(const char *), rm_fmt_summary_cmp)) {
            continue;
        }

        /* Check if the file really exists, so we can print it for sure */
        if(access(path, R_OK) == -1) {
            continue;
        }

        if(first_print_flag) {
            fprintf(out, "\n");
            first_print_flag = false;
        }

        fprintf(
            out,
            _("Wrote a %s%s%s file to %s%s%s.\n"),
            MAYBE_BLUE(session), handler->name, MAYBE_RESET(session),
            MAYBE_GREEN(session), path, MAYBE_RESET(session)
        );
    }
}