示例#1
0
static void rm_tm_cluster_up(RmTreeMerger *self, RmDirectory *directory) {
    char *parent_dir = g_path_get_dirname(directory->dirname);
    bool is_root = strcmp(parent_dir, "/") == 0;

    /* Lookup if we already found this parent before (if yes, merge with it) */
    RmDirectory *parent = rm_trie_search(&self->dir_tree, parent_dir);

    if(parent == NULL) {
        /* none yet, basically copy child */
        parent = rm_directory_new(parent_dir);
        rm_trie_insert(&self->dir_tree, parent_dir, parent);

        /* Get the actual file count */
        parent->file_count =
            GPOINTER_TO_UINT(rm_trie_search(&self->count_tree, parent_dir));

    } else {
        g_free(parent_dir);
    }

    rm_directory_add_subdir(parent, directory);

    if(parent->dupe_count == parent->file_count && parent->file_count > 0) {
        rm_tm_insert_dir(self, parent);
        if(!is_root) {
            rm_tm_cluster_up(self, parent);
        }
    }
}
示例#2
0
int main(void) {
    RmTrie trie;
    rm_trie_init(&trie);
    GTimer *timer = g_timer_new();

    g_timer_start(timer);
    char buf[1024];
    int i = 0;

    while(fgets(buf, sizeof(buf), stdin)) {
        buf[strlen(buf) - 1] = 0;
        rm_trie_insert(&trie, buf, GUINT_TO_POINTER(++i));
        memset(buf, 0, sizeof(buf));
    }

    g_printerr("Took %2.5f to insert %d items\n", g_timer_elapsed(timer, NULL), i);
    rm_trie_print(&trie);
    memset(buf, 0, sizeof(buf));
    rm_trie_build_path(&trie, rm_trie_search_node(&trie, "/usr/bin/rmlint"), buf,
                       sizeof(buf));
    g_printerr("=> %s\n", buf);

    g_timer_start(timer);
    const int N = 10000000;
    for(int x = 0; x < N; x++) {
        rm_trie_search(&trie, "/usr/bin/rmlint");
    }
    g_printerr("Took %2.5f to search\n", g_timer_elapsed(timer, NULL));

    g_printerr("%u\n", GPOINTER_TO_UINT(rm_trie_search(&trie, "/usr/bin/rmlint")));
    g_printerr("%u\n", GPOINTER_TO_UINT(rm_trie_search(&trie, "/a/b/c")));
    rm_trie_destroy(&trie);
    g_timer_destroy(timer);
    return 0;
}
示例#3
0
void rm_file_set_path(RmFile *file, char *path, size_t path_len) {
    if(file->session->cfg->use_meta_cache == false) {
        file->folder = rm_trie_insert(&file->session->cfg->file_trie, path, NULL);
    } else {
        file->path_id = rm_swap_table_insert(file->session->meta_cache,
                                             file->session->meta_cache_path_id,
                                             (char *)path, path_len + 1);
    }
}
示例#4
0
int rm_tm_count_art_callback(_UNUSED RmTrie *self, RmNode *node, _UNUSED int level,
                             void *user_data) {
    /* Note: this method has a time complexity of O(log(n) * m) which may
       result in a few seconds buildup time for large sets of directories.  Since this
       will only happen when rmlint ran for long anyways and since we can keep the
       code easy and memory efficient this way, Im against more clever but longer
       solutions. (Good way of saying "Im just too stupid", eh?)
    */

    RmTrie *count_tree = user_data;
    bool error_flag = GPOINTER_TO_INT(node->data);

    char path[PATH_MAX];
    memset(path, 0, sizeof(path));
    rm_trie_build_path_unlocked(node, path, sizeof(path));

    /* Ascend the path parts up, add one for each part we meet.
       If a part was never found before, add it.
       This is the 'm' above: The count of separators in the path.

       Hack: path[key_len] is nul, at key_len it must be either an
             extra slash (bad) or the beginning of a file name.
             Therefore start at -2.
     */
    for(int i = strlen(path) - 1; i >= 0; --i) {
        if(path[i] == G_DIR_SEPARATOR) {
            /* Do not use an empty path, use a slash for root */
            if(i == 0) {
                path[0] = G_DIR_SEPARATOR;
                path[1] = 0;
            } else {
                path[i] = 0;
            }

            /* Include the nulbyte */
            int new_count = -1;

            if(error_flag == false) {
                /* Lookup the count on this level */
                int old_count = GPOINTER_TO_INT(rm_trie_search(count_tree, path));

                /* Propagate old error up or just increment the count */
                new_count = (old_count == -1) ? -1 : old_count + 1;
            }

            /* Accumulate the count ('n' above is the height of the trie)  */
            rm_trie_insert(count_tree, path, GINT_TO_POINTER(new_count));
        }
    }

    return 0;
}
示例#5
0
void rm_tm_feed(RmTreeMerger *self, RmFile *file) {
    RM_DEFINE_PATH(file);
    char *dirname = g_path_get_dirname(file_path);

    /* See if we know that directory already */
    RmDirectory *directory = rm_trie_search(&self->dir_tree, dirname);

    if(directory == NULL) {
        /* Get the actual file count */
        int file_count = GPOINTER_TO_INT(rm_trie_search(&self->count_tree, dirname));
        if(file_count == 0) {
            rm_log_error(
                RED "Empty directory or weird RmFile encountered; rejecting.\n" RESET);
            file_count = -1;
        }

        directory = rm_directory_new(dirname);
        directory->file_count = file_count;

        /* Make the new directory known */
        rm_trie_insert(&self->dir_tree, dirname, directory);

        g_queue_push_head(&self->valid_dirs, directory);
    } else {
        g_free(dirname);
    }

    g_queue_push_tail(self->free_list, file);
    rm_directory_add(directory, file);

    /* Add the file to this directory */
    g_queue_push_head(&directory->known_files, file);

    /* Remember the digest (if only to free it later...) */
    g_hash_table_replace(self->known_hashs, file->digest, NULL);

    /* Check if the directory reached the number of actual files in it */
    if(directory->dupe_count == directory->file_count && directory->file_count > 0) {
        rm_tm_insert_dir(self, directory);
    }
}
示例#6
0
static bool rm_tm_count_files(RmTrie *count_tree, char **paths, RmSession *session) {
    if(*paths == NULL) {
        rm_log_error("No paths passed to rm_tm_count_files\n");
        return false;
    }

    int fts_flags = FTS_COMFOLLOW;
    if(session->cfg->follow_symlinks) {
        fts_flags |= FTS_LOGICAL;
    } else {
        fts_flags |= FTS_PHYSICAL;
    }

    /* This tree stores the full file paths.
       It is joined into a full directory tree later.
     */
    RmTrie file_tree;
    rm_trie_init(&file_tree);

    FTS *fts = fts_open(paths, fts_flags, NULL);
    if(fts == NULL) {
        rm_log_perror("fts_open failed");
        return false;
    }

    FTSENT *ent = NULL;
    while((ent = fts_read(fts))) {
        /* Handle large files (where fts fails with FTS_NS) */
        if(ent->fts_info == FTS_NS) {
            RmStat stat_buf;
            if(rm_sys_stat(ent->fts_path, &stat_buf) == -1) {
                rm_log_perror("stat(2) failed");
                continue;
            } else {
                /* Must be a large file (or followed link to it) */
                ent->fts_info = FTS_F;
            }
        }

        switch(ent->fts_info) {
        case FTS_ERR:
        case FTS_DC:
            /* Save this path as an error */
            rm_trie_insert(&file_tree, ent->fts_path, GINT_TO_POINTER(true));
            break;
        case FTS_F:
        case FTS_SL:
        case FTS_NS:
        case FTS_SLNONE:
        case FTS_DEFAULT:
            /* Save this path as countable file */
            if(ent->fts_statp->st_size > 0) {
                rm_trie_insert(&file_tree, ent->fts_path, GINT_TO_POINTER(false));
            }
        case FTS_D:
        case FTS_DNR:
        case FTS_DOT:
        case FTS_DP:
        case FTS_NSOK:
        default:
            /* other fts states, that do not count as errors or files */
            break;
        }
    }

    if(fts_close(fts) != 0) {
        rm_log_perror("fts_close failed");
        return false;
    }

    rm_trie_iter(&file_tree, NULL, true, false, rm_tm_count_art_callback, count_tree);

    /* Now flag everything as a no-go over the given paths,
     * otherwise we would continue merging till / with fatal consequences,
     * since / does not have more files as paths[0]
     */
    for(int i = 0; paths[i]; ++i) {
        /* Just call the callback directly */
        RmNode *node = rm_trie_search_node(&file_tree, paths[i]);
        if(node != NULL) {
            node->data = GINT_TO_POINTER(true);
            rm_tm_count_art_callback(&file_tree, node, 0, count_tree);
        }
    }

#ifdef _RM_TREEMERGE_DEBUG
    rm_trie_print(count_tree);
#endif

    rm_trie_destroy(&file_tree);
    return true;
}