RmTreeMerger *rm_tm_new(RmSession *session) { RmTreeMerger *self = g_slice_new(RmTreeMerger); self->session = session; g_queue_init(&self->valid_dirs); self->result_table = g_hash_table_new_full((GHashFunc)rm_directory_hash, (GEqualFunc)rm_directory_equal, NULL, (GDestroyNotify)g_queue_free); self->file_groups = g_hash_table_new_full((GHashFunc)rm_digest_hash, (GEqualFunc)rm_digest_equal, NULL, (GDestroyNotify)g_queue_free); self->file_checks = g_hash_table_new_full((GHashFunc)rm_digest_hash, (GEqualFunc)rm_digest_equal, NULL, NULL); self->known_hashs = g_hash_table_new_full(NULL, NULL, NULL, NULL); rm_trie_init(&self->dir_tree); rm_trie_init(&self->count_tree); rm_tm_chunk_paths(self, session->cfg->paths); return self; }
int main(void) { RmTrie trie; rm_trie_init(&trie); GTimer *timer = g_timer_new(); g_timer_start(timer); char buf[1024]; int i = 0; while(fgets(buf, sizeof(buf), stdin)) { buf[strlen(buf) - 1] = 0; rm_trie_insert(&trie, buf, GUINT_TO_POINTER(++i)); memset(buf, 0, sizeof(buf)); } g_printerr("Took %2.5f to insert %d items\n", g_timer_elapsed(timer, NULL), i); rm_trie_print(&trie); memset(buf, 0, sizeof(buf)); rm_trie_build_path(&trie, rm_trie_search_node(&trie, "/usr/bin/rmlint"), buf, sizeof(buf)); g_printerr("=> %s\n", buf); g_timer_start(timer); const int N = 10000000; for(int x = 0; x < N; x++) { rm_trie_search(&trie, "/usr/bin/rmlint"); } g_printerr("Took %2.5f to search\n", g_timer_elapsed(timer, NULL)); g_printerr("%u\n", GPOINTER_TO_UINT(rm_trie_search(&trie, "/usr/bin/rmlint"))); g_printerr("%u\n", GPOINTER_TO_UINT(rm_trie_search(&trie, "/a/b/c"))); rm_trie_destroy(&trie); g_timer_destroy(timer); return 0; }
/* Options not specified by commandline get a default option - * this is usually called before rm_cmd_parse_args */ void rm_cfg_set_default(RmCfg *cfg) { /* Set everything to 0 at first, * only non-null options are listed below. */ memset(cfg, 0, sizeof(RmCfg)); /* Traversal options */ cfg->depth = PATH_MAX / 2; cfg->minsize = 0; cfg->maxsize = G_MAXUINT64; /* Lint Types */ cfg->ignore_hidden = true; cfg->find_emptydirs = true; cfg->find_emptyfiles = true; cfg->find_duplicates = true; cfg->find_badids = true; cfg->find_badlinks = true; cfg->find_hardlinked_dupes = true; cfg->build_fiemap = true; cfg->crossdev = true; cfg->list_mounts = true; /* Misc options */ cfg->sort_criteria = g_strdup("pOma"); cfg->checksum_type = RM_DEFAULT_DIGEST; cfg->with_color = true; cfg->with_stdout_color = true; cfg->with_stderr_color = true; cfg->threads = 16; cfg->threads_per_disk = 2; cfg->verbosity = G_LOG_LEVEL_INFO; cfg->follow_symlinks = false; cfg->total_mem = (RmOff) 1024 * 1024 * 1024; cfg->sweep_size = 1024 * 1024 * 1024; cfg->sweep_count = 1024 * 16; cfg->skip_start_factor = 0.0; cfg->skip_end_factor = 1.0; cfg->use_absolute_start_offset = false; cfg->use_absolute_end_offset = false; cfg->skip_start_offset = 0; cfg->skip_end_offset = 0; cfg->mtime_window = -1; rm_trie_init(&cfg->file_trie); }
static bool rm_tm_count_files(RmTrie *count_tree, char **paths, RmSession *session) { if(*paths == NULL) { rm_log_error("No paths passed to rm_tm_count_files\n"); return false; } int fts_flags = FTS_COMFOLLOW; if(session->cfg->follow_symlinks) { fts_flags |= FTS_LOGICAL; } else { fts_flags |= FTS_PHYSICAL; } /* This tree stores the full file paths. It is joined into a full directory tree later. */ RmTrie file_tree; rm_trie_init(&file_tree); FTS *fts = fts_open(paths, fts_flags, NULL); if(fts == NULL) { rm_log_perror("fts_open failed"); return false; } FTSENT *ent = NULL; while((ent = fts_read(fts))) { /* Handle large files (where fts fails with FTS_NS) */ if(ent->fts_info == FTS_NS) { RmStat stat_buf; if(rm_sys_stat(ent->fts_path, &stat_buf) == -1) { rm_log_perror("stat(2) failed"); continue; } else { /* Must be a large file (or followed link to it) */ ent->fts_info = FTS_F; } } switch(ent->fts_info) { case FTS_ERR: case FTS_DC: /* Save this path as an error */ rm_trie_insert(&file_tree, ent->fts_path, GINT_TO_POINTER(true)); break; case FTS_F: case FTS_SL: case FTS_NS: case FTS_SLNONE: case FTS_DEFAULT: /* Save this path as countable file */ if(ent->fts_statp->st_size > 0) { rm_trie_insert(&file_tree, ent->fts_path, GINT_TO_POINTER(false)); } case FTS_D: case FTS_DNR: case FTS_DOT: case FTS_DP: case FTS_NSOK: default: /* other fts states, that do not count as errors or files */ break; } } if(fts_close(fts) != 0) { rm_log_perror("fts_close failed"); return false; } rm_trie_iter(&file_tree, NULL, true, false, rm_tm_count_art_callback, count_tree); /* Now flag everything as a no-go over the given paths, * otherwise we would continue merging till / with fatal consequences, * since / does not have more files as paths[0] */ for(int i = 0; paths[i]; ++i) { /* Just call the callback directly */ RmNode *node = rm_trie_search_node(&file_tree, paths[i]); if(node != NULL) { node->data = GINT_TO_POINTER(true); rm_tm_count_art_callback(&file_tree, node, 0, count_tree); } } #ifdef _RM_TREEMERGE_DEBUG rm_trie_print(count_tree); #endif rm_trie_destroy(&file_tree); return true; }