static int rm_xattr_build_cksum(RmFile *file, char *buf, size_t buf_size) { g_assert(file); g_assert(file->digest); memset(buf, '0', buf_size); buf[buf_size - 1] = 0; if(file->digest->type == RM_DIGEST_PARANOID) { g_assert(file->digest->shadow_hash); return rm_digest_hexstring(file->digest->shadow_hash, buf); } else { return rm_digest_hexstring(file->digest, buf); } }
static void rm_hasher_print(RmDigest *digest, char *path, bool print_multihash) { gsize size = rm_digest_get_bytes(digest) * 2 + 1; char checksum_str[size]; memset(checksum_str, '0', size); checksum_str[size - 1] = 0; rm_digest_hexstring(digest, checksum_str); if(print_multihash) { g_print("%02x%02x@", rm_digest_type_to_multihash_id(digest->type), rm_digest_get_bytes(digest)); } g_print("%s %s\n", checksum_str, path); }
static void rm_fmt_elem(_UNUSED RmSession *session, _UNUSED RmFmtHandler *parent, FILE *out, RmFile *file) { if (file->lint_type == RM_LINT_TYPE_UNIQUE_FILE && (!file->digest || !session->cfg->write_unfinished)) { /* unique file with no partial checksum */ return; /* TODO: add option to output all unique files */ } char checksum_str[rm_digest_get_bytes(file->digest) * 2 + 1]; memset(checksum_str, '0', sizeof(checksum_str)); checksum_str[sizeof(checksum_str) - 1] = 0; if(file->digest) { rm_digest_hexstring(file->digest, checksum_str); } /* Escape quotes in the path (refer http://tools.ietf.org/html/rfc4180, item 6)*/ RM_DEFINE_PATH(file); char *clean_path = rm_util_strsub(file_path, CSV_QUOTE, CSV_QUOTE "" CSV_QUOTE); fprintf(out, CSV_FORMAT, rm_file_lint_type_to_string(file->lint_type), clean_path, file->file_size, checksum_str); g_free(clean_path); }
static void rm_tm_extract(RmTreeMerger *self) { /* Iterate over all directories per hash (which are same therefore) */ GList *result_table_values = g_hash_table_get_values(self->result_table); result_table_values = g_list_sort(result_table_values, (GCompareFunc)rm_tm_cmp_directory_groups); for(GList *iter = result_table_values; iter; iter = iter->next) { /* Needs at least two directories to be duplicate... */ GQueue *dir_list = iter->data; #ifdef _RM_TREEMERGE_DEBUG for(GList *i = dir_list->head; i; i = i->next) { RmDirectory *d = i->data; char buf[512]; memset(buf, 0, sizeof(buf)); rm_digest_hexstring(d->digest, buf); g_printerr(" mergeups=%" LLU ": %s - %s\n", d->mergeups, d->dirname, buf); } g_printerr("---\n"); #endif if(dir_list->length < 2) { continue; } if(rm_session_was_aborted(self->session)) { break; } /* List of result directories */ GQueue result_dirs = G_QUEUE_INIT; /* Sort the RmDirectory list by their path depth, lowest depth first */ g_queue_sort(dir_list, (GCompareDataFunc)rm_tm_sort_paths, self); /* Output the directories and mark their children to prevent * duplicate directory reports in lower levels. */ for(GList *iter = dir_list->head; iter; iter = iter->next) { RmDirectory *directory = iter->data; if(directory->finished == false) { rm_tm_mark_finished(self, directory); g_queue_push_head(&result_dirs, directory); } } /* Make sure the original directory lands as first * in the result_dirs queue. */ g_queue_sort(&result_dirs, (GCompareDataFunc)rm_tm_sort_orig_criteria, self); GQueue file_adaptor_group = G_QUEUE_INIT; for(GList *iter = result_dirs.head; iter; iter = iter->next) { RmDirectory *directory = iter->data; RmFile *mask = rm_directory_as_file(self, directory); g_queue_push_tail(&file_adaptor_group, mask); if(iter == result_dirs.head) { /* First one in the group -> It's the original */ mask->is_original = true; rm_tm_mark_original_files(self, directory); } else { if(rm_tm_mark_duplicate_files(self, directory, 0) == directory->dupe_count) { /* Mark the file as original when all files in it are preferred. */ mask->is_original = true; rm_tm_mark_original_files(self, directory); } } if(self->session->cfg->write_unfinished) { rm_tm_write_unfinished_cksums(self, directory); } } if(result_dirs.length >= 2) { rm_shred_forward_to_output(self->session, &file_adaptor_group); } g_queue_foreach(&file_adaptor_group, (GFunc)g_free, NULL); g_queue_clear(&file_adaptor_group); g_queue_clear(&result_dirs); } g_list_free(result_table_values); /* Iterate over all non-finished dirs in the tree, * and grab unfinished files that must be dupes elsewhise. */ rm_trie_iter(&self->dir_tree, NULL, true, false, rm_tm_iter_unfinished_files, self); /* Now here's a problem. Consider an input like this: * /root * ├── a * ├── sub1 * │ ├── a * │ └── b * └── sub2 * ├── a * └── b * * This yields two duplicate dirs (sub1, sub2) * and one duplicate, unmatched file (a). * * For outputting files we need groups, which consist of at least 2 files. * So how to group that, so we don't end up deleting a file many times? * We always choose which directories are originals first, so we flag all * files in it as originals. */ GHashTableIter iter; g_hash_table_iter_init(&iter, self->file_groups); GQueue *file_list = NULL; while(g_hash_table_iter_next(&iter, NULL, (void **)&file_list)) { bool has_one_dupe = false; RmOff file_size_acc = 0; GList *next = NULL; for(GList *iter = file_list->head; iter; iter = next) { RmFile *file = iter->data; next = iter->next; bool is_duplicate = g_hash_table_contains(self->file_checks, file->digest); has_one_dupe |= is_duplicate; /* with --partial-hidden we do not want to output */ if(self->session->cfg->partial_hidden && file->is_hidden) { g_queue_delete_link(file_list, iter); continue; } if(iter != file_list->head && !is_duplicate) { file_size_acc += file->file_size; } } if(file_list->length >= 2) { /* If no separate duplicate files are requested, we can stop here */ if(self->session->cfg->find_duplicates == false) { self->session->total_lint_size -= file_size_acc; self->session->dup_group_counter -= 1; self->session->dup_counter -= file_list->length - 1; } else { rm_shred_group_find_original(self->session, file_list); rm_shred_forward_to_output(self->session, file_list); } } } }
static void rm_fmt_json_cksum(RmFile *file, char *checksum_str, size_t size) { memset(checksum_str, '0', size); checksum_str[size - 1] = 0; rm_digest_hexstring(file->digest, checksum_str); }