bool rm_mounts_is_nonrotational(RmMountTable *self, dev_t device) { if(self == NULL) { return true; } RmPartitionInfo *part = g_hash_table_lookup(self->part_table, GINT_TO_POINTER(device)); if (part) { RmDiskInfo *disk = g_hash_table_lookup(self->disk_table, GINT_TO_POINTER(part->disk)); if (disk) { return !disk->is_rotational; } else { rm_log_error_line("Disk not found in rm_mounts_is_nonrotational"); return true; } } else { rm_log_error_line("Partition not found in rm_mounts_is_nonrotational"); return true; } }
/* wrapper for g_thread_pool_new with error reporting */ GThreadPool *rm_util_thread_pool_new(GFunc func, gpointer data, int threads) { GError *error = NULL; GThreadPool *pool = g_thread_pool_new(func, data, threads, FALSE, &error); if(error != NULL) { rm_log_error_line("Unable to create thread pool."); g_error_free(error); } return pool; }
/* wrapper for g_thread_pool_push with error reporting */ bool rm_util_thread_pool_push(GThreadPool *pool, gpointer data) { GError *error = NULL; g_thread_pool_push(pool, data, &error); if(error != NULL) { rm_log_error_line("Unable to push thread to pool %p: %s", pool, error->message); g_error_free(error); return false; } else { return true; } }
static void signal_handler(int signum) { switch(signum) { case SIGINT: rm_session_abort(); break; case SIGFPE: case SIGABRT: case SIGSEGV: /* logging messages might have unexpected effects in a signal handler, * but that's probably the least thing we have to worry about in case of * a segmentation fault. */ rm_log_error_line(_("Aborting due to a fatal error. (signal received: %s)"), g_strsignal(signum)); rm_log_error_line(_("Please file a bug report (See rmlint -h)")); exit(1); default: break; } }
static ino_t rm_path_parent_inode(RmFile *file) { char parent_path[PATH_MAX]; rm_trie_build_path((RmTrie *)&file->session->cfg->file_trie, file->folder->parent, parent_path, PATH_MAX); RmStat stat_buf; int retval = rm_sys_stat(parent_path, &stat_buf); if (retval == -1) { rm_log_error_line("Failed to get parent path: stat failed: %s", g_strerror(errno)); return 0; } return stat_buf.st_ino; }
gboolean rm_digest_equal(RmDigest *a, RmDigest *b) { rm_assert_gentle(a && b); if(a->type != b->type) { return false; } if(a->bytes != b->bytes) { return false; } if(a->type == RM_DIGEST_PARANOID) { if(!a->paranoid->buffers) { /* buffers have been freed so we need to rely on shadow hash */ return rm_digest_equal(a->paranoid->shadow_hash, b->paranoid->shadow_hash); } /* check if pre-matched twins */ if(a->paranoid->twin_candidate == b || b->paranoid->twin_candidate == a) { return true; } /* check if already rejected */ if(g_slist_find(a->paranoid->rejects, b) || g_slist_find(b->paranoid->rejects, a)) { return false; } /* all the "easy" ways failed... do manual check of all buffers */ GSList *a_iter = a->paranoid->buffers; GSList *b_iter = b->paranoid->buffers; guint bytes = 0; while(a_iter && b_iter) { if(!rm_buffer_equal(a_iter->data, b_iter->data)) { rm_log_error_line( "Paranoid digest compare found mismatch - must be hash collision in " "shadow hash"); return false; } bytes += ((RmBuffer *)a_iter->data)->len; a_iter = a_iter->next; b_iter = b_iter->next; } return (!a_iter && !b_iter && bytes == a->bytes); } else if(rm_digest_needs_steal(a->type)) { guint8 *buf_a = rm_digest_steal(a); guint8 *buf_b = rm_digest_steal(b); gboolean result; if(a->bytes != b->bytes) { result = false; } else { result = !memcmp(buf_a, buf_b, MIN(a->bytes, b->bytes)); } g_slice_free1(a->bytes, buf_a); g_slice_free1(b->bytes, buf_b); return result; } else { return !memcmp(a->checksum, b->checksum, MIN(a->bytes, b->bytes)); } }
int rm_hasher_main(int argc, const char **argv) { RmHasherSession tag; /* List of paths we got passed (or NULL) */ tag.paths = NULL; /* Print hashes in the same order as files in command line args */ tag.print_in_order = TRUE; /* Print a hash with builtin identifier */ tag.print_multihash = FALSE; /* Digest type (user option, default SHA1) */ tag.digest_type = RM_DIGEST_SHA1; gint threads = 8; gint64 buffer_mbytes = 256; ////////////// Option Parsing /////////////// /* clang-format off */ const GOptionEntry entries[] = { {"digest-type" , 'd' , 0 , G_OPTION_ARG_CALLBACK , (GOptionArgFunc)rm_hasher_parse_type , _("Digest type [SHA1]") , "[TYPE]"} , {"num-threads" , 't' , 0 , G_OPTION_ARG_INT , &threads , _("Number of hashing threads [8]") , "N"} , {"multihash" , 'm' , 0 , G_OPTION_ARG_NONE , &tag.print_multihash , _("Print hash as self identifying multihash") , NULL} , {"buffer-mbytes" , 'b' , 0 , G_OPTION_ARG_INT64 , &buffer_mbytes , _("Megabytes read buffer [256 MB]") , "MB"} , {"ignore-order" , 'i' , G_OPTION_FLAG_REVERSE , G_OPTION_ARG_NONE , &tag.print_in_order , _("Print hashes in order completed, not in order entered (reduces memory usage)") , NULL} , {"" , 0 , 0 , G_OPTION_ARG_FILENAME_ARRAY , &tag.paths , _("Space-separated list of files") , "[FILE…]"} , {NULL , 0 , 0 , 0 , NULL , NULL , NULL}}; /* clang-format on */ GError *error = NULL; GOptionContext *context = g_option_context_new(_("Hash a list of files")); GOptionGroup *main_group = g_option_group_new(argv[0], _("Hash a list of files"), "", &tag, NULL); char summary[4096]; memset(summary, 0, sizeof(summary)); g_snprintf(summary, sizeof(summary), _("Multi-threaded file digest (hash) calculator.\n" "\n Available digest types:" "\n %s\n" "\n Versions with different bit numbers:" "\n %s\n" "\n Supported, but not useful:" "\n %s\n"), "spooky, city, xxhash, sha{1,256,512}, md5, murmur", "spooky{32,64,128}, city{128,256,512}, murmur{512}", "farmhash, cumulative, paranoid, ext, bastard"); g_option_group_add_entries(main_group, entries); g_option_context_set_main_group(context, main_group); g_option_context_set_summary(context, summary); if(!g_option_context_parse(context, &argc, (char ***)&argv, &error)) { /* print g_option error message */ rm_log_error_line("%s", error->message); exit(EXIT_FAILURE); } if(tag.paths == NULL) { /* read paths from stdin */ char path_buf[PATH_MAX]; char *tokbuf = NULL; GPtrArray *paths = g_ptr_array_new(); while(fgets(path_buf, PATH_MAX, stdin)) { char *abs_path = realpath(strtok_r(path_buf, "\n", &tokbuf), NULL); g_ptr_array_add(paths, abs_path); } tag.paths = (char **)g_ptr_array_free(paths, FALSE); } if(tag.paths == NULL || tag.paths[0] == NULL) { rm_log_error_line(_("No valid paths given.")); exit(EXIT_FAILURE); } g_option_context_free(context); ////////// Implementation ////// if(tag.print_in_order) { /* allocate buffer to collect results */ tag.completed_digests_buffer = g_slice_alloc0((g_strv_length(tag.paths) + 1) * sizeof(RmDigest *)); tag.path_index = 0; } /* initialise structures */ g_mutex_init(&tag.lock); RmHasher *hasher = rm_hasher_new(tag.digest_type, threads, FALSE, 4096, 1024 * 1024 * buffer_mbytes, 0, (RmHasherCallback)rm_hasher_callback, &tag); /* Iterate over paths, pushing to hasher threads */ for(int i = 0; tag.paths && tag.paths[i]; ++i) { /* check it is a regular file */ RmStat stat_buf; if(rm_sys_stat(tag.paths[i], &stat_buf) == -1) { rm_log_warning_line(_("Can't open directory or file \"%s\": %s"), tag.paths[i], strerror(errno)); } else if(S_ISDIR(stat_buf.st_mode)) { rm_log_warning_line(_("Directories are not supported: %s"), tag.paths[i]); } else if(S_ISREG(stat_buf.st_mode)) { RmHasherTask *task = rm_hasher_task_new(hasher, NULL, GINT_TO_POINTER(i)); rm_hasher_task_hash(task, tag.paths[i], 0, 0, FALSE); rm_hasher_task_finish(task); continue; } else { rm_log_warning_line(_("%s: Unknown file type"), tag.paths[i]); } /* dummy callback for failed paths */ g_free(tag.paths[i]); tag.paths[i] = NULL; rm_hasher_callback(hasher, NULL, &tag, GINT_TO_POINTER(i)); } /* wait for all hasher threads to finish... */ rm_hasher_free(hasher, TRUE); /* tidy up */ if(tag.print_in_order) { g_slice_free1((g_strv_length(tag.paths) + 1) * sizeof(RmDigest *), tag.completed_digests_buffer); } g_strfreev(tag.paths); return EXIT_SUCCESS; }
static gint64 rm_hasher_unbuffered_read(RmHasher *hasher, GThreadPool *hashpipe, RmDigest *digest, char *path, gint64 start_offset, gint64 bytes_to_read) { gint32 bytes_read = 0; gint64 total_bytes_read = 0; guint64 file_offset = start_offset; if(bytes_to_read == 0) { RmStat stat_buf; if(rm_sys_stat(path, &stat_buf) != -1) { bytes_to_read = MAX(stat_buf.st_size - start_offset, 0); } } /* how many buffers to read? */ const gint16 N_BUFFERS = MIN(4, DIVIDE_CEIL(bytes_to_read, hasher->buf_size)); struct iovec readvec[N_BUFFERS + 1]; int fd = 0; fd = rm_sys_open(path, O_RDONLY); if(fd == -1) { rm_log_info("open(2) failed for %s: %s\n", path, g_strerror(errno)); goto finish; } /* preadv() is beneficial for large files since it can cut the * number of syscall heavily. I suggest N_BUFFERS=4 as good * compromise between memory and cpu. * * With 16 buffers: 43% cpu 33,871 total * With 8 buffers: 43% cpu 32,098 total * With 4 buffers: 42% cpu 32,091 total * With 2 buffers: 44% cpu 32,245 total * With 1 buffers: 45% cpu 34,491 total */ /* Give the kernel scheduler some hints */ rm_hasher_request_readahead(fd, start_offset, bytes_to_read); /* Initialize the buffers to begin with. * After a buffer is full, a new one is retrieved. */ RmBuffer **buffers; buffers = g_slice_alloc(sizeof(*buffers) * N_BUFFERS); memset(readvec, 0, sizeof(readvec)); for(int i = 0; i < N_BUFFERS; ++i) { /* buffer is one contignous memory block */ buffers[i] = rm_buffer_get(hasher->mem_pool); readvec[i].iov_base = buffers[i]->data; readvec[i].iov_len = hasher->buf_size; } while((bytes_to_read == 0 || total_bytes_read < bytes_to_read) && (bytes_read = rm_sys_preadv(fd, readvec, N_BUFFERS, file_offset)) > 0) { bytes_read = MIN(bytes_read, bytes_to_read - total_bytes_read); /* ignore over-reads */ int blocks = DIVIDE_CEIL(bytes_read, hasher->buf_size); rm_assert_gentle(blocks <= N_BUFFERS); total_bytes_read += bytes_read; file_offset += bytes_read; for(int i = 0; i < blocks; ++i) { /* Get the RmBuffer from the datapointer */ RmBuffer *buffer = buffers[i]; buffer->len = MIN(hasher->buf_size, bytes_read - i * hasher->buf_size); buffer->digest = digest; buffer->user_data = NULL; /* Send it to the hasher */ rm_util_thread_pool_push(hashpipe, buffer); /* Allocate a new buffer - hasher will release the old buffer */ buffers[i] = rm_buffer_get(hasher->mem_pool); readvec[i].iov_base = buffers[i]->data; readvec[i].iov_len = hasher->buf_size; } } if(bytes_read == -1) { rm_log_perror("preadv failed"); } else if(total_bytes_read != bytes_to_read) { rm_log_error_line(_("Something went wrong reading %s; expected %li bytes, " "got %li; ignoring"), path, (long int)bytes_to_read, (long int)total_bytes_read); } /* Release the rest of the buffers */ for(int i = 0; i < N_BUFFERS; ++i) { rm_buffer_release(buffers[i]); } g_slice_free1(sizeof(*buffers) * N_BUFFERS, buffers); finish: if(fd > 0) { rm_sys_close(fd); } return total_bytes_read; }
/* Method to test if a file is non stripped binary. Uses libelf*/ bool rm_util_is_nonstripped(_U const char *path, _U RmStat *statp) { bool is_ns = false; #if HAVE_LIBELF g_return_val_if_fail(path, false); if(statp && (statp->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { return false; } /* inspired by "jschmier"'s answer at http://stackoverflow.com/a/5159890 */ int fd; /* ELF handle */ Elf *elf; /* section descriptor pointer */ Elf_Scn *scn; /* section header */ GElf_Shdr shdr; /* Open ELF file to obtain file descriptor */ if((fd = rm_sys_open(path, O_RDONLY)) == -1) { rm_log_warning_line(_("cannot open file '%s' for nonstripped test: "), path); rm_log_perror(""); return 0; } /* Protect program from using an older library */ if(elf_version(EV_CURRENT) == EV_NONE) { rm_log_error_line(_("ELF Library is out of date!")); return false; } /* Initialize elf pointer for examining contents of file */ elf = elf_begin(fd, ELF_C_READ, NULL); /* Initialize section descriptor pointer so that elf_nextscn() * returns a pointer to the section descriptor at index 1. * */ scn = NULL; /* Iterate through ELF sections */ while((scn = elf_nextscn(elf, scn)) != NULL) { /* Retrieve section header */ gelf_getshdr(scn, &shdr); /* If a section header holding a symbol table (.symtab) * is found, this ELF file has not been stripped. */ if(shdr.sh_type == SHT_SYMTAB) { is_ns = true; break; } } elf_end(elf); rm_sys_close(fd); #endif return is_ns; }