static void rm_mounts_freebsd_list_disks(void) { char disks[1024]; size_t disks_len = sizeof(disks); memset(disks, 0, sizeof(disks)); DISK_TABLE = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); if(sysctlbyname("kern.disks", disks, &disks_len, NULL, 0) == 0) { char **disk_vec = g_strsplit(disks, " ", -1); for(int i = 0; disk_vec[i]; ++i) { char *disk = g_strdup_printf("/dev/%s", disk_vec[i]); RmStat dev_stat; if(rm_sys_stat(disk, &dev_stat) != -1) { g_hash_table_insert(DISK_TABLE, disk, GUINT_TO_POINTER(dev_stat.st_rdev)); } else { rm_log_perror("stat on /dev"); g_free(disk); } } g_strfreev(disk_vec); } else { rm_log_perror("sysctlbyname"); } }
static gint64 rm_hasher_buffered_read(RmHasher *hasher, GThreadPool *hashpipe, RmDigest *digest, char *path, gsize start_offset, gsize bytes_to_read) { FILE *fd = NULL; if(bytes_to_read == 0) { bytes_to_read = G_MAXSIZE; } gsize total_bytes_read = 0; if((fd = fopen(path, "rb")) == NULL) { rm_log_info("fopen(3) failed for %s: %s\n", path, g_strerror(errno)); goto finish; } gint32 bytes_read = 0; rm_hasher_request_readahead(fileno(fd), start_offset, bytes_to_read); if(fseek(fd, start_offset, SEEK_SET) == -1) { rm_log_perror("fseek(3) failed"); goto finish; } RmBuffer *buffer = rm_buffer_get(hasher->mem_pool); while((bytes_read = fread(buffer->data, 1, MIN(bytes_to_read, hasher->buf_size), fd)) > 0) { bytes_to_read -= bytes_read; buffer->len = bytes_read; buffer->digest = digest; buffer->user_data = NULL; rm_util_thread_pool_push(hashpipe, buffer); total_bytes_read += bytes_read; buffer = rm_buffer_get(hasher->mem_pool); } rm_buffer_release(buffer); if(ferror(fd) != 0) { rm_log_perror("fread(3) failed"); if(total_bytes_read == bytes_to_read) { /* signal error to caller */ total_bytes_read++; } } finish: if(fd != NULL) { fclose(fd); } return total_bytes_read; }
static gint64 rm_hasher_symlink_read(RmHasher *hasher, RmDigest *digest, char *path) { /* Fake an IO operation on the symlink. */ RmBuffer *buf = rm_buffer_get(hasher->mem_pool); buf->len = 256; memset(buf->data, 0, buf->len); RmStat stat_buf; if(rm_sys_stat(path, &stat_buf) == -1) { /* Oops, that did not work out, report as an error */ rm_log_perror("Cannot stat symbolic link"); return -1; } gint data_size = snprintf((char *)buf->data, rm_buffer_size(hasher->mem_pool), "%ld:%ld", (long)stat_buf.st_dev, (long)stat_buf.st_ino); buf->len = data_size; buf->digest = digest; rm_digest_buffered_update(buf); /* In case of paranoia: shrink the used data buffer, so comparasion works * as expected. Otherwise a full buffer is used with possibly different * content */ if(digest->type == RM_DIGEST_PARANOID) { rm_digest_paranoia_shrink(digest, data_size); } return 0; }
time_t rm_iso8601_parse(const char *string) { GTimeVal time_result; if(!g_time_val_from_iso8601(string, &time_result)) { rm_log_perror("Converting time failed"); return 0; } return time_result.tv_sec; }
static gchar rm_mounts_is_rotational_blockdev(const char *dev) { gchar is_rotational = -1; #if HAVE_SYSBLOCK /* this works only on linux */ char sys_path[PATH_MAX]; snprintf(sys_path, PATH_MAX, "/sys/block/%s/queue/rotational", dev); FILE *sys_fdes = fopen(sys_path, "r"); if(sys_fdes == NULL) { return -1; } if(fread(&is_rotational, 1, 1, sys_fdes) == 1) { is_rotational -= '0'; } fclose(sys_fdes); #elif HAVE_SYSCTL /* try with sysctl() */ int device_num = 0; char cmd[32] = {0}, delete_method[32] = {0}, dev_copy[32] = {0}; size_t delete_method_len = sizeof(delete_method_len); memset(cmd, 0, sizeof(cmd)); memset(delete_method, 0, sizeof(delete_method)); strncpy(dev_copy, dev, sizeof(dev_copy)); for(int i = 0; dev_copy[i]; ++i) { if(isdigit(dev_copy[i])) { if(i > 0) { dev_copy[i - 1] = 0; } device_num = g_ascii_strtoll(&dev_copy[i], NULL, 10); break; } } if(snprintf(cmd, sizeof(cmd), "kern.cam.%s.%d.delete_method", dev_copy, device_num) == -1) { return -1; } if(sysctlbyname(cmd, delete_method, &delete_method_len, NULL, 0) != 0) { rm_log_perror("sysctlbyname"); } else { if(memcmp("NONE", delete_method, MIN(delete_method_len, 4)) == 0) { is_rotational = 1; } else { is_rotational = 0; } } #endif return is_rotational; }
time_t rm_iso8601_parse(const char *string) { struct tm time_key; memset(&time_key, 0, sizeof(struct tm)); if(strptime(string, "%FT%T%z", &time_key) == NULL) { rm_log_perror("strptime(3) failed"); return 0; } return mktime(&time_key) + time_key.tm_gmtoff; }
static int rm_xattr_is_fail(const char *name, int rc) { if(rc != -1) { return 0; } if(errno != ENOTSUP && errno != ENODATA) { rm_log_perror(name); return errno; } return 0; }
static RmDirectory *rm_directory_new(char *dirname) { RmDirectory *self = g_new0(RmDirectory, 1); self->file_count = 0; self->dupe_count = 0; self->prefd_files = 0; self->was_merged = false; self->was_inserted = false; self->mergeups = 0; self->dirname = dirname; self->finished = false; self->depth = 0; for(char *s = dirname; *s; s++) { self->depth += (*s == G_DIR_SEPARATOR); } RmStat dir_stat; if(rm_sys_stat(self->dirname, &dir_stat) == -1) { rm_log_perror("stat(2) failed during sort"); } else { self->metadata.dir_mtime = dir_stat.st_mtime; self->metadata.dir_inode = dir_stat.st_ino; self->metadata.dir_dev = dir_stat.st_dev; } /* Special cumulative hashsum, that is not dependent on the * order in which the file hashes were added. * It is not used as full hash, but as sorting speedup. */ self->digest = rm_digest_new(RM_DIGEST_CUMULATIVE, 0, 0, 0, false); g_queue_init(&self->known_files); g_queue_init(&self->children); self->hash_set = g_hash_table_new((GHashFunc)rm_digest_hash, (GEqualFunc)rm_digest_equal); return self; }
static RmTravBuffer *rm_trav_buffer_new(RmSession *session, char *path, bool is_prefd, unsigned long path_index) { RmTravBuffer *self = g_new0(RmTravBuffer, 1); self->path = path; self->is_prefd = is_prefd; self->path_index = path_index; RM_BUFFER_DEFINE_PATH(session, self); int stat_state; if(session->cfg->follow_symlinks) { stat_state = rm_sys_stat(self_path, &self->stat_buf); } else { stat_state = rm_sys_lstat(self_path, &self->stat_buf); } if(stat_state == -1) { rm_log_perror("Unable to stat file"); } return self; }
static bool rm_tm_count_files(RmTrie *count_tree, char **paths, RmSession *session) { if(*paths == NULL) { rm_log_error("No paths passed to rm_tm_count_files\n"); return false; } int fts_flags = FTS_COMFOLLOW; if(session->cfg->follow_symlinks) { fts_flags |= FTS_LOGICAL; } else { fts_flags |= FTS_PHYSICAL; } /* This tree stores the full file paths. It is joined into a full directory tree later. */ RmTrie file_tree; rm_trie_init(&file_tree); FTS *fts = fts_open(paths, fts_flags, NULL); if(fts == NULL) { rm_log_perror("fts_open failed"); return false; } FTSENT *ent = NULL; while((ent = fts_read(fts))) { /* Handle large files (where fts fails with FTS_NS) */ if(ent->fts_info == FTS_NS) { RmStat stat_buf; if(rm_sys_stat(ent->fts_path, &stat_buf) == -1) { rm_log_perror("stat(2) failed"); continue; } else { /* Must be a large file (or followed link to it) */ ent->fts_info = FTS_F; } } switch(ent->fts_info) { case FTS_ERR: case FTS_DC: /* Save this path as an error */ rm_trie_insert(&file_tree, ent->fts_path, GINT_TO_POINTER(true)); break; case FTS_F: case FTS_SL: case FTS_NS: case FTS_SLNONE: case FTS_DEFAULT: /* Save this path as countable file */ if(ent->fts_statp->st_size > 0) { rm_trie_insert(&file_tree, ent->fts_path, GINT_TO_POINTER(false)); } case FTS_D: case FTS_DNR: case FTS_DOT: case FTS_DP: case FTS_NSOK: default: /* other fts states, that do not count as errors or files */ break; } } if(fts_close(fts) != 0) { rm_log_perror("fts_close failed"); return false; } rm_trie_iter(&file_tree, NULL, true, false, rm_tm_count_art_callback, count_tree); /* Now flag everything as a no-go over the given paths, * otherwise we would continue merging till / with fatal consequences, * since / does not have more files as paths[0] */ for(int i = 0; paths[i]; ++i) { /* Just call the callback directly */ RmNode *node = rm_trie_search_node(&file_tree, paths[i]); if(node != NULL) { node->data = GINT_TO_POINTER(true); rm_tm_count_art_callback(&file_tree, node, 0, count_tree); } } #ifdef _RM_TREEMERGE_DEBUG rm_trie_print(count_tree); #endif rm_trie_destroy(&file_tree); return true; }
static gint64 rm_hasher_unbuffered_read(RmHasher *hasher, GThreadPool *hashpipe, RmDigest *digest, char *path, gint64 start_offset, gint64 bytes_to_read) { gint32 bytes_read = 0; gint64 total_bytes_read = 0; guint64 file_offset = start_offset; if(bytes_to_read == 0) { RmStat stat_buf; if(rm_sys_stat(path, &stat_buf) != -1) { bytes_to_read = MAX(stat_buf.st_size - start_offset, 0); } } /* how many buffers to read? */ const gint16 N_BUFFERS = MIN(4, DIVIDE_CEIL(bytes_to_read, hasher->buf_size)); struct iovec readvec[N_BUFFERS + 1]; int fd = 0; fd = rm_sys_open(path, O_RDONLY); if(fd == -1) { rm_log_info("open(2) failed for %s: %s\n", path, g_strerror(errno)); goto finish; } /* preadv() is beneficial for large files since it can cut the * number of syscall heavily. I suggest N_BUFFERS=4 as good * compromise between memory and cpu. * * With 16 buffers: 43% cpu 33,871 total * With 8 buffers: 43% cpu 32,098 total * With 4 buffers: 42% cpu 32,091 total * With 2 buffers: 44% cpu 32,245 total * With 1 buffers: 45% cpu 34,491 total */ /* Give the kernel scheduler some hints */ rm_hasher_request_readahead(fd, start_offset, bytes_to_read); /* Initialize the buffers to begin with. * After a buffer is full, a new one is retrieved. */ RmBuffer **buffers; buffers = g_slice_alloc(sizeof(*buffers) * N_BUFFERS); memset(readvec, 0, sizeof(readvec)); for(int i = 0; i < N_BUFFERS; ++i) { /* buffer is one contignous memory block */ buffers[i] = rm_buffer_get(hasher->mem_pool); readvec[i].iov_base = buffers[i]->data; readvec[i].iov_len = hasher->buf_size; } while((bytes_to_read == 0 || total_bytes_read < bytes_to_read) && (bytes_read = rm_sys_preadv(fd, readvec, N_BUFFERS, file_offset)) > 0) { bytes_read = MIN(bytes_read, bytes_to_read - total_bytes_read); /* ignore over-reads */ int blocks = DIVIDE_CEIL(bytes_read, hasher->buf_size); rm_assert_gentle(blocks <= N_BUFFERS); total_bytes_read += bytes_read; file_offset += bytes_read; for(int i = 0; i < blocks; ++i) { /* Get the RmBuffer from the datapointer */ RmBuffer *buffer = buffers[i]; buffer->len = MIN(hasher->buf_size, bytes_read - i * hasher->buf_size); buffer->digest = digest; buffer->user_data = NULL; /* Send it to the hasher */ rm_util_thread_pool_push(hashpipe, buffer); /* Allocate a new buffer - hasher will release the old buffer */ buffers[i] = rm_buffer_get(hasher->mem_pool); readvec[i].iov_base = buffers[i]->data; readvec[i].iov_len = hasher->buf_size; } } if(bytes_read == -1) { rm_log_perror("preadv failed"); } else if(total_bytes_read != bytes_to_read) { rm_log_error_line(_("Something went wrong reading %s; expected %li bytes, " "got %li; ignoring"), path, (long int)bytes_to_read, (long int)total_bytes_read); } /* Release the rest of the buffers */ for(int i = 0; i < N_BUFFERS; ++i) { rm_buffer_release(buffers[i]); } g_slice_free1(sizeof(*buffers) * N_BUFFERS, buffers); finish: if(fd > 0) { rm_sys_close(fd); } return total_bytes_read; }
static RmMountEntries *rm_mount_list_open(RmMountTable *table) { RmMountEntries *self = g_slice_new(RmMountEntries); self->entries = NULL; self->current = NULL; #if HAVE_GETMNTENT struct mntent *entry = NULL; self->mnt_ent_file = setmntent("/etc/mtab", "r"); if(self->mnt_ent_file != NULL) { while((entry = getmntent(self->mnt_ent_file))) { RmMountEntry *wrap_entry = g_slice_new(RmMountEntry); wrap_entry->fsname = g_strdup(entry->mnt_fsname); wrap_entry->dir = g_strdup(entry->mnt_dir); self->entries = g_list_prepend(self->entries, wrap_entry); } endmntent(self->mnt_ent_file); } else { rm_log_perror("getmntent"); } #elif HAVE_GETMNTINFO /* probably FreeBSD or other */ int mnt_list_n = 0; struct statfs *mnt_list = NULL; if((mnt_list_n = getmntinfo(&mnt_list, MNT_NOWAIT)) != 0) { for(int i = 0; i < mnt_list_n; ++i) { RmMountEntry *wrap_entry = g_slice_new(RmMountEntry); struct statfs *entry = &mnt_list[i]; wrap_entry->fsname = g_strdup(entry->f_mntfromname); wrap_entry->dir = g_strdup(entry->f_mntonname); self->entries = g_list_prepend(self->entries, wrap_entry); } } else { rm_log_perror("getmntinfo"); } #endif RmMountEntry *wrap_entry = NULL; while((wrap_entry = rm_mount_list_next(self))) { /* bindfs mounts mirror directory trees. * This cannot be detected properly by rmlint since * files in it have the same inode as their unmirrored file, but * a different dev_t. * * So better go and ignore it. */ static const char *evilfs_types[] = {"bindfs", "nullfs", NULL}; const char *evilfs_found = NULL; for(int i = 0; evilfs_types[i] && !evilfs_found; ++i) { if(strcmp(evilfs_types[i], wrap_entry->fsname) == 0) { evilfs_found = evilfs_types[i]; } } if(evilfs_found != NULL) { RmStat dir_stat; rm_sys_stat(wrap_entry->dir, &dir_stat); g_hash_table_insert( table->evilfs_table, GUINT_TO_POINTER(dir_stat.st_dev), GUINT_TO_POINTER(1) ); rm_log_error( YELLOW"WARNING:"RESET" `%s` mount detected at %s (#%u); Ignoring all files in it.\n", evilfs_found, wrap_entry->dir, (unsigned)dir_stat.st_dev ); } } return self; }
/* Method to test if a file is non stripped binary. Uses libelf*/ bool rm_util_is_nonstripped(_U const char *path, _U RmStat *statp) { bool is_ns = false; #if HAVE_LIBELF g_return_val_if_fail(path, false); if(statp && (statp->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { return false; } /* inspired by "jschmier"'s answer at http://stackoverflow.com/a/5159890 */ int fd; /* ELF handle */ Elf *elf; /* section descriptor pointer */ Elf_Scn *scn; /* section header */ GElf_Shdr shdr; /* Open ELF file to obtain file descriptor */ if((fd = rm_sys_open(path, O_RDONLY)) == -1) { rm_log_warning_line(_("cannot open file '%s' for nonstripped test: "), path); rm_log_perror(""); return 0; } /* Protect program from using an older library */ if(elf_version(EV_CURRENT) == EV_NONE) { rm_log_error_line(_("ELF Library is out of date!")); return false; } /* Initialize elf pointer for examining contents of file */ elf = elf_begin(fd, ELF_C_READ, NULL); /* Initialize section descriptor pointer so that elf_nextscn() * returns a pointer to the section descriptor at index 1. * */ scn = NULL; /* Iterate through ELF sections */ while((scn = elf_nextscn(elf, scn)) != NULL) { /* Retrieve section header */ gelf_getshdr(scn, &shdr); /* If a section header holding a symbol table (.symtab) * is found, this ELF file has not been stripped. */ if(shdr.sh_type == SHT_SYMTAB) { is_ns = true; break; } } elf_end(elf); rm_sys_close(fd); #endif return is_ns; }