RmOffsetTable rm_offset_create_table(const char *path) { int fd = rm_sys_open(path, O_RDONLY); if(fd == -1) { rm_log_info("Error opening %s in setup_fiemap_extents\n", path); return NULL; } /* struct fiemap does not allocate any extents by default, * so we choose ourself how many of them we allocate. * */ const int n_extents = 256; struct fiemap *fiemap = g_malloc0(sizeof(struct fiemap) + n_extents * sizeof(struct fiemap_extent)); struct fiemap_extent *fm_ext = fiemap->fm_extents; /* data structure we save our offsets in */ GSequence *self = g_sequence_new((GFreeFunc)rm_offset_free_func); bool last = false; while(!last) { fiemap->fm_flags = 0; fiemap->fm_extent_count = n_extents; fiemap->fm_length = FIEMAP_MAX_OFFSET; if(ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap) == -1) { break; } /* This might happen on empty files - those have no * extents, but they have an offset on the disk. */ if(fiemap->fm_mapped_extents <= 0) { break; } /* used for detecting contiguous extents, which we ignore */ unsigned long expected = 0; /* Remember all non contiguous extents */ for(unsigned i = 0; i < fiemap->fm_mapped_extents && !last; i++) { if (i == 0 || fm_ext[i].fe_physical != expected) { RmOffsetEntry *offset_entry = g_slice_new(RmOffsetEntry); offset_entry->logical = fm_ext[i].fe_logical; offset_entry->physical = fm_ext[i].fe_physical; g_sequence_append(self, offset_entry); } expected = fm_ext[i].fe_physical + fm_ext[i].fe_length; fiemap->fm_start = fm_ext[i].fe_logical + fm_ext[i].fe_length; last = fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST; } } rm_sys_close(fd); g_free(fiemap); g_sequence_sort(self, (GCompareDataFunc)rm_offset_sort_logical, NULL); return self; }
static gint64 rm_hasher_buffered_read(RmHasher *hasher, GThreadPool *hashpipe, RmDigest *digest, char *path, gsize start_offset, gsize bytes_to_read) { FILE *fd = NULL; if(bytes_to_read == 0) { bytes_to_read = G_MAXSIZE; } gsize total_bytes_read = 0; if((fd = fopen(path, "rb")) == NULL) { rm_log_info("fopen(3) failed for %s: %s\n", path, g_strerror(errno)); goto finish; } gint32 bytes_read = 0; rm_hasher_request_readahead(fileno(fd), start_offset, bytes_to_read); if(fseek(fd, start_offset, SEEK_SET) == -1) { rm_log_perror("fseek(3) failed"); goto finish; } RmBuffer *buffer = rm_buffer_get(hasher->mem_pool); while((bytes_read = fread(buffer->data, 1, MIN(bytes_to_read, hasher->buf_size), fd)) > 0) { bytes_to_read -= bytes_read; buffer->len = bytes_read; buffer->digest = digest; buffer->user_data = NULL; rm_util_thread_pool_push(hashpipe, buffer); total_bytes_read += bytes_read; buffer = rm_buffer_get(hasher->mem_pool); } rm_buffer_release(buffer); if(ferror(fd) != 0) { rm_log_perror("fread(3) failed"); if(total_bytes_read == bytes_to_read) { /* signal error to caller */ total_bytes_read++; } } finish: if(fd != NULL) { fclose(fd); } return total_bytes_read; }
RmOff rm_offset_get_from_path(const char *path, RmOff file_offset, RmOff *file_offset_next) { int fd = rm_sys_open(path, O_RDONLY); if(fd == -1) { rm_log_info("Error opening %s in rm_offset_get_from_path\n", path); return 0; } RmOff result = rm_offset_get_from_fd(fd, file_offset, file_offset_next); rm_sys_close(fd); return result; }
static gint64 rm_hasher_unbuffered_read(RmHasher *hasher, GThreadPool *hashpipe, RmDigest *digest, char *path, gint64 start_offset, gint64 bytes_to_read) { gint32 bytes_read = 0; gint64 total_bytes_read = 0; guint64 file_offset = start_offset; if(bytes_to_read == 0) { RmStat stat_buf; if(rm_sys_stat(path, &stat_buf) != -1) { bytes_to_read = MAX(stat_buf.st_size - start_offset, 0); } } /* how many buffers to read? */ const gint16 N_BUFFERS = MIN(4, DIVIDE_CEIL(bytes_to_read, hasher->buf_size)); struct iovec readvec[N_BUFFERS + 1]; int fd = 0; fd = rm_sys_open(path, O_RDONLY); if(fd == -1) { rm_log_info("open(2) failed for %s: %s\n", path, g_strerror(errno)); goto finish; } /* preadv() is beneficial for large files since it can cut the * number of syscall heavily. I suggest N_BUFFERS=4 as good * compromise between memory and cpu. * * With 16 buffers: 43% cpu 33,871 total * With 8 buffers: 43% cpu 32,098 total * With 4 buffers: 42% cpu 32,091 total * With 2 buffers: 44% cpu 32,245 total * With 1 buffers: 45% cpu 34,491 total */ /* Give the kernel scheduler some hints */ rm_hasher_request_readahead(fd, start_offset, bytes_to_read); /* Initialize the buffers to begin with. * After a buffer is full, a new one is retrieved. */ RmBuffer **buffers; buffers = g_slice_alloc(sizeof(*buffers) * N_BUFFERS); memset(readvec, 0, sizeof(readvec)); for(int i = 0; i < N_BUFFERS; ++i) { /* buffer is one contignous memory block */ buffers[i] = rm_buffer_get(hasher->mem_pool); readvec[i].iov_base = buffers[i]->data; readvec[i].iov_len = hasher->buf_size; } while((bytes_to_read == 0 || total_bytes_read < bytes_to_read) && (bytes_read = rm_sys_preadv(fd, readvec, N_BUFFERS, file_offset)) > 0) { bytes_read = MIN(bytes_read, bytes_to_read - total_bytes_read); /* ignore over-reads */ int blocks = DIVIDE_CEIL(bytes_read, hasher->buf_size); rm_assert_gentle(blocks <= N_BUFFERS); total_bytes_read += bytes_read; file_offset += bytes_read; for(int i = 0; i < blocks; ++i) { /* Get the RmBuffer from the datapointer */ RmBuffer *buffer = buffers[i]; buffer->len = MIN(hasher->buf_size, bytes_read - i * hasher->buf_size); buffer->digest = digest; buffer->user_data = NULL; /* Send it to the hasher */ rm_util_thread_pool_push(hashpipe, buffer); /* Allocate a new buffer - hasher will release the old buffer */ buffers[i] = rm_buffer_get(hasher->mem_pool); readvec[i].iov_base = buffers[i]->data; readvec[i].iov_len = hasher->buf_size; } } if(bytes_read == -1) { rm_log_perror("preadv failed"); } else if(total_bytes_read != bytes_to_read) { rm_log_error_line(_("Something went wrong reading %s; expected %li bytes, " "got %li; ignoring"), path, (long int)bytes_to_read, (long int)total_bytes_read); } /* Release the rest of the buffers */ for(int i = 0; i < N_BUFFERS; ++i) { rm_buffer_release(buffers[i]); } g_slice_free1(sizeof(*buffers) * N_BUFFERS, buffers); finish: if(fd > 0) { rm_sys_close(fd); } return total_bytes_read; }
static bool rm_mounts_create_tables(RmMountTable *self) { /* partition dev_t to disk dev_t */ self->part_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)rm_part_info_free); /* disk dev_t to boolean indication if disk is rotational */ self->disk_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)rm_disk_info_free); self->nfs_table = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); /* Mapping dev_t => true (used as set) */ self->evilfs_table = g_hash_table_new(NULL, NULL); RmMountEntry *entry = NULL; RmMountEntries *mnt_entries = rm_mount_list_open(self); if(mnt_entries == NULL) { return false; } while((entry = rm_mount_list_next(mnt_entries))) { RmStat stat_buf_folder; if(rm_sys_stat(entry->dir, &stat_buf_folder) == -1) { continue; } dev_t whole_disk = 0; gchar is_rotational = true; char diskname[PATH_MAX]; memset(diskname, 0, sizeof(diskname)); RmStat stat_buf_dev; if(rm_sys_stat(entry->fsname, &stat_buf_dev) == -1) { char *nfs_marker = NULL; /* folder rm_sys_stat() is ok but devname rm_sys_stat() is not; this happens for example * with tmpfs and with nfs mounts. Try to handle a few such cases. * */ if(rm_mounts_is_ramdisk(entry->fsname)) { strncpy(diskname, entry->fsname, sizeof(diskname)); is_rotational = false; whole_disk = stat_buf_folder.st_dev; } else if((nfs_marker = strstr(entry->fsname, ":/")) != NULL) { size_t until_slash = MIN((int)sizeof(entry->fsname), nfs_marker - entry->fsname); strncpy(diskname, entry->fsname, until_slash); is_rotational = true; /* Assign different dev ids (with major id 0) to different nfs servers */ if(!g_hash_table_contains(self->nfs_table, diskname)) { g_hash_table_insert(self->nfs_table, g_strdup(diskname), NULL); } whole_disk = makedev(0, g_hash_table_size(self->nfs_table)); } else { strncpy(diskname, "unknown", sizeof(diskname)); is_rotational = true; whole_disk = 0; } } else { if(rm_mounts_devno_to_wholedisk( entry, stat_buf_dev.st_rdev, diskname, sizeof(diskname), &whole_disk ) == -1) { /* folder and devname rm_sys_stat() are ok but blkid failed; this happens when? * Treat as a non-rotational device using devname dev as whole_disk key * */ rm_log_debug(RED"devno_to_wholedisk failed for %s\n"RESET, entry->fsname); whole_disk = stat_buf_dev.st_dev; strncpy(diskname, entry->fsname, sizeof(diskname)); is_rotational = false; } else { is_rotational = rm_mounts_is_rotational_blockdev(diskname); } } g_hash_table_insert( self->part_table, GUINT_TO_POINTER(stat_buf_folder.st_dev), rm_part_info_new (entry->dir, whole_disk)); /* small hack, so also the full disk id can be given to the api below */ if (!g_hash_table_contains(self->part_table, GINT_TO_POINTER(whole_disk))) { g_hash_table_insert( self->part_table, GUINT_TO_POINTER(whole_disk), rm_part_info_new (entry->dir, whole_disk)); } if (!g_hash_table_contains(self->disk_table, GINT_TO_POINTER(whole_disk))) { g_hash_table_insert( self->disk_table, GINT_TO_POINTER(whole_disk), rm_disk_info_new(diskname, is_rotational)); } rm_log_info("%02u:%02u %50s -> %02u:%02u %-12s (underlying disk: %s; rotational: %3s\n)", major(stat_buf_folder.st_dev), minor(stat_buf_folder.st_dev), entry->dir, major(whole_disk), minor(whole_disk), entry->fsname, diskname, is_rotational ? "yes" : "no" ); } #if HAVE_SYSCTL if(DISK_TABLE) { g_hash_table_unref(DISK_TABLE); } #endif rm_mount_list_close(mnt_entries); return true; }