static int zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj) { struct dmu_tx *tx; int rc; tx = dmu_tx_create(os); dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); rc = dmu_tx_assign(tx, TXG_WAIT); if (rc) { zpios_print(run_args->file, "dmu_tx_assign() failed: %d\n", rc); dmu_tx_abort(tx); return (rc); } rc = dmu_object_free(os, obj, tx); if (rc) { zpios_print(run_args->file, "dmu_object_free() failed: %d\n", rc); dmu_tx_abort(tx); return (rc); } dmu_tx_commit(tx); return (0); }
static uint64_t zpios_dmu_object_create(run_args_t *run_args, objset_t *os) { struct dmu_tx *tx; uint64_t obj = 0ULL; int rc; tx = dmu_tx_create(os); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); rc = dmu_tx_assign(tx, TXG_WAIT); if (rc) { zpios_print(run_args->file, "dmu_tx_assign() failed: %d\n", rc); dmu_tx_abort(tx); return obj; } obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx); rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx); if (rc) { zpios_print(run_args->file, "dmu_object_set_blocksize() failed: %d\n", rc); dmu_tx_abort(tx); return obj; } dmu_tx_commit(tx); return obj; }
static int zpios_ioctl_cmd(struct file *file, unsigned long arg) { zpios_cmd_t *kcmd; void *data = NULL; int rc = -EINVAL; kcmd = kmem_alloc(sizeof (zpios_cmd_t), KM_SLEEP); rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof (zpios_cmd_t)); if (rc) { zpios_print(file, "Unable to copy command structure " "from user to kernel memory, %d\n", rc); goto out_cmd; } if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) { zpios_print(file, "Bad command magic 0x%x != 0x%x\n", kcmd->cmd_magic, ZPIOS_CFG_MAGIC); rc = (-EINVAL); goto out_cmd; } /* Allocate memory for any opaque data the caller needed to pass on */ if (kcmd->cmd_data_size > 0) { data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP); rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t, cmd_data_str)), kcmd->cmd_data_size); if (rc) { zpios_print(file, "Unable to copy data buffer " "from user to kernel memory, %d\n", rc); goto out_data; } } rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data); if (data != NULL) { /* If the test failed do not print out the stats */ if (rc) goto out_data; rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t, cmd_data_str)), data, kcmd->cmd_data_size); if (rc) { zpios_print(file, "Unable to copy data buffer " "from kernel to user memory, %d\n", rc); rc = -EFAULT; } out_data: vmem_free(data, kcmd->cmd_data_size); } out_cmd: kmem_free(kcmd, sizeof (zpios_cmd_t)); return (rc); }
static void zpios_remove_objset(run_args_t *run_args) { zpios_time_t *t = &(run_args->stats.rm_time); zpios_region_t *region; char name[32]; int rc = 0, i; (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0); t->start = zpios_timespec_now(); (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); if (run_args->flags & DMU_REMOVE) { if (run_args->flags & DMU_FPP) { for (i = 0; i < run_args->region_count; i++) { region = &run_args->regions[i]; rc = zpios_dmu_object_free(run_args, region->obj.os, region->obj.obj); if (rc) zpios_print(run_args->file, "Error " "removing object %d, %d\n", (int)region->obj.obj, rc); } } else { region = &run_args->regions[0]; rc = zpios_dmu_object_free(run_args, region->obj.os, region->obj.obj); if (rc) zpios_print(run_args->file, "Error " "removing object %d, %d\n", (int)region->obj.obj, rc); } } dmu_objset_disown(run_args->os, zpios_tag); if (run_args->flags & DMU_REMOVE) { rc = dsl_destroy_head(name); if (rc) zpios_print(run_args->file, "Error dsl_destroy_head" "(%s, ...) failed: %d\n", name, rc); } t->stop = zpios_timespec_now(); t->delta = zpios_timespec_sub(t->stop, t->start); (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc); }
static long zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { unsigned int minor = iminor(file->f_dentry->d_inode); int rc = 0; /* Ignore tty ioctls */ if ((cmd & 0xffffff00) == ((int)'T') << 8) return -ENOTTY; if (minor >= ZPIOS_MINORS) return -ENXIO; switch (cmd) { case ZPIOS_CFG: rc = zpios_ioctl_cfg(file, arg); break; case ZPIOS_CMD: rc = zpios_ioctl_cmd(file, arg); break; default: zpios_print(file, "Bad ioctl command %d\n", cmd); rc = -EINVAL; break; } return rc; }
static uint64_t zpios_dmu_object_create(run_args_t *run_args, objset_t *os) { struct dmu_tx *tx; uint64_t obj = 0ULL; uint64_t blksize = run_args->block_size; int rc; if (blksize < SPA_MINBLOCKSIZE || blksize > spa_maxblocksize(dmu_objset_spa(os)) || !ISP2(blksize)) { zpios_print(run_args->file, "invalid block size for pool: %d\n", (int)blksize); return (obj); } tx = dmu_tx_create(os); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); rc = dmu_tx_assign(tx, TXG_WAIT); if (rc) { zpios_print(run_args->file, "dmu_tx_assign() failed: %d\n", rc); dmu_tx_abort(tx); return (obj); } obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx); rc = dmu_object_set_blocksize(os, obj, blksize, 0, tx); if (rc) { zpios_print(run_args->file, "dmu_object_set_blocksize to %d failed: %d\n", (int)blksize, rc); dmu_tx_abort(tx); return (obj); } dmu_tx_commit(tx); return (obj); }
static int zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file) { run_args_t *ra; int rc, size; size = sizeof (*ra) + kcmd->cmd_region_count * sizeof (zpios_region_t); ra = vmem_zalloc(size, KM_SLEEP); if (ra == NULL) { zpios_print(file, "Unable to vmem_zalloc() %d bytes " "for regions\n", size); return (-ENOMEM); } *run_args = ra; strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1); strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1); strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1); strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1); ra->id = kcmd->cmd_id; ra->chunk_size = kcmd->cmd_chunk_size; ra->thread_count = kcmd->cmd_thread_count; ra->region_count = kcmd->cmd_region_count; ra->region_size = kcmd->cmd_region_size; ra->offset = kcmd->cmd_offset; ra->region_noise = kcmd->cmd_region_noise; ra->chunk_noise = kcmd->cmd_chunk_noise; ra->thread_delay = kcmd->cmd_thread_delay; ra->flags = kcmd->cmd_flags; ra->block_size = kcmd->cmd_block_size; ra->stats.wr_data = 0; ra->stats.wr_chunks = 0; ra->stats.rd_data = 0; ra->stats.rd_chunks = 0; ra->region_next = 0; ra->file = file; mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL); (void) zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0); rc = zpios_dmu_setup(ra); if (rc) { mutex_destroy(&ra->lock_ctl); mutex_destroy(&ra->lock_work); vmem_free(ra, size); *run_args = NULL; } return (rc); }
static int zpios_ioctl_cfg(struct file *file, unsigned long arg) { zpios_cfg_t kcfg; int rc = 0; if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof (kcfg))) return (-EFAULT); if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) { zpios_print(file, "Bad config magic 0x%x != 0x%x\n", kcfg.cfg_magic, ZPIOS_CFG_MAGIC); return (-EINVAL); } switch (kcfg.cfg_cmd) { case ZPIOS_CFG_BUFFER_CLEAR: /* * cfg_arg1 - Unused * cfg_rc1 - Unused */ rc = zpios_buffer_clear(file, &kcfg, arg); break; case ZPIOS_CFG_BUFFER_SIZE: /* * cfg_arg1 - 0 - query size; >0 resize * cfg_rc1 - Set to current buffer size */ rc = zpios_buffer_size(file, &kcfg, arg); break; default: zpios_print(file, "Bad config command %d\n", kcfg.cfg_cmd); rc = -EINVAL; break; } return (rc); }
static int zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf) { struct dmu_tx *tx; int rc, how = TXG_WAIT; // int flags = 0; if (run_args->flags & DMU_WRITE_NOWAIT) how = TXG_NOWAIT; while (1) { tx = dmu_tx_create(os); dmu_tx_hold_write(tx, object, offset, size); rc = dmu_tx_assign(tx, how); if (rc) { if (rc == ERESTART && how == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); continue; } zpios_print(run_args->file, "Error in dmu_tx_assign(), %d", rc); dmu_tx_abort(tx); return (rc); } break; } // if (run_args->flags & DMU_WRITE_ZC) // flags |= DMU_WRITE_ZEROCOPY; dmu_write(os, object, offset, size, buf, tx); dmu_tx_commit(tx); return (0); }
static long zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int rc = 0; /* Ignore tty ioctls */ if ((cmd & 0xffffff00) == ((int)'T') << 8) return (-ENOTTY); switch (cmd) { case ZPIOS_CFG: rc = zpios_ioctl_cfg(file, arg); break; case ZPIOS_CMD: rc = zpios_ioctl_cmd(file, arg); break; default: zpios_print(file, "Bad ioctl command %d\n", cmd); rc = -EINVAL; break; } return (rc); }
static int zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd, int data_size, void *data) { run_args_t *run_args = { 0 }; zpios_stats_t *stats = (zpios_stats_t *)data; int i, n, m, size, rc; if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) || (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) { zpios_print(file, "Invalid chunk_size, region_size, " "thread_count, or region_count, %d\n", -EINVAL); return (-EINVAL); } if (!(kcmd->cmd_flags & DMU_WRITE) || !(kcmd->cmd_flags & DMU_READ)) { zpios_print(file, "Invalid flags, minimally DMU_WRITE " "and DMU_READ must be set, %d\n", -EINVAL); return (-EINVAL); } if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) && (kcmd->cmd_flags & DMU_VERIFY)) { zpios_print(file, "Invalid flags, DMU_*_ZC incompatible " "with DMU_VERIFY, used for performance analysis " "only, %d\n", -EINVAL); return (-EINVAL); } /* * Opaque data on return contains structs of the following form: * * zpios_stat_t stats[]; * stats[0] = run_args->stats; * stats[1-N] = threads[N]->stats; * stats[N+1-M] = regions[M]->stats; * * Where N is the number of threads, and M is the number of regions. */ size = (sizeof (zpios_stats_t) + (kcmd->cmd_thread_count * sizeof (zpios_stats_t)) + (kcmd->cmd_region_count * sizeof (zpios_stats_t))); if (data_size < size) { zpios_print(file, "Invalid size, command data buffer " "size too small, (%d < %d)\n", data_size, size); return (-ENOSPC); } rc = zpios_setup_run(&run_args, kcmd, file); if (rc) return (rc); rc = zpios_threads_run(run_args); zpios_remove_objset(run_args); if (rc) goto cleanup; if (stats) { n = 1; m = 1 + kcmd->cmd_thread_count; stats[0] = run_args->stats; for (i = 0; i < kcmd->cmd_thread_count; i++) stats[n+i] = run_args->threads[i]->stats; for (i = 0; i < kcmd->cmd_region_count; i++) stats[m+i] = run_args->regions[i].stats; } cleanup: zpios_cleanup_run(run_args); (void) zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0); return (rc); }
static int zpios_thread_main(void *data) { thread_data_t *thr = (thread_data_t *)data; run_args_t *run_args = thr->run_args; zpios_time_t t; dmu_obj_t obj; __u64 offset; __u32 chunk_size; zpios_region_t *region; char *buf; unsigned int random_int; int chunk_noise = run_args->chunk_noise; int chunk_noise_tmp = 0; int thread_delay = run_args->thread_delay; int thread_delay_tmp = 0; int i, rc = 0; if (chunk_noise) { get_random_bytes(&random_int, sizeof (unsigned int)); chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise; } /* * It's OK to vmem_alloc() this memory because it will be copied * in to the slab and pointers to the slab copy will be setup in * the bio when the IO is submitted. This of course is not ideal * since we want a zero-copy IO path if possible. It would be nice * to have direct access to those slab entries. */ chunk_size = run_args->chunk_size + chunk_noise_tmp; buf = (char *)vmem_alloc(chunk_size, KM_SLEEP); ASSERT(buf); /* Trivial data verification pattern for now. */ if (run_args->flags & DMU_VERIFY) memset(buf, 'z', chunk_size); /* Write phase */ mutex_enter(&thr->lock); thr->stats.wr_time.start = zpios_timespec_now(); mutex_exit(&thr->lock); while (zpios_get_work_item(run_args, &obj, &offset, &chunk_size, ®ion, DMU_WRITE)) { if (thread_delay) { get_random_bytes(&random_int, sizeof (unsigned int)); thread_delay_tmp = random_int % thread_delay; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(thread_delay_tmp); /* In jiffies */ } t.start = zpios_timespec_now(); rc = zpios_dmu_write(run_args, obj.os, obj.obj, offset, chunk_size, buf); t.stop = zpios_timespec_now(); t.delta = zpios_timespec_sub(t.stop, t.start); if (rc) { zpios_print(run_args->file, "IO error while doing " "dmu_write(): %d\n", rc); break; } mutex_enter(&thr->lock); thr->stats.wr_data += chunk_size; thr->stats.wr_chunks++; thr->stats.wr_time.delta = zpios_timespec_add( thr->stats.wr_time.delta, t.delta); mutex_exit(&thr->lock); mutex_enter(®ion->lock); region->stats.wr_data += chunk_size; region->stats.wr_chunks++; region->stats.wr_time.delta = zpios_timespec_add( region->stats.wr_time.delta, t.delta); /* First time region was accessed */ if (region->init_offset == offset) region->stats.wr_time.start = t.start; mutex_exit(®ion->lock); } mutex_enter(&run_args->lock_ctl); run_args->threads_done++; mutex_exit(&run_args->lock_ctl); mutex_enter(&thr->lock); thr->rc = rc; thr->stats.wr_time.stop = zpios_timespec_now(); mutex_exit(&thr->lock); wake_up(&run_args->waitq); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); /* Check if we should exit */ mutex_enter(&thr->lock); rc = thr->rc; mutex_exit(&thr->lock); if (rc) goto out; /* Read phase */ mutex_enter(&thr->lock); thr->stats.rd_time.start = zpios_timespec_now(); mutex_exit(&thr->lock); while (zpios_get_work_item(run_args, &obj, &offset, &chunk_size, ®ion, DMU_READ)) { if (thread_delay) { get_random_bytes(&random_int, sizeof (unsigned int)); thread_delay_tmp = random_int % thread_delay; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(thread_delay_tmp); /* In jiffies */ } if (run_args->flags & DMU_VERIFY) memset(buf, 0, chunk_size); t.start = zpios_timespec_now(); rc = zpios_dmu_read(run_args, obj.os, obj.obj, offset, chunk_size, buf); t.stop = zpios_timespec_now(); t.delta = zpios_timespec_sub(t.stop, t.start); if (rc) { zpios_print(run_args->file, "IO error while doing " "dmu_read(): %d\n", rc); break; } /* Trivial data verification, expensive! */ if (run_args->flags & DMU_VERIFY) { for (i = 0; i < chunk_size; i++) { if (buf[i] != 'z') { zpios_print(run_args->file, "IO verify error: %d/%d/%d\n", (int)obj.obj, (int)offset, (int)chunk_size); break; } } } mutex_enter(&thr->lock); thr->stats.rd_data += chunk_size; thr->stats.rd_chunks++; thr->stats.rd_time.delta = zpios_timespec_add( thr->stats.rd_time.delta, t.delta); mutex_exit(&thr->lock); mutex_enter(®ion->lock); region->stats.rd_data += chunk_size; region->stats.rd_chunks++; region->stats.rd_time.delta = zpios_timespec_add( region->stats.rd_time.delta, t.delta); /* First time region was accessed */ if (region->init_offset == offset) region->stats.rd_time.start = t.start; mutex_exit(®ion->lock); } mutex_enter(&run_args->lock_ctl); run_args->threads_done++; mutex_exit(&run_args->lock_ctl); mutex_enter(&thr->lock); thr->rc = rc; thr->stats.rd_time.stop = zpios_timespec_now(); mutex_exit(&thr->lock); wake_up(&run_args->waitq); out: vmem_free(buf, chunk_size); do_exit(0); return (rc); /* Unreachable, due to do_exit() */ }
static int zpios_dmu_setup(run_args_t *run_args) { zpios_time_t *t = &(run_args->stats.cr_time); objset_t *os; char name[32]; uint64_t obj = 0ULL; int i, rc = 0, rc2; (void) zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0); t->start = zpios_timespec_now(); (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL); if (rc) { zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) " "failed: %d\n", name, rc); goto out; } rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os); if (rc) { zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) " "failed: %d\n", name, rc); goto out_destroy; } if (!(run_args->flags & DMU_FPP)) { obj = zpios_dmu_object_create(run_args, os); if (obj == 0) { rc = -EBADF; zpios_print(run_args->file, "Error zpios_dmu_" "object_create() failed, %d\n", rc); goto out_destroy; } } for (i = 0; i < run_args->region_count; i++) { zpios_region_t *region; region = &run_args->regions[i]; mutex_init(®ion->lock, NULL, MUTEX_DEFAULT, NULL); if (run_args->flags & DMU_FPP) { /* File per process */ region->obj.os = os; region->obj.obj = zpios_dmu_object_create(run_args, os); ASSERT(region->obj.obj > 0); /* XXX - Handle this */ region->wr_offset = run_args->offset; region->rd_offset = run_args->offset; region->init_offset = run_args->offset; region->max_offset = run_args->offset + run_args->region_size; } else { /* Single shared file */ region->obj.os = os; region->obj.obj = obj; region->wr_offset = run_args->offset * i; region->rd_offset = run_args->offset * i; region->init_offset = run_args->offset * i; region->max_offset = run_args->offset * i + run_args->region_size; } } run_args->os = os; out_destroy: if (rc) { rc2 = dsl_destroy_head(name); if (rc2) zpios_print(run_args->file, "Error dsl_destroy_head" "(%s, ...) failed: %d\n", name, rc2); } out: t->stop = zpios_timespec_now(); t->delta = zpios_timespec_sub(t->stop, t->start); (void) zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc); return (rc); }