Exemplo n.º 1
0
static int fio_rdmaio_open_file(struct thread_data *td, struct fio_file *f)
{
	if (td_read(td))
		return fio_rdmaio_accept(td, f);
	else
		return fio_rdmaio_connect(td, f);
}
Exemplo n.º 2
0
Arquivo: eta.c Projeto: martin21/fio
/*
 * Sets the status of the 'td' in the printed status map.
 */
static void check_str_update(struct thread_data *td)
{
	char c = run_str[td->thread_number - 1];

	switch (td->runstate) {
	case TD_REAPED:
		c = '_';
		break;
	case TD_EXITED:
		c = 'E';
		break;
	case TD_RAMP:
		c = '/';
		break;
	case TD_RUNNING:
		if (td_rw(td)) {
			if (td_random(td))
				c = 'm';
			else
				c = 'M';
		} else if (td_read(td)) {
			if (td_random(td))
				c = 'r';
			else
				c = 'R';
		} else {
			if (td_random(td))
				c = 'w';
			else
				c = 'W';
		}
		break;
	case TD_PRE_READING:
		c = 'p';
		break;
	case TD_VERIFYING:
		c = 'V';
		break;
	case TD_FSYNCING:
		c = 'F';
		break;
	case TD_CREATED:
		c = 'C';
		break;
	case TD_INITIALIZED:
		c = 'I';
		break;
	case TD_NOT_CREATED:
		c = 'P';
		break;
	default:
		log_err("state %d\n", td->runstate);
	}

	run_str[td->thread_number - 1] = c;
}
Exemplo n.º 3
0
Arquivo: io_u.c Projeto: vsharma13/fio
/*
 * Return the data direction for the next io_u. If the job is a
 * mixed read/write workload, check the rwmix cycle and switch if
 * necessary.
 */
static enum fio_ddir get_rw_ddir(struct thread_data *td)
{
	enum fio_ddir ddir;

	/*
	 * see if it's time to fsync
	 */
	if (td->o.fsync_blocks &&
	   !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) &&
	     td->io_issues[DDIR_WRITE] && should_fsync(td))
		return DDIR_SYNC;

	/*
	 * see if it's time to fdatasync
	 */
	if (td->o.fdatasync_blocks &&
	   !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks) &&
	     td->io_issues[DDIR_WRITE] && should_fsync(td))
		return DDIR_DATASYNC;

	/*
	 * see if it's time to sync_file_range
	 */
	if (td->sync_file_range_nr &&
	   !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr) &&
	     td->io_issues[DDIR_WRITE] && should_fsync(td))
		return DDIR_SYNC_FILE_RANGE;

	if (td_rw(td)) {
		/*
		 * Check if it's time to seed a new data direction.
		 */
		if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) {
			/*
			 * Put a top limit on how many bytes we do for
			 * one data direction, to avoid overflowing the
			 * ranges too much
			 */
			ddir = get_rand_ddir(td);

			if (ddir != td->rwmix_ddir)
				set_rwmix_bytes(td);

			td->rwmix_ddir = ddir;
		}
		ddir = td->rwmix_ddir;
	} else if (td_read(td))
		ddir = DDIR_READ;
	else if (td_write(td))
		ddir = DDIR_WRITE;
	else
		ddir = DDIR_TRIM;

	td->rwmix_ddir = rate_ddir(td, ddir);
	return td->rwmix_ddir;
}
Exemplo n.º 4
0
static void update_accounting(struct submit_worker *sw)
{
	struct thread_data *src = &sw->td;
	struct thread_data *dst = sw->wq->td;

	if (td_read(src))
		sum_ddir(dst, src, DDIR_READ);
	if (td_write(src))
		sum_ddir(dst, src, DDIR_WRITE);
	if (td_trim(src))
		sum_ddir(dst, src, DDIR_TRIM);
}
Exemplo n.º 5
0
static void io_workqueue_update_acct_fn(struct submit_worker *sw)
{
    struct thread_data *src = sw->priv;
    struct thread_data *dst = sw->wq->td;

    if (td_read(src))
        sum_ddir(dst, src, DDIR_READ);
    if (td_write(src))
        sum_ddir(dst, src, DDIR_WRITE);
    if (td_trim(src))
        sum_ddir(dst, src, DDIR_TRIM);

}
Exemplo n.º 6
0
void rate_throttle(struct thread_data *td, unsigned long time_spent,
		   unsigned int bytes)
{
	unsigned long usec_cycle;
	unsigned int bs;

	if (!td->o.rate && !td->o.rate_iops)
		return;

	if (td_rw(td))
		bs = td->o.rw_min_bs;
	else if (td_read(td))
		bs = td->o.min_bs[DDIR_READ];
	else
		bs = td->o.min_bs[DDIR_WRITE];

	usec_cycle = td->rate_usec_cycle * (bytes / bs);

	if (time_spent < usec_cycle) {
		unsigned long s = usec_cycle - time_spent;

		td->rate_pending_usleep += s;

		if (td->rate_pending_usleep >= 100000) {
			struct timeval t;

			fio_gettime(&t, NULL);
			usec_sleep(td, td->rate_pending_usleep);
			td->rate_pending_usleep -= utime_since_now(&t);
		}
	} else {
		long overtime = time_spent - usec_cycle;

		td->rate_pending_usleep -= overtime;
	}
}
Exemplo n.º 7
0
/*
 * Print status of the jobs we know about. This includes rate estimates,
 * ETA, thread state, etc.
 */
int calc_thread_status(struct jobs_eta *je, int force)
{
    struct thread_data *td;
    int i, unified_rw_rep;
    unsigned long rate_time, disp_time, bw_avg_time, *eta_secs;
    unsigned long long io_bytes[DDIR_RWDIR_CNT];
    unsigned long long io_iops[DDIR_RWDIR_CNT];
    struct timeval now;

    static unsigned long long rate_io_bytes[DDIR_RWDIR_CNT];
    static unsigned long long disp_io_bytes[DDIR_RWDIR_CNT];
    static unsigned long long disp_io_iops[DDIR_RWDIR_CNT];
    static struct timeval rate_prev_time, disp_prev_time;

    if (!force) {
        if (output_format != FIO_OUTPUT_NORMAL &&
                f_out == stdout)
            return 0;
        if (temp_stall_ts || eta_print == FIO_ETA_NEVER)
            return 0;

        if (!isatty(STDOUT_FILENO) && (eta_print != FIO_ETA_ALWAYS))
            return 0;
    }

    if (!ddir_rw_sum(rate_io_bytes))
        fill_start_time(&rate_prev_time);
    if (!ddir_rw_sum(disp_io_bytes))
        fill_start_time(&disp_prev_time);

    eta_secs = malloc(thread_number * sizeof(unsigned long));
    memset(eta_secs, 0, thread_number * sizeof(unsigned long));

    je->elapsed_sec = (mtime_since_genesis() + 999) / 1000;

    io_bytes[DDIR_READ] = io_bytes[DDIR_WRITE] = io_bytes[DDIR_TRIM] = 0;
    io_iops[DDIR_READ] = io_iops[DDIR_WRITE] = io_iops[DDIR_TRIM] = 0;
    bw_avg_time = ULONG_MAX;
    unified_rw_rep = 0;
    for_each_td(td, i) {
        unified_rw_rep += td->o.unified_rw_rep;
        if (is_power_of_2(td->o.kb_base))
            je->is_pow2 = 1;
        je->unit_base = td->o.unit_base;
        if (td->o.bw_avg_time < bw_avg_time)
            bw_avg_time = td->o.bw_avg_time;
        if (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING
                || td->runstate == TD_FSYNCING
                || td->runstate == TD_PRE_READING
                || td->runstate == TD_FINISHING) {
            je->nr_running++;
            if (td_read(td)) {
                je->t_rate[0] += td->o.rate[DDIR_READ];
                je->t_iops[0] += td->o.rate_iops[DDIR_READ];
                je->m_rate[0] += td->o.ratemin[DDIR_READ];
                je->m_iops[0] += td->o.rate_iops_min[DDIR_READ];
            }
            if (td_write(td)) {
                je->t_rate[1] += td->o.rate[DDIR_WRITE];
                je->t_iops[1] += td->o.rate_iops[DDIR_WRITE];
                je->m_rate[1] += td->o.ratemin[DDIR_WRITE];
                je->m_iops[1] += td->o.rate_iops_min[DDIR_WRITE];
            }
            if (td_trim(td)) {
                je->t_rate[2] += td->o.rate[DDIR_TRIM];
                je->t_iops[2] += td->o.rate_iops[DDIR_TRIM];
                je->m_rate[2] += td->o.ratemin[DDIR_TRIM];
                je->m_iops[2] += td->o.rate_iops_min[DDIR_TRIM];
            }

            je->files_open += td->nr_open_files;
        } else if (td->runstate == TD_RAMP) {
            je->nr_running++;
            je->nr_ramp++;
        } else if (td->runstate == TD_SETTING_UP) {
            je->nr_running++;
            je->nr_setting_up++;
        } else if (td->runstate < TD_RUNNING)
            je->nr_pending++;

        if (je->elapsed_sec >= 3)
            eta_secs[i] = thread_eta(td);
        else
            eta_secs[i] = INT_MAX;

        check_str_update(td);

        if (td->runstate > TD_SETTING_UP) {
            int ddir;

            for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
                if (unified_rw_rep) {
                    io_bytes[0] += td->io_bytes[ddir];
                    io_iops[0] += td->io_blocks[ddir];
                } else {
                    io_bytes[ddir] += td->io_bytes[ddir];
                    io_iops[ddir] += td->io_blocks[ddir];
                }
            }
        }
    }
Exemplo n.º 8
0
static int fio_rdmaio_init(struct thread_data *td)
{
	struct rdmaio_data *rd = td->io_ops->data;
	struct rdmaio_options *o = td->eo;
	unsigned int max_bs;
	int ret, i;

	if (td_rw(td)) {
		log_err("fio: rdma connections must be read OR write\n");
		return 1;
	}
	if (td_random(td)) {
		log_err("fio: RDMA network IO can't be random\n");
		return 1;
	}

	if (compat_options(td))
		return 1;

	if (!o->port) {
		log_err("fio: no port has been specified which is required "
			"for the rdma engine\n");
		return 1;
	}

	if (check_set_rlimits(td))
		return 1;

	rd->rdma_protocol = o->verb;
	rd->cq_event_num = 0;

	rd->cm_channel = rdma_create_event_channel();
	if (!rd->cm_channel) {
		log_err("fio: rdma_create_event_channel fail\n");
		return 1;
	}

	ret = rdma_create_id(rd->cm_channel, &rd->cm_id, rd, RDMA_PS_TCP);
	if (ret) {
		log_err("fio: rdma_create_id fail\n");
		return 1;
	}

	if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) ||
	    (rd->rdma_protocol == FIO_RDMA_MEM_READ)) {
		rd->rmt_us =
			malloc(FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u));
		memset(rd->rmt_us, 0,
			FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u));
		rd->rmt_nr = 0;
	}

	rd->io_us_queued = malloc(td->o.iodepth * sizeof(struct io_u *));
	memset(rd->io_us_queued, 0, td->o.iodepth * sizeof(struct io_u *));
	rd->io_u_queued_nr = 0;

	rd->io_us_flight = malloc(td->o.iodepth * sizeof(struct io_u *));
	memset(rd->io_us_flight, 0, td->o.iodepth * sizeof(struct io_u *));
	rd->io_u_flight_nr = 0;

	rd->io_us_completed = malloc(td->o.iodepth * sizeof(struct io_u *));
	memset(rd->io_us_completed, 0, td->o.iodepth * sizeof(struct io_u *));
	rd->io_u_completed_nr = 0;

	if (td_read(td)) {	/* READ as the server */
		rd->is_client = 0;
		td->flags |= TD_F_NO_PROGRESS;
		/* server rd->rdma_buf_len will be setup after got request */
		ret = fio_rdmaio_setup_listen(td, o->port);
	} else {		/* WRITE as the client */
		rd->is_client = 1;
		ret = fio_rdmaio_setup_connect(td, td->o.filename, o->port);
	}

	max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
	rd->send_buf.max_bs = htonl(max_bs);

	/* register each io_u in the free list */
	for (i = 0; i < td->io_u_freelist.nr; i++) {
		struct io_u *io_u = td->io_u_freelist.io_us[i];

		io_u->engine_data = malloc(sizeof(struct rdma_io_u_data));
		memset(io_u->engine_data, 0, sizeof(struct rdma_io_u_data));
		((struct rdma_io_u_data *)io_u->engine_data)->wr_id = i;

		io_u->mr = ibv_reg_mr(rd->pd, io_u->buf, max_bs,
				      IBV_ACCESS_LOCAL_WRITE |
				      IBV_ACCESS_REMOTE_READ |
				      IBV_ACCESS_REMOTE_WRITE);
		if (io_u->mr == NULL) {
			log_err("fio: ibv_reg_mr io_u failed\n");
			return 1;
		}

		rd->send_buf.rmt_us[i].buf =
		    htonll((uint64_t) (unsigned long)io_u->buf);
		rd->send_buf.rmt_us[i].rkey = htonl(io_u->mr->rkey);
		rd->send_buf.rmt_us[i].size = htonl(max_bs);

#if 0
		log_info("fio: Send rkey %x addr %" PRIx64 " len %d to client\n", io_u->mr->rkey, io_u->buf, max_bs); */
#endif
	}

	rd->send_buf.nr = htonl(i);

	return ret;
}
Exemplo n.º 9
0
Arquivo: eta.c Projeto: mavissong/fio
/*
 * Print status of the jobs we know about. This includes rate estimates,
 * ETA, thread state, etc.
 */
int calc_thread_status(struct jobs_eta *je, int force)
{
	struct thread_data *td;
	int i;
	unsigned long rate_time, disp_time, bw_avg_time, *eta_secs;
	unsigned long long io_bytes[DDIR_RWDIR_CNT];
	unsigned long long io_iops[DDIR_RWDIR_CNT];
	struct timeval now;

	static unsigned long long rate_io_bytes[DDIR_RWDIR_CNT];
	static unsigned long long disp_io_bytes[DDIR_RWDIR_CNT];
	static unsigned long long disp_io_iops[DDIR_RWDIR_CNT];
	static struct timeval rate_prev_time, disp_prev_time;

	if (!force) {
		if (output_format != FIO_OUTPUT_NORMAL)
			return 0;
		if (temp_stall_ts || eta_print == FIO_ETA_NEVER)
			return 0;

		if (!isatty(STDOUT_FILENO) && (eta_print != FIO_ETA_ALWAYS))
			return 0;
	}

	if (!ddir_rw_sum(rate_io_bytes))
		fill_start_time(&rate_prev_time);
	if (!ddir_rw_sum(disp_io_bytes))
		fill_start_time(&disp_prev_time);

	eta_secs = malloc(thread_number * sizeof(unsigned long));
	memset(eta_secs, 0, thread_number * sizeof(unsigned long));

	je->elapsed_sec = (mtime_since_genesis() + 999) / 1000;

	io_bytes[DDIR_READ] = io_bytes[DDIR_WRITE] = io_bytes[DDIR_TRIM] = 0;
	io_iops[DDIR_READ] = io_iops[DDIR_WRITE] = io_iops[DDIR_TRIM] = 0;
	bw_avg_time = ULONG_MAX;
	for_each_td(td, i) {
		if (is_power_of_2(td->o.kb_base))
			je->is_pow2 = 1;
		if (td->o.bw_avg_time < bw_avg_time)
			bw_avg_time = td->o.bw_avg_time;
		if (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING
		    || td->runstate == TD_FSYNCING
		    || td->runstate == TD_PRE_READING) {
			je->nr_running++;
			if (td_read(td)) {
				je->t_rate += td->o.rate[DDIR_READ];
				je->t_iops += td->o.rate_iops[DDIR_READ];
				je->m_rate += td->o.ratemin[DDIR_READ];
				je->m_iops += td->o.rate_iops_min[DDIR_READ];
			}
			if (td_write(td)) {
				je->t_rate += td->o.rate[DDIR_WRITE];
				je->t_iops += td->o.rate_iops[DDIR_WRITE];
				je->m_rate += td->o.ratemin[DDIR_WRITE];
				je->m_iops += td->o.rate_iops_min[DDIR_WRITE];
			}
			if (td_trim(td)) {
				je->t_rate += td->o.rate[DDIR_TRIM];
				je->t_iops += td->o.rate_iops[DDIR_TRIM];
				je->m_rate += td->o.ratemin[DDIR_TRIM];
				je->m_iops += td->o.rate_iops_min[DDIR_TRIM];
			}

			je->files_open += td->nr_open_files;
		} else if (td->runstate == TD_RAMP) {
			je->nr_running++;
			je->nr_ramp++;
		} else if (td->runstate == TD_SETTING_UP)
			je->nr_running++;
		else if (td->runstate < TD_RUNNING)
			je->nr_pending++;

		if (je->elapsed_sec >= 3)
			eta_secs[i] = thread_eta(td);
		else
			eta_secs[i] = INT_MAX;

		check_str_update(td);

		if (td->runstate > TD_RAMP) {
			int ddir;
			for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
				io_bytes[ddir] += td->io_bytes[ddir];
				io_iops[ddir] += td->io_blocks[ddir];
			}
		}
	}

	if (exitall_on_terminate)
		je->eta_sec = INT_MAX;
	else
		je->eta_sec = 0;

	for_each_td(td, i) {
		if (exitall_on_terminate) {
			if (eta_secs[i] < je->eta_sec)
				je->eta_sec = eta_secs[i];
		} else {
			if (eta_secs[i] > je->eta_sec)
				je->eta_sec = eta_secs[i];
		}
	}

	free(eta_secs);

	fio_gettime(&now, NULL);
	rate_time = mtime_since(&rate_prev_time, &now);

	if (write_bw_log && rate_time > bw_avg_time && !in_ramp_time(td)) {
		calc_rate(rate_time, io_bytes, rate_io_bytes, je->rate);
		memcpy(&rate_prev_time, &now, sizeof(now));
		add_agg_sample(je->rate[DDIR_READ], DDIR_READ, 0);
		add_agg_sample(je->rate[DDIR_WRITE], DDIR_WRITE, 0);
		add_agg_sample(je->rate[DDIR_TRIM], DDIR_TRIM, 0);
	}

	disp_time = mtime_since(&disp_prev_time, &now);

	/*
	 * Allow a little slack, the target is to print it every 1000 msecs
	 */
	if (!force && disp_time < 900)
		return 0;

	calc_rate(disp_time, io_bytes, disp_io_bytes, je->rate);
	calc_iops(disp_time, io_iops, disp_io_iops, je->iops);

	memcpy(&disp_prev_time, &now, sizeof(now));

	if (!force && !je->nr_running && !je->nr_pending)
		return 0;

	je->nr_threads = thread_number;
	memcpy(je->run_str, run_str, thread_number * sizeof(char));

	return 1;
}
Exemplo n.º 10
0
int generic_open_file(struct thread_data *td, struct fio_file *f)
{
	int is_std = 0;
	int flags = 0;
	int from_hash = 0;

	dprint(FD_FILE, "fd open %s\n", f->file_name);

	if (td_trim(td) && f->filetype != FIO_TYPE_BD) {
		log_err("fio: trim only applies to block device\n");
		return 1;
	}

	if (!strcmp(f->file_name, "-")) {
		if (td_rw(td)) {
			log_err("fio: can't read/write to stdin/out\n");
			return 1;
		}
		is_std = 1;

		/*
		 * move output logging to stderr, if we are writing to stdout
		 */
		if (td_write(td))
			f_out = stderr;
	}

	if (td_trim(td))
		goto skip_flags;
	if (td->o.odirect)
		flags |= OS_O_DIRECT;
	if (td->o.sync_io)
		flags |= O_SYNC;
	if (td->o.create_on_open)
		flags |= O_CREAT;
skip_flags:
	if (f->filetype != FIO_TYPE_FILE)
		flags |= FIO_O_NOATIME;

open_again:
	if (td_write(td)) {
		if (!read_only)
			flags |= O_RDWR;

		if (f->filetype == FIO_TYPE_FILE)
			flags |= O_CREAT;

		if (is_std)
			f->fd = dup(STDOUT_FILENO);
		else
			from_hash = file_lookup_open(f, flags);
	} else if (td_read(td)) {
		if (f->filetype == FIO_TYPE_CHAR && !read_only)
			flags |= O_RDWR;
		else
			flags |= O_RDONLY;

		if (is_std)
			f->fd = dup(STDIN_FILENO);
		else
			from_hash = file_lookup_open(f, flags);
	} else { //td trim
		flags |= O_RDWR;
		from_hash = file_lookup_open(f, flags);
	}

	if (f->fd == -1) {
		char buf[FIO_VERROR_SIZE];
		int __e = errno;

		if (__e == EPERM && (flags & FIO_O_NOATIME)) {
			flags &= ~FIO_O_NOATIME;
			goto open_again;
		}
		if (__e == EMFILE && file_close_shadow_fds(td))
			goto open_again;

		snprintf(buf, sizeof(buf), "open(%s)", f->file_name);

		if (__e == EINVAL && (flags & OS_O_DIRECT)) {
			log_err("fio: looks like your file system does not " \
				"support direct=1/buffered=0\n");
		}

		td_verror(td, __e, buf);
	}

	if (!from_hash && f->fd != -1) {
		if (add_file_hash(f)) {
			int fio_unused ret;

			/*
			 * Stash away descriptor for later close. This is to
			 * work-around a "feature" on Linux, where a close of
			 * an fd that has been opened for write will trigger
			 * udev to call blkid to check partitions, fs id, etc.
			 * That polutes the device cache, which can slow down
			 * unbuffered accesses.
			 */
			if (f->shadow_fd == -1)
				f->shadow_fd = f->fd;
			else {
				/*
			 	 * OK to ignore, we haven't done anything
				 * with it
				 */
				ret = generic_close_file(td, f);
			}
			goto open_again;
		}
	}

	return 0;
}
Exemplo n.º 11
0
/*
 * Leaves f->fd open on success, caller must close
 */
static int extend_file(struct thread_data *td, struct fio_file *f)
{
	int r, new_layout = 0, unlink_file = 0, flags;
	unsigned long long left;
	unsigned int bs;
	char *b;

	if (read_only) {
		log_err("fio: refusing extend of file due to read-only\n");
		return 0;
	}

	/*
	 * check if we need to lay the file out complete again. fio
	 * does that for operations involving reads, or for writes
	 * where overwrite is set
	 */
	if (td_read(td) || (td_write(td) && td->o.overwrite) ||
	    (td_write(td) && td->io_ops->flags & FIO_NOEXTEND))
		new_layout = 1;
	if (td_write(td) && !td->o.overwrite)
		unlink_file = 1;

	if (unlink_file || new_layout) {
		dprint(FD_FILE, "layout unlink %s\n", f->file_name);
		if ((unlink(f->file_name) < 0) && (errno != ENOENT)) {
			td_verror(td, errno, "unlink");
			return 1;
		}
	}

	flags = O_WRONLY | O_CREAT;
	if (new_layout)
		flags |= O_TRUNC;

	dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
	f->fd = open(f->file_name, flags, 0644);
	if (f->fd < 0) {
		td_verror(td, errno, "open");
		return 1;
	}

#ifdef CONFIG_POSIX_FALLOCATE
	if (!td->o.fill_device) {
		switch (td->o.fallocate_mode) {
		case FIO_FALLOCATE_NONE:
			break;
		case FIO_FALLOCATE_POSIX:
			dprint(FD_FILE, "posix_fallocate file %s size %llu\n",
				 f->file_name,
				 (unsigned long long) f->real_file_size);

			r = posix_fallocate(f->fd, 0, f->real_file_size);
			if (r > 0) {
				log_err("fio: posix_fallocate fails: %s\n",
						strerror(r));
			}
			break;
#ifdef CONFIG_LINUX_FALLOCATE
		case FIO_FALLOCATE_KEEP_SIZE:
			dprint(FD_FILE,
				"fallocate(FALLOC_FL_KEEP_SIZE) "
				"file %s size %llu\n", f->file_name,
				(unsigned long long) f->real_file_size);

			r = fallocate(f->fd, FALLOC_FL_KEEP_SIZE, 0,
					f->real_file_size);
			if (r != 0)
				td_verror(td, errno, "fallocate");

			break;
#endif /* CONFIG_LINUX_FALLOCATE */
		default:
			log_err("fio: unknown fallocate mode: %d\n",
				td->o.fallocate_mode);
			assert(0);
		}
	}
#endif /* CONFIG_POSIX_FALLOCATE */

	if (!new_layout)
		goto done;

	/*
	 * The size will be -1ULL when fill_device is used, so don't truncate
	 * or fallocate this file, just write it
	 */
	if (!td->o.fill_device) {
		dprint(FD_FILE, "truncate file %s, size %llu\n", f->file_name,
					(unsigned long long) f->real_file_size);
		if (ftruncate(f->fd, f->real_file_size) == -1) {
			td_verror(td, errno, "ftruncate");
			goto err;
		}
	}

	b = malloc(td->o.max_bs[DDIR_WRITE]);

	left = f->real_file_size;
	while (left && !td->terminate) {
		bs = td->o.max_bs[DDIR_WRITE];
		if (bs > left)
			bs = left;

		fill_io_buffer(td, b, bs, bs);

		r = write(f->fd, b, bs);

		if (r > 0) {
			left -= r;
			continue;
		} else {
			if (r < 0) {
				int __e = errno;

				if (__e == ENOSPC) {
					if (td->o.fill_device)
						break;
					log_info("fio: ENOSPC on laying out "
						 "file, stopping\n");
					break;
				}
				td_verror(td, errno, "write");
			} else
				td_verror(td, EIO, "write");

			break;
		}
	}

	if (td->terminate) {
		dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
		unlink(f->file_name);
	} else if (td->o.create_fsync) {
		if (fsync(f->fd) < 0) {
			td_verror(td, errno, "fsync");
			goto err;
		}
	}
	if (td->o.fill_device && !td_write(td)) {
		fio_file_clear_size_known(f);
		if (td_io_get_file_size(td, f))
			goto err;
		if (f->io_size > f->real_file_size)
			f->io_size = f->real_file_size;
	}

	free(b);
done:
	return 0;
err:
	close(f->fd);
	f->fd = -1;
	return 1;
}
Exemplo n.º 12
0
Arquivo: splice.c Projeto: cvoltz/fio
/*
 * For splice writing, we can vmsplice our data buffer directly into a
 * pipe and then splice that to a file.
 */
static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
{
	struct spliceio_data *sd = td->io_ops->data;
	struct iovec iov = {
		.iov_base = io_u->xfer_buf,
		.iov_len = io_u->xfer_buflen,
	};
	struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
	struct fio_file *f = io_u->file;
	off_t off = io_u->offset;
	int ret, ret2;

	while (iov.iov_len) {
		if (poll(&pfd, 1, -1) < 0)
			return errno;

		ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK);
		if (ret < 0)
			return -errno;

		iov.iov_len -= ret;
		iov.iov_base += ret;

		while (ret) {
			ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0);
			if (ret2 < 0)
				return -errno;

			ret -= ret2;
		}
	}

	return io_u->xfer_buflen;
}

static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
{
	struct spliceio_data *sd = td->io_ops->data;
	int uninitialized_var(ret);

	fio_ro_check(td, io_u);

	if (io_u->ddir == DDIR_READ) {
		if (sd->vmsplice_to_user) {
			ret = fio_splice_read(td, io_u);
			/*
			 * This kernel doesn't support vmsplice to user
			 * space. Reset the vmsplice_to_user flag, so that
			 * we retry below and don't hit this path again.
			 */
			if (ret == -EBADF)
				sd->vmsplice_to_user = 0;
		}
		if (!sd->vmsplice_to_user)
			ret = fio_splice_read_old(td, io_u);
	} else if (io_u->ddir == DDIR_WRITE)
		ret = fio_splice_write(td, io_u);
	else
		ret = fsync(io_u->file->fd);

	if (ret != (int) io_u->xfer_buflen) {
		if (ret >= 0) {
			io_u->resid = io_u->xfer_buflen - ret;
			io_u->error = 0;
			return FIO_Q_COMPLETED;
		} else
			io_u->error = errno;
	}

	if (io_u->error) {
		td_verror(td, io_u->error, "xfer");
		if (io_u->error == EINVAL)
			log_err("fio: looks like splice doesn't work on this"
					" file system\n");
	}

	return FIO_Q_COMPLETED;
}

static void fio_spliceio_cleanup(struct thread_data *td)
{
	struct spliceio_data *sd = td->io_ops->data;

	if (sd) {
		close(sd->pipe[0]);
		close(sd->pipe[1]);
		free(sd);
	}
}

static int fio_spliceio_init(struct thread_data *td)
{
	struct spliceio_data *sd = malloc(sizeof(*sd));

	if (pipe(sd->pipe) < 0) {
		td_verror(td, errno, "pipe");
		free(sd);
		return 1;
	}

	/*
	 * Assume this work, we'll reset this if it doesn't
	 */
	sd->vmsplice_to_user = 1;

	/*
	 * Works with "real" vmsplice to user, eg mapping pages directly.
	 * Reset if we fail.
	 */
	sd->vmsplice_to_user_map = 1;

	/*
	 * And if vmsplice_to_user works, we definitely need aligned
	 * buffers. Just set ->odirect to force that.
	 */
	if (td_read(td))
		td->o.odirect = 1;

	td->io_ops->data = sd;
	return 0;
}

static struct ioengine_ops ioengine = {
	.name		= "splice",
	.version	= FIO_IOOPS_VERSION,
	.init		= fio_spliceio_init,
	.queue		= fio_spliceio_queue,
	.cleanup	= fio_spliceio_cleanup,
	.open_file	= generic_open_file,
	.close_file	= generic_close_file,
	.get_file_size	= generic_get_file_size,
	.flags		= FIO_SYNCIO | FIO_PIPEIO,
};

#else /* FIO_HAVE_SPLICE */

/*
 * When we have a proper configure system in place, we simply wont build
 * and install this io engine. For now install a crippled version that
 * just complains and fails to load.
 */
static int fio_spliceio_init(struct thread_data fio_unused *td)
{
	fprintf(stderr, "fio: splice not available\n");
	return 1;
}

static struct ioengine_ops ioengine = {
	.name		= "splice",
	.version	= FIO_IOOPS_VERSION,
	.init		= fio_spliceio_init,
};

#endif

static void fio_init fio_spliceio_register(void)
{
	register_ioengine(&ioengine);
}

static void fio_exit fio_spliceio_unregister(void)
{
	unregister_ioengine(&ioengine);
}
Exemplo n.º 13
0
Arquivo: ime.c Projeto: arh/fio
/* This functions mimics the generic_file_open function, but issues
   IME native calls instead of POSIX calls. */
static int fio_ime_open_file(struct thread_data *td, struct fio_file *f)
{
	int flags = 0;
	int ret;
	uint64_t desired_fs;
	char *ime_filename;

	dprint(FD_FILE, "fd open %s\n", f->file_name);

	if (td_trim(td)) {
		td_verror(td, EINVAL, "IME does not support TRIM operation");
		return 1;
	}

	if (td->o.oatomic) {
		td_verror(td, EINVAL, "IME does not support atomic IO");
		return 1;
	}
	if (td->o.odirect)
		flags |= O_DIRECT;
	if (td->o.sync_io)
		flags |= O_SYNC;
	if (td->o.create_on_open && td->o.allow_create)
		flags |= O_CREAT;

	if (td_write(td)) {
		if (!read_only)
			flags |= O_RDWR;

		if (td->o.allow_create)
			flags |= O_CREAT;
	} else if (td_read(td)) {
		flags |= O_RDONLY;
	} else {
		/* We should never go here. */
		td_verror(td, EINVAL, "Unsopported open mode");
		return 1;
	}

	ime_filename = fio_set_ime_filename(f->file_name);
	if (ime_filename == NULL)
		return 1;
	f->fd = ime_native_open(ime_filename, flags, 0600);
	if (f->fd == -1) {
		char buf[FIO_VERROR_SIZE];
		int __e = errno;

		snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
		td_verror(td, __e, buf);
		return 1;
	}

	/* Now we need to make sure the real file size is sufficient for FIO
	   to do its things. This is normally done before the file open function
	   is called, but because FIO would use POSIX calls, we need to do it
	   ourselves */
	ret = fio_ime_get_file_size(td, f);
	if (ret < 0) {
		ime_native_close(f->fd);
		td_verror(td, errno, "ime_get_file_size");
		return 1;
	}

	desired_fs = f->io_size + f->file_offset;
	if (td_write(td)) {
		dprint(FD_FILE, "Laying out file %s%s\n",
			DEFAULT_IME_FILE_PREFIX, f->file_name);
		if (!td->o.create_on_open &&
				f->real_file_size < desired_fs &&
				ime_native_ftruncate(f->fd, desired_fs) < 0) {
			ime_native_close(f->fd);
			td_verror(td, errno, "ime_native_ftruncate");
			return 1;
		}
		if (f->real_file_size < desired_fs)
			f->real_file_size = desired_fs;
	} else if (td_read(td) && f->real_file_size < desired_fs) {
		ime_native_close(f->fd);
		log_err("error: can't read %lu bytes from file with "
						"%lu bytes\n", desired_fs, f->real_file_size);
		return 1;
	}

	return 0;
}
Exemplo n.º 14
0
Arquivo: eta.c Projeto: rmatt/fio
/*
 * Best effort calculation of the estimated pending runtime of a job.
 */
static unsigned long thread_eta(struct thread_data *td)
{
	unsigned long long bytes_total, bytes_done;
	unsigned long eta_sec = 0;
	unsigned long elapsed;
	uint64_t timeout;

	elapsed = (mtime_since_now(&td->epoch) + 999) / 1000;
	timeout = td->o.timeout / 1000000UL;

	bytes_total = td->total_io_size;

	if (td->flags & TD_F_NO_PROGRESS)
		return -1;

	if (td->o.fill_device && td->o.size  == -1ULL) {
		if (!td->fill_device_size || td->fill_device_size == -1ULL)
			return 0;

		bytes_total = td->fill_device_size;
	}

	if (td->o.zone_size && td->o.zone_skip && bytes_total) {
		unsigned int nr_zones;
		uint64_t zone_bytes;

		zone_bytes = bytes_total + td->o.zone_size + td->o.zone_skip;
		nr_zones = (zone_bytes - 1) / (td->o.zone_size + td->o.zone_skip);
		bytes_total -= nr_zones * td->o.zone_skip;
	}

	/*
	 * if writing and verifying afterwards, bytes_total will be twice the
	 * size. In a mixed workload, verify phase will be the size of the
	 * first stage writes.
	 */
	if (td->o.do_verify && td->o.verify && td_write(td)) {
		if (td_rw(td)) {
			unsigned int perc = 50;

			if (td->o.rwmix[DDIR_WRITE])
				perc = td->o.rwmix[DDIR_WRITE];

			bytes_total += (bytes_total * perc) / 100;
		} else
			bytes_total <<= 1;
	}

	if (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING) {
		double perc, perc_t;

		bytes_done = ddir_rw_sum(td->io_bytes);

		if (bytes_total) {
			perc = (double) bytes_done / (double) bytes_total;
			if (perc > 1.0)
				perc = 1.0;
		} else
			perc = 0.0;

		if (td->o.time_based) {
			if (timeout) {
				perc_t = (double) elapsed / (double) timeout;
				if (perc_t < perc)
					perc = perc_t;
			} else {
				/*
				 * Will never hit, we can't have time_based
				 * without a timeout set.
				 */
				perc = 0.0;
			}
		}

		if (perc == 0.0) {
			eta_sec = timeout;
		} else {
			eta_sec = (unsigned long) (elapsed * (1.0 / perc)) - elapsed;
		}

		if (td->o.timeout &&
		    eta_sec > (timeout + done_secs - elapsed))
			eta_sec = timeout + done_secs - elapsed;
	} else if (td->runstate == TD_NOT_CREATED || td->runstate == TD_CREATED
			|| td->runstate == TD_INITIALIZED
			|| td->runstate == TD_SETTING_UP
			|| td->runstate == TD_RAMP
			|| td->runstate == TD_PRE_READING) {
		int64_t t_eta = 0, r_eta = 0;
		unsigned long long rate_bytes;

		/*
		 * We can only guess - assume it'll run the full timeout
		 * if given, otherwise assume it'll run at the specified rate.
		 */
		if (td->o.timeout) {
			uint64_t __timeout = td->o.timeout;
			uint64_t start_delay = td->o.start_delay;
			uint64_t ramp_time = td->o.ramp_time;

			t_eta = __timeout + start_delay;
			if (!td->ramp_time_over) {
				t_eta += ramp_time;
			}
			t_eta /= 1000000ULL;

			if ((td->runstate == TD_RAMP) && in_ramp_time(td)) {
				unsigned long ramp_left;

				ramp_left = mtime_since_now(&td->epoch);
				ramp_left = (ramp_left + 999) / 1000;
				if (ramp_left <= t_eta)
					t_eta -= ramp_left;
			}
		}
		rate_bytes = 0;
		if (td_read(td))
			rate_bytes  = td->o.rate[DDIR_READ];
		if (td_write(td))
			rate_bytes += td->o.rate[DDIR_WRITE];
		if (td_trim(td))
			rate_bytes += td->o.rate[DDIR_TRIM];

		if (rate_bytes) {
			r_eta = bytes_total / rate_bytes;
			r_eta += (td->o.start_delay / 1000000ULL);
		}

		if (r_eta && t_eta)
			eta_sec = min(r_eta, t_eta);
		else if (r_eta)
			eta_sec = r_eta;
		else if (t_eta)
			eta_sec = t_eta;
		else
			eta_sec = 0;
	} else {
		/*
		 * thread is already done or waiting for fsync
		 */
		eta_sec = 0;
	}

	return eta_sec;
}
Exemplo n.º 15
0
/*
 * Sets the status of the 'td' in the printed status map.
 */
static void check_str_update(struct thread_data *td)
{
    char c = __run_str[td->thread_number - 1];

    switch (td->runstate) {
    case TD_REAPED:
        if (td->error)
            c = 'X';
        else if (td->sig)
            c = 'K';
        else
            c = '_';
        break;
    case TD_EXITED:
        c = 'E';
        break;
    case TD_RAMP:
        c = '/';
        break;
    case TD_RUNNING:
        if (td_rw(td)) {
            if (td_random(td)) {
                if (td->o.rwmix[DDIR_READ] == 100)
                    c = 'r';
                else if (td->o.rwmix[DDIR_WRITE] == 100)
                    c = 'w';
                else
                    c = 'm';
            } else {
                if (td->o.rwmix[DDIR_READ] == 100)
                    c = 'R';
                else if (td->o.rwmix[DDIR_WRITE] == 100)
                    c = 'W';
                else
                    c = 'M';
            }
        } else if (td_read(td)) {
            if (td_random(td))
                c = 'r';
            else
                c = 'R';
        } else if (td_write(td)) {
            if (td_random(td))
                c = 'w';
            else
                c = 'W';
        } else {
            if (td_random(td))
                c = 'd';
            else
                c = 'D';
        }
        break;
    case TD_PRE_READING:
        c = 'p';
        break;
    case TD_VERIFYING:
        c = 'V';
        break;
    case TD_FSYNCING:
        c = 'F';
        break;
    case TD_FINISHING:
        c = 'f';
        break;
    case TD_CREATED:
        c = 'C';
        break;
    case TD_INITIALIZED:
    case TD_SETTING_UP:
        c = 'I';
        break;
    case TD_NOT_CREATED:
        c = 'P';
        break;
    default:
        log_err("state %d\n", td->runstate);
    }

    __run_str[td->thread_number - 1] = c;
    update_condensed_str(__run_str, run_str);
}
Exemplo n.º 16
0
Arquivo: init.c Projeto: rudyLi/fio
/*
 * Lazy way of fixing up options that depend on each other. We could also
 * define option callback handlers, but this is easier.
 */
static int fixup_options(struct thread_data *td)
{
	struct thread_options *o = &td->o;
	int ret = 0;

#ifndef FIO_HAVE_PSHARED_MUTEX
	if (!o->use_thread) {
		log_info("fio: this platform does not support process shared"
			 " mutexes, forcing use of threads. Use the 'thread'"
			 " option to get rid of this warning.\n");
		o->use_thread = 1;
		ret = warnings_fatal;
	}
#endif

	if (o->write_iolog_file && o->read_iolog_file) {
		log_err("fio: read iolog overrides write_iolog\n");
		free(o->write_iolog_file);
		o->write_iolog_file = NULL;
		ret = warnings_fatal;
	}

	/*
	 * only really works with 1 file
	 */
	if (o->zone_size && o->open_files == 1)
		o->zone_size = 0;

	/*
	 * If zone_range isn't specified, backward compatibility dictates it
	 * should be made equal to zone_size.
	 */
	if (o->zone_size && !o->zone_range)
		o->zone_range = o->zone_size;

	/*
	 * Reads can do overwrites, we always need to pre-create the file
	 */
	if (td_read(td) || td_rw(td))
		o->overwrite = 1;

	if (!o->min_bs[DDIR_READ])
		o->min_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->max_bs[DDIR_READ])
		o->max_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->min_bs[DDIR_WRITE])
		o->min_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
	if (!o->max_bs[DDIR_WRITE])
		o->max_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];

	o->rw_min_bs = min(o->min_bs[DDIR_READ], o->min_bs[DDIR_WRITE]);

	/*
	 * For random IO, allow blockalign offset other than min_bs.
	 */
	if (!o->ba[DDIR_READ] || !td_random(td))
		o->ba[DDIR_READ] = o->min_bs[DDIR_READ];
	if (!o->ba[DDIR_WRITE] || !td_random(td))
		o->ba[DDIR_WRITE] = o->min_bs[DDIR_WRITE];

	if ((o->ba[DDIR_READ] != o->min_bs[DDIR_READ] ||
	    o->ba[DDIR_WRITE] != o->min_bs[DDIR_WRITE]) &&
	    !o->norandommap) {
		log_err("fio: Any use of blockalign= turns off randommap\n");
		o->norandommap = 1;
		ret = warnings_fatal;
	}

	if (!o->file_size_high)
		o->file_size_high = o->file_size_low;

	if (o->norandommap && o->verify != VERIFY_NONE
	    && !fixed_block_size(o))  {
		log_err("fio: norandommap given for variable block sizes, "
			"verify disabled\n");
		o->verify = VERIFY_NONE;
		ret = warnings_fatal;
	}
	if (o->bs_unaligned && (o->odirect || td->io_ops->flags & FIO_RAWIO))
		log_err("fio: bs_unaligned may not work with raw io\n");

	/*
	 * thinktime_spin must be less than thinktime
	 */
	if (o->thinktime_spin > o->thinktime)
		o->thinktime_spin = o->thinktime;

	/*
	 * The low water mark cannot be bigger than the iodepth
	 */
	if (o->iodepth_low > o->iodepth || !o->iodepth_low) {
		/*
		 * syslet work around - if the workload is sequential,
		 * we want to let the queue drain all the way down to
		 * avoid seeking between async threads
		 */
		if (!strcmp(td->io_ops->name, "syslet-rw") && !td_random(td))
			o->iodepth_low = 1;
		else
			o->iodepth_low = o->iodepth;
	}

	/*
	 * If batch number isn't set, default to the same as iodepth
	 */
	if (o->iodepth_batch > o->iodepth || !o->iodepth_batch)
		o->iodepth_batch = o->iodepth;

	if (o->nr_files > td->files_index)
		o->nr_files = td->files_index;

	if (o->open_files > o->nr_files || !o->open_files)
		o->open_files = o->nr_files;

	if (((o->rate[0] + o->rate[1]) && (o->rate_iops[0] + o->rate_iops[1]))||
	    ((o->ratemin[0] + o->ratemin[1]) && (o->rate_iops_min[0] +
		o->rate_iops_min[1]))) {
		log_err("fio: rate and rate_iops are mutually exclusive\n");
		ret = 1;
	}
	if ((o->rate[0] < o->ratemin[0]) || (o->rate[1] < o->ratemin[1]) ||
	    (o->rate_iops[0] < o->rate_iops_min[0]) ||
	    (o->rate_iops[1] < o->rate_iops_min[1])) {
		log_err("fio: minimum rate exceeds rate\n");
		ret = 1;
	}

	if (!o->timeout && o->time_based) {
		log_err("fio: time_based requires a runtime/timeout setting\n");
		o->time_based = 0;
		ret = warnings_fatal;
	}

	if (o->fill_device && !o->size)
		o->size = -1ULL;

	if (o->verify != VERIFY_NONE) {
		if (td_write(td) && o->do_verify && o->numjobs > 1) {
			log_info("Multiple writers may overwrite blocks that "
				"belong to other jobs. This can cause "
				"verification failures.\n");
			ret = warnings_fatal;
		}

		o->refill_buffers = 1;
		if (o->max_bs[DDIR_WRITE] != o->min_bs[DDIR_WRITE] &&
		    !o->verify_interval)
			o->verify_interval = o->min_bs[DDIR_WRITE];
	}

	if (o->pre_read) {
		o->invalidate_cache = 0;
		if (td->io_ops->flags & FIO_PIPEIO) {
			log_info("fio: cannot pre-read files with an IO engine"
				 " that isn't seekable. Pre-read disabled.\n");
			ret = warnings_fatal;
		}
	}

#ifndef FIO_HAVE_FDATASYNC
	if (o->fdatasync_blocks) {
		log_info("fio: this platform does not support fdatasync()"
			 " falling back to using fsync().  Use the 'fsync'"
			 " option instead of 'fdatasync' to get rid of"
			 " this warning\n");
		o->fsync_blocks = o->fdatasync_blocks;
		o->fdatasync_blocks = 0;
		ret = warnings_fatal;
	}
#endif

#ifdef WIN32
	/*
	 * Windows doesn't support O_DIRECT or O_SYNC with the _open interface,
	 * so fail if we're passed those flags
	 */
	if ((td->io_ops->flags & FIO_SYNCIO) && (td->o.odirect || td->o.sync_io)) {
		log_err("fio: Windows does not support direct or non-buffered io with"
				" the synchronous ioengines. Use the 'windowsaio' ioengine"
				" with 'direct=1' and 'iodepth=1' instead.\n");
		ret = 1;
	}
#endif

	/*
	 * For fully compressible data, just zero them at init time.
	 * It's faster than repeatedly filling it.
	 */
	if (td->o.compress_percentage == 100) {
		td->o.zero_buffers = 1;
		td->o.compress_percentage = 0;
	}

	return ret;
}
Exemplo n.º 17
0
/*
 * Lazy way of fixing up options that depend on each other. We could also
 * define option callback handlers, but this is easier.
 */
static int fixup_options(struct thread_data *td)
{
	struct thread_options *o = &td->o;
	int ret = 0;

#ifndef FIO_HAVE_PSHARED_MUTEX
	if (!o->use_thread) {
		log_info("fio: this platform does not support process shared"
			 " mutexes, forcing use of threads. Use the 'thread'"
			 " option to get rid of this warning.\n");
		o->use_thread = 1;
		ret = warnings_fatal;
	}
#endif

	if (o->write_iolog_file && o->read_iolog_file) {
		log_err("fio: read iolog overrides write_iolog\n");
		free(o->write_iolog_file);
		o->write_iolog_file = NULL;
		ret = warnings_fatal;
	}

	/*
	 * only really works with 1 file
	 */
	if (o->zone_size && o->open_files > 1)
		o->zone_size = 0;

	/*
	 * If zone_range isn't specified, backward compatibility dictates it
	 * should be made equal to zone_size.
	 */
	if (o->zone_size && !o->zone_range)
		o->zone_range = o->zone_size;

	/*
	 * Reads can do overwrites, we always need to pre-create the file
	 */
	if (td_read(td) || td_rw(td))
		o->overwrite = 1;

	if (!o->min_bs[DDIR_READ])
		o->min_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->max_bs[DDIR_READ])
		o->max_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->min_bs[DDIR_WRITE])
		o->min_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
	if (!o->max_bs[DDIR_WRITE])
		o->max_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
	if (!o->min_bs[DDIR_TRIM])
		o->min_bs[DDIR_TRIM] = o->bs[DDIR_TRIM];
	if (!o->max_bs[DDIR_TRIM])
		o->max_bs[DDIR_TRIM] = o->bs[DDIR_TRIM];


	o->rw_min_bs = min(o->min_bs[DDIR_READ], o->min_bs[DDIR_WRITE]);
	o->rw_min_bs = min(o->min_bs[DDIR_TRIM], o->rw_min_bs);

	/*
	 * For random IO, allow blockalign offset other than min_bs.
	 */
	if (!o->ba[DDIR_READ] || !td_random(td))
		o->ba[DDIR_READ] = o->min_bs[DDIR_READ];
	if (!o->ba[DDIR_WRITE] || !td_random(td))
		o->ba[DDIR_WRITE] = o->min_bs[DDIR_WRITE];
	if (!o->ba[DDIR_TRIM] || !td_random(td))
		o->ba[DDIR_TRIM] = o->min_bs[DDIR_TRIM];

	if ((o->ba[DDIR_READ] != o->min_bs[DDIR_READ] ||
	    o->ba[DDIR_WRITE] != o->min_bs[DDIR_WRITE] ||
	    o->ba[DDIR_TRIM] != o->min_bs[DDIR_TRIM]) &&
	    !o->norandommap) {
		log_err("fio: Any use of blockalign= turns off randommap\n");
		o->norandommap = 1;
		ret = warnings_fatal;
	}

	if (!o->file_size_high)
		o->file_size_high = o->file_size_low;

	if (o->start_delay_high)
		o->start_delay = get_rand_start_delay(td);

	if (o->norandommap && o->verify != VERIFY_NONE
	    && !fixed_block_size(o))  {
		log_err("fio: norandommap given for variable block sizes, "
			"verify disabled\n");
		o->verify = VERIFY_NONE;
		ret = warnings_fatal;
	}
	if (o->bs_unaligned && (o->odirect || td->io_ops->flags & FIO_RAWIO))
		log_err("fio: bs_unaligned may not work with raw io\n");

	/*
	 * thinktime_spin must be less than thinktime
	 */
	if (o->thinktime_spin > o->thinktime)
		o->thinktime_spin = o->thinktime;

	/*
	 * The low water mark cannot be bigger than the iodepth
	 */
	if (o->iodepth_low > o->iodepth || !o->iodepth_low)
		o->iodepth_low = o->iodepth;

	/*
	 * If batch number isn't set, default to the same as iodepth
	 */
	if (o->iodepth_batch > o->iodepth || !o->iodepth_batch)
		o->iodepth_batch = o->iodepth;

	if (o->nr_files > td->files_index)
		o->nr_files = td->files_index;

	if (o->open_files > o->nr_files || !o->open_files)
		o->open_files = o->nr_files;

	if (((o->rate[DDIR_READ] + o->rate[DDIR_WRITE] + o->rate[DDIR_TRIM]) &&
	    (o->rate_iops[DDIR_READ] + o->rate_iops[DDIR_WRITE] + o->rate_iops[DDIR_TRIM])) ||
	    ((o->ratemin[DDIR_READ] + o->ratemin[DDIR_WRITE] + o->ratemin[DDIR_TRIM]) &&
	    (o->rate_iops_min[DDIR_READ] + o->rate_iops_min[DDIR_WRITE] + o->rate_iops_min[DDIR_TRIM]))) {
		log_err("fio: rate and rate_iops are mutually exclusive\n");
		ret = 1;
	}
	if ((o->rate[DDIR_READ] < o->ratemin[DDIR_READ]) ||
	    (o->rate[DDIR_WRITE] < o->ratemin[DDIR_WRITE]) ||
	    (o->rate[DDIR_TRIM] < o->ratemin[DDIR_TRIM]) ||
	    (o->rate_iops[DDIR_READ] < o->rate_iops_min[DDIR_READ]) ||
	    (o->rate_iops[DDIR_WRITE] < o->rate_iops_min[DDIR_WRITE]) ||
	    (o->rate_iops[DDIR_TRIM] < o->rate_iops_min[DDIR_TRIM])) {
		log_err("fio: minimum rate exceeds rate\n");
		ret = 1;
	}

	if (!o->timeout && o->time_based) {
		log_err("fio: time_based requires a runtime/timeout setting\n");
		o->time_based = 0;
		ret = warnings_fatal;
	}

	if (o->fill_device && !o->size)
		o->size = -1ULL;

	if (o->verify != VERIFY_NONE) {
		if (td_write(td) && o->do_verify && o->numjobs > 1) {
			log_info("Multiple writers may overwrite blocks that "
				"belong to other jobs. This can cause "
				"verification failures.\n");
			ret = warnings_fatal;
		}

		o->refill_buffers = 1;
		if (o->max_bs[DDIR_WRITE] != o->min_bs[DDIR_WRITE] &&
		    !o->verify_interval)
			o->verify_interval = o->min_bs[DDIR_WRITE];

		/*
		 * Verify interval must be smaller or equal to the
		 * write size.
		 */
		if (o->verify_interval > o->min_bs[DDIR_WRITE])
			o->verify_interval = o->min_bs[DDIR_WRITE];
		else if (td_read(td) && o->verify_interval > o->min_bs[DDIR_READ])
			o->verify_interval = o->min_bs[DDIR_READ];
	}

	if (o->pre_read) {
		o->invalidate_cache = 0;
		if (td->io_ops->flags & FIO_PIPEIO) {
			log_info("fio: cannot pre-read files with an IO engine"
				 " that isn't seekable. Pre-read disabled.\n");
			ret = warnings_fatal;
		}
	}

	if (!o->unit_base) {
		if (td->io_ops->flags & FIO_BIT_BASED)
			o->unit_base = 1;
		else
			o->unit_base = 8;
	}

#ifndef CONFIG_FDATASYNC
	if (o->fdatasync_blocks) {
		log_info("fio: this platform does not support fdatasync()"
			 " falling back to using fsync().  Use the 'fsync'"
			 " option instead of 'fdatasync' to get rid of"
			 " this warning\n");
		o->fsync_blocks = o->fdatasync_blocks;
		o->fdatasync_blocks = 0;
		ret = warnings_fatal;
	}
#endif

#ifdef WIN32
	/*
	 * Windows doesn't support O_DIRECT or O_SYNC with the _open interface,
	 * so fail if we're passed those flags
	 */
	if ((td->io_ops->flags & FIO_SYNCIO) && (td->o.odirect || td->o.sync_io)) {
		log_err("fio: Windows does not support direct or non-buffered io with"
				" the synchronous ioengines. Use the 'windowsaio' ioengine"
				" with 'direct=1' and 'iodepth=1' instead.\n");
		ret = 1;
	}
#endif

	/*
	 * For fully compressible data, just zero them at init time.
	 * It's faster than repeatedly filling it.
	 */
	if (td->o.compress_percentage == 100) {
		td->o.zero_buffers = 1;
		td->o.compress_percentage = 0;
	}

	/*
	 * Using a non-uniform random distribution excludes usage of
	 * a random map
	 */
	if (td->o.random_distribution != FIO_RAND_DIST_RANDOM)
		td->o.norandommap = 1;

	/*
	 * If size is set but less than the min block size, complain
	 */
	if (o->size && o->size < td_min_bs(td)) {
		log_err("fio: size too small, must be larger than the IO size: %llu\n", (unsigned long long) o->size);
		ret = 1;
	}

	/*
	 * O_ATOMIC implies O_DIRECT
	 */
	if (td->o.oatomic)
		td->o.odirect = 1;

	/*
	 * If randseed is set, that overrides randrepeat
	 */
	if (td->o.rand_seed)
		td->o.rand_repeatable = 0;

	if ((td->io_ops->flags & FIO_NOEXTEND) && td->o.file_append) {
		log_err("fio: can't append/extent with IO engine %s\n", td->io_ops->name);
		ret = 1;
	}

	return ret;
}