Beispiel #1
0
/*
 * Sets the status of the 'td' in the printed status map.
 */
static void check_str_update(struct thread_data *td)
{
	char c = run_str[td->thread_number - 1];

	switch (td->runstate) {
	case TD_REAPED:
		c = '_';
		break;
	case TD_EXITED:
		c = 'E';
		break;
	case TD_RAMP:
		c = '/';
		break;
	case TD_RUNNING:
		if (td_rw(td)) {
			if (td_random(td))
				c = 'm';
			else
				c = 'M';
		} else if (td_read(td)) {
			if (td_random(td))
				c = 'r';
			else
				c = 'R';
		} else {
			if (td_random(td))
				c = 'w';
			else
				c = 'W';
		}
		break;
	case TD_PRE_READING:
		c = 'p';
		break;
	case TD_VERIFYING:
		c = 'V';
		break;
	case TD_FSYNCING:
		c = 'F';
		break;
	case TD_CREATED:
		c = 'C';
		break;
	case TD_INITIALIZED:
		c = 'I';
		break;
	case TD_NOT_CREATED:
		c = 'P';
		break;
	default:
		log_err("state %d\n", td->runstate);
	}

	run_str[td->thread_number - 1] = c;
}
static void td_fill_rand_seeds_os(struct thread_data *td)
{
	os_random_seed(td->rand_seeds[FIO_RAND_BS_OFF], &td->bsrange_state);
	os_random_seed(td->rand_seeds[FIO_RAND_VER_OFF], &td->verify_state);
	os_random_seed(td->rand_seeds[FIO_RAND_MIX_OFF], &td->rwmix_state);

	if (td->o.file_service_type == FIO_FSERVICE_RANDOM)
		os_random_seed(td->rand_seeds[FIO_RAND_FILE_OFF], &td->next_file_state);

	os_random_seed(td->rand_seeds[FIO_RAND_FILE_SIZE_OFF], &td->file_size_state);
	os_random_seed(td->rand_seeds[FIO_RAND_TRIM_OFF], &td->trim_state);
	os_random_seed(td->rand_seeds[FIO_RAND_START_DELAY], &td->delay_state);

	if (!td_random(td))
		return;

	if (td->o.rand_repeatable)
		td->rand_seeds[FIO_RAND_BLOCK_OFF] = FIO_RANDSEED * td->thread_number;

	os_random_seed(td->rand_seeds[FIO_RAND_BLOCK_OFF], &td->random_state);

	os_random_seed(td->rand_seeds[FIO_RAND_SEQ_RAND_READ_OFF], &td->seq_rand_state[DDIR_READ]);
	os_random_seed(td->rand_seeds[FIO_RAND_SEQ_RAND_WRITE_OFF], &td->seq_rand_state[DDIR_WRITE]);
	os_random_seed(td->rand_seeds[FIO_RAND_SEQ_RAND_TRIM_OFF], &td->seq_rand_state[DDIR_TRIM]);
}
Beispiel #3
0
static int fill_io_u(struct thread_data *td, struct io_u *io_u)
{
	if (td->io_ops->flags & FIO_NOIO)
		goto out;

	set_rw_ddir(td, io_u);

	/*
	 * fsync() or fdatasync() or trim etc, we are done
	 */
	if (!ddir_rw(io_u->ddir))
		goto out;

	/*
	 * See if it's time to switch to a new zone
	 */
	if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) {
		td->zone_bytes = 0;
		io_u->file->file_offset += td->o.zone_range + td->o.zone_skip;
		io_u->file->last_pos = io_u->file->file_offset;
		td->io_skip_bytes += td->o.zone_skip;
	}

	/*
	 * No log, let the seq/rand engine retrieve the next buflen and
	 * position.
	 */
	if (get_next_offset(td, io_u)) {
		dprint(FD_IO, "io_u %p, failed getting offset\n", io_u);
		return 1;
	}

	io_u->buflen = get_next_buflen(td, io_u);
	if (!io_u->buflen) {
		dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u);
		return 1;
	}

	if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
		dprint(FD_IO, "io_u %p, offset too large\n", io_u);
		dprint(FD_IO, "  off=%llu/%lu > %llu\n", io_u->offset,
				io_u->buflen, io_u->file->real_file_size);
		return 1;
	}

	/*
	 * mark entry before potentially trimming io_u
	 */
	if (td_random(td) && file_randommap(td, io_u->file))
		mark_random_map(td, io_u);

	/*
	 * If using a write iolog, store this entry.
	 */
out:
	dprint_io_u(io_u, "fill_io_u");
	td->zone_bytes += io_u->buflen;
	log_io_u(td, io_u);
	return 0;
}
Beispiel #4
0
Datei: io_u.c Projekt: jeefke/fio
static int get_next_block(struct thread_data *td, struct io_u *io_u,
			  enum fio_ddir ddir, int rw_seq, unsigned long long *b)
{
	struct fio_file *f = io_u->file;
	int ret;

	assert(ddir_rw(ddir));

	if (rw_seq) {
		if (td_random(td))
			ret = get_next_rand_block(td, f, ddir, b);
		else
			ret = get_next_seq_block(td, f, ddir, b);
	} else {
		io_u->flags |= IO_U_F_BUSY_OK;

		if (td->o.rw_seq == RW_SEQ_SEQ) {
			ret = get_next_seq_block(td, f, ddir, b);
			if (ret)
				ret = get_next_rand_block(td, f, ddir, b);
		} else if (td->o.rw_seq == RW_SEQ_IDENT) {
			if (f->last_start != -1ULL)
				*b = (f->last_start - f->file_offset)
					/ td->o.min_bs[ddir];
			else
				*b = 0;
			ret = 0;
		} else {
			log_err("fio: unknown rw_seq=%d\n", td->o.rw_seq);
			ret = 1;
		}
	}
	
	return ret;
}
Beispiel #5
0
static int fio_mmapio_open(struct thread_data *td, struct fio_file *f)
{
	int ret, flags;

	ret = generic_open_file(td, f);
	if (ret)
		return ret;

	/*
	 * for size checkup, don't mmap anything.
	 */
	if (!f->io_size)
		return 0;

	if (td_rw(td))
		flags = PROT_READ | PROT_WRITE;
	else if (td_write(td)) {
		flags = PROT_WRITE;

		if (td->o.verify != VERIFY_NONE)
			flags |= PROT_READ;
	} else
		flags = PROT_READ;

	f->mmap = mmap(NULL, f->io_size, flags, MAP_SHARED, f->fd, f->file_offset);
	if (f->mmap == MAP_FAILED) {
		f->mmap = NULL;
		td_verror(td, errno, "mmap");
		goto err;
	}

	if (file_invalidate_cache(td, f))
		goto err;

	if (!td_random(td)) {
		if (madvise(f->mmap, f->io_size, MADV_SEQUENTIAL) < 0) {
			td_verror(td, errno, "madvise");
			goto err;
		}
	} else {
		if (madvise(f->mmap, f->io_size, MADV_RANDOM) < 0) {
			td_verror(td, errno, "madvise");
			goto err;
		}
	}

	return 0;

err:
	td->io_ops->close_file(td, f);
	return 1;
}
Beispiel #6
0
/*
 * Sort the reads for a verify phase in batches of verifysort_nr, if
 * specified.
 */
static inline int should_sort_io(struct thread_data *td)
{
	if (!td->o.verifysort_nr || !td->o.do_verify)
		return 0;
	if (!td_random(td))
		return 0;
	if (td->runstate != TD_VERIFYING)
		return 0;
	if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE)
		return 0;

	return 1;
}
Beispiel #7
0
/*
 * Sort the reads for a verify phase in batches of verifysort_nr, if
 * specified.
 */
static inline bool should_sort_io(struct thread_data *td)
{
	if (!td->o.verifysort_nr || !td->o.do_verify)
		return false;
	if (!td_random(td))
		return false;
	if (td->runstate != TD_VERIFYING)
		return false;
	if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE ||
	    td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE64)
		return false;

	return true;
}
Beispiel #8
0
static int get_next_block(struct thread_data *td, struct io_u *io_u,
			  enum fio_ddir ddir, int rw_seq)
{
	struct fio_file *f = io_u->file;
	unsigned long long b, offset;
	int ret;

	assert(ddir_rw(ddir));

	b = offset = -1ULL;

	if (rw_seq) {
		if (td_random(td))
			ret = get_next_rand_block(td, f, ddir, &b);
		else
			ret = get_next_seq_offset(td, f, ddir, &offset);
	} else {
		io_u->flags |= IO_U_F_BUSY_OK;

		if (td->o.rw_seq == RW_SEQ_SEQ) {
			ret = get_next_seq_offset(td, f, ddir, &offset);
			if (ret)
				ret = get_next_rand_block(td, f, ddir, &b);
		} else if (td->o.rw_seq == RW_SEQ_IDENT) {
			if (f->last_start != -1ULL)
				offset = f->last_start - f->file_offset;
			else
				offset = 0;
			ret = 0;
		} else {
			log_err("fio: unknown rw_seq=%d\n", td->o.rw_seq);
			ret = 1;
		}
	}
	
	if (!ret) {
		if (offset != -1ULL)
			io_u->offset = offset;
		else if (b != -1ULL)
			io_u->offset = b * td->o.ba[ddir];
		else {
			log_err("fio: bug in offset generation\n");
			ret = 1;
		}
	}

	return ret;
}
Beispiel #9
0
/*
 * log a successful write, so we can unwind the log for verify
 */
void log_io_piece(struct thread_data *td, struct io_u *io_u)
{
    struct rb_node **p, *parent;
    struct io_piece *ipo, *__ipo;

    ipo = malloc(sizeof(struct io_piece));
    ipo->file = io_u->file;
    ipo->offset = io_u->offset;
    ipo->len = io_u->buflen;

    /*
     * We don't need to sort the entries, if:
     *
     *	Sequential writes, or
     *	Random writes that lay out the file as it goes along
     *
     * For both these cases, just reading back data in the order we
     * wrote it out is the fastest.
     */
    if (!td_random(td) || !td->o.overwrite) {
        INIT_FLIST_HEAD(&ipo->list);
        flist_add_tail(&ipo->list, &td->io_hist_list);
        return;
    }

    RB_CLEAR_NODE(&ipo->rb_node);
    p = &td->io_hist_tree.rb_node;
    parent = NULL;

    /*
     * Sort the entry into the verification list
     */
    while (*p) {
        parent = *p;

        __ipo = rb_entry(parent, struct io_piece, rb_node);
        if (ipo->offset <= __ipo->offset)
            p = &(*p)->rb_left;
        else
            p = &(*p)->rb_right;
    }

    rb_link_node(&ipo->rb_node, parent, p);
    rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
}
Beispiel #10
0
Datei: init.c Projekt: rudyLi/fio
static void td_fill_rand_seeds_internal(struct thread_data *td)
{
	init_rand_seed(&td->__bsrange_state, td->rand_seeds[0]);
	init_rand_seed(&td->__verify_state, td->rand_seeds[1]);
	init_rand_seed(&td->__rwmix_state, td->rand_seeds[2]);

	if (td->o.file_service_type == FIO_FSERVICE_RANDOM)
		init_rand_seed(&td->__next_file_state, td->rand_seeds[3]);

	init_rand_seed(&td->__file_size_state, td->rand_seeds[5]);
	init_rand_seed(&td->__trim_state, td->rand_seeds[6]);

	if (!td_random(td))
		return;

	if (td->o.rand_repeatable)
		td->rand_seeds[4] = FIO_RANDSEED * td->thread_number;

	init_rand_seed(&td->__random_state, td->rand_seeds[4]);
}
Beispiel #11
0
Datei: init.c Projekt: rudyLi/fio
static void td_fill_rand_seeds_os(struct thread_data *td)
{
	os_random_seed(td->rand_seeds[0], &td->bsrange_state);
	os_random_seed(td->rand_seeds[1], &td->verify_state);
	os_random_seed(td->rand_seeds[2], &td->rwmix_state);

	if (td->o.file_service_type == FIO_FSERVICE_RANDOM)
		os_random_seed(td->rand_seeds[3], &td->next_file_state);

	os_random_seed(td->rand_seeds[5], &td->file_size_state);
	os_random_seed(td->rand_seeds[6], &td->trim_state);

	if (!td_random(td))
		return;

	if (td->o.rand_repeatable)
		td->rand_seeds[4] = FIO_RANDSEED * td->thread_number;

	os_random_seed(td->rand_seeds[4], &td->random_state);
}
Beispiel #12
0
static void td_fill_rand_seeds_internal(struct thread_data *td)
{
	init_rand_seed(&td->__bsrange_state, td->rand_seeds[FIO_RAND_BS_OFF]);
	init_rand_seed(&td->__verify_state, td->rand_seeds[FIO_RAND_VER_OFF]);
	init_rand_seed(&td->__rwmix_state, td->rand_seeds[FIO_RAND_MIX_OFF]);

	if (td->o.file_service_type == FIO_FSERVICE_RANDOM)
		init_rand_seed(&td->__next_file_state, td->rand_seeds[FIO_RAND_FILE_OFF]);

	init_rand_seed(&td->__file_size_state, td->rand_seeds[FIO_RAND_FILE_SIZE_OFF]);
	init_rand_seed(&td->__trim_state, td->rand_seeds[FIO_RAND_TRIM_OFF]);

	if (!td_random(td))
		return;

	if (td->o.rand_repeatable)
		td->rand_seeds[FIO_RAND_BLOCK_OFF] = FIO_RANDSEED * td->thread_number;

	init_rand_seed(&td->__random_state, td->rand_seeds[FIO_RAND_BLOCK_OFF]);
	init_rand_seed(&td->__seq_rand_state[DDIR_READ], td->rand_seeds[FIO_RAND_SEQ_RAND_READ_OFF]);
	init_rand_seed(&td->__seq_rand_state[DDIR_WRITE], td->rand_seeds[FIO_RAND_SEQ_RAND_WRITE_OFF]);
	init_rand_seed(&td->__seq_rand_state[DDIR_TRIM], td->rand_seeds[FIO_RAND_SEQ_RAND_TRIM_OFF]);
}
Beispiel #13
0
Datei: mmap.c Projekt: arh/fio
static bool fio_madvise_file(struct thread_data *td, struct fio_file *f,
			     size_t length)

{
	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);

	if (!td->o.fadvise_hint)
		return true;

	if (!td_random(td)) {
		if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_SEQUENTIAL) < 0) {
			td_verror(td, errno, "madvise");
			return false;
		}
	} else {
		if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_RANDOM) < 0) {
			td_verror(td, errno, "madvise");
			return false;
		}
	}

	return true;
}
Beispiel #14
0
Datei: init.c Projekt: rudyLi/fio
/*
 * Lazy way of fixing up options that depend on each other. We could also
 * define option callback handlers, but this is easier.
 */
static int fixup_options(struct thread_data *td)
{
	struct thread_options *o = &td->o;
	int ret = 0;

#ifndef FIO_HAVE_PSHARED_MUTEX
	if (!o->use_thread) {
		log_info("fio: this platform does not support process shared"
			 " mutexes, forcing use of threads. Use the 'thread'"
			 " option to get rid of this warning.\n");
		o->use_thread = 1;
		ret = warnings_fatal;
	}
#endif

	if (o->write_iolog_file && o->read_iolog_file) {
		log_err("fio: read iolog overrides write_iolog\n");
		free(o->write_iolog_file);
		o->write_iolog_file = NULL;
		ret = warnings_fatal;
	}

	/*
	 * only really works with 1 file
	 */
	if (o->zone_size && o->open_files == 1)
		o->zone_size = 0;

	/*
	 * If zone_range isn't specified, backward compatibility dictates it
	 * should be made equal to zone_size.
	 */
	if (o->zone_size && !o->zone_range)
		o->zone_range = o->zone_size;

	/*
	 * Reads can do overwrites, we always need to pre-create the file
	 */
	if (td_read(td) || td_rw(td))
		o->overwrite = 1;

	if (!o->min_bs[DDIR_READ])
		o->min_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->max_bs[DDIR_READ])
		o->max_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->min_bs[DDIR_WRITE])
		o->min_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
	if (!o->max_bs[DDIR_WRITE])
		o->max_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];

	o->rw_min_bs = min(o->min_bs[DDIR_READ], o->min_bs[DDIR_WRITE]);

	/*
	 * For random IO, allow blockalign offset other than min_bs.
	 */
	if (!o->ba[DDIR_READ] || !td_random(td))
		o->ba[DDIR_READ] = o->min_bs[DDIR_READ];
	if (!o->ba[DDIR_WRITE] || !td_random(td))
		o->ba[DDIR_WRITE] = o->min_bs[DDIR_WRITE];

	if ((o->ba[DDIR_READ] != o->min_bs[DDIR_READ] ||
	    o->ba[DDIR_WRITE] != o->min_bs[DDIR_WRITE]) &&
	    !o->norandommap) {
		log_err("fio: Any use of blockalign= turns off randommap\n");
		o->norandommap = 1;
		ret = warnings_fatal;
	}

	if (!o->file_size_high)
		o->file_size_high = o->file_size_low;

	if (o->norandommap && o->verify != VERIFY_NONE
	    && !fixed_block_size(o))  {
		log_err("fio: norandommap given for variable block sizes, "
			"verify disabled\n");
		o->verify = VERIFY_NONE;
		ret = warnings_fatal;
	}
	if (o->bs_unaligned && (o->odirect || td->io_ops->flags & FIO_RAWIO))
		log_err("fio: bs_unaligned may not work with raw io\n");

	/*
	 * thinktime_spin must be less than thinktime
	 */
	if (o->thinktime_spin > o->thinktime)
		o->thinktime_spin = o->thinktime;

	/*
	 * The low water mark cannot be bigger than the iodepth
	 */
	if (o->iodepth_low > o->iodepth || !o->iodepth_low) {
		/*
		 * syslet work around - if the workload is sequential,
		 * we want to let the queue drain all the way down to
		 * avoid seeking between async threads
		 */
		if (!strcmp(td->io_ops->name, "syslet-rw") && !td_random(td))
			o->iodepth_low = 1;
		else
			o->iodepth_low = o->iodepth;
	}

	/*
	 * If batch number isn't set, default to the same as iodepth
	 */
	if (o->iodepth_batch > o->iodepth || !o->iodepth_batch)
		o->iodepth_batch = o->iodepth;

	if (o->nr_files > td->files_index)
		o->nr_files = td->files_index;

	if (o->open_files > o->nr_files || !o->open_files)
		o->open_files = o->nr_files;

	if (((o->rate[0] + o->rate[1]) && (o->rate_iops[0] + o->rate_iops[1]))||
	    ((o->ratemin[0] + o->ratemin[1]) && (o->rate_iops_min[0] +
		o->rate_iops_min[1]))) {
		log_err("fio: rate and rate_iops are mutually exclusive\n");
		ret = 1;
	}
	if ((o->rate[0] < o->ratemin[0]) || (o->rate[1] < o->ratemin[1]) ||
	    (o->rate_iops[0] < o->rate_iops_min[0]) ||
	    (o->rate_iops[1] < o->rate_iops_min[1])) {
		log_err("fio: minimum rate exceeds rate\n");
		ret = 1;
	}

	if (!o->timeout && o->time_based) {
		log_err("fio: time_based requires a runtime/timeout setting\n");
		o->time_based = 0;
		ret = warnings_fatal;
	}

	if (o->fill_device && !o->size)
		o->size = -1ULL;

	if (o->verify != VERIFY_NONE) {
		if (td_write(td) && o->do_verify && o->numjobs > 1) {
			log_info("Multiple writers may overwrite blocks that "
				"belong to other jobs. This can cause "
				"verification failures.\n");
			ret = warnings_fatal;
		}

		o->refill_buffers = 1;
		if (o->max_bs[DDIR_WRITE] != o->min_bs[DDIR_WRITE] &&
		    !o->verify_interval)
			o->verify_interval = o->min_bs[DDIR_WRITE];
	}

	if (o->pre_read) {
		o->invalidate_cache = 0;
		if (td->io_ops->flags & FIO_PIPEIO) {
			log_info("fio: cannot pre-read files with an IO engine"
				 " that isn't seekable. Pre-read disabled.\n");
			ret = warnings_fatal;
		}
	}

#ifndef FIO_HAVE_FDATASYNC
	if (o->fdatasync_blocks) {
		log_info("fio: this platform does not support fdatasync()"
			 " falling back to using fsync().  Use the 'fsync'"
			 " option instead of 'fdatasync' to get rid of"
			 " this warning\n");
		o->fsync_blocks = o->fdatasync_blocks;
		o->fdatasync_blocks = 0;
		ret = warnings_fatal;
	}
#endif

#ifdef WIN32
	/*
	 * Windows doesn't support O_DIRECT or O_SYNC with the _open interface,
	 * so fail if we're passed those flags
	 */
	if ((td->io_ops->flags & FIO_SYNCIO) && (td->o.odirect || td->o.sync_io)) {
		log_err("fio: Windows does not support direct or non-buffered io with"
				" the synchronous ioengines. Use the 'windowsaio' ioengine"
				" with 'direct=1' and 'iodepth=1' instead.\n");
		ret = 1;
	}
#endif

	/*
	 * For fully compressible data, just zero them at init time.
	 * It's faster than repeatedly filling it.
	 */
	if (td->o.compress_percentage == 100) {
		td->o.zero_buffers = 1;
		td->o.compress_percentage = 0;
	}

	return ret;
}
Beispiel #15
0
/*
 * Sets the status of the 'td' in the printed status map.
 */
static void check_str_update(struct thread_data *td)
{
    char c = __run_str[td->thread_number - 1];

    switch (td->runstate) {
    case TD_REAPED:
        if (td->error)
            c = 'X';
        else if (td->sig)
            c = 'K';
        else
            c = '_';
        break;
    case TD_EXITED:
        c = 'E';
        break;
    case TD_RAMP:
        c = '/';
        break;
    case TD_RUNNING:
        if (td_rw(td)) {
            if (td_random(td)) {
                if (td->o.rwmix[DDIR_READ] == 100)
                    c = 'r';
                else if (td->o.rwmix[DDIR_WRITE] == 100)
                    c = 'w';
                else
                    c = 'm';
            } else {
                if (td->o.rwmix[DDIR_READ] == 100)
                    c = 'R';
                else if (td->o.rwmix[DDIR_WRITE] == 100)
                    c = 'W';
                else
                    c = 'M';
            }
        } else if (td_read(td)) {
            if (td_random(td))
                c = 'r';
            else
                c = 'R';
        } else if (td_write(td)) {
            if (td_random(td))
                c = 'w';
            else
                c = 'W';
        } else {
            if (td_random(td))
                c = 'd';
            else
                c = 'D';
        }
        break;
    case TD_PRE_READING:
        c = 'p';
        break;
    case TD_VERIFYING:
        c = 'V';
        break;
    case TD_FSYNCING:
        c = 'F';
        break;
    case TD_FINISHING:
        c = 'f';
        break;
    case TD_CREATED:
        c = 'C';
        break;
    case TD_INITIALIZED:
    case TD_SETTING_UP:
        c = 'I';
        break;
    case TD_NOT_CREATED:
        c = 'P';
        break;
    default:
        log_err("state %d\n", td->runstate);
    }

    __run_str[td->thread_number - 1] = c;
    update_condensed_str(__run_str, run_str);
}
Beispiel #16
0
static int fio_rdmaio_init(struct thread_data *td)
{
	struct rdmaio_data *rd = td->io_ops->data;
	struct rdmaio_options *o = td->eo;
	unsigned int max_bs;
	int ret, i;

	if (td_rw(td)) {
		log_err("fio: rdma connections must be read OR write\n");
		return 1;
	}
	if (td_random(td)) {
		log_err("fio: RDMA network IO can't be random\n");
		return 1;
	}

	if (compat_options(td))
		return 1;

	if (!o->port) {
		log_err("fio: no port has been specified which is required "
			"for the rdma engine\n");
		return 1;
	}

	if (check_set_rlimits(td))
		return 1;

	rd->rdma_protocol = o->verb;
	rd->cq_event_num = 0;

	rd->cm_channel = rdma_create_event_channel();
	if (!rd->cm_channel) {
		log_err("fio: rdma_create_event_channel fail\n");
		return 1;
	}

	ret = rdma_create_id(rd->cm_channel, &rd->cm_id, rd, RDMA_PS_TCP);
	if (ret) {
		log_err("fio: rdma_create_id fail\n");
		return 1;
	}

	if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) ||
	    (rd->rdma_protocol == FIO_RDMA_MEM_READ)) {
		rd->rmt_us =
			malloc(FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u));
		memset(rd->rmt_us, 0,
			FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u));
		rd->rmt_nr = 0;
	}

	rd->io_us_queued = malloc(td->o.iodepth * sizeof(struct io_u *));
	memset(rd->io_us_queued, 0, td->o.iodepth * sizeof(struct io_u *));
	rd->io_u_queued_nr = 0;

	rd->io_us_flight = malloc(td->o.iodepth * sizeof(struct io_u *));
	memset(rd->io_us_flight, 0, td->o.iodepth * sizeof(struct io_u *));
	rd->io_u_flight_nr = 0;

	rd->io_us_completed = malloc(td->o.iodepth * sizeof(struct io_u *));
	memset(rd->io_us_completed, 0, td->o.iodepth * sizeof(struct io_u *));
	rd->io_u_completed_nr = 0;

	if (td_read(td)) {	/* READ as the server */
		rd->is_client = 0;
		td->flags |= TD_F_NO_PROGRESS;
		/* server rd->rdma_buf_len will be setup after got request */
		ret = fio_rdmaio_setup_listen(td, o->port);
	} else {		/* WRITE as the client */
		rd->is_client = 1;
		ret = fio_rdmaio_setup_connect(td, td->o.filename, o->port);
	}

	max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
	rd->send_buf.max_bs = htonl(max_bs);

	/* register each io_u in the free list */
	for (i = 0; i < td->io_u_freelist.nr; i++) {
		struct io_u *io_u = td->io_u_freelist.io_us[i];

		io_u->engine_data = malloc(sizeof(struct rdma_io_u_data));
		memset(io_u->engine_data, 0, sizeof(struct rdma_io_u_data));
		((struct rdma_io_u_data *)io_u->engine_data)->wr_id = i;

		io_u->mr = ibv_reg_mr(rd->pd, io_u->buf, max_bs,
				      IBV_ACCESS_LOCAL_WRITE |
				      IBV_ACCESS_REMOTE_READ |
				      IBV_ACCESS_REMOTE_WRITE);
		if (io_u->mr == NULL) {
			log_err("fio: ibv_reg_mr io_u failed\n");
			return 1;
		}

		rd->send_buf.rmt_us[i].buf =
		    htonll((uint64_t) (unsigned long)io_u->buf);
		rd->send_buf.rmt_us[i].rkey = htonl(io_u->mr->rkey);
		rd->send_buf.rmt_us[i].size = htonl(max_bs);

#if 0
		log_info("fio: Send rkey %x addr %" PRIx64 " len %d to client\n", io_u->mr->rkey, io_u->buf, max_bs); */
#endif
	}

	rd->send_buf.nr = htonl(i);

	return ret;
}
Beispiel #17
0
/*
 * log a successful write, so we can unwind the log for verify
 */
void log_io_piece(struct thread_data *td, struct io_u *io_u)
{
	struct rb_node **p, *parent;
	struct io_piece *ipo, *__ipo;

	ipo = malloc(sizeof(struct io_piece));
	init_ipo(ipo);
	ipo->file = io_u->file;
	ipo->offset = io_u->offset;
	ipo->len = io_u->buflen;
	ipo->numberio = io_u->numberio;
	ipo->flags = IP_F_IN_FLIGHT;

	io_u->ipo = ipo;

	if (io_u_should_trim(td, io_u)) {
		flist_add_tail(&ipo->trim_list, &td->trim_list);
		td->trim_entries++;
	}

	/*
	 * We don't need to sort the entries, if:
	 *
	 *	Sequential writes, or
	 *	Random writes that lay out the file as it goes along
	 *
	 * For both these cases, just reading back data in the order we
	 * wrote it out is the fastest.
	 *
	 * One exception is if we don't have a random map AND we are doing
	 * verifies, in that case we need to check for duplicate blocks and
	 * drop the old one, which we rely on the rb insert/lookup for
	 * handling.
	 */
	if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
	      (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
		INIT_FLIST_HEAD(&ipo->list);
		flist_add_tail(&ipo->list, &td->io_hist_list);
		ipo->flags |= IP_F_ONLIST;
		td->io_hist_len++;
		return;
	}

	RB_CLEAR_NODE(&ipo->rb_node);

	/*
	 * Sort the entry into the verification list
	 */
restart:
	p = &td->io_hist_tree.rb_node;
	parent = NULL;
	while (*p) {
		parent = *p;

		__ipo = rb_entry(parent, struct io_piece, rb_node);
		if (ipo->file < __ipo->file)
			p = &(*p)->rb_left;
		else if (ipo->file > __ipo->file)
			p = &(*p)->rb_right;
		else if (ipo->offset < __ipo->offset)
			p = &(*p)->rb_left;
		else if (ipo->offset > __ipo->offset)
			p = &(*p)->rb_right;
		else {
			dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
				__ipo->offset, __ipo->len,
				ipo->offset, ipo->len);
			td->io_hist_len--;
			rb_erase(parent, &td->io_hist_tree);
			remove_trim_entry(td, __ipo);
			free(__ipo);
			goto restart;
		}
	}

	rb_link_node(&ipo->rb_node, parent, p);
	rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
	ipo->flags |= IP_F_ONRB;
	td->io_hist_len++;
}
Beispiel #18
0
/*
 * Lazy way of fixing up options that depend on each other. We could also
 * define option callback handlers, but this is easier.
 */
static int fixup_options(struct thread_data *td)
{
	struct thread_options *o = &td->o;
	int ret = 0;

#ifndef FIO_HAVE_PSHARED_MUTEX
	if (!o->use_thread) {
		log_info("fio: this platform does not support process shared"
			 " mutexes, forcing use of threads. Use the 'thread'"
			 " option to get rid of this warning.\n");
		o->use_thread = 1;
		ret = warnings_fatal;
	}
#endif

	if (o->write_iolog_file && o->read_iolog_file) {
		log_err("fio: read iolog overrides write_iolog\n");
		free(o->write_iolog_file);
		o->write_iolog_file = NULL;
		ret = warnings_fatal;
	}

	/*
	 * only really works with 1 file
	 */
	if (o->zone_size && o->open_files > 1)
		o->zone_size = 0;

	/*
	 * If zone_range isn't specified, backward compatibility dictates it
	 * should be made equal to zone_size.
	 */
	if (o->zone_size && !o->zone_range)
		o->zone_range = o->zone_size;

	/*
	 * Reads can do overwrites, we always need to pre-create the file
	 */
	if (td_read(td) || td_rw(td))
		o->overwrite = 1;

	if (!o->min_bs[DDIR_READ])
		o->min_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->max_bs[DDIR_READ])
		o->max_bs[DDIR_READ] = o->bs[DDIR_READ];
	if (!o->min_bs[DDIR_WRITE])
		o->min_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
	if (!o->max_bs[DDIR_WRITE])
		o->max_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
	if (!o->min_bs[DDIR_TRIM])
		o->min_bs[DDIR_TRIM] = o->bs[DDIR_TRIM];
	if (!o->max_bs[DDIR_TRIM])
		o->max_bs[DDIR_TRIM] = o->bs[DDIR_TRIM];


	o->rw_min_bs = min(o->min_bs[DDIR_READ], o->min_bs[DDIR_WRITE]);
	o->rw_min_bs = min(o->min_bs[DDIR_TRIM], o->rw_min_bs);

	/*
	 * For random IO, allow blockalign offset other than min_bs.
	 */
	if (!o->ba[DDIR_READ] || !td_random(td))
		o->ba[DDIR_READ] = o->min_bs[DDIR_READ];
	if (!o->ba[DDIR_WRITE] || !td_random(td))
		o->ba[DDIR_WRITE] = o->min_bs[DDIR_WRITE];
	if (!o->ba[DDIR_TRIM] || !td_random(td))
		o->ba[DDIR_TRIM] = o->min_bs[DDIR_TRIM];

	if ((o->ba[DDIR_READ] != o->min_bs[DDIR_READ] ||
	    o->ba[DDIR_WRITE] != o->min_bs[DDIR_WRITE] ||
	    o->ba[DDIR_TRIM] != o->min_bs[DDIR_TRIM]) &&
	    !o->norandommap) {
		log_err("fio: Any use of blockalign= turns off randommap\n");
		o->norandommap = 1;
		ret = warnings_fatal;
	}

	if (!o->file_size_high)
		o->file_size_high = o->file_size_low;

	if (o->start_delay_high)
		o->start_delay = get_rand_start_delay(td);

	if (o->norandommap && o->verify != VERIFY_NONE
	    && !fixed_block_size(o))  {
		log_err("fio: norandommap given for variable block sizes, "
			"verify disabled\n");
		o->verify = VERIFY_NONE;
		ret = warnings_fatal;
	}
	if (o->bs_unaligned && (o->odirect || td->io_ops->flags & FIO_RAWIO))
		log_err("fio: bs_unaligned may not work with raw io\n");

	/*
	 * thinktime_spin must be less than thinktime
	 */
	if (o->thinktime_spin > o->thinktime)
		o->thinktime_spin = o->thinktime;

	/*
	 * The low water mark cannot be bigger than the iodepth
	 */
	if (o->iodepth_low > o->iodepth || !o->iodepth_low)
		o->iodepth_low = o->iodepth;

	/*
	 * If batch number isn't set, default to the same as iodepth
	 */
	if (o->iodepth_batch > o->iodepth || !o->iodepth_batch)
		o->iodepth_batch = o->iodepth;

	if (o->nr_files > td->files_index)
		o->nr_files = td->files_index;

	if (o->open_files > o->nr_files || !o->open_files)
		o->open_files = o->nr_files;

	if (((o->rate[DDIR_READ] + o->rate[DDIR_WRITE] + o->rate[DDIR_TRIM]) &&
	    (o->rate_iops[DDIR_READ] + o->rate_iops[DDIR_WRITE] + o->rate_iops[DDIR_TRIM])) ||
	    ((o->ratemin[DDIR_READ] + o->ratemin[DDIR_WRITE] + o->ratemin[DDIR_TRIM]) &&
	    (o->rate_iops_min[DDIR_READ] + o->rate_iops_min[DDIR_WRITE] + o->rate_iops_min[DDIR_TRIM]))) {
		log_err("fio: rate and rate_iops are mutually exclusive\n");
		ret = 1;
	}
	if ((o->rate[DDIR_READ] < o->ratemin[DDIR_READ]) ||
	    (o->rate[DDIR_WRITE] < o->ratemin[DDIR_WRITE]) ||
	    (o->rate[DDIR_TRIM] < o->ratemin[DDIR_TRIM]) ||
	    (o->rate_iops[DDIR_READ] < o->rate_iops_min[DDIR_READ]) ||
	    (o->rate_iops[DDIR_WRITE] < o->rate_iops_min[DDIR_WRITE]) ||
	    (o->rate_iops[DDIR_TRIM] < o->rate_iops_min[DDIR_TRIM])) {
		log_err("fio: minimum rate exceeds rate\n");
		ret = 1;
	}

	if (!o->timeout && o->time_based) {
		log_err("fio: time_based requires a runtime/timeout setting\n");
		o->time_based = 0;
		ret = warnings_fatal;
	}

	if (o->fill_device && !o->size)
		o->size = -1ULL;

	if (o->verify != VERIFY_NONE) {
		if (td_write(td) && o->do_verify && o->numjobs > 1) {
			log_info("Multiple writers may overwrite blocks that "
				"belong to other jobs. This can cause "
				"verification failures.\n");
			ret = warnings_fatal;
		}

		o->refill_buffers = 1;
		if (o->max_bs[DDIR_WRITE] != o->min_bs[DDIR_WRITE] &&
		    !o->verify_interval)
			o->verify_interval = o->min_bs[DDIR_WRITE];

		/*
		 * Verify interval must be smaller or equal to the
		 * write size.
		 */
		if (o->verify_interval > o->min_bs[DDIR_WRITE])
			o->verify_interval = o->min_bs[DDIR_WRITE];
		else if (td_read(td) && o->verify_interval > o->min_bs[DDIR_READ])
			o->verify_interval = o->min_bs[DDIR_READ];
	}

	if (o->pre_read) {
		o->invalidate_cache = 0;
		if (td->io_ops->flags & FIO_PIPEIO) {
			log_info("fio: cannot pre-read files with an IO engine"
				 " that isn't seekable. Pre-read disabled.\n");
			ret = warnings_fatal;
		}
	}

	if (!o->unit_base) {
		if (td->io_ops->flags & FIO_BIT_BASED)
			o->unit_base = 1;
		else
			o->unit_base = 8;
	}

#ifndef CONFIG_FDATASYNC
	if (o->fdatasync_blocks) {
		log_info("fio: this platform does not support fdatasync()"
			 " falling back to using fsync().  Use the 'fsync'"
			 " option instead of 'fdatasync' to get rid of"
			 " this warning\n");
		o->fsync_blocks = o->fdatasync_blocks;
		o->fdatasync_blocks = 0;
		ret = warnings_fatal;
	}
#endif

#ifdef WIN32
	/*
	 * Windows doesn't support O_DIRECT or O_SYNC with the _open interface,
	 * so fail if we're passed those flags
	 */
	if ((td->io_ops->flags & FIO_SYNCIO) && (td->o.odirect || td->o.sync_io)) {
		log_err("fio: Windows does not support direct or non-buffered io with"
				" the synchronous ioengines. Use the 'windowsaio' ioengine"
				" with 'direct=1' and 'iodepth=1' instead.\n");
		ret = 1;
	}
#endif

	/*
	 * For fully compressible data, just zero them at init time.
	 * It's faster than repeatedly filling it.
	 */
	if (td->o.compress_percentage == 100) {
		td->o.zero_buffers = 1;
		td->o.compress_percentage = 0;
	}

	/*
	 * Using a non-uniform random distribution excludes usage of
	 * a random map
	 */
	if (td->o.random_distribution != FIO_RAND_DIST_RANDOM)
		td->o.norandommap = 1;

	/*
	 * If size is set but less than the min block size, complain
	 */
	if (o->size && o->size < td_min_bs(td)) {
		log_err("fio: size too small, must be larger than the IO size: %llu\n", (unsigned long long) o->size);
		ret = 1;
	}

	/*
	 * O_ATOMIC implies O_DIRECT
	 */
	if (td->o.oatomic)
		td->o.odirect = 1;

	/*
	 * If randseed is set, that overrides randrepeat
	 */
	if (td->o.rand_seed)
		td->o.rand_repeatable = 0;

	if ((td->io_ops->flags & FIO_NOEXTEND) && td->o.file_append) {
		log_err("fio: can't append/extent with IO engine %s\n", td->io_ops->name);
		ret = 1;
	}

	return ret;
}