Exemple #1
0
/*
 * Update cache contents upon write completion.
 */
void
vdev_cache_write(zio_t *zio)
{
	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
	vdev_cache_entry_t *ve, ve_search;
	uint64_t io_start = zio->io_offset;
	uint64_t io_end = io_start + zio->io_size;
	uint64_t min_offset = P2ALIGN(io_start, VCBS);
	uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
	avl_index_t where;

	ASSERT(zio->io_type == ZIO_TYPE_WRITE);

	mutex_enter(&vc->vc_lock);

	ve_search.ve_offset = min_offset;
	ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);

	if (ve == NULL)
		ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);

	while (ve != NULL && ve->ve_offset < max_offset) {
		uint64_t start = MAX(ve->ve_offset, io_start);
		uint64_t end = MIN(ve->ve_offset + VCBS, io_end);

		if (ve->ve_fill_io != NULL) {
			ve->ve_missed_update = 1;
		} else {
			abd_copy_off(ve->ve_data, zio->io_data, end - start,
			    start - ve->ve_offset, start - io_start);
		}
		ve = AVL_NEXT(&vc->vc_offset_tree, ve);
	}
	mutex_exit(&vc->vc_lock);
}
Exemple #2
0
static void
vdev_queue_agg_io_done(zio_t *aio)
{
	if (aio->io_type == ZIO_TYPE_READ) {
		zio_t *pio;
		while ((pio = zio_walk_parents(aio)) != NULL) {
			abd_copy_off(pio->io_data, aio->io_data, pio->io_size,
			    0, pio->io_offset - aio->io_offset);
		}
	}

	abd_free(aio->io_data, aio->io_size);
}
Exemple #3
0
static void
vdev_queue_agg_io_done(zio_t *aio)
{
	if (aio->io_type == ZIO_TYPE_READ) {
		zio_t *pio;
		zio_link_t *zl = NULL;
		while ((pio = zio_walk_parents(aio, &zl)) != NULL) {
			abd_copy_off(pio->io_abd, aio->io_abd,
			    0, pio->io_offset - aio->io_offset, pio->io_size);
		}
	}

	abd_free(aio->io_abd);
}
Exemple #4
0
static void
vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
{
	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);

	ASSERT(MUTEX_HELD(&vc->vc_lock));
	ASSERT(ve->ve_fill_io == NULL);

	if (ve->ve_lastused != ddi_get_lbolt()) {
		avl_remove(&vc->vc_lastused_tree, ve);
		ve->ve_lastused = ddi_get_lbolt();
		avl_add(&vc->vc_lastused_tree, ve);
	}

	ve->ve_hits++;
	abd_copy_off(zio->io_data, ve->ve_data, zio->io_size,
	    0, cache_phase);
}
Exemple #5
0
static zio_t *
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
{
	zio_t *first, *last, *aio, *dio, *mandatory, *nio;
	uint64_t maxgap = 0;
	uint64_t size;
	boolean_t stretch = B_FALSE;
	avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
	enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;

	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
		return (NULL);

	/*
	 * Prevent users from setting the zfs_vdev_aggregation_limit
	 * tuning larger than SPA_MAXBLOCKSIZE.
	 */
	zfs_vdev_aggregation_limit =
	    MIN(zfs_vdev_aggregation_limit, SPA_MAXBLOCKSIZE);

	first = last = zio;

	if (zio->io_type == ZIO_TYPE_READ)
		maxgap = zfs_vdev_read_gap_limit;

	/*
	 * We can aggregate I/Os that are sufficiently adjacent and of
	 * the same flavor, as expressed by the AGG_INHERIT flags.
	 * The latter requirement is necessary so that certain
	 * attributes of the I/O, such as whether it's a normal I/O
	 * or a scrub/resilver, can be preserved in the aggregate.
	 * We can include optional I/Os, but don't allow them
	 * to begin a range as they add no benefit in that situation.
	 */

	/*
	 * We keep track of the last non-optional I/O.
	 */
	mandatory = (first->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : first;

	/*
	 * Walk backwards through sufficiently contiguous I/Os
	 * recording the last non-optional I/O.
	 */
	while ((dio = AVL_PREV(t, first)) != NULL &&
	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
	    IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
	    IO_GAP(dio, first) <= maxgap &&
	    dio->io_type == zio->io_type) {
		first = dio;
		if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL))
			mandatory = first;
	}

	/*
	 * Skip any initial optional I/Os.
	 */
	while ((first->io_flags & ZIO_FLAG_OPTIONAL) && first != last) {
		first = AVL_NEXT(t, first);
		ASSERT(first != NULL);
	}


	/*
	 * Walk forward through sufficiently contiguous I/Os.
	 * The aggregation limit does not apply to optional i/os, so that
	 * we can issue contiguous writes even if they are larger than the
	 * aggregation limit.
	 */
	while ((dio = AVL_NEXT(t, last)) != NULL &&
	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
	    (IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit ||
	    (dio->io_flags & ZIO_FLAG_OPTIONAL)) &&
	    IO_GAP(last, dio) <= maxgap &&
	    dio->io_type == zio->io_type) {
		last = dio;
		if (!(last->io_flags & ZIO_FLAG_OPTIONAL))
			mandatory = last;
	}

	/*
	 * Now that we've established the range of the I/O aggregation
	 * we must decide what to do with trailing optional I/Os.
	 * For reads, there's nothing to do. While we are unable to
	 * aggregate further, it's possible that a trailing optional
	 * I/O would allow the underlying device to aggregate with
	 * subsequent I/Os. We must therefore determine if the next
	 * non-optional I/O is close enough to make aggregation
	 * worthwhile.
	 */
	if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) {
		zio_t *nio = last;
		while ((dio = AVL_NEXT(t, nio)) != NULL &&
		    IO_GAP(nio, dio) == 0 &&
		    IO_GAP(mandatory, dio) <= zfs_vdev_write_gap_limit) {
			nio = dio;
			if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) {
				stretch = B_TRUE;
				break;
			}
		}
	}

	if (stretch) {
		/*
		 * We are going to include an optional io in our aggregated
		 * span, thus closing the write gap.  Only mandatory i/os can
		 * start aggregated spans, so make sure that the next i/o
		 * after our span is mandatory.
		 */
		dio = AVL_NEXT(t, last);
		dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
	} else {
		/* do not include the optional i/o */
		while (last != mandatory && last != first) {
			ASSERT(last->io_flags & ZIO_FLAG_OPTIONAL);
			last = AVL_PREV(t, last);
			ASSERT(last != NULL);
		}
	}

	if (first == last)
		return (NULL);

	size = IO_SPAN(first, last);
	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);

	aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
	    abd_alloc_for_io(size, B_TRUE), size, first->io_type,
	    zio->io_priority, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
	    vdev_queue_agg_io_done, NULL);
	aio->io_timestamp = first->io_timestamp;

	nio = first;
	do {
		dio = nio;
		nio = AVL_NEXT(t, dio);
		ASSERT3U(dio->io_type, ==, aio->io_type);

		if (dio->io_flags & ZIO_FLAG_NODATA) {
			ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
			abd_zero_off(aio->io_abd,
			    dio->io_offset - aio->io_offset, dio->io_size);
		} else if (dio->io_type == ZIO_TYPE_WRITE) {
			abd_copy_off(aio->io_abd, dio->io_abd,
			    dio->io_offset - aio->io_offset, 0, dio->io_size);
		}

		zio_add_child(dio, aio);
		vdev_queue_io_remove(vq, dio);
		zio_vdev_io_bypass(dio);
		zio_execute(dio);
	} while (dio != last);