static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->source; int64_t offset, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ int nb_chunks = 1; bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); bdrv_dirty_bitmap_lock(s->dirty_bitmap); offset = bdrv_dirty_iter_next(s->dbi); if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); offset = bdrv_dirty_iter_next(s->dbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(offset >= 0); } bdrv_dirty_bitmap_unlock(s->dirty_bitmap); first_chunk = offset / s->granularity; while (test_bit(first_chunk, s->in_flight_bitmap)) { trace_mirror_yield_in_flight(s, offset, s->in_flight); mirror_wait_for_io(s); } block_job_pause_point(&s->common); /* Find the number of consective dirty chunks following the first dirty * one, and wait for in flight requests in them. */ bdrv_dirty_bitmap_lock(s->dirty_bitmap); while (nb_chunks * s->granularity < s->buf_size) { int64_t next_dirty; int64_t next_offset = offset + nb_chunks * s->granularity; int64_t next_chunk = next_offset / s->granularity; if (next_offset >= s->bdev_length || !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) { break; } if (test_bit(next_chunk, s->in_flight_bitmap)) { break; } next_dirty = bdrv_dirty_iter_next(s->dbi); if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ bdrv_set_dirty_iter(s->dbi, next_offset); next_dirty = bdrv_dirty_iter_next(s->dbi); } assert(next_dirty == next_offset); nb_chunks++; } /* Clear dirty bits before querying the block status, because * calling bdrv_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, nb_chunks * s->granularity); bdrv_dirty_bitmap_unlock(s->dirty_bitmap); bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); while (nb_chunks > 0 && offset < s->bdev_length) { int ret; int64_t io_bytes; int64_t io_bytes_acct; enum MirrorMethod { MIRROR_METHOD_COPY, MIRROR_METHOD_ZERO, MIRROR_METHOD_DISCARD } mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); ret = bdrv_block_status_above(source, NULL, offset, nb_chunks * s->granularity, &io_bytes, NULL, NULL); if (ret < 0) { io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes); } else if (ret & BDRV_BLOCK_DATA) { io_bytes = MIN(io_bytes, max_io_bytes); } io_bytes -= io_bytes % s->granularity; if (io_bytes < s->granularity) { io_bytes = s->granularity; } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_offset; int64_t target_bytes; bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, &target_offset, &target_bytes); if (target_offset == offset && target_bytes == io_bytes) { mirror_method = ret & BDRV_BLOCK_ZERO ? MIRROR_METHOD_ZERO : MIRROR_METHOD_DISCARD; } } while (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield_in_flight(s, offset, s->in_flight); mirror_wait_for_io(s); } if (s->ret < 0) { return 0; } io_bytes = mirror_clip_bytes(s, offset, io_bytes); switch (mirror_method) { case MIRROR_METHOD_COPY: io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes); break; case MIRROR_METHOD_ZERO: case MIRROR_METHOD_DISCARD: mirror_do_zero_or_discard(s, offset, io_bytes, mirror_method == MIRROR_METHOD_DISCARD); if (write_zeroes_ok) { io_bytes_acct = 0; } else { io_bytes_acct = io_bytes; } break; default: abort(); } assert(io_bytes); offset += io_bytes; nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); } return delay_ns; }
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; int nb_sectors, sectors_per_chunk, nb_chunks; int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; uint64_t delay_ns = 0; MirrorOp *op; int pnum; int64_t ret; s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); s->sector_num = hbitmap_iter_next(&s->hbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(s->sector_num >= 0); } hbitmap_next_sector = s->sector_num; sector_num = s->sector_num; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; end = s->bdev_length / BDRV_SECTOR_SIZE; /* Extend the QEMUIOVector to include all adjacent blocks that will * be copied in this operation. * * We have to do this if we have no backing file yet in the destination, * and the cluster size is very large. Then we need to do COW ourselves. * The first time a cluster is copied, copy it entirely. Note that, * because both the granularity and the cluster size are powers of two, * the number of sectors to copy cannot exceed one cluster. * * We also want to extend the QEMUIOVector to include more adjacent * dirty blocks if possible, to limit the number of I/O operations and * run efficiently even with a small granularity. */ nb_chunks = 0; nb_sectors = 0; next_sector = sector_num; next_chunk = sector_num / sectors_per_chunk; /* Wait for I/O to this cluster (from a previous iteration) to be done. */ while (test_bit(next_chunk, s->in_flight_bitmap)) { trace_mirror_yield_in_flight(s, sector_num, s->in_flight); s->waiting_for_io = true; qemu_coroutine_yield(); s->waiting_for_io = false; } do { int added_sectors, added_chunks; if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) || test_bit(next_chunk, s->in_flight_bitmap)) { assert(nb_sectors > 0); break; } added_sectors = sectors_per_chunk; if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { bdrv_round_to_clusters(s->target, next_sector, added_sectors, &next_sector, &added_sectors); /* On the first iteration, the rounding may make us copy * sectors before the first dirty one. */ if (next_sector < sector_num) { assert(nb_sectors == 0); sector_num = next_sector; next_chunk = next_sector / sectors_per_chunk; } } added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; /* When doing COW, it may happen that there is not enough space for * a full cluster. Wait if that is the case. */ while (nb_chunks == 0 && s->buf_free_count < added_chunks) { trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); s->waiting_for_io = true; qemu_coroutine_yield(); s->waiting_for_io = false; } if (s->buf_free_count < nb_chunks + added_chunks) { trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); break; } if (IOV_MAX < nb_chunks + added_chunks) { trace_mirror_break_iov_max(s, nb_chunks, added_chunks); break; } /* We have enough free space to copy these sectors. */ bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); nb_sectors += added_sectors; nb_chunks += added_chunks; next_sector += added_sectors; next_chunk += added_chunks; if (!s->synced && s->common.speed) { delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors); } } while (delay_ns == 0 && next_sector < end); /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_new(MirrorOp, 1); op->s = s; op->sector_num = sector_num; op->nb_sectors = nb_sectors; /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); next_sector = sector_num; while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size; QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); /* Advance the HBitmapIter in parallel, so that we do not examine * the same sector twice. */ if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { hbitmap_next_sector = hbitmap_iter_next(&s->hbi); } next_sector += sectors_per_chunk; } bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors); /* Copy the dirty cluster. */ s->in_flight++; s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); ret = bdrv_get_block_status_above(source, NULL, sector_num, nb_sectors, &pnum); if (ret < 0 || pnum < nb_sectors || (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) { bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, mirror_read_complete, op); } else if (ret & BDRV_BLOCK_ZERO) { bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, mirror_write_complete, op); } else { assert(!(ret & BDRV_BLOCK_DATA)); bdrv_aio_discard(s->target, sector_num, op->nb_sectors, mirror_write_complete, op); } return delay_ns; }
/* Submit async read while handling COW. * Returns: The number of bytes copied after and including offset, * excluding any bytes copied prior to offset due to alignment. * This will be @bytes if no alignment is necessary, or * (new_end - offset) if tail is rounded up or down due to * alignment or buffer limit. */ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset, uint64_t bytes) { BlockBackend *source = s->common.blk; int nb_chunks; uint64_t ret; MirrorOp *op; uint64_t max_bytes; max_bytes = s->granularity * s->max_iov; /* We can only handle as much as buf_size at a time. */ bytes = MIN(s->buf_size, MIN(max_bytes, bytes)); assert(bytes); assert(bytes < BDRV_REQUEST_MAX_BYTES); ret = bytes; if (s->cow_bitmap) { ret += mirror_cow_align(s, &offset, &bytes); } assert(bytes <= s->buf_size); /* The offset is granularity-aligned because: * 1) Caller passes in aligned values; * 2) mirror_cow_align is used only when target cluster is larger. */ assert(QEMU_IS_ALIGNED(offset, s->granularity)); /* The range is sector-aligned, since bdrv_getlength() rounds up. */ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); nb_chunks = DIV_ROUND_UP(bytes, s->granularity); while (s->buf_free_count < nb_chunks) { trace_mirror_yield_in_flight(s, offset, s->in_flight); mirror_wait_for_io(s); } /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_new(MirrorOp, 1); op->s = s; op->offset = offset; op->bytes = bytes; /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); size_t remaining = bytes - op->qiov.size; QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); } /* Copy the dirty cluster. */ s->in_flight++; s->bytes_in_flight += bytes; trace_mirror_one_iteration(s, offset, bytes); blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op); return ret; }
static void coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; int nb_sectors, sectors_per_chunk, nb_chunks; int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; MirrorOp *op; s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { bdrv_dirty_iter_init(source, &s->hbi); s->sector_num = hbitmap_iter_next(&s->hbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); assert(s->sector_num >= 0); } hbitmap_next_sector = s->sector_num; sector_num = s->sector_num; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; end = s->common.len >> BDRV_SECTOR_BITS; /* Extend the QEMUIOVector to include all adjacent blocks that will * be copied in this operation. * * We have to do this if we have no backing file yet in the destination, * and the cluster size is very large. Then we need to do COW ourselves. * The first time a cluster is copied, copy it entirely. Note that, * because both the granularity and the cluster size are powers of two, * the number of sectors to copy cannot exceed one cluster. * * We also want to extend the QEMUIOVector to include more adjacent * dirty blocks if possible, to limit the number of I/O operations and * run efficiently even with a small granularity. */ nb_chunks = 0; nb_sectors = 0; next_sector = sector_num; next_chunk = sector_num / sectors_per_chunk; /* Wait for I/O to this cluster (from a previous iteration) to be done. */ while (test_bit(next_chunk, s->in_flight_bitmap)) { trace_mirror_yield_in_flight(s, sector_num, s->in_flight); qemu_coroutine_yield(); } do { int added_sectors, added_chunks; if (!bdrv_get_dirty(source, next_sector) || test_bit(next_chunk, s->in_flight_bitmap)) { assert(nb_sectors > 0); break; } added_sectors = sectors_per_chunk; if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { bdrv_round_to_clusters(s->target, next_sector, added_sectors, &next_sector, &added_sectors); /* On the first iteration, the rounding may make us copy * sectors before the first dirty one. */ if (next_sector < sector_num) { assert(nb_sectors == 0); sector_num = next_sector; next_chunk = next_sector / sectors_per_chunk; } } added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; /* When doing COW, it may happen that there is not enough space for * a full cluster. Wait if that is the case. */ while (nb_chunks == 0 && s->buf_free_count < added_chunks) { trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); qemu_coroutine_yield(); } if (s->buf_free_count < nb_chunks + added_chunks) { trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); break; } /* We have enough free space to copy these sectors. */ bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); nb_sectors += added_sectors; nb_chunks += added_chunks; next_sector += added_sectors; next_chunk += added_chunks; } while (next_sector < end); /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_slice_new(MirrorOp); op->s = s; op->sector_num = sector_num; op->nb_sectors = nb_sectors; /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); next_sector = sector_num; while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, s->granularity); /* Advance the HBitmapIter in parallel, so that we do not examine * the same sector twice. */ if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { hbitmap_next_sector = hbitmap_iter_next(&s->hbi); } next_sector += sectors_per_chunk; } bdrv_reset_dirty(source, sector_num, nb_sectors); /* Copy the dirty cluster. */ s->in_flight++; trace_mirror_one_iteration(s, sector_num, nb_sectors); bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, mirror_read_complete, op); }