static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockJob *bjob = &s->common; BlockDriverState *bs = blk_bs(bjob->blk); BlockDriverState *base = s->base; Error *local_err = NULL; int ret = 0; bdrv_unfreeze_backing_chain(bs, base); s->chain_frozen = false; if (bs->backing) { const char *base_id = NULL, *base_fmt = NULL; if (base) { base_id = s->backing_file_str; if (base->drv) { base_fmt = base->drv->format_name; } } ret = bdrv_change_backing_file(bs, base_id, base_fmt); bdrv_set_backing_hd(bs, base, &local_err); if (local_err) { error_report_err(local_err); return -EPERM; } } return ret; }
static void mirror_exit(BlockJob *job, void *opaque) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); } if (s->should_complete && data->ret == 0) { BlockDriverState *to_replace = s->common.bs; if (s->to_replace) { to_replace = s->to_replace; } if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } bdrv_swap(s->target, to_replace); if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { /* drop the bs loop chain formed by the swap: break the loop then * trigger the unref from the top one */ BlockDriverState *p = s->base->backing_hd; bdrv_set_backing_hd(s->base, NULL); bdrv_unref(p); } } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); bdrv_unref(s->to_replace); } if (replace_aio_context) { aio_context_release(replace_aio_context); } g_free(s->replaces); bdrv_unref(s->target); block_job_completed(&s->common, data->ret); g_free(data); }
static void mirror_exit(BlockJob *job, void *opaque) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); BlockDriverState *mirror_top_bs = s->mirror_top_bs; Error *local_err = NULL; bdrv_release_dirty_bitmap(src, s->dirty_bitmap); /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); bdrv_ref(mirror_top_bs); bdrv_ref(target_bs); /* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before * inserting target_bs at s->to_replace, where we might not be able to get * these permissions. * * Note that blk_unref() alone doesn't necessarily drop permissions because * we might be running nested inside mirror_drain(), which takes an extra * reference, so use an explicit blk_set_perm() first. */ blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort); blk_unref(s->target); s->target = NULL; /* We don't access the source any more. Dropping any WRITE/RESIZE is * required before it could become a backing file of target_bs. */ bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { BlockDriverState *backing = s->is_none_mode ? src : s->base; if (backing_bs(target_bs) != backing) { bdrv_set_backing_hd(target_bs, backing, &local_err); if (local_err) { error_report_err(local_err); data->ret = -EPERM; } } } if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); } if (s->should_complete && data->ret == 0) { BlockDriverState *to_replace = src; if (s->to_replace) { to_replace = s->to_replace; } if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); } /* The mirror job has no requests in flight any more, but we need to * drain potential other users of the BDS before changing the graph. */ bdrv_drained_begin(target_bs); bdrv_replace_node(to_replace, target_bs, &local_err); bdrv_drained_end(target_bs); if (local_err) { error_report_err(local_err); data->ret = -EPERM; } } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); bdrv_unref(s->to_replace); } if (replace_aio_context) { aio_context_release(replace_aio_context); } g_free(s->replaces); bdrv_unref(target_bs); /* Remove the mirror filter driver from the graph. Before this, get rid of * the blockers on the intermediate nodes so that the resulting state is * valid. Also give up permissions on mirror_top_bs->backing, which might * block the removal. */ block_job_remove_all_bdrv(job); bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); /* We just changed the BDS the job BB refers to (with either or both of the * bdrv_replace_node() calls), so switch the BB back so the cleanup does * the right thing. We don't need any permissions any more now. */ blk_remove_bs(job->blk); blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); blk_insert_bs(job->blk, mirror_top_bs, &error_abort); block_job_completed(&s->common, data->ret); g_free(data); bdrv_drained_end(src); bdrv_unref(mirror_top_bs); bdrv_unref(src); }
/* commit COW file into the raw image */ int bdrv_commit(BlockDriverState *bs) { BlockBackend *src, *backing; BlockDriverState *backing_file_bs = NULL; BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; int64_t offset, length, backing_length; int ro; int64_t n; int ret = 0; uint8_t *buf = NULL; Error *local_err = NULL; if (!drv) return -ENOMEDIUM; if (!bs->backing) { return -ENOTSUP; } if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { return -EBUSY; } ro = bs->backing->bs->read_only; if (ro) { if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) { return -EACCES; } } src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(src, bs, &local_err); if (ret < 0) { error_report_err(local_err); goto ro_cleanup; } /* Insert commit_top block node above backing, so we can write to it */ backing_file_bs = backing_bs(bs); commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, &local_err); if (commit_top_bs == NULL) { error_report_err(local_err); goto ro_cleanup; } bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs)); bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); ret = blk_insert_bs(backing, backing_file_bs, &local_err); if (ret < 0) { error_report_err(local_err); goto ro_cleanup; } length = blk_getlength(src); if (length < 0) { ret = length; goto ro_cleanup; } backing_length = blk_getlength(backing); if (backing_length < 0) { ret = backing_length; goto ro_cleanup; } /* If our top snapshot is larger than the backing file image, * grow the backing file image if possible. If not possible, * we must return an error */ if (length > backing_length) { ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); goto ro_cleanup; } } /* blk_try_blockalign() for src will choose an alignment that works for * backing as well, so no need to compare the alignment manually. */ buf = blk_try_blockalign(src, COMMIT_BUF_SIZE); if (buf == NULL) { ret = -ENOMEM; goto ro_cleanup; } for (offset = 0; offset < length; offset += n) { ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n); if (ret < 0) { goto ro_cleanup; } if (ret) { ret = blk_pread(src, offset, buf, n); if (ret < 0) { goto ro_cleanup; } ret = blk_pwrite(backing, offset, buf, n, 0); if (ret < 0) { goto ro_cleanup; } } } if (drv->bdrv_make_empty) { ret = drv->bdrv_make_empty(bs); if (ret < 0) { goto ro_cleanup; } blk_flush(src); } /* * Make sure all data we wrote to the backing device is actually * stable on disk. */ blk_flush(backing); ret = 0; ro_cleanup: qemu_vfree(buf); blk_unref(backing); if (backing_file_bs) { bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); } bdrv_unref(commit_top_bs); blk_unref(src); if (ro) { /* ignoring error return here */ bdrv_reopen_set_read_only(bs->backing->bs, true, NULL); } return ret; }
void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int creation_flags, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, const char *filter_node_name, Error **errp) { CommitBlockJob *s; BlockDriverState *iter; BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; int ret; assert(top != bs); if (top == base) { error_setg(errp, "Invalid files for merge: top and base are the same"); return; } s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, speed, creation_flags, NULL, NULL, errp); if (!s) { return; } /* convert base to r/w, if necessary */ s->base_read_only = bdrv_is_read_only(base); if (s->base_read_only) { if (bdrv_reopen_set_read_only(base, false, errp) != 0) { goto fail; } } /* Insert commit_top block node above top, so we can block consistent read * on the backing chain below it */ commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, errp); if (commit_top_bs == NULL) { goto fail; } if (!filter_node_name) { commit_top_bs->implicit = true; } commit_top_bs->total_sectors = top->total_sectors; bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top)); bdrv_set_backing_hd(commit_top_bs, top, &local_err); if (local_err) { bdrv_unref(commit_top_bs); commit_top_bs = NULL; error_propagate(errp, local_err); goto fail; } bdrv_replace_node(top, commit_top_bs, &local_err); if (local_err) { bdrv_unref(commit_top_bs); commit_top_bs = NULL; error_propagate(errp, local_err); goto fail; } s->commit_top_bs = commit_top_bs; bdrv_unref(commit_top_bs); /* Block all nodes between top and base, because they will * disappear from the chain after this operation. */ assert(bdrv_chain_contains(top, base)); for (iter = top; iter != base; iter = backing_bs(iter)) { /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves * at s->base (if writes are blocked for a node, they are also blocked * for its backing file). The other options would be a second filter * driver above s->base. */ ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, errp); if (ret < 0) { goto fail; } } if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { goto fail; } s->chain_frozen = true; ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); if (ret < 0) { goto fail; } s->base = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | BLK_PERM_WRITE_UNCHANGED); ret = blk_insert_bs(s->base, base, errp); if (ret < 0) { goto fail; } s->base_bs = base; /* Required permissions are already taken with block_job_add_bdrv() */ s->top = blk_new(0, BLK_PERM_ALL); ret = blk_insert_bs(s->top, top, errp); if (ret < 0) { goto fail; } s->backing_file_str = g_strdup(backing_file_str); s->on_error = on_error; trace_commit_start(bs, base, top, s); job_start(&s->common.job); return; fail: if (s->chain_frozen) { bdrv_unfreeze_backing_chain(commit_top_bs, base); } if (s->base) { blk_unref(s->base); } if (s->top) { blk_unref(s->top); } job_early_fail(&s->common.job); /* commit_top_bs has to be replaced after deleting the block job, * otherwise this would fail because of lack of permissions. */ if (commit_top_bs) { bdrv_replace_node(commit_top_bs, top, &error_abort); } }
static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; BlockDriverState *bs = s->common.bs; int64_t sector_num, end, sectors_per_chunk, length; uint64_t last_pause_ns; BlockDriverInfo bdi; char backing_filename[1024]; int ret = 0; int n; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { ret = s->common.len; goto immediate_exit; } else if (s->common.len == 0) { /* Report BLOCK_JOB_READY and wait for complete. */ block_job_event_ready(&s->common); s->synced = true; while (!block_job_is_cancelled(&s->common) && !s->should_complete) { block_job_yield(&s->common); } s->common.cancelled = false; goto immediate_exit; } length = DIV_ROUND_UP(s->common.len, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); if (backing_filename[0] && !s->target->backing_hd) { ret = bdrv_get_info(s->target, &bdi); if (ret < 0) { goto immediate_exit; } if (s->granularity < bdi.cluster_size) { s->buf_size = MAX(s->buf_size, bdi.cluster_size); s->cow_bitmap = bitmap_new(length); } } end = s->common.len >> BDRV_SECTOR_BITS; s->buf = qemu_blockalign(bs, s->buf_size); sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; mirror_free_init(s); if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; for (sector_num = 0; sector_num < end; ) { int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; ret = bdrv_is_allocated_above(bs, base, sector_num, next - sector_num, &n); if (ret < 0) { goto immediate_exit; } assert(n > 0); if (ret == 1) { bdrv_set_dirty(bs, sector_num, n); sector_num = next; } else { sector_num += n; } } } bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { uint64_t delay_ns = 0; int64_t cnt; bool should_complete; if (s->ret < 0) { ret = s->ret; goto immediate_exit; } cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that qemu_aio_flush() returns. * We do so every SLICE_TIME nanoseconds, or when there is an error, * or when the source is clean, whichever comes first. */ if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); qemu_coroutine_yield(); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); if (delay_ns == 0) { continue; } } } should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); ret = bdrv_flush(s->target); if (ret < 0) { if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) { goto immediate_exit; } } else { /* We're out of the streaming phase. From now on, if the job * is cancelled we will actually complete all pending I/O and * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ s->common.offset = end * BDRV_SECTOR_SIZE; if (!s->synced) { block_job_event_ready(&s->common); s->synced = true; } should_complete = s->should_complete || block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } } if (cnt == 0 && should_complete) { /* The dirty bitmap is not updated while operations are pending. * If we're about to exit, wait for pending operations before * calling bdrv_get_dirty_count(bs), or we may exit while the * source has dirty data to copy! * * Note that I/O can be submitted by the guest while * mirror_populate runs. */ trace_mirror_before_drain(s, cnt); bdrv_drain_all(); cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } ret = 0; trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { /* Publish progress */ s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } } else if (!should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0); block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); } else if (cnt == 0) { /* The two disks are in sync. Exit and report successful * completion. */ assert(QLIST_EMPTY(&bs->tracked_requests)); s->common.cancelled = false; break; } last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: if (s->in_flight > 0) { /* We get here only if something went wrong. Either the job failed, * or it was cancelled prematurely so that we do not guarantee that * the target is a copy of the source. */ assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common))); mirror_drain(s); } assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { BlockDriverState *to_replace = s->common.bs; if (s->to_replace) { to_replace = s->to_replace; } if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } bdrv_swap(s->target, to_replace); if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { /* drop the bs loop chain formed by the swap: break the loop then * trigger the unref from the top one */ BlockDriverState *p = s->base->backing_hd; bdrv_set_backing_hd(s->base, NULL); bdrv_unref(p); } } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); bdrv_unref(s->to_replace); } g_free(s->replaces); bdrv_unref(s->target); block_job_completed(&s->common, ret); }