static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockJob *bjob = &s->common; BlockDriverState *bs = blk_bs(bjob->blk); BlockDriverState *base = s->base; Error *local_err = NULL; int ret = 0; bdrv_unfreeze_backing_chain(bs, base); s->chain_frozen = false; if (bs->backing) { const char *base_id = NULL, *base_fmt = NULL; if (base) { base_id = s->backing_file_str; if (base->drv) { base_fmt = base->drv->format_name; } } ret = bdrv_change_backing_file(bs, base_id, base_fmt); bdrv_set_backing_hd(bs, base, &local_err); if (local_err) { error_report_err(local_err); return -EPERM; } } return ret; }
/* Round offset and/or bytes to target cluster if COW is needed, and * return the offset of the adjusted tail against original. */ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, uint64_t *bytes) { bool need_cow; int ret = 0; int64_t align_offset = *offset; int64_t align_bytes = *bytes; int max_bytes = s->granularity * s->max_iov; need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap); need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, s->cow_bitmap); if (need_cow) { bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes, &align_offset, &align_bytes); } if (align_bytes > max_bytes) { align_bytes = max_bytes; if (need_cow) { align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size); } } /* Clipping may result in align_bytes unaligned to chunk boundary, but * that doesn't matter because it's already the end of source image. */ align_bytes = mirror_clip_bytes(s, align_offset, align_bytes); ret = align_offset + align_bytes - (*offset + *bytes); *offset = align_offset; *bytes = align_bytes; assert(ret >= 0); return ret; }
static void stream_abort(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); if (s->chain_frozen) { BlockJob *bjob = &s->common; bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base); } }
/* @p_info will be set only on success. */ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, Error **errp) { BlockInfo *info = g_malloc0(sizeof(*info)); BlockDriverState *bs = blk_bs(blk); BlockDriverState *bs0; ImageInfo **p_image_info; Error *local_err = NULL; info->device = g_strdup(blk_name(blk)); info->type = g_strdup("unknown"); info->locked = blk_dev_is_medium_locked(blk); info->removable = blk_dev_has_removable_media(blk); if (blk_dev_has_removable_media(blk)) { info->has_tray_open = true; info->tray_open = blk_dev_is_tray_open(blk); } if (bdrv_iostatus_is_enabled(bs)) { info->has_io_status = true; info->io_status = bs->iostatus; } if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { info->has_dirty_bitmaps = true; info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); } if (bs->drv) { info->has_inserted = true; info->inserted = bdrv_block_device_info(bs); bs0 = bs; p_image_info = &info->inserted->image; while (1) { bdrv_query_image_info(bs0, p_image_info, &local_err); if (local_err) { error_propagate(errp, local_err); goto err; } if (bs0->drv && bs0->backing_hd) { bs0 = bs0->backing_hd; (*p_image_info)->has_backing_image = true; p_image_info = &((*p_image_info)->backing_image); } else { break; } } } *p_info = info; return; err: qapi_free_BlockInfo(info); }
/* @p_info will be set only on success. */ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, Error **errp) { BlockInfo *info = g_malloc0(sizeof(*info)); BlockDriverState *bs = blk_bs(blk); char *qdev; /* Skip automatically inserted nodes that the user isn't aware of */ while (bs && bs->drv && bs->implicit) { bs = backing_bs(bs); } info->device = g_strdup(blk_name(blk)); info->type = g_strdup("unknown"); info->locked = blk_dev_is_medium_locked(blk); info->removable = blk_dev_has_removable_media(blk); qdev = blk_get_attached_dev_id(blk); if (qdev && *qdev) { info->has_qdev = true; info->qdev = qdev; } else { g_free(qdev); } if (blk_dev_has_tray(blk)) { info->has_tray_open = true; info->tray_open = blk_dev_is_tray_open(blk); } if (blk_iostatus_is_enabled(blk)) { info->has_io_status = true; info->io_status = blk_iostatus(blk); } if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) { info->has_dirty_bitmaps = true; info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); } if (bs && bs->drv) { info->has_inserted = true; info->inserted = bdrv_block_device_info(blk, bs, errp); if (info->inserted == NULL) { goto err; } } *p_info = info; return; err: qapi_free_BlockInfo(info); }
static void test_block_job_complete(BlockJob *job, void *opaque) { BlockDriverState *bs = blk_bs(job->blk); int rc = (intptr_t)opaque; if (block_job_is_cancelled(job)) { rc = -ECANCELED; } block_job_completed(job, rc); bdrv_unref(bs); }
static char *print_drive(void *ptr) { const char *name; name = blk_name(ptr); if (!*name) { BlockDriverState *bs = blk_bs(ptr); if (bs) { name = bdrv_get_node_name(bs); } } return g_strdup(name); }
static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs, bool query_nodes) { if (query_nodes) { *bs = bdrv_next_node(*bs); return !!*bs; } *blk = blk_next(*blk); *bs = *blk ? blk_bs(*blk) : NULL; return !!*blk; }
static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); BlockDriverState *target; target = blk_bs(s->target); if (!s->synced) { error_setg(errp, "The active block job '%s' cannot be completed", job->id); return; } if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { int ret; assert(!target->backing); ret = bdrv_open_backing_file(target, NULL, "backing", errp); if (ret < 0) { return; } } /* block all operations on to_replace bs */ if (s->replaces) { AioContext *replace_aio_context; s->to_replace = bdrv_find_node(s->replaces); if (!s->to_replace) { error_setg(errp, "Node name '%s' not found", s->replaces); return; } replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); /* TODO Translate this into permission system. Current definition of * GRAPH_MOD would require to request it for the parents; they might * not even be BlockDriverStates, however, so a BdrvChild can't address * them. May need redefinition of GRAPH_MOD. */ error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); bdrv_ref(s->to_replace); aio_context_release(replace_aio_context); } s->should_complete = true; block_job_enter(&s->common); }
static void stream_clean(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockJob *bjob = &s->common; BlockDriverState *bs = blk_bs(bjob->blk); /* Reopen the image back in read-only mode if necessary */ if (s->bs_read_only) { /* Give up write permissions before making it read-only */ blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort); bdrv_reopen_set_read_only(bs, true, NULL); } g_free(s->backing_file_str); }
void qdev_prop_set_drive(DeviceState *dev, const char *name, BlockBackend *value, Error **errp) { const char *ref = ""; if (value) { ref = blk_name(value); if (!*ref) { BlockDriverState *bs = blk_bs(value); if (bs) { ref = bdrv_get_node_name(bs); } } } object_property_set_str(OBJECT(dev), ref, name, errp); }
static BlockJob *do_test_id(BlockBackend *blk, const char *id, bool should_succeed) { BlockJob *job; Error *errp = NULL; job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp); if (should_succeed) { g_assert_null(errp); g_assert_nonnull(job); if (id) { g_assert_cmpstr(job->id, ==, id); } else { g_assert_cmpstr(job->id, ==, blk_name(blk)); } } else {
static void test_sync_op_blk_flush(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); int ret; /* Normal success path */ ret = blk_flush(blk); g_assert_cmpint(ret, ==, 0); /* Early success: Read-only image */ bs->read_only = true; bs->open_flags &= ~BDRV_O_RDWR; ret = blk_flush(blk); g_assert_cmpint(ret, ==, 0); bs->read_only = false; bs->open_flags |= BDRV_O_RDWR; }
BlockStatsList *qmp_query_blockstats(bool has_query_nodes, bool query_nodes, Error **errp) { BlockStatsList *head = NULL, **p_next = &head; BlockBackend *blk; BlockDriverState *bs; /* Just to be safe if query_nodes is not always initialized */ if (has_query_nodes && query_nodes) { for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { BlockStatsList *info = g_malloc0(sizeof(*info)); AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); info->value = bdrv_query_bds_stats(bs, false); aio_context_release(ctx); *p_next = info; p_next = &info->next; } } else { for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { BlockStatsList *info = g_malloc0(sizeof(*info)); AioContext *ctx = blk_get_aio_context(blk); BlockStats *s; aio_context_acquire(ctx); s = bdrv_query_bds_stats(blk_bs(blk), true); s->has_device = true; s->device = g_strdup(blk_name(blk)); bdrv_query_blk_stats(s->stats, blk); aio_context_release(ctx); info->value = s; *p_next = info; p_next = &info->next; } } return head; }
/* @p_info will be set only on success. */ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, Error **errp) { BlockInfo *info = g_malloc0(sizeof(*info)); BlockDriverState *bs = blk_bs(blk); info->device = g_strdup(blk_name(blk)); info->type = g_strdup("unknown"); info->locked = blk_dev_is_medium_locked(blk); info->removable = blk_dev_has_removable_media(blk); if (blk_dev_has_tray(blk)) { info->has_tray_open = true; info->tray_open = blk_dev_is_tray_open(blk); } if (blk_iostatus_is_enabled(blk)) { info->has_io_status = true; info->io_status = blk_iostatus(blk); } if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) { info->has_dirty_bitmaps = true; info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); } if (bs && bs->drv) { info->has_inserted = true; info->inserted = bdrv_block_device_info(blk, bs, errp); if (info->inserted == NULL) { goto err; } } *p_info = info; return; err: qapi_free_BlockInfo(info); }
static void commit_abort(Job *job) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); BlockDriverState *top_bs = blk_bs(s->top); if (s->chain_frozen) { bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs); } /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ bdrv_ref(top_bs); bdrv_ref(s->commit_top_bs); if (s->base) { blk_unref(s->base); } /* free the blockers on the intermediate nodes so that bdrv_replace_nodes * can succeed */ block_job_remove_all_bdrv(&s->common); /* If bdrv_drop_intermediate() failed (or was not invoked), remove the * commit filter driver from the backing chain now. Do this as the final * step so that the 'consistent read' permission can be granted. * * XXX Can (or should) we somehow keep 'consistent read' blocked even * after the failed/cancelled commit job is gone? If we already wrote * something to base, the intermediate images aren't valid any more. */ bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs), &error_abort); bdrv_unref(s->commit_top_bs); bdrv_unref(top_bs); }
static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ int ret = 0; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } s->bdev_length = bdrv_getlength(bs); if (s->bdev_length < 0) { ret = s->bdev_length; goto immediate_exit; } /* Active commit must resize the base image if its size differs from the * active layer. */ if (s->base == blk_bs(s->target)) { int64_t base_length; base_length = blk_getlength(s->target); if (base_length < 0) { ret = base_length; goto immediate_exit; } if (s->bdev_length > base_length) { ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF, NULL); if (ret < 0) { goto immediate_exit; } } } if (s->bdev_length == 0) { /* Report BLOCK_JOB_READY and wait for complete. */ block_job_event_ready(&s->common); s->synced = true; while (!block_job_is_cancelled(&s->common) && !s->should_complete) { block_job_yield(&s->common); } s->common.cancelled = false; goto immediate_exit; } length = DIV_ROUND_UP(s->bdev_length, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ bdrv_get_backing_filename(target_bs, backing_filename, sizeof(backing_filename)); if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) { s->target_cluster_size = bdi.cluster_size; } else { s->target_cluster_size = BDRV_SECTOR_SIZE; } if (backing_filename[0] && !target_bs->backing && s->granularity < s->target_cluster_size) { s->buf_size = MAX(s->buf_size, s->target_cluster_size); s->cow_bitmap = bitmap_new(length); } s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; goto immediate_exit; } mirror_free_init(s); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (!s->is_none_mode) { ret = mirror_dirty_init(s); if (ret < 0 || block_job_is_cancelled(&s->common)) { goto immediate_exit; } } assert(!s->dbi); s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; bool should_complete; if (s->ret < 0) { ret = s->ret; goto immediate_exit; } block_job_pause_point(&s->common); cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is * the number of bytes currently being processed; together those are * the current remaining operation length */ block_job_progress_set_remaining(&s->common, s->bytes_in_flight + cnt); /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that bdrv_drain_all() returns. * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is * an error, or when the source is clean, whichever comes first. */ delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; if (delta < BLOCK_JOB_SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); } } should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); if (!s->synced) { if (mirror_flush(s) < 0) { /* Go check s->ret. */ continue; } /* We're out of the streaming phase. From now on, if the job * is cancelled we will actually complete all pending I/O and * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ block_job_event_ready(&s->common); s->synced = true; } should_complete = s->should_complete || block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(s->dirty_bitmap); } if (cnt == 0 && should_complete) { /* The dirty bitmap is not updated while operations are pending. * If we're about to exit, wait for pending operations before * calling bdrv_get_dirty_count(bs), or we may exit while the * source has dirty data to copy! * * Note that I/O can be submitted by the guest while * mirror_populate runs, so pause it now. Before deciding * whether to switch to target check one last time if I/O has * come in the meanwhile, and if not flush the data to disk. */ trace_mirror_before_drain(s, cnt); bdrv_drained_begin(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); if (cnt > 0 || mirror_flush(s) < 0) { bdrv_drained_end(bs); continue; } /* The two disks are in sync. Exit and report successful * completion. */ assert(QLIST_EMPTY(&bs->tracked_requests)); s->common.cancelled = false; need_drain = false; break; } ret = 0; if (s->synced && !should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0); } trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); block_job_sleep_ns(&s->common, delay_ns); if (block_job_is_cancelled(&s->common) && (!s->synced || s->common.force)) { break; } s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: if (s->in_flight > 0) { /* We get here only if something went wrong. Either the job failed, * or it was cancelled prematurely so that we do not guarantee that * the target is a copy of the source. */ assert(ret < 0 || ((s->common.force || !s->synced) && block_job_is_cancelled(&s->common))); assert(need_drain); mirror_wait_for_all_io(s); } assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_dirty_iter_free(s->dbi); data = g_malloc(sizeof(*data)); data->ret = ret; if (need_drain) { bdrv_drained_begin(bs); } block_job_defer_to_main_loop(&s->common, mirror_exit, data); }
static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t offset; BlockDriverState *base = s->base; BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); int ret; int64_t count; if (base == NULL && !bdrv_has_zero_init(target_bs)) { if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } s->initial_zeroing_ongoing = true; for (offset = 0; offset < s->bdev_length; ) { int bytes = MIN(s->bdev_length - offset, QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); if (block_job_is_cancelled(&s->common)) { s->initial_zeroing_ongoing = false; return 0; } if (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } mirror_do_zero_or_discard(s, offset, bytes, false); offset += bytes; } mirror_wait_for_all_io(s); s->initial_zeroing_ongoing = false; } /* First part, loop on the sectors and initialize the dirty bitmap. */ for (offset = 0; offset < s->bdev_length; ) { /* Just to make sure we are not exceeding int limit. */ int bytes = MIN(s->bdev_length - offset, QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); if (block_job_is_cancelled(&s->common)) { return 0; } ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count); if (ret < 0) { return ret; } assert(count); if (ret == 1) { bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count); } offset += count; } return 0; }
static void mirror_exit(BlockJob *job, void *opaque) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); BlockDriverState *mirror_top_bs = s->mirror_top_bs; Error *local_err = NULL; bdrv_release_dirty_bitmap(src, s->dirty_bitmap); /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); bdrv_ref(mirror_top_bs); bdrv_ref(target_bs); /* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before * inserting target_bs at s->to_replace, where we might not be able to get * these permissions. * * Note that blk_unref() alone doesn't necessarily drop permissions because * we might be running nested inside mirror_drain(), which takes an extra * reference, so use an explicit blk_set_perm() first. */ blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort); blk_unref(s->target); s->target = NULL; /* We don't access the source any more. Dropping any WRITE/RESIZE is * required before it could become a backing file of target_bs. */ bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { BlockDriverState *backing = s->is_none_mode ? src : s->base; if (backing_bs(target_bs) != backing) { bdrv_set_backing_hd(target_bs, backing, &local_err); if (local_err) { error_report_err(local_err); data->ret = -EPERM; } } } if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); } if (s->should_complete && data->ret == 0) { BlockDriverState *to_replace = src; if (s->to_replace) { to_replace = s->to_replace; } if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); } /* The mirror job has no requests in flight any more, but we need to * drain potential other users of the BDS before changing the graph. */ bdrv_drained_begin(target_bs); bdrv_replace_node(to_replace, target_bs, &local_err); bdrv_drained_end(target_bs); if (local_err) { error_report_err(local_err); data->ret = -EPERM; } } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); bdrv_unref(s->to_replace); } if (replace_aio_context) { aio_context_release(replace_aio_context); } g_free(s->replaces); bdrv_unref(target_bs); /* Remove the mirror filter driver from the graph. Before this, get rid of * the blockers on the intermediate nodes so that the resulting state is * valid. Also give up permissions on mirror_top_bs->backing, which might * block the removal. */ block_job_remove_all_bdrv(job); bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); /* We just changed the BDS the job BB refers to (with either or both of the * bdrv_replace_node() calls), so switch the BB back so the cleanup does * the right thing. We don't need any permissions any more now. */ blk_remove_bs(job->blk); blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); blk_insert_bs(job->blk, mirror_top_bs, &error_abort); block_job_completed(&s->common, data->ret); g_free(data); bdrv_drained_end(src); bdrv_unref(mirror_top_bs); bdrv_unref(src); }
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->source; int64_t offset, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ int nb_chunks = 1; bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); bdrv_dirty_bitmap_lock(s->dirty_bitmap); offset = bdrv_dirty_iter_next(s->dbi); if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); offset = bdrv_dirty_iter_next(s->dbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(offset >= 0); } bdrv_dirty_bitmap_unlock(s->dirty_bitmap); first_chunk = offset / s->granularity; while (test_bit(first_chunk, s->in_flight_bitmap)) { trace_mirror_yield_in_flight(s, offset, s->in_flight); mirror_wait_for_io(s); } block_job_pause_point(&s->common); /* Find the number of consective dirty chunks following the first dirty * one, and wait for in flight requests in them. */ bdrv_dirty_bitmap_lock(s->dirty_bitmap); while (nb_chunks * s->granularity < s->buf_size) { int64_t next_dirty; int64_t next_offset = offset + nb_chunks * s->granularity; int64_t next_chunk = next_offset / s->granularity; if (next_offset >= s->bdev_length || !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) { break; } if (test_bit(next_chunk, s->in_flight_bitmap)) { break; } next_dirty = bdrv_dirty_iter_next(s->dbi); if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ bdrv_set_dirty_iter(s->dbi, next_offset); next_dirty = bdrv_dirty_iter_next(s->dbi); } assert(next_dirty == next_offset); nb_chunks++; } /* Clear dirty bits before querying the block status, because * calling bdrv_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, nb_chunks * s->granularity); bdrv_dirty_bitmap_unlock(s->dirty_bitmap); bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); while (nb_chunks > 0 && offset < s->bdev_length) { int ret; int64_t io_bytes; int64_t io_bytes_acct; enum MirrorMethod { MIRROR_METHOD_COPY, MIRROR_METHOD_ZERO, MIRROR_METHOD_DISCARD } mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); ret = bdrv_block_status_above(source, NULL, offset, nb_chunks * s->granularity, &io_bytes, NULL, NULL); if (ret < 0) { io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes); } else if (ret & BDRV_BLOCK_DATA) { io_bytes = MIN(io_bytes, max_io_bytes); } io_bytes -= io_bytes % s->granularity; if (io_bytes < s->granularity) { io_bytes = s->granularity; } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_offset; int64_t target_bytes; bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, &target_offset, &target_bytes); if (target_offset == offset && target_bytes == io_bytes) { mirror_method = ret & BDRV_BLOCK_ZERO ? MIRROR_METHOD_ZERO : MIRROR_METHOD_DISCARD; } } while (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield_in_flight(s, offset, s->in_flight); mirror_wait_for_io(s); } if (s->ret < 0) { return 0; } io_bytes = mirror_clip_bytes(s, offset, io_bytes); switch (mirror_method) { case MIRROR_METHOD_COPY: io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes); break; case MIRROR_METHOD_ZERO: case MIRROR_METHOD_DISCARD: mirror_do_zero_or_discard(s, offset, io_bytes, mirror_method == MIRROR_METHOD_DISCARD); if (write_zeroes_ok) { io_bytes_acct = 0; } else { io_bytes_acct = io_bytes; } break; default: abort(); } assert(io_bytes); offset += io_bytes; nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); } return delay_ns; }
static int blk_connect(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); int index, qflags; bool readonly = true; bool writethrough = true; int order, ring_ref; unsigned int ring_size, max_grants; unsigned int i; trace_xen_disk_connect(xendev->name); /* read-only ? */ if (blkdev->directiosafe) { qflags = BDRV_O_NOCACHE | BDRV_O_NATIVE_AIO; } else { qflags = 0; writethrough = false; } if (strcmp(blkdev->mode, "w") == 0) { qflags |= BDRV_O_RDWR; readonly = false; } if (blkdev->feature_discard) { qflags |= BDRV_O_UNMAP; } /* init qemu block driver */ index = (xendev->dev - 202 * 256) / 16; blkdev->dinfo = drive_get(IF_XEN, 0, index); if (!blkdev->dinfo) { Error *local_err = NULL; QDict *options = NULL; if (strcmp(blkdev->fileproto, "<unset>")) { options = qdict_new(); qdict_put_str(options, "driver", blkdev->fileproto); } /* setup via xenbus -> create new block driver instance */ xen_pv_printf(xendev, 2, "create new bdrv (xenbus setup)\n"); blkdev->blk = blk_new_open(blkdev->filename, NULL, options, qflags, &local_err); if (!blkdev->blk) { xen_pv_printf(xendev, 0, "error: %s\n", error_get_pretty(local_err)); error_free(local_err); return -1; } blk_set_enable_write_cache(blkdev->blk, !writethrough); } else { /* setup via qemu cmdline -> already setup for us */ xen_pv_printf(xendev, 2, "get configured bdrv (cmdline setup)\n"); blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo); if (blk_is_read_only(blkdev->blk) && !readonly) { xen_pv_printf(xendev, 0, "Unexpected read-only drive"); blkdev->blk = NULL; return -1; } /* blkdev->blk is not create by us, we get a reference * so we can blk_unref() unconditionally */ blk_ref(blkdev->blk); } blk_attach_dev_legacy(blkdev->blk, blkdev); blkdev->file_size = blk_getlength(blkdev->blk); if (blkdev->file_size < 0) { BlockDriverState *bs = blk_bs(blkdev->blk); const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL; xen_pv_printf(xendev, 1, "blk_getlength: %d (%s) | drv %s\n", (int)blkdev->file_size, strerror(-blkdev->file_size), drv_name ?: "-"); blkdev->file_size = 0; }
int main(int argc, char **argv) { BlockBackend *blk; BlockDriverState *bs; off_t dev_offset = 0; uint32_t nbdflags = 0; bool disconnect = false; const char *bindto = "0.0.0.0"; const char *port = NULL; char *sockpath = NULL; char *device = NULL; off_t fd_size; QemuOpts *sn_opts = NULL; const char *sn_id_or_name = NULL; const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:"; struct option lopt[] = { { "help", 0, NULL, 'h' }, { "version", 0, NULL, 'V' }, { "bind", 1, NULL, 'b' }, { "port", 1, NULL, 'p' }, { "socket", 1, NULL, 'k' }, { "offset", 1, NULL, 'o' }, { "read-only", 0, NULL, 'r' }, { "partition", 1, NULL, 'P' }, { "connect", 1, NULL, 'c' }, { "disconnect", 0, NULL, 'd' }, { "snapshot", 0, NULL, 's' }, { "load-snapshot", 1, NULL, 'l' }, { "nocache", 0, NULL, 'n' }, { "cache", 1, NULL, QEMU_NBD_OPT_CACHE }, { "aio", 1, NULL, QEMU_NBD_OPT_AIO }, { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD }, { "detect-zeroes", 1, NULL, QEMU_NBD_OPT_DETECT_ZEROES }, { "shared", 1, NULL, 'e' }, { "format", 1, NULL, 'f' }, { "persistent", 0, NULL, 't' }, { "verbose", 0, NULL, 'v' }, { NULL, 0, NULL, 0 } }; int ch; int opt_ind = 0; char *end; int flags = BDRV_O_RDWR; int partition = -1; int ret = 0; int fd; bool seen_cache = false; bool seen_discard = false; bool seen_aio = false; pthread_t client_thread; const char *fmt = NULL; Error *local_err = NULL; BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; QDict *options = NULL; /* The client thread uses SIGTERM to interrupt the server. A signal * handler ensures that "qemu-nbd -v -c" exits with a nice status code. */ struct sigaction sa_sigterm; memset(&sa_sigterm, 0, sizeof(sa_sigterm)); sa_sigterm.sa_handler = termsig_handler; sigaction(SIGTERM, &sa_sigterm, NULL); qemu_init_exec_dir(argv[0]); while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { case 's': flags |= BDRV_O_SNAPSHOT; break; case 'n': optarg = (char *) "none"; /* fallthrough */ case QEMU_NBD_OPT_CACHE: if (seen_cache) { errx(EXIT_FAILURE, "-n and --cache can only be specified once"); } seen_cache = true; if (bdrv_parse_cache_flags(optarg, &flags) == -1) { errx(EXIT_FAILURE, "Invalid cache mode `%s'", optarg); } break; case QEMU_NBD_OPT_AIO: if (seen_aio) { errx(EXIT_FAILURE, "--aio can only be specified once"); } seen_aio = true; if (!strcmp(optarg, "native")) { flags |= BDRV_O_NATIVE_AIO; } else if (!strcmp(optarg, "threads")) { /* this is the default */ } else { errx(EXIT_FAILURE, "invalid aio mode `%s'", optarg); } break; case QEMU_NBD_OPT_DISCARD: if (seen_discard) { errx(EXIT_FAILURE, "--discard can only be specified once"); } seen_discard = true; if (bdrv_parse_discard_flags(optarg, &flags) == -1) { errx(EXIT_FAILURE, "Invalid discard mode `%s'", optarg); } break; case QEMU_NBD_OPT_DETECT_ZEROES: detect_zeroes = qapi_enum_parse(BlockdevDetectZeroesOptions_lookup, optarg, BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX, BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); if (local_err) { errx(EXIT_FAILURE, "Failed to parse detect_zeroes mode: %s", error_get_pretty(local_err)); } if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && !(flags & BDRV_O_UNMAP)) { errx(EXIT_FAILURE, "setting detect-zeroes to unmap is not allowed " "without setting discard operation to unmap"); } break; case 'b': bindto = optarg; break; case 'p': port = optarg; break; case 'o': dev_offset = strtoll (optarg, &end, 0); if (*end) { errx(EXIT_FAILURE, "Invalid offset `%s'", optarg); } if (dev_offset < 0) { errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg); } break; case 'l': if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts, optarg, false); if (!sn_opts) { errx(EXIT_FAILURE, "Failed in parsing snapshot param `%s'", optarg); } } else { sn_id_or_name = optarg; } /* fall through */ case 'r': nbdflags |= NBD_FLAG_READ_ONLY; flags &= ~BDRV_O_RDWR; break; case 'P': partition = strtol(optarg, &end, 0); if (*end) { errx(EXIT_FAILURE, "Invalid partition `%s'", optarg); } if (partition < 1 || partition > 8) { errx(EXIT_FAILURE, "Invalid partition %d", partition); } break; case 'k': sockpath = optarg; if (sockpath[0] != '/') { errx(EXIT_FAILURE, "socket path must be absolute\n"); } break; case 'd': disconnect = true; break; case 'c': device = optarg; break; case 'e': shared = strtol(optarg, &end, 0); if (*end) { errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg); } if (shared < 1) { errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); } break; case 'f': fmt = optarg; break; case 't': persistent = 1; break; case 'v': verbose = 1; break; case 'V': version(argv[0]); exit(0); break; case 'h': usage(argv[0]); exit(0); break; case '?': errx(EXIT_FAILURE, "Try `%s --help' for more information.", argv[0]); } } if ((argc - optind) != 1) { errx(EXIT_FAILURE, "Invalid number of argument.\n" "Try `%s --help' for more information.", argv[0]); } if (disconnect) { fd = open(argv[optind], O_RDWR); if (fd < 0) { err(EXIT_FAILURE, "Cannot open %s", argv[optind]); } nbd_disconnect(fd); close(fd); printf("%s disconnected\n", argv[optind]); return 0; } if (device && !verbose) { int stderr_fd[2]; pid_t pid; int ret; if (qemu_pipe(stderr_fd) < 0) { err(EXIT_FAILURE, "Error setting up communication pipe"); } /* Now daemonize, but keep a communication channel open to * print errors and exit with the proper status code. */ pid = fork(); if (pid < 0) { err(EXIT_FAILURE, "Failed to fork"); } else if (pid == 0) { close(stderr_fd[0]); ret = qemu_daemon(1, 0); /* Temporarily redirect stderr to the parent's pipe... */ dup2(stderr_fd[1], STDERR_FILENO); if (ret < 0) { err(EXIT_FAILURE, "Failed to daemonize"); } /* ... close the descriptor we inherited and go on. */ close(stderr_fd[1]); } else { bool errors = false; char *buf; /* In the parent. Print error messages from the child until * it closes the pipe. */ close(stderr_fd[1]); buf = g_malloc(1024); while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { errors = true; ret = qemu_write_full(STDERR_FILENO, buf, ret); if (ret < 0) { exit(EXIT_FAILURE); } } if (ret < 0) { err(EXIT_FAILURE, "Cannot read from daemon"); } /* Usually the daemon should not print any message. * Exit with zero status in that case. */ exit(errors); } } if (device != NULL && sockpath == NULL) { sockpath = g_malloc(128); snprintf(sockpath, 128, SOCKET_PATH, basename(device)); } saddr = nbd_build_socket_address(sockpath, bindto, port); if (qemu_init_main_loop(&local_err)) { error_report_err(local_err); exit(EXIT_FAILURE); } bdrv_init(); atexit(bdrv_close_all); if (fmt) { options = qdict_new(); qdict_put(options, "driver", qstring_from_str(fmt)); } srcpath = argv[optind]; blk = blk_new_open("hda", srcpath, NULL, options, flags, &local_err); if (!blk) { errx(EXIT_FAILURE, "Failed to blk_new_open '%s': %s", argv[optind], error_get_pretty(local_err)); } bs = blk_bs(blk); if (sn_opts) { ret = bdrv_snapshot_load_tmp(bs, qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), &local_err); } else if (sn_id_or_name) { ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name, &local_err); } if (ret < 0) { errno = -ret; err(EXIT_FAILURE, "Failed to load snapshot: %s", error_get_pretty(local_err)); } bs->detect_zeroes = detect_zeroes; fd_size = blk_getlength(blk); if (fd_size < 0) { errx(EXIT_FAILURE, "Failed to determine the image length: %s", strerror(-fd_size)); } if (partition != -1) { ret = find_partition(blk, partition, &dev_offset, &fd_size); if (ret < 0) { errno = -ret; err(EXIT_FAILURE, "Could not find partition %d", partition); } } exp = nbd_export_new(blk, dev_offset, fd_size, nbdflags, nbd_export_closed, &local_err); if (!exp) { errx(EXIT_FAILURE, "%s", error_get_pretty(local_err)); } fd = socket_listen(saddr, &local_err); if (fd < 0) { error_report_err(local_err); return 1; } if (device) { int ret; ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); if (ret != 0) { errx(EXIT_FAILURE, "Failed to create client thread: %s", strerror(ret)); } } else { /* Shut up GCC warnings. */ memset(&client_thread, 0, sizeof(client_thread)); } server_fd = fd; nbd_update_server_fd_handler(fd); /* now when the initialization is (almost) complete, chdir("/") * to free any busy filesystems */ if (chdir("/") < 0) { err(EXIT_FAILURE, "Could not chdir to root directory"); } state = RUNNING; do { main_loop_wait(false); if (state == TERMINATE) { state = TERMINATING; nbd_export_close(exp); nbd_export_put(exp); exp = NULL; } } while (state != TERMINATED); blk_unref(blk); if (sockpath) { unlink(sockpath); } qemu_opts_del(sn_opts); if (device) { void *ret; pthread_join(client_thread, &ret); exit(ret != NULL); } else { exit(EXIT_SUCCESS); } }
static int coroutine_fn commit_run(Job *job, Error **errp) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); int64_t offset; uint64_t delay_ns = 0; int ret = 0; int64_t n = 0; /* bytes */ void *buf = NULL; int bytes_written = 0; int64_t len, base_len; ret = len = blk_getlength(s->top); if (len < 0) { goto out; } job_progress_set_remaining(&s->common.job, len); ret = base_len = blk_getlength(s->base); if (base_len < 0) { goto out; } if (base_len < len) { ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL); if (ret) { goto out; } } buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); for (offset = 0; offset < len; offset += n) { bool copy; /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ job_sleep_ns(&s->common.job, delay_ns); if (job_is_cancelled(&s->common.job)) { break; } /* Copy if allocated above the base */ ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), offset, COMMIT_BUFFER_SIZE, &n); copy = (ret == 1); trace_commit_one_iteration(s, offset, n, ret); if (copy) { ret = commit_populate(s->top, s->base, offset, n, buf); bytes_written += n; } if (ret < 0) { BlockErrorAction action = block_job_error_action(&s->common, false, s->on_error, -ret); if (action == BLOCK_ERROR_ACTION_REPORT) { goto out; } else { n = 0; continue; } } /* Publish progress */ job_progress_update(&s->common.job, n); if (copy) { delay_ns = block_job_ratelimit_get_delay(&s->common, n); } else { delay_ns = 0; } } ret = 0; out: qemu_vfree(buf); return ret; }
static int coroutine_fn stream_run(Job *job, Error **errp) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockBackend *blk = s->common.blk; BlockDriverState *bs = blk_bs(blk); BlockDriverState *base = s->base; int64_t len; int64_t offset = 0; uint64_t delay_ns = 0; int error = 0; int ret = 0; int64_t n = 0; /* bytes */ void *buf; if (!bs->backing) { goto out; } len = bdrv_getlength(bs); if (len < 0) { ret = len; goto out; } job_progress_set_remaining(&s->common.job, len); buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE); /* Turn on copy-on-read for the whole block device so that guest read * requests help us make progress. Only do this when copying the entire * backing chain since the copy-on-read operation does not take base into * account. */ if (!base) { bdrv_enable_copy_on_read(bs); } for ( ; offset < len; offset += n) { bool copy; /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ job_sleep_ns(&s->common.job, delay_ns); if (job_is_cancelled(&s->common.job)) { break; } copy = false; ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n); if (ret == 1) { /* Allocated in the top, no need to copy. */ } else if (ret >= 0) { /* Copy if allocated in the intermediate images. Limit to the * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ ret = bdrv_is_allocated_above(backing_bs(bs), base, offset, n, &n); /* Finish early if end of backing file has been reached */ if (ret == 0 && n == 0) { n = len - offset; } copy = (ret == 1); } trace_stream_one_iteration(s, offset, n, ret); if (copy) { ret = stream_populate(blk, offset, n, buf); } if (ret < 0) { BlockErrorAction action = block_job_error_action(&s->common, s->on_error, true, -ret); if (action == BLOCK_ERROR_ACTION_STOP) { n = 0; continue; } if (error == 0) { error = ret; } if (action == BLOCK_ERROR_ACTION_REPORT) { break; } } ret = 0; /* Publish progress */ job_progress_update(&s->common.job, n); if (copy) { delay_ns = block_job_ratelimit_get_delay(&s->common, n); } else { delay_ns = 0; } } if (!base) { bdrv_disable_copy_on_read(bs); } /* Do not remove the backing file if an error was there but ignored. */ ret = error; qemu_vfree(buf); out: /* Modify backing chain and close BDSes in main loop */ return ret; }
BlockDriverState *bs; if (qemuio_blk) { error_report("file open already, try 'help close'"); QDECREF(opts); return 1; } qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err); if (!qemuio_blk) { error_reportf_err(local_err, "can't open%s%s: ", name ? " device " : "", name ?: ""); return 1; } bs = blk_bs(qemuio_blk); if (bdrv_is_encrypted(bs)) { char password[256]; printf("Disk image '%s' is encrypted.\n", name); if (qemu_read_password(password, sizeof(password)) < 0) { error_report("No password given"); goto error; } if (bdrv_set_key(bs, password) < 0) { error_report("invalid password"); goto error; } } return 0;
int main(int argc, char **argv) { BlockBackend *blk; BlockDriverState *bs; off_t dev_offset = 0; uint16_t nbdflags = 0; bool disconnect = false; const char *bindto = NULL; const char *port = NULL; char *sockpath = NULL; char *device = NULL; off_t fd_size; QemuOpts *sn_opts = NULL; const char *sn_id_or_name = NULL; const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:"; struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, { "bind", required_argument, NULL, 'b' }, { "port", required_argument, NULL, 'p' }, { "socket", required_argument, NULL, 'k' }, { "offset", required_argument, NULL, 'o' }, { "read-only", no_argument, NULL, 'r' }, { "partition", required_argument, NULL, 'P' }, { "connect", required_argument, NULL, 'c' }, { "disconnect", no_argument, NULL, 'd' }, { "snapshot", no_argument, NULL, 's' }, { "load-snapshot", required_argument, NULL, 'l' }, { "nocache", no_argument, NULL, 'n' }, { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE }, { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO }, { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD }, { "detect-zeroes", required_argument, NULL, QEMU_NBD_OPT_DETECT_ZEROES }, { "shared", required_argument, NULL, 'e' }, { "format", required_argument, NULL, 'f' }, { "persistent", no_argument, NULL, 't' }, { "verbose", no_argument, NULL, 'v' }, { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT }, { "export-name", required_argument, NULL, 'x' }, { "description", required_argument, NULL, 'D' }, { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS }, { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS }, { "trace", required_argument, NULL, 'T' }, { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK }, { NULL, 0, NULL, 0 } }; int ch; int opt_ind = 0; char *end; int flags = BDRV_O_RDWR; int partition = -1; int ret = 0; bool seen_cache = false; bool seen_discard = false; bool seen_aio = false; pthread_t client_thread; const char *fmt = NULL; Error *local_err = NULL; BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; QDict *options = NULL; const char *export_name = NULL; const char *export_description = NULL; const char *tlscredsid = NULL; bool imageOpts = false; bool writethrough = true; char *trace_file = NULL; bool fork_process = false; int old_stderr = -1; unsigned socket_activation; /* The client thread uses SIGTERM to interrupt the server. A signal * handler ensures that "qemu-nbd -v -c" exits with a nice status code. */ struct sigaction sa_sigterm; memset(&sa_sigterm, 0, sizeof(sa_sigterm)); sa_sigterm.sa_handler = termsig_handler; sigaction(SIGTERM, &sa_sigterm, NULL); #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); #endif module_call_init(MODULE_INIT_TRACE); qcrypto_init(&error_fatal); module_call_init(MODULE_INIT_QOM); qemu_add_opts(&qemu_object_opts); qemu_add_opts(&qemu_trace_opts); qemu_init_exec_dir(argv[0]); while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { case 's': flags |= BDRV_O_SNAPSHOT; break; case 'n': optarg = (char *) "none"; /* fallthrough */ case QEMU_NBD_OPT_CACHE: if (seen_cache) { error_report("-n and --cache can only be specified once"); exit(EXIT_FAILURE); } seen_cache = true; if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) == -1) { error_report("Invalid cache mode `%s'", optarg); exit(EXIT_FAILURE); } break; case QEMU_NBD_OPT_AIO: if (seen_aio) { error_report("--aio can only be specified once"); exit(EXIT_FAILURE); } seen_aio = true; if (!strcmp(optarg, "native")) { flags |= BDRV_O_NATIVE_AIO; } else if (!strcmp(optarg, "threads")) { /* this is the default */ } else { error_report("invalid aio mode `%s'", optarg); exit(EXIT_FAILURE); } break; case QEMU_NBD_OPT_DISCARD: if (seen_discard) { error_report("--discard can only be specified once"); exit(EXIT_FAILURE); } seen_discard = true; if (bdrv_parse_discard_flags(optarg, &flags) == -1) { error_report("Invalid discard mode `%s'", optarg); exit(EXIT_FAILURE); } break; case QEMU_NBD_OPT_DETECT_ZEROES: detect_zeroes = qapi_enum_parse(BlockdevDetectZeroesOptions_lookup, optarg, BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX, BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); if (local_err) { error_reportf_err(local_err, "Failed to parse detect_zeroes mode: "); exit(EXIT_FAILURE); } if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && !(flags & BDRV_O_UNMAP)) { error_report("setting detect-zeroes to unmap is not allowed " "without setting discard operation to unmap"); exit(EXIT_FAILURE); } break; case 'b': bindto = optarg; break; case 'p': port = optarg; break; case 'o': dev_offset = strtoll (optarg, &end, 0); if (*end) { error_report("Invalid offset `%s'", optarg); exit(EXIT_FAILURE); } if (dev_offset < 0) { error_report("Offset must be positive `%s'", optarg); exit(EXIT_FAILURE); } break; case 'l': if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts, optarg, false); if (!sn_opts) { error_report("Failed in parsing snapshot param `%s'", optarg); exit(EXIT_FAILURE); } } else { sn_id_or_name = optarg; } /* fall through */ case 'r': nbdflags |= NBD_FLAG_READ_ONLY; flags &= ~BDRV_O_RDWR; break; case 'P': partition = strtol(optarg, &end, 0); if (*end) { error_report("Invalid partition `%s'", optarg); exit(EXIT_FAILURE); } if (partition < 1 || partition > 8) { error_report("Invalid partition %d", partition); exit(EXIT_FAILURE); } break; case 'k': sockpath = optarg; if (sockpath[0] != '/') { error_report("socket path must be absolute"); exit(EXIT_FAILURE); } break; case 'd': disconnect = true; break; case 'c': device = optarg; break; case 'e': shared = strtol(optarg, &end, 0); if (*end) { error_report("Invalid shared device number '%s'", optarg); exit(EXIT_FAILURE); } if (shared < 1) { error_report("Shared device number must be greater than 0"); exit(EXIT_FAILURE); } break; case 'f': fmt = optarg; break; case 't': persistent = 1; break; case 'x': export_name = optarg; break; case 'D': export_description = optarg; break; case 'v': verbose = 1; break; case 'V': version(argv[0]); exit(0); break; case 'h': usage(argv[0]); exit(0); break; case '?': error_report("Try `%s --help' for more information.", argv[0]); exit(EXIT_FAILURE); case QEMU_NBD_OPT_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { exit(EXIT_FAILURE); } } break; case QEMU_NBD_OPT_TLSCREDS: tlscredsid = optarg; break; case QEMU_NBD_OPT_IMAGE_OPTS: imageOpts = true; break; case 'T': g_free(trace_file); trace_file = trace_opt_parse(optarg); break; case QEMU_NBD_OPT_FORK: fork_process = true; break; } } if ((argc - optind) != 1) { error_report("Invalid number of arguments"); error_printf("Try `%s --help' for more information.\n", argv[0]); exit(EXIT_FAILURE); } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, NULL)) { exit(EXIT_FAILURE); } if (!trace_init_backends()) { exit(1); } trace_init_file(trace_file); qemu_set_log(LOG_TRACE); socket_activation = check_socket_activation(); if (socket_activation == 0) { setup_address_and_port(&bindto, &port); } else { /* Using socket activation - check user didn't use -p etc. */ const char *err_msg = socket_activation_validate_opts(device, sockpath, bindto, port); if (err_msg != NULL) { error_report("%s", err_msg); exit(EXIT_FAILURE); } /* qemu-nbd can only listen on a single socket. */ if (socket_activation > 1) { error_report("qemu-nbd does not support socket activation with %s > 1", "LISTEN_FDS"); exit(EXIT_FAILURE); } } if (tlscredsid) { if (sockpath) { error_report("TLS is only supported with IPv4/IPv6"); exit(EXIT_FAILURE); } if (device) { error_report("TLS is not supported with a host device"); exit(EXIT_FAILURE); } if (!export_name) { /* Set the default NBD protocol export name, since * we *must* use new style protocol for TLS */ export_name = ""; } tlscreds = nbd_get_tls_creds(tlscredsid, &local_err); if (local_err) { error_report("Failed to get TLS creds %s", error_get_pretty(local_err)); exit(EXIT_FAILURE); } } if (disconnect) { int nbdfd = open(argv[optind], O_RDWR); if (nbdfd < 0) { error_report("Cannot open %s: %s", argv[optind], strerror(errno)); exit(EXIT_FAILURE); } nbd_disconnect(nbdfd); close(nbdfd); printf("%s disconnected\n", argv[optind]); return 0; } if ((device && !verbose) || fork_process) { int stderr_fd[2]; pid_t pid; int ret; if (qemu_pipe(stderr_fd) < 0) { error_report("Error setting up communication pipe: %s", strerror(errno)); exit(EXIT_FAILURE); } /* Now daemonize, but keep a communication channel open to * print errors and exit with the proper status code. */ pid = fork(); if (pid < 0) { error_report("Failed to fork: %s", strerror(errno)); exit(EXIT_FAILURE); } else if (pid == 0) { close(stderr_fd[0]); ret = qemu_daemon(1, 0); /* Temporarily redirect stderr to the parent's pipe... */ old_stderr = dup(STDERR_FILENO); dup2(stderr_fd[1], STDERR_FILENO); if (ret < 0) { error_report("Failed to daemonize: %s", strerror(errno)); exit(EXIT_FAILURE); } /* ... close the descriptor we inherited and go on. */ close(stderr_fd[1]); } else { bool errors = false; char *buf; /* In the parent. Print error messages from the child until * it closes the pipe. */ close(stderr_fd[1]); buf = g_malloc(1024); while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { errors = true; ret = qemu_write_full(STDERR_FILENO, buf, ret); if (ret < 0) { exit(EXIT_FAILURE); } } if (ret < 0) { error_report("Cannot read from daemon: %s", strerror(errno)); exit(EXIT_FAILURE); } /* Usually the daemon should not print any message. * Exit with zero status in that case. */ exit(errors); } } if (device != NULL && sockpath == NULL) { sockpath = g_malloc(128); snprintf(sockpath, 128, SOCKET_PATH, basename(device)); } if (socket_activation == 0) { server_ioc = qio_channel_socket_new(); saddr = nbd_build_socket_address(sockpath, bindto, port); if (qio_channel_socket_listen_sync(server_ioc, saddr, &local_err) < 0) { object_unref(OBJECT(server_ioc)); error_report_err(local_err); return 1; } } else { /* See comment in check_socket_activation above. */ assert(socket_activation == 1); server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD, &local_err); if (server_ioc == NULL) { error_report("Failed to use socket activation: %s", error_get_pretty(local_err)); exit(EXIT_FAILURE); } } if (qemu_init_main_loop(&local_err)) { error_report_err(local_err); exit(EXIT_FAILURE); } bdrv_init(); atexit(bdrv_close_all); srcpath = argv[optind]; if (imageOpts) { QemuOpts *opts; if (fmt) { error_report("--image-opts and -f are mutually exclusive"); exit(EXIT_FAILURE); } opts = qemu_opts_parse_noisily(&file_opts, srcpath, true); if (!opts) { qemu_opts_reset(&file_opts); exit(EXIT_FAILURE); } options = qemu_opts_to_qdict(opts, NULL); qemu_opts_reset(&file_opts); blk = blk_new_open(NULL, NULL, options, flags, &local_err); } else { if (fmt) { options = qdict_new(); qdict_put_str(options, "driver", fmt); } blk = blk_new_open(srcpath, NULL, options, flags, &local_err); } if (!blk) { error_reportf_err(local_err, "Failed to blk_new_open '%s': ", argv[optind]); exit(EXIT_FAILURE); } bs = blk_bs(blk); blk_set_enable_write_cache(blk, !writethrough); if (sn_opts) { ret = bdrv_snapshot_load_tmp(bs, qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), &local_err); } else if (sn_id_or_name) { ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name, &local_err); } if (ret < 0) { error_reportf_err(local_err, "Failed to load snapshot: "); exit(EXIT_FAILURE); } bs->detect_zeroes = detect_zeroes; fd_size = blk_getlength(blk); if (fd_size < 0) { error_report("Failed to determine the image length: %s", strerror(-fd_size)); exit(EXIT_FAILURE); } if (dev_offset >= fd_size) { error_report("Offset (%lld) has to be smaller than the image size " "(%lld)", (long long int)dev_offset, (long long int)fd_size); exit(EXIT_FAILURE); } fd_size -= dev_offset; if (partition != -1) { ret = find_partition(blk, partition, &dev_offset, &fd_size); if (ret < 0) { error_report("Could not find partition %d: %s", partition, strerror(-ret)); exit(EXIT_FAILURE); } } exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed, writethrough, NULL, &local_err); if (!exp) { error_report_err(local_err); exit(EXIT_FAILURE); } if (export_name) { nbd_export_set_name(exp, export_name); nbd_export_set_description(exp, export_description); newproto = true; } else if (export_description) { error_report("Export description requires an export name"); exit(EXIT_FAILURE); } if (device) { int ret; ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); if (ret != 0) { error_report("Failed to create client thread: %s", strerror(ret)); exit(EXIT_FAILURE); } } else { /* Shut up GCC warnings. */ memset(&client_thread, 0, sizeof(client_thread)); } nbd_update_server_watch(); /* now when the initialization is (almost) complete, chdir("/") * to free any busy filesystems */ if (chdir("/") < 0) { error_report("Could not chdir to root directory: %s", strerror(errno)); exit(EXIT_FAILURE); } if (fork_process) { dup2(old_stderr, STDERR_FILENO); close(old_stderr); } state = RUNNING; do { main_loop_wait(false); if (state == TERMINATE) { state = TERMINATING; nbd_export_close(exp); nbd_export_put(exp); exp = NULL; } } while (state != TERMINATED); blk_unref(blk); if (sockpath) { unlink(sockpath); } qemu_opts_del(sn_opts); if (device) { void *ret; pthread_join(client_thread, &ret); exit(ret != NULL); } else { exit(EXIT_SUCCESS); } }