Пример #1
0
static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
{
    struct XenBlkDev *blkdev = ioreq->blkdev;

    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
        goto err_no_map;
    }

    ioreq->aio_inflight++;
    if (ioreq->presync) {
        bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
    }

    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ);
        ioreq->aio_inflight++;
        bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
                       &ioreq->v, ioreq->v.size / BLOCK_SIZE,
                       qemu_aio_complete, ioreq);
        break;
    case BLKIF_OP_WRITE:
    case BLKIF_OP_WRITE_BARRIER:
        if (!ioreq->req.nr_segments) {
            break;
        }

        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE);
        ioreq->aio_inflight++;
        bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
                        qemu_aio_complete, ioreq);
        break;
    default:
        /* unknown operation (shouldn't happen -- parse catches this) */
        goto err;
    }

    if (ioreq->postsync) {
        bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
    }
    qemu_aio_complete(ioreq, 0);

    return 0;

err:
    ioreq_unmap(ioreq);
err_no_map:
    ioreq_finish(ioreq);
    ioreq->status = BLKIF_RSP_ERROR;
    return -1;
}
Пример #2
0
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
    BDRVQcowState *s = bs->opaque;
    int result = 0;
    int ret;
    int i;

    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);

    for (i = 0; i < c->size; i++) {
        ret = qcow2_cache_entry_flush(bs, c, i);
        if (ret < 0 && result != -ENOSPC) {
            result = ret;
        }
    }

    if (result == 0) {
        ret = bdrv_flush(bs->file);
        if (ret < 0) {
            result = ret;
        }
    }

    return result;
}
Пример #3
0
static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
    BDRVQcowState *s = bs->opaque;
    int ret = 0;

    if (!c->entries[i].dirty || !c->entries[i].offset) {
        return 0;
    }

    trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
                                  c == s->l2_table_cache, i);

    if (c->depends) {
        ret = qcow2_cache_flush_dependency(bs, c);
    } else if (c->depends_on_flush) {
        ret = bdrv_flush(bs->file);
        if (ret >= 0) {
            c->depends_on_flush = false;
        }
    }

    if (ret < 0) {
        return ret;
    }

    if (c == s->refcount_block_cache) {
        ret = qcow2_pre_write_overlap_check(bs,
                                            QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_BLOCK,
                                            c->entries[i].offset, s->cluster_size);
    } else if (c == s->l2_table_cache) {
        ret = qcow2_pre_write_overlap_check(bs,
                                            QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2,
                                            c->entries[i].offset, s->cluster_size);
    } else {
        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
                                            c->entries[i].offset, s->cluster_size);
    }

    if (ret < 0) {
        return ret;
    }

    if (c == s->refcount_block_cache) {
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
    } else if (c == s->l2_table_cache) {
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
                      s->cluster_size);
    if (ret < 0) {
        return ret;
    }

    c->entries[i].dirty = false;

    return 0;
}
Пример #4
0
static void test_sync_op_flush(BdrvChild *c)
{
    int ret;

    /* Normal success path */
    ret = bdrv_flush(c->bs);
    g_assert_cmpint(ret, ==, 0);

    /* Early success: Read-only image */
    c->bs->read_only = true;
    c->bs->open_flags &= ~BDRV_O_RDWR;

    ret = bdrv_flush(c->bs);
    g_assert_cmpint(ret, ==, 0);

    c->bs->read_only = false;
    c->bs->open_flags |= BDRV_O_RDWR;
}
Пример #5
0
static int update_header_sync(BlockDriverState *bs)
{
    int ret;

    ret = qcow2_update_header(bs);
    if (ret < 0) {
        return ret;
    }

    return bdrv_flush(bs);
}
Пример #6
0
static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
{
    struct XenBlkDev *blkdev = ioreq->blkdev;

    if (ioreq_map(ioreq) == -1)
	goto err;

    ioreq->aio_inflight++;
    if (ioreq->presync)
	bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */

    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
        ioreq->aio_inflight++;
        bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
                       &ioreq->v, ioreq->v.size / BLOCK_SIZE,
                       qemu_aio_complete, ioreq);
	break;
    case BLKIF_OP_WRITE:
    case BLKIF_OP_WRITE_BARRIER:
        ioreq->aio_inflight++;
        bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
                        qemu_aio_complete, ioreq);
	break;
    default:
	/* unknown operation (shouldn't happen -- parse catches this) */
	goto err;
    }

    if (ioreq->postsync)
	bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
    qemu_aio_complete(ioreq, 0);

    return 0;

err:
    ioreq->status = BLKIF_RSP_ERROR;
    return -1;
}
Пример #7
0
static void bdrv_qed_close(BlockDriverState *bs)
{
    BDRVQEDState *s = bs->opaque;

    /* Ensure writes reach stable storage */
    bdrv_flush(bs->file);

    /* Clean shutdown, no check required on next open */
    if (s->header.features & QED_F_NEED_CHECK) {
        s->header.features &= ~QED_F_NEED_CHECK;
        qed_write_header_sync(s);
    }

    qed_free_l2_cache(&s->l2_cache);
    qemu_vfree(s->l1_table);
}
Пример #8
0
/**
 * Mark an image clean once it passes check or has been repaired
 */
static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
{
    /* Skip if there were unfixable corruptions or I/O errors */
    if (result->corruptions > 0 || result->check_errors > 0) {
        return;
    }

    /* Skip if image is already marked clean */
    if (!(s->header.features & QED_F_NEED_CHECK)) {
        return;
    }

    /* Ensure fixes reach storage before clearing check bit */
    bdrv_flush(s->bs);

    s->header.features &= ~QED_F_NEED_CHECK;
    qed_write_header_sync(s);
}
Пример #9
0
static void qemu_aio_complete(void *opaque, int ret)
{
    struct ioreq *ioreq = opaque;

    if (ret != 0) {
        xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
                      ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
        ioreq->aio_errors++;
    }

    ioreq->aio_inflight--;
    if (ioreq->aio_inflight > 0)
        return;
    if (ioreq->postsync)
	bdrv_flush(ioreq->blkdev->bs);

    ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
    ioreq_unmap(ioreq);
    ioreq_finish(ioreq);
    qemu_bh_schedule(ioreq->blkdev->bh);
}
Пример #10
0
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
    int result = 0;
    int ret;
    int i;

    for (i = 0; i < c->size; i++) {
        ret = qcow2_cache_entry_flush(bs, c, i);
        if (ret < 0 && result != -ENOSPC) {
            result = ret;
        }
    }

    if (result == 0) {
        ret = bdrv_flush(bs->file);
        if (ret < 0) {
            result = ret;
        }
    }

    return result;
}
Пример #11
0
static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
    BDRVQcowState *s = bs->opaque;
    int ret = 0;

    if (!c->entries[i].dirty || !c->entries[i].offset) {
        return 0;
    }

    if (c->depends) {
        ret = qcow2_cache_flush_dependency(bs, c);
    } else if (c->depends_on_flush) {
        ret = bdrv_flush(bs->file);
        if (ret >= 0) {
            c->depends_on_flush = false;
        }
    }

    if (ret < 0) {
        return ret;
    }

    if (c == s->refcount_block_cache) {
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
    } else if (c == s->l2_table_cache) {
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
        s->cluster_size);
    if (ret < 0) {
        return ret;
    }

    c->entries[i].dirty = false;

    return 0;
}
Пример #12
0
static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
    char backing_filename[2]; /* we only need 2 characters because we are only
                                 checking for a NULL string */
    int ret = 0;
    int n;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->bdev_length = bdrv_getlength(bs);
    if (s->bdev_length < 0) {
        ret = s->bdev_length;
        goto immediate_exit;
    } else if (s->bdev_length == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
            block_job_yield(&s->common);
        }
        s->common.cancelled = false;
        goto immediate_exit;
    }

    length = DIV_ROUND_UP(s->bdev_length, s->granularity);
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        ret = bdrv_get_info(s->target, &bdi);
        if (ret < 0) {
            goto immediate_exit;
        }
        if (s->granularity < bdi.cluster_size) {
            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
            s->cow_bitmap = bitmap_new(length);
        }
    }

    end = s->bdev_length / BDRV_SECTOR_SIZE;
    s->buf = qemu_try_blockalign(bs, s->buf_size);
    if (s->buf == NULL) {
        ret = -ENOMEM;
        goto immediate_exit;
    }

    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

    if (!s->is_none_mode) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base = s->base;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_is_allocated_above(bs, base,
                                          sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
            }

            assert(n > 0);
            if (ret == 1) {
                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
            }
        }
    }

    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
        uint64_t delay_ns = 0;
        int64_t cnt;
        bool should_complete;

        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        /* s->common.offset contains the number of bytes already processed so
         * far, cnt is the number of dirty sectors remaining and
         * s->sectors_in_flight is the number of sectors currently being
         * processed; together those are the current total operation length */
        s->common.len = s->common.offset +
                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that bdrv_drain_all() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
                delay_ns = mirror_iteration(s);
            }
        }

        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
            ret = bdrv_flush(s->target);
            if (ret < 0) {
                if (mirror_error_action(s, false, -ret) ==
                    BLOCK_ERROR_ACTION_REPORT) {
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                if (!s->synced) {
                    block_job_event_ready(&s->common);
                    s->synced = true;
                }

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
            }
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        if (!s->synced) {
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

immediate_exit:
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
        mirror_drain(s);
    }

    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
    bdrv_iostatus_disable(s->target);

    data = g_malloc(sizeof(*data));
    data->ret = ret;
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
Пример #13
0
static int ioreq_runio_qemu_sync(struct ioreq *ioreq)
{
    struct XenBlkDev *blkdev = ioreq->blkdev;
    int i, rc, len = 0;
    off_t pos;

    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1)
	goto err;
    if (ioreq->presync)
	bdrv_flush(blkdev->bs);

    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
	pos = ioreq->start;
	for (i = 0; i < ioreq->v.niov; i++) {
	    rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE,
			   ioreq->v.iov[i].iov_base,
			   ioreq->v.iov[i].iov_len / BLOCK_SIZE);
	    if (rc != 0) {
		xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n",
			      ioreq->v.iov[i].iov_base,
			      ioreq->v.iov[i].iov_len);
		goto err;
	    }
	    len += ioreq->v.iov[i].iov_len;
	    pos += ioreq->v.iov[i].iov_len;
	}
	break;
    case BLKIF_OP_WRITE:
    case BLKIF_OP_WRITE_BARRIER:
        if (!ioreq->req.nr_segments)
            break;
	pos = ioreq->start;
	for (i = 0; i < ioreq->v.niov; i++) {
	    rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE,
			    ioreq->v.iov[i].iov_base,
			    ioreq->v.iov[i].iov_len / BLOCK_SIZE);
	    if (rc != 0) {
		xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n",
			      ioreq->v.iov[i].iov_base,
			      ioreq->v.iov[i].iov_len);
		goto err;
	    }
	    len += ioreq->v.iov[i].iov_len;
	    pos += ioreq->v.iov[i].iov_len;
	}
	break;
    default:
	/* unknown operation (shouldn't happen -- parse catches this) */
	goto err;
    }

    if (ioreq->postsync)
	bdrv_flush(blkdev->bs);
    ioreq->status = BLKIF_RSP_OKAY;

    ioreq_unmap(ioreq);
    ioreq_finish(ioreq);
    return 0;

err:
    ioreq->status = BLKIF_RSP_ERROR;
    return -1;
}
Пример #14
0
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
{
    BDRVQEDState *s = bs->opaque;
    QEDHeader le_header;
    int64_t file_size;
    int ret;

    s->bs = bs;
    QSIMPLEQ_INIT(&s->allocating_write_reqs);

    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        return ret;
    }
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
        error_setg(errp, "Image not in QED format");
        return -EINVAL;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
        return -EINVAL;
    }

    /* Round down file size to the last cluster */
    file_size = bdrv_getlength(bs->file);
    if (file_size < 0) {
        return file_size;
    }
    s->file_size = qed_start_of_cluster(s, file_size);

    if (!qed_is_table_size_valid(s->header.table_size)) {
        return -EINVAL;
    }
    if (!qed_is_image_size_valid(s->header.image_size,
                                 s->header.cluster_size,
                                 s->header.table_size)) {
        return -EINVAL;
    }
    if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
        return -EINVAL;
    }

    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
                      sizeof(uint64_t);
    s->l2_shift = ffs(s->header.cluster_size) - 1;
    s->l2_mask = s->table_nelems - 1;
    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;

    if ((s->header.features & QED_F_BACKING_FILE)) {
        if ((uint64_t)s->header.backing_filename_offset +
            s->header.backing_filename_size >
            s->header.cluster_size * s->header.header_size) {
            return -EINVAL;
        }

        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
                              s->header.backing_filename_size, bs->backing_file,
                              sizeof(bs->backing_file));
        if (ret < 0) {
            return ret;
        }

        if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
            pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
        }
    }

    /* Reset unknown autoclear feature bits.  This is a backwards
     * compatibility mechanism that allows images to be opened by older
     * programs, which "knock out" unknown feature bits.  When an image is
     * opened by a newer program again it can detect that the autoclear
     * feature is no longer valid.
     */
    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
        !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;

        ret = qed_write_header_sync(s);
        if (ret) {
            return ret;
        }

        /* From here on only known autoclear feature bits are valid */
        bdrv_flush(bs->file);
    }

    s->l1_table = qed_alloc_table(s);
    qed_init_l2_cache(&s->l2_cache);

    ret = qed_read_l1_table_sync(s);
    if (ret) {
        goto out;
    }

    /* If image was not closed cleanly, check consistency */
    if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
        /* Read-only images cannot be fixed.  There is no risk of corruption
         * since write operations are not possible.  Therefore, allow
         * potentially inconsistent images to be opened read-only.  This can
         * aid data recovery from an otherwise inconsistent image.
         */
        if (!bdrv_is_read_only(bs->file) &&
            !(flags & BDRV_O_INCOMING)) {
            BdrvCheckResult result = {0};

            ret = qed_check(s, &result, true);
            if (ret) {
                goto out;
            }
        }
    }

    bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));

out:
    if (ret) {
        qed_free_l2_cache(&s->l2_cache);
        qemu_vfree(s->l1_table);
    }
    return ret;
}
Пример #15
0
static int raw_flush(BlockDriverState *bs)
{
    return bdrv_flush(bs->file);
}
Пример #16
0
/* add at the end of the file a new list of snapshots */
static int qcow2_write_snapshots(BlockDriverState *bs)
{
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    QCowSnapshotHeader h;
    int i, name_size, id_str_size, snapshots_size;
    uint64_t data64;
    uint32_t data32;
    int64_t offset, snapshots_offset;

    /* compute the size of the snapshots */
    offset = 0;
    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
        offset = align_offset(offset, 8);
        offset += sizeof(h);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
    }
    snapshots_size = offset;

    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
    bdrv_flush(bs->file);
    offset = snapshots_offset;
    if (offset < 0) {
        return offset;
    }

    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
        memset(&h, 0, sizeof(h));
        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
        h.l1_size = cpu_to_be32(sn->l1_size);
        h.vm_state_size = cpu_to_be32(sn->vm_state_size);
        h.date_sec = cpu_to_be32(sn->date_sec);
        h.date_nsec = cpu_to_be32(sn->date_nsec);
        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);

        id_str_size = strlen(sn->id_str);
        name_size = strlen(sn->name);
        h.id_str_size = cpu_to_be16(id_str_size);
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);
        if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0)
            goto fail;
        offset += sizeof(h);
        if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0)
            goto fail;
        offset += id_str_size;
        if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0)
            goto fail;
        offset += name_size;
    }

    /* update the various header fields */
    data64 = cpu_to_be64(snapshots_offset);
    if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset),
                         &data64, sizeof(data64)) < 0)
        goto fail;
    data32 = cpu_to_be32(s->nb_snapshots);
    if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
                         &data32, sizeof(data32)) < 0)
        goto fail;

    /* free the old snapshot table */
    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
    s->snapshots_offset = snapshots_offset;
    s->snapshots_size = snapshots_size;
    return 0;
fail:
    return -1;
}
Пример #17
0
static int bdrv_fclose(void *opaque)
{
    return bdrv_flush(opaque);
}
Пример #18
0
/* if no id is provided, a new one is constructed */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *snapshots1, sn1, *sn = &sn1;
    int i, ret;
    uint64_t *l1_table = NULL;
    int64_t l1_table_offset;

    memset(sn, 0, sizeof(*sn));

    if (sn_info->id_str[0] == '\0') {
        /* compute a new id */
        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
    }

    /* check that the ID is unique */
    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0)
        return -ENOENT;

    sn->id_str = g_strdup(sn_info->id_str);
    if (!sn->id_str)
        goto fail;
    sn->name = g_strdup(sn_info->name);
    if (!sn->name)
        goto fail;
    sn->vm_state_size = sn_info->vm_state_size;
    sn->date_sec = sn_info->date_sec;
    sn->date_nsec = sn_info->date_nsec;
    sn->vm_clock_nsec = sn_info->vm_clock_nsec;

    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
    if (ret < 0)
        goto fail;

    /* create the L1 table of the snapshot */
    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
    if (l1_table_offset < 0) {
        goto fail;
    }
    bdrv_flush(bs->file);

    sn->l1_table_offset = l1_table_offset;
    sn->l1_size = s->l1_size;

    if (s->l1_size != 0) {
        l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
    } else {
        l1_table = NULL;
    }

    for(i = 0; i < s->l1_size; i++) {
        l1_table[i] = cpu_to_be64(s->l1_table[i]);
    }
    if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset,
                         l1_table, s->l1_size * sizeof(uint64_t)) < 0)
        goto fail;
    g_free(l1_table);
    l1_table = NULL;

    snapshots1 = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
    if (s->snapshots) {
        memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
        g_free(s->snapshots);
    }
    s->snapshots = snapshots1;
    s->snapshots[s->nb_snapshots++] = *sn;

    if (qcow2_write_snapshots(bs) < 0)
        goto fail;
#ifdef DEBUG_ALLOC
    qcow2_check_refcounts(bs);
#endif
    return 0;
fail:
    g_free(sn->name);
    g_free(l1_table);
    return -1;
}
Пример #19
0
static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
    char backing_filename[1024];
    int ret = 0;
    int n;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->common.len = bdrv_getlength(bs);
    if (s->common.len <= 0) {
        block_job_completed(&s->common, s->common.len);
        return;
    }

    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        bdrv_get_info(s->target, &bdi);
        if (s->granularity < bdi.cluster_size) {
            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
            s->cow_bitmap = bitmap_new(length);
        }
    }

    end = s->common.len >> BDRV_SECTOR_BITS;
    s->buf = qemu_blockalign(bs, s->buf_size);
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

    if (s->mode != MIRROR_SYNC_MODE_NONE) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base;
        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_is_allocated_above(bs, base,
                                          sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
            }

            assert(n > 0);
            if (ret == 1) {
                bdrv_set_dirty(bs, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
            }
        }
    }

    bdrv_dirty_iter_init(bs, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
        uint64_t delay_ns;
        int64_t cnt;
        bool should_complete;

        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

        cnt = bdrv_get_dirty_count(bs);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
                mirror_iteration(s);
                continue;
            }
        }

        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
            ret = bdrv_flush(s->target);
            if (ret < 0) {
                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                s->common.offset = end * BDRV_SECTOR_SIZE;
                if (!s->synced) {
                    block_job_ready(&s->common);
                    s->synced = true;
                }

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
                cnt = bdrv_get_dirty_count(bs);
            }
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain_all();
            cnt = bdrv_get_dirty_count(bs);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced);
        if (!s->synced) {
            /* Publish progress */
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;

            if (s->common.speed) {
                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
            } else {
                delay_ns = 0;
            }

            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

immediate_exit:
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
        mirror_drain(s);
    }

    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_set_dirty_tracking(bs, 0);
    bdrv_iostatus_disable(s->target);
    if (s->should_complete && ret == 0) {
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
    }
    bdrv_close(s->target);
    bdrv_unref(s->target);
    block_job_completed(&s->common, ret);
}
Пример #20
0
/* add at the end of the file a new list of snapshots */
static int qcow2_write_snapshots(BlockDriverState *bs)
{
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    QCowSnapshotHeader h;
    QCowSnapshotExtraData extra;
    int i, name_size, id_str_size, snapshots_size;
    struct {
        uint32_t nb_snapshots;
        uint64_t snapshots_offset;
    } QEMU_PACKED header_data;
    int64_t offset, snapshots_offset;
    int ret;

    /* compute the size of the snapshots */
    offset = 0;
    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
        offset = align_offset(offset, 8);
        offset += sizeof(h);
        offset += sizeof(extra);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
    }
    snapshots_size = offset;

    /* Allocate space for the new snapshot list */
    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
    offset = snapshots_offset;
    if (offset < 0) {
        return offset;
    }
    ret = bdrv_flush(bs);
    if (ret < 0) {
        return ret;
    }

    /* The snapshot list position has not yet been updated, so these clusters
     * must indeed be completely free */
    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
                                        s->snapshots_size);
    if (ret < 0) {
        return ret;
    }


    /* Write all snapshots to the new list */
    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
        memset(&h, 0, sizeof(h));
        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
        h.l1_size = cpu_to_be32(sn->l1_size);
        /* If it doesn't fit in 32 bit, older implementations should treat it
         * as a disk-only snapshot rather than truncate the VM state */
        if (sn->vm_state_size <= 0xffffffff) {
            h.vm_state_size = cpu_to_be32(sn->vm_state_size);
        }
        h.date_sec = cpu_to_be32(sn->date_sec);
        h.date_nsec = cpu_to_be32(sn->date_nsec);
        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
        h.extra_data_size = cpu_to_be32(sizeof(extra));

        memset(&extra, 0, sizeof(extra));
        extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
        extra.disk_size = cpu_to_be64(sn->disk_size);

        id_str_size = strlen(sn->id_str);
        name_size = strlen(sn->name);
        h.id_str_size = cpu_to_be16(id_str_size);
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);

        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(h);

        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(extra);

        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
        offset += id_str_size;

        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
        offset += name_size;
    }

    /*
     * Update the header to point to the new snapshot table. This requires the
     * new table and its refcounts to be stable on disk.
     */
    ret = bdrv_flush(bs);
    if (ret < 0) {
        goto fail;
    }

    QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
                      offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));

    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);

    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
                           &header_data, sizeof(header_data));
    if (ret < 0) {
        goto fail;
    }

    /* free the old snapshot table */
    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
                        QCOW2_DISCARD_SNAPSHOT);
    s->snapshots_offset = snapshots_offset;
    s->snapshots_size = snapshots_size;
    return 0;

fail:
    return ret;
}
Пример #21
0
static int flush_f(int argc, char **argv)
{
    bdrv_flush(bs);
    return 0;
}
Пример #22
0
/* if no id is provided, a new one is constructed */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *new_snapshot_list = NULL;
    QCowSnapshot *old_snapshot_list = NULL;
    QCowSnapshot sn1, *sn = &sn1;
    int i, ret;
    uint64_t *l1_table = NULL;
    int64_t l1_table_offset;

    memset(sn, 0, sizeof(*sn));

    /* Generate an ID if it wasn't passed */
    if (sn_info->id_str[0] == '\0') {
        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
    }

    /* Check that the ID is unique */
    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
        return -EEXIST;
    }

    /* Populate sn with passed data */
    sn->id_str = g_strdup(sn_info->id_str);
    sn->name = g_strdup(sn_info->name);

    sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
    sn->vm_state_size = sn_info->vm_state_size;
    sn->date_sec = sn_info->date_sec;
    sn->date_nsec = sn_info->date_nsec;
    sn->vm_clock_nsec = sn_info->vm_clock_nsec;

    /* Allocate the L1 table of the snapshot and copy the current one there. */
    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
    if (l1_table_offset < 0) {
        ret = l1_table_offset;
        goto fail;
    }

    sn->l1_table_offset = l1_table_offset;
    sn->l1_size = s->l1_size;

    l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
    for(i = 0; i < s->l1_size; i++) {
        l1_table[i] = cpu_to_be64(s->l1_table[i]);
    }

    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
                      s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
    }

    g_free(l1_table);
    l1_table = NULL;

    /*
     * Increase the refcounts of all clusters and make sure everything is
     * stable on disk before updating the snapshot table to contain a pointer
     * to the new L1 table.
     */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
    if (ret < 0) {
        goto fail;
    }

    ret = bdrv_flush(bs);
    if (ret < 0) {
        goto fail;
    }

    /* Append the new snapshot to the snapshot list */
    new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
    if (s->snapshots) {
        memcpy(new_snapshot_list, s->snapshots,
               s->nb_snapshots * sizeof(QCowSnapshot));
        old_snapshot_list = s->snapshots;
    }
    s->snapshots = new_snapshot_list;
    s->snapshots[s->nb_snapshots++] = *sn;

    ret = qcow2_write_snapshots(bs);
    if (ret < 0) {
        g_free(s->snapshots);
        s->snapshots = old_snapshot_list;
        goto fail;
    }

    g_free(old_snapshot_list);

#ifdef DEBUG_ALLOC
    {
      BdrvCheckResult result = {0};
      qcow2_check_refcounts(bs, &result);
    }
#endif
    return 0;

fail:
    g_free(sn->id_str);
    g_free(sn->name);
    g_free(l1_table);

    return ret;
}