Exemple #1
0
/**
 * qemu_co_queue_run_restart:
 *
 * Enter each coroutine that was previously marked for restart by
 * qemu_co_queue_next() or qemu_co_queue_restart_all().  This function is
 * invoked by the core coroutine code when the current coroutine yields or
 * terminates.
 */
void qemu_co_queue_run_restart(Coroutine *co)
{
    Coroutine *next;

    trace_qemu_co_queue_run_restart(co);
    while ((next = QSIMPLEQ_FIRST(&co->co_queue_wakeup))) {
        QSIMPLEQ_REMOVE_HEAD(&co->co_queue_wakeup, co_queue_next);
        qemu_coroutine_enter(next);
    }
}
Exemple #2
0
void nbd_export_close(NBDExport *exp)
{
    while (!QSIMPLEQ_EMPTY(&exp->requests)) {
        NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
        qemu_vfree(first->data);
        g_free(first);
    }

    bdrv_close(exp->bs);
    g_free(exp);
}
Exemple #3
0
static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
{
    QEDAIOCB *acb;

    assert(s->allocating_write_reqs_plugged);

    s->allocating_write_reqs_plugged = false;

    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
    if (acb) {
        qed_aio_next_io(acb, 0);
    }
}
Exemple #4
0
bool qemu_co_enter_next(CoQueue *queue)
{
    Coroutine *next;

    next = QSIMPLEQ_FIRST(&queue->entries);
    if (!next) {
        return false;
    }

    QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
    qemu_coroutine_enter(next);
    return true;
}
Exemple #5
0
static void qed_need_check_timer_cb(void *opaque)
{
    BDRVQEDState *s = opaque;

    /* The timer should only fire when allocating writes have drained */
    assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));

    trace_qed_need_check_timer_cb(s);

    qed_plug_allocating_write_reqs(s);

    /* Ensure writes are on disk before clearing flag */
    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
}
Exemple #6
0
static void bitmap_list_free(Qcow2BitmapList *bm_list)
{
    Qcow2Bitmap *bm;

    if (bm_list == NULL) {
        return;
    }

    while ((bm = QSIMPLEQ_FIRST(bm_list)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(bm_list, entry);
        bitmap_free(bm);
    }

    g_free(bm_list);
}
Exemple #7
0
static NBDRequest *nbd_request_get(NBDClient *client)
{
    NBDRequest *req;
    NBDExport *exp = client->exp;

    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
    client->nb_requests++;

    if (QSIMPLEQ_EMPTY(&exp->requests)) {
        req = g_malloc0(sizeof(NBDRequest));
        req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
    } else {
        req = QSIMPLEQ_FIRST(&exp->requests);
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
    }
    nbd_client_get(client);
    req->client = client;
    return req;
}
Exemple #8
0
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
{
    Coroutine *self = qemu_coroutine_self();
    Coroutine *next;

    if (QSIMPLEQ_EMPTY(&queue->entries)) {
        return false;
    }

    while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
        QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
        trace_qemu_co_queue_next(next);
        if (single) {
            break;
        }
    }
    return true;
}
Exemple #9
0
static void entropy_available(void *opaque)
{
    RndRandom *s = RNG_RANDOM(opaque);

    while (!QSIMPLEQ_EMPTY(&s->parent.requests)) {
        RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
        ssize_t len;

        len = read(s->fd, req->data, req->size);
        if (len < 0 && errno == EAGAIN) {
            return;
        }
        g_assert(len != -1);

        req->receive_entropy(req->opaque, req->data, len);

        rng_backend_finalize_request(&s->parent, req);
    }

    /* We've drained all requests, the fd handler can be reset. */
    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
}
Exemple #10
0
/* Submit async read while handling COW.
 * Returns: The number of bytes copied after and including offset,
 *          excluding any bytes copied prior to offset due to alignment.
 *          This will be @bytes if no alignment is necessary, or
 *          (new_end - offset) if tail is rounded up or down due to
 *          alignment or buffer limit.
 */
static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
                               uint64_t bytes)
{
    BlockBackend *source = s->common.blk;
    int nb_chunks;
    uint64_t ret;
    MirrorOp *op;
    uint64_t max_bytes;

    max_bytes = s->granularity * s->max_iov;

    /* We can only handle as much as buf_size at a time. */
    bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
    assert(bytes);
    assert(bytes < BDRV_REQUEST_MAX_BYTES);
    ret = bytes;

    if (s->cow_bitmap) {
        ret += mirror_cow_align(s, &offset, &bytes);
    }
    assert(bytes <= s->buf_size);
    /* The offset is granularity-aligned because:
     * 1) Caller passes in aligned values;
     * 2) mirror_cow_align is used only when target cluster is larger. */
    assert(QEMU_IS_ALIGNED(offset, s->granularity));
    /* The range is sector-aligned, since bdrv_getlength() rounds up. */
    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
    nb_chunks = DIV_ROUND_UP(bytes, s->granularity);

    while (s->buf_free_count < nb_chunks) {
        trace_mirror_yield_in_flight(s, offset, s->in_flight);
        mirror_wait_for_io(s);
    }

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_new(MirrorOp, 1);
    op->s = s;
    op->offset = offset;
    op->bytes = bytes;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        size_t remaining = bytes - op->qiov.size;

        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
    }

    /* Copy the dirty cluster.  */
    s->in_flight++;
    s->bytes_in_flight += bytes;
    trace_mirror_one_iteration(s, offset, bytes);

    blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
    return ret;
}
Exemple #11
0
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
    uint64_t delay_ns = 0;
    MirrorOp *op;
    int pnum;
    int64_t ret;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
        assert(s->sector_num >= 0);
    }

    hbitmap_next_sector = s->sector_num;
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    end = s->bdev_length / BDRV_SECTOR_SIZE;

    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
     *
     * We have to do this if we have no backing file yet in the destination,
     * and the cluster size is very large.  Then we need to do COW ourselves.
     * The first time a cluster is copied, copy it entirely.  Note that,
     * because both the granularity and the cluster size are powers of two,
     * the number of sectors to copy cannot exceed one cluster.
     *
     * We also want to extend the QEMUIOVector to include more adjacent
     * dirty blocks if possible, to limit the number of I/O operations and
     * run efficiently even with a small granularity.
     */
    nb_chunks = 0;
    nb_sectors = 0;
    next_sector = sector_num;
    next_chunk = sector_num / sectors_per_chunk;

    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
    while (test_bit(next_chunk, s->in_flight_bitmap)) {
        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
        s->waiting_for_io = true;
        qemu_coroutine_yield();
        s->waiting_for_io = false;
    }

    do {
        int added_sectors, added_chunks;

        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
        }

        added_sectors = sectors_per_chunk;
        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
            bdrv_round_to_clusters(s->target,
                                   next_sector, added_sectors,
                                   &next_sector, &added_sectors);

            /* On the first iteration, the rounding may make us copy
             * sectors before the first dirty one.
             */
            if (next_sector < sector_num) {
                assert(nb_sectors == 0);
                sector_num = next_sector;
                next_chunk = next_sector / sectors_per_chunk;
            }
        }

        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;

        /* When doing COW, it may happen that there is not enough space for
         * a full cluster.  Wait if that is the case.
         */
        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
            s->waiting_for_io = true;
            qemu_coroutine_yield();
            s->waiting_for_io = false;
        }
        if (s->buf_free_count < nb_chunks + added_chunks) {
            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
            break;
        }
        if (IOV_MAX < nb_chunks + added_chunks) {
            trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
            break;
        }

        /* We have enough free space to copy these sectors.  */
        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);

        nb_sectors += added_sectors;
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
        if (!s->synced && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
        }
    } while (delay_ns == 0 && next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_new(MirrorOp, 1);
    op->s = s;
    op->sector_num = sector_num;
    op->nb_sectors = nb_sectors;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;

        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
        if (next_sector > hbitmap_next_sector
            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

        next_sector += sectors_per_chunk;
    }

    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);

    /* Copy the dirty cluster.  */
    s->in_flight++;
    s->sectors_in_flight += nb_sectors;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);

    ret = bdrv_get_block_status_above(source, NULL, sector_num,
                                      nb_sectors, &pnum);
    if (ret < 0 || pnum < nb_sectors ||
            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                       mirror_read_complete, op);
    } else if (ret & BDRV_BLOCK_ZERO) {
        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                              mirror_write_complete, op);
    } else {
        assert(!(ret & BDRV_BLOCK_DATA));
        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
                         mirror_write_complete, op);
    }
    return delay_ns;
}
Exemple #12
0
static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
    MirrorOp *op;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
        bdrv_dirty_iter_init(source, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
        assert(s->sector_num >= 0);
    }

    hbitmap_next_sector = s->sector_num;
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    end = s->common.len >> BDRV_SECTOR_BITS;

    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
     *
     * We have to do this if we have no backing file yet in the destination,
     * and the cluster size is very large.  Then we need to do COW ourselves.
     * The first time a cluster is copied, copy it entirely.  Note that,
     * because both the granularity and the cluster size are powers of two,
     * the number of sectors to copy cannot exceed one cluster.
     *
     * We also want to extend the QEMUIOVector to include more adjacent
     * dirty blocks if possible, to limit the number of I/O operations and
     * run efficiently even with a small granularity.
     */
    nb_chunks = 0;
    nb_sectors = 0;
    next_sector = sector_num;
    next_chunk = sector_num / sectors_per_chunk;

    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
    while (test_bit(next_chunk, s->in_flight_bitmap)) {
        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
        qemu_coroutine_yield();
    }

    do {
        int added_sectors, added_chunks;

        if (!bdrv_get_dirty(source, next_sector) ||
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
        }

        added_sectors = sectors_per_chunk;
        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
            bdrv_round_to_clusters(s->target,
                                   next_sector, added_sectors,
                                   &next_sector, &added_sectors);

            /* On the first iteration, the rounding may make us copy
             * sectors before the first dirty one.
             */
            if (next_sector < sector_num) {
                assert(nb_sectors == 0);
                sector_num = next_sector;
                next_chunk = next_sector / sectors_per_chunk;
            }
        }

        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;

        /* When doing COW, it may happen that there is not enough space for
         * a full cluster.  Wait if that is the case.
         */
        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
            qemu_coroutine_yield();
        }
        if (s->buf_free_count < nb_chunks + added_chunks) {
            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
            break;
        }

        /* We have enough free space to copy these sectors.  */
        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);

        nb_sectors += added_sectors;
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
    } while (next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_slice_new(MirrorOp);
    op->s = s;
    op->sector_num = sector_num;
    op->nb_sectors = nb_sectors;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, s->granularity);

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

        next_sector += sectors_per_chunk;
    }

    bdrv_reset_dirty(source, sector_num, nb_sectors);

    /* Copy the dirty cluster.  */
    s->in_flight++;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
}
Exemple #13
0
bool qemu_co_queue_empty(CoQueue *queue)
{
    return QSIMPLEQ_FIRST(&queue->entries) == NULL;
}