Ejemplo n.º 1
0
int qcow2_snapshot_load_tmp(BlockDriverState *bs,
                            const char *snapshot_id,
                            const char *name,
                            Error **errp)
{
    int i, snapshot_index;
    BDRVQcow2State *s = bs->opaque;
    QCowSnapshot *sn;
    uint64_t *new_l1_table;
    int new_l1_bytes;
    int ret;

    assert(bs->read_only);

    /* Search the snapshot */
    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
    if (snapshot_index < 0) {
        error_setg(errp,
                   "Can't find snapshot");
        return -ENOENT;
    }
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
    if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
        error_setg(errp, "Snapshot L1 table too large");
        return -EFBIG;
    }
    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
    new_l1_table = qemu_try_blockalign(bs->file->bs,
                                       align_offset(new_l1_bytes, 512));
    if (new_l1_table == NULL) {
        return -ENOMEM;
    }

    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
                     new_l1_table, new_l1_bytes);
    if (ret < 0) {
        error_setg(errp, "Failed to read l1 table for snapshot");
        qemu_vfree(new_l1_table);
        return ret;
    }

    /* Switch the L1 table */
    qemu_vfree(s->l1_table);

    s->l1_size = sn->l1_size;
    s->l1_table_offset = sn->l1_table_offset;
    s->l1_table = new_l1_table;

    for(i = 0;i < s->l1_size; i++) {
        be64_to_cpus(&s->l1_table[i]);
    }

    return 0;
}
Ejemplo n.º 2
0
static void vhdx_close(BlockDriverState *bs)
{
    BDRVVHDXState *s = bs->opaque;
    qemu_vfree(s->headers[0]);
    qemu_vfree(s->headers[1]);
    qemu_vfree(s->bat);
    qemu_vfree(s->parent_entries);
    migrate_del_blocker(s->migration_blocker);
    error_free(s->migration_blocker);
}
Ejemplo n.º 3
0
Archivo: dmg.c Proyecto: heiher/qemu
static void dmg_close(BlockDriverState *bs)
{
    BDRVDMGState *s = bs->opaque;

    g_free(s->types);
    g_free(s->offsets);
    g_free(s->lengths);
    g_free(s->sectors);
    g_free(s->sectorcounts);
    qemu_vfree(s->compressed_chunk);
    qemu_vfree(s->uncompressed_chunk);

    inflateEnd(&s->zstream);
}
Ejemplo n.º 4
0
static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
{
    VirtIOBlockDataPlane *s = opaque;
    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
    struct virtio_blk_inhdr hdr;
    int len;

    if (likely(ret >= 0)) {
        hdr.status = VIRTIO_BLK_S_OK;
        len = ret;
    } else {
        hdr.status = VIRTIO_BLK_S_IOERR;
        len = 0;
    }

    trace_virtio_blk_data_plane_complete_request(s, req->head, ret);

    if (req->read_qiov) {
        assert(req->bounce_iov);
        qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
        qemu_iovec_destroy(req->read_qiov);
        g_slice_free(QEMUIOVector, req->read_qiov);
    }

    if (req->bounce_iov) {
        qemu_vfree(req->bounce_iov->iov_base);
        g_slice_free(struct iovec, req->bounce_iov);
    }
Ejemplo n.º 5
0
static void qemu_io_free(void *p)
{
    if (misalign) {
        p -= MISALIGN_OFFSET;
    }
    qemu_vfree(p);
}
Ejemplo n.º 6
0
/*
 * Completes an AIO request (calls the callback and frees the ACB).
 */
static void win32_aio_process_completion(QEMUWin32AIOState *s,
    QEMUWin32AIOCB *waiocb, DWORD count)
{
    int ret;
    s->count--;

    if (waiocb->ov.Internal != 0) {
        ret = -EIO;
    } else {
        ret = 0;
        if (count < waiocb->nbytes) {
            /* Short reads mean EOF, pad with zeros. */
            if (waiocb->is_read) {
                qemu_iovec_memset(waiocb->qiov, count, 0,
                    waiocb->qiov->size - count);
            } else {
                ret = -EINVAL;
            }
       }
    }

    if (!waiocb->is_linear) {
        if (ret == 0 && waiocb->is_read) {
            QEMUIOVector *qiov = waiocb->qiov;
            iov_from_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
        }
        qemu_vfree(waiocb->buf);
    }


    waiocb->common.cb(waiocb->common.opaque, ret);
    qemu_aio_release(waiocb);
}
Ejemplo n.º 7
0
static void qed_write_table_cb(void *opaque, int ret)
{
    QEDWriteTableCB *write_table_cb = opaque;

    trace_qed_write_table_cb(write_table_cb->s,
                             write_table_cb->orig_table,
                             write_table_cb->flush,
                             ret);

    if (ret) {
        goto out;
    }

    if (write_table_cb->flush) {
        /* We still need to flush first */
        write_table_cb->flush = false;
        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
                       write_table_cb);
        return;
    }

out:
    qemu_vfree(write_table_cb->table);
    gencb_complete(&write_table_cb->gencb, ret);
    return;
}
Ejemplo n.º 8
0
static void qed_write_header_cb(void *opaque, int ret)
{
    QEDWriteHeaderCB *write_header_cb = opaque;

    qemu_vfree(write_header_cb->buf);
    gencb_complete(write_header_cb, ret);
}
Ejemplo n.º 9
0
static int coroutine_fn
raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                    QEMUIOVector *qiov, int flags)
{
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;

    if (bs->probed && sector_num == 0) {
        /* As long as these conditions are true, we can't get partial writes to
         * the probe buffer and can just directly check the request. */
        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);

        if (nb_sectors == 0) {
            /* qemu_iovec_to_buf() would fail, but we want to return success
             * instead of -EINVAL in this case. */
            return 0;
        }

        buf = qemu_try_blockalign(bs->file->bs, 512);
        if (!buf) {
            ret = -ENOMEM;
            goto fail;
        }

        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
        if (ret != 512) {
            ret = -EINVAL;
            goto fail;
        }

        drv = bdrv_probe_all(buf, 512, NULL);
        if (drv != bs->drv) {
            ret = -EPERM;
            goto fail;
        }

        /* Use the checked buffer, a malicious guest might be overwriting its
         * original buffer in the background. */
        qemu_iovec_init(&local_qiov, qiov->niov + 1);
        qemu_iovec_add(&local_qiov, buf, 512);
        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
        qiov = &local_qiov;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
    ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
                             nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);

fail:
    if (qiov == &local_qiov) {
        qemu_iovec_destroy(&local_qiov);
    }
    qemu_vfree(buf);
    return ret;
}
Ejemplo n.º 10
0
static void raw_close(BlockDriverState *bs)
{
    BDRVRawState *s = bs->opaque;
    if (s->fd >= 0) {
        close(s->fd);
        s->fd = -1;
        if (s->aligned_buf != NULL)
            qemu_vfree(s->aligned_buf);
    }
}
Ejemplo n.º 11
0
Archivo: vpc.c Proyecto: binape/qemu
static void vpc_close(BlockDriverState *bs)
{
    BDRVVPCState *s = bs->opaque;
    qemu_vfree(s->pagetable);
#ifdef CACHE
    g_free(s->pageentry_u8);
#endif

    migrate_del_blocker(s->migration_blocker);
    error_free(s->migration_blocker);
}
Ejemplo n.º 12
0
Archivo: nbd.c Proyecto: marsleezm/qemu
void nbd_export_close(NBDExport *exp)
{
    while (!QSIMPLEQ_EMPTY(&exp->requests)) {
        NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
        qemu_vfree(first->data);
        g_free(first);
    }

    bdrv_close(exp->bs);
    g_free(exp);
}
Ejemplo n.º 13
0
/* Reads the log header, and subsequent descriptors (if any).  This
 * will allocate all the space for buffer, which must be NULL when
 * passed into this function. Each descriptor will also be validated,
 * and error returned if any are invalid. */
static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
                              VHDXLogEntries *log, VHDXLogDescEntries **buffer)
{
    int ret = 0;
    uint32_t desc_sectors;
    uint32_t sectors_read;
    VHDXLogEntryHeader hdr;
    VHDXLogDescEntries *desc_entries = NULL;
    int i;

    assert(*buffer == NULL);

    ret = vhdx_log_peek_hdr(bs, log, &hdr);
    if (ret < 0) {
        goto exit;
    }
    vhdx_log_entry_hdr_le_import(&hdr);
    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
        ret = -EINVAL;
        goto exit;
    }

    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
    desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);

    ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
                                desc_sectors, false);
    if (ret < 0) {
        goto free_and_exit;
    }
    if (sectors_read != desc_sectors) {
        ret = -EINVAL;
        goto free_and_exit;
    }

    /* put in proper endianness, and validate each desc */
    for (i = 0; i < hdr.descriptor_count; i++) {
        vhdx_log_desc_le_import(&desc_entries->desc[i]);
        if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
            ret = -EINVAL;
            goto free_and_exit;
        }
    }

    *buffer = desc_entries;
    goto exit;

free_and_exit:
    qemu_vfree(desc_entries);
exit:
    return ret;
}
Ejemplo n.º 14
0
static void rbd_aio_bh_cb(void *opaque)
{
    RBDAIOCB *acb = opaque;

    if (acb->cmd == RBD_AIO_READ) {
        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
    }
    qemu_vfree(acb->bounce);
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
    qemu_bh_delete(acb->bh);
    acb->bh = NULL;

    qemu_aio_release(acb);
}
Ejemplo n.º 15
0
int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
{
    int i;

    for (i = 0; i < c->size; i++) {
        assert(c->entries[i].ref == 0);
        qemu_vfree(c->entries[i].table);
    }

    qemu_free(c->entries);
    qemu_free(c);

    return 0;
}
Ejemplo n.º 16
0
static void blkverify_aio_bh(void *opaque)
{
    BlkverifyAIOCB *acb = opaque;

    qemu_bh_delete(acb->bh);
    if (acb->buf) {
        qemu_iovec_destroy(&acb->raw_qiov);
        qemu_vfree(acb->buf);
    }
    acb->common.cb(acb->common.opaque, acb->ret);
    if (acb->finished) {
        *acb->finished = true;
    }
    qemu_aio_release(acb);
}
Ejemplo n.º 17
0
static void bdrv_qed_close(BlockDriverState *bs)
{
    BDRVQEDState *s = bs->opaque;

    /* Ensure writes reach stable storage */
    bdrv_flush(bs->file);

    /* Clean shutdown, no check required on next open */
    if (s->header.features & QED_F_NEED_CHECK) {
        s->header.features &= ~QED_F_NEED_CHECK;
        qed_write_header_sync(s);
    }

    qed_free_l2_cache(&s->l2_cache);
    qemu_vfree(s->l1_table);
}
Ejemplo n.º 18
0
Archivo: rbd.c Proyecto: nikunjad/qemu
/*
 * This aio completion is being called from rbd_finish_bh() and runs in qemu
 * BH context.
 */
static void qemu_rbd_complete_aio(RADOSCB *rcb)
{
    RBDAIOCB *acb = rcb->acb;
    int64_t r;

    r = rcb->ret;

    if (acb->cmd != RBD_AIO_READ) {
        if (r < 0) {
            acb->ret = r;
            acb->error = 1;
        } else if (!acb->error) {
            acb->ret = rcb->size;
        }
    } else {
        if (r < 0) {
            qemu_rbd_memset(rcb, 0);
            acb->ret = r;
            acb->error = 1;
        } else if (r < rcb->size) {
            qemu_rbd_memset(rcb, r);
            if (!acb->error) {
                acb->ret = rcb->size;
            }
        } else if (!acb->error) {
            acb->ret = r;
        }
    }

    g_free(rcb);

    if (!LIBRBD_USE_IOVEC) {
        if (acb->cmd == RBD_AIO_READ) {
            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
        }
        qemu_vfree(acb->bounce);
    }

    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));

    qemu_aio_unref(acb);
}
Ejemplo n.º 19
0
/*
 * This aio completion is being called from rbd_finish_bh() and runs in qemu
 * BH context.
 */
static void qemu_rbd_complete_aio(RADOSCB *rcb)
{
    RBDAIOCB *acb = rcb->acb;
    int64_t r;

    r = rcb->ret;

    if (acb->cmd != RBD_AIO_READ) {
        if (r < 0) {
            acb->ret = r;
            acb->error = 1;
        } else if (!acb->error) {
            acb->ret = rcb->size;
        }
    } else {
        if (r < 0) {
            memset(rcb->buf, 0, rcb->size);
            acb->ret = r;
            acb->error = 1;
        } else if (r < rcb->size) {
            memset(rcb->buf + r, 0, rcb->size - r);
            if (!acb->error) {
                acb->ret = rcb->size;
            }
        } else if (!acb->error) {
            acb->ret = r;
        }
    }

    g_free(rcb);

    if (acb->cmd == RBD_AIO_READ) {
        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
    }
    qemu_vfree(acb->bounce);
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
    acb->status = 0;

    if (!acb->cancelled) {
        qemu_aio_release(acb);
    }
}
Ejemplo n.º 20
0
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
{
    BDRVQcowState *s = bs->opaque;
    Qcow2Cache *c;

    c = g_new0(Qcow2Cache, 1);
    c->size = num_tables;
    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
    c->table_array = qemu_try_blockalign(bs->file,
                                         (size_t) num_tables * s->cluster_size);

    if (!c->entries || !c->table_array) {
        qemu_vfree(c->table_array);
        g_free(c->entries);
        g_free(c);
        c = NULL;
    }

    return c;
}
Ejemplo n.º 21
0
/*
 * This aio completion is being called from qemu_rbd_aio_event_reader()
 * and runs in qemu context. It schedules a bh, but just in case the aio
 * was not cancelled before.
 */
static void qemu_rbd_complete_aio(RADOSCB *rcb)
{
    RBDAIOCB *acb = rcb->acb;
    int64_t r;

    if (acb->cancelled) {
        qemu_vfree(acb->bounce);
        qemu_aio_release(acb);
        goto done;
    }

    r = rcb->ret;

    if (acb->cmd == RBD_AIO_WRITE ||
        acb->cmd == RBD_AIO_DISCARD) {
        if (r < 0) {
            acb->ret = r;
            acb->error = 1;
        } else if (!acb->error) {
            acb->ret = rcb->size;
        }
    } else {
        if (r < 0) {
            memset(rcb->buf, 0, rcb->size);
            acb->ret = r;
            acb->error = 1;
        } else if (r < rcb->size) {
            memset(rcb->buf + r, 0, rcb->size - r);
            if (!acb->error) {
                acb->ret = rcb->size;
            }
        } else if (!acb->error) {
            acb->ret = r;
        }
    }
    /* Note that acb->bh can be NULL in case where the aio was cancelled */
    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
    qemu_bh_schedule(acb->bh);
done:
    g_free(rcb);
}
Ejemplo n.º 22
0
void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane)
{
    XenBlockRequest *request;

    if (!dataplane) {
        return;
    }

    while (!QLIST_EMPTY(&dataplane->freelist)) {
        request = QLIST_FIRST(&dataplane->freelist);
        QLIST_REMOVE(request, list);
        qemu_iovec_destroy(&request->v);
        qemu_vfree(request->buf);
        g_free(request);
    }

    qemu_bh_delete(dataplane->bh);
    if (dataplane->iothread) {
        object_unref(OBJECT(dataplane->iothread));
    }

    g_free(dataplane);
}
Ejemplo n.º 23
0
static void compare_full_images_cb (void *opaque, int ret)
{
    CompareFullCB *cf = opaque;

    if (ret) {
        /* Failed. Retry the operation. */
        bdrv_aio_readv (bs, cf->sector_num, &cf->qiov, cf->nb_sectors,
                        compare_full_images_cb, cf);
        return;
    }

    truth_io (cf->truth_buf, cf->sector_num, cf->nb_sectors, TRUE);
    verify (cf->truth_buf, cf->iov.iov_base, cf->sector_num, cf->nb_sectors);

    cf->sector_num += cf->nb_sectors;
    if (cf->sector_num >= total_sectors) {
        /* Finished. */
        free (cf->truth_buf);
        qemu_vfree (cf->iov.iov_base);
        g_free(cf);
        return;
    }

    /* Read more data to compare. */
    if (cf->sector_num + cf->max_nb_sectors > total_sectors) {
        cf->nb_sectors = total_sectors - cf->sector_num;
    } else {
        cf->nb_sectors = cf->max_nb_sectors;
    }
    cf->iov.iov_len = cf->nb_sectors * 512;
    qemu_iovec_init_external (&cf->qiov, &cf->iov, 1);
    if (!bdrv_aio_readv (bs, cf->sector_num, &cf->qiov,
                         cf->nb_sectors, compare_full_images_cb, cf)) {
        die ("bdrv_aio_readv\n");
    }
}
Ejemplo n.º 24
0
static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
    char backing_filename[2]; /* we only need 2 characters because we are only
                                 checking for a NULL string */
    int ret = 0;
    int n;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->bdev_length = bdrv_getlength(bs);
    if (s->bdev_length < 0) {
        ret = s->bdev_length;
        goto immediate_exit;
    } else if (s->bdev_length == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
            block_job_yield(&s->common);
        }
        s->common.cancelled = false;
        goto immediate_exit;
    }

    length = DIV_ROUND_UP(s->bdev_length, s->granularity);
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        ret = bdrv_get_info(s->target, &bdi);
        if (ret < 0) {
            goto immediate_exit;
        }
        if (s->granularity < bdi.cluster_size) {
            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
            s->cow_bitmap = bitmap_new(length);
        }
    }

    end = s->bdev_length / BDRV_SECTOR_SIZE;
    s->buf = qemu_try_blockalign(bs, s->buf_size);
    if (s->buf == NULL) {
        ret = -ENOMEM;
        goto immediate_exit;
    }

    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

    if (!s->is_none_mode) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base = s->base;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_is_allocated_above(bs, base,
                                          sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
            }

            assert(n > 0);
            if (ret == 1) {
                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
            }
        }
    }

    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
        uint64_t delay_ns = 0;
        int64_t cnt;
        bool should_complete;

        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        /* s->common.offset contains the number of bytes already processed so
         * far, cnt is the number of dirty sectors remaining and
         * s->sectors_in_flight is the number of sectors currently being
         * processed; together those are the current total operation length */
        s->common.len = s->common.offset +
                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that bdrv_drain_all() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
                delay_ns = mirror_iteration(s);
            }
        }

        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
            ret = bdrv_flush(s->target);
            if (ret < 0) {
                if (mirror_error_action(s, false, -ret) ==
                    BLOCK_ERROR_ACTION_REPORT) {
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                if (!s->synced) {
                    block_job_event_ready(&s->common);
                    s->synced = true;
                }

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
            }
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        if (!s->synced) {
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

immediate_exit:
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
        mirror_drain(s);
    }

    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
    bdrv_iostatus_disable(s->target);

    data = g_malloc(sizeof(*data));
    data->ret = ret;
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
Ejemplo n.º 25
0
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
{
    BDRVQEDState *s = bs->opaque;
    QEDHeader le_header;
    int64_t file_size;
    int ret;

    s->bs = bs;
    QSIMPLEQ_INIT(&s->allocating_write_reqs);

    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        return ret;
    }
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
        error_setg(errp, "Image not in QED format");
        return -EINVAL;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
        return -EINVAL;
    }

    /* Round down file size to the last cluster */
    file_size = bdrv_getlength(bs->file);
    if (file_size < 0) {
        return file_size;
    }
    s->file_size = qed_start_of_cluster(s, file_size);

    if (!qed_is_table_size_valid(s->header.table_size)) {
        return -EINVAL;
    }
    if (!qed_is_image_size_valid(s->header.image_size,
                                 s->header.cluster_size,
                                 s->header.table_size)) {
        return -EINVAL;
    }
    if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
        return -EINVAL;
    }

    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
                      sizeof(uint64_t);
    s->l2_shift = ffs(s->header.cluster_size) - 1;
    s->l2_mask = s->table_nelems - 1;
    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;

    if ((s->header.features & QED_F_BACKING_FILE)) {
        if ((uint64_t)s->header.backing_filename_offset +
            s->header.backing_filename_size >
            s->header.cluster_size * s->header.header_size) {
            return -EINVAL;
        }

        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
                              s->header.backing_filename_size, bs->backing_file,
                              sizeof(bs->backing_file));
        if (ret < 0) {
            return ret;
        }

        if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
            pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
        }
    }

    /* Reset unknown autoclear feature bits.  This is a backwards
     * compatibility mechanism that allows images to be opened by older
     * programs, which "knock out" unknown feature bits.  When an image is
     * opened by a newer program again it can detect that the autoclear
     * feature is no longer valid.
     */
    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
        !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;

        ret = qed_write_header_sync(s);
        if (ret) {
            return ret;
        }

        /* From here on only known autoclear feature bits are valid */
        bdrv_flush(bs->file);
    }

    s->l1_table = qed_alloc_table(s);
    qed_init_l2_cache(&s->l2_cache);

    ret = qed_read_l1_table_sync(s);
    if (ret) {
        goto out;
    }

    /* If image was not closed cleanly, check consistency */
    if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
        /* Read-only images cannot be fixed.  There is no risk of corruption
         * since write operations are not possible.  Therefore, allow
         * potentially inconsistent images to be opened read-only.  This can
         * aid data recovery from an otherwise inconsistent image.
         */
        if (!bdrv_is_read_only(bs->file) &&
            !(flags & BDRV_O_INCOMING)) {
            BdrvCheckResult result = {0};

            ret = qed_check(s, &result, true);
            if (ret) {
                goto out;
            }
        }
    }

    bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));

out:
    if (ret) {
        qed_free_l2_cache(&s->l2_cache);
        qemu_vfree(s->l1_table);
    }
    return ret;
}
Ejemplo n.º 26
0
static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
    char backing_filename[1024];
    int ret = 0;
    int n;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->common.len = bdrv_getlength(bs);
    if (s->common.len <= 0) {
        block_job_completed(&s->common, s->common.len);
        return;
    }

    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        bdrv_get_info(s->target, &bdi);
        if (s->granularity < bdi.cluster_size) {
            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
            s->cow_bitmap = bitmap_new(length);
        }
    }

    end = s->common.len >> BDRV_SECTOR_BITS;
    s->buf = qemu_blockalign(bs, s->buf_size);
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

    if (s->mode != MIRROR_SYNC_MODE_NONE) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base;
        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_is_allocated_above(bs, base,
                                          sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
            }

            assert(n > 0);
            if (ret == 1) {
                bdrv_set_dirty(bs, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
            }
        }
    }

    bdrv_dirty_iter_init(bs, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
        uint64_t delay_ns;
        int64_t cnt;
        bool should_complete;

        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

        cnt = bdrv_get_dirty_count(bs);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
                mirror_iteration(s);
                continue;
            }
        }

        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
            ret = bdrv_flush(s->target);
            if (ret < 0) {
                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                s->common.offset = end * BDRV_SECTOR_SIZE;
                if (!s->synced) {
                    block_job_ready(&s->common);
                    s->synced = true;
                }

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
                cnt = bdrv_get_dirty_count(bs);
            }
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain_all();
            cnt = bdrv_get_dirty_count(bs);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced);
        if (!s->synced) {
            /* Publish progress */
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;

            if (s->common.speed) {
                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
            } else {
                delay_ns = 0;
            }

            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

immediate_exit:
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
        mirror_drain(s);
    }

    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_set_dirty_tracking(bs, 0);
    bdrv_iostatus_disable(s->target);
    if (s->should_complete && ret == 0) {
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
    }
    bdrv_close(s->target);
    bdrv_unref(s->target);
    block_job_completed(&s->common, ret);
}
Ejemplo n.º 27
0
int main(int argc, char **argv)
{
    BlockDriverState *bs;
    off_t dev_offset = 0;
    off_t offset = 0;
    bool readonly = false;
    bool disconnect = false;
    const char *bindto = "0.0.0.0";
    int port = NBD_DEFAULT_PORT;
    struct sockaddr_in addr;
    socklen_t addr_len = sizeof(addr);
    off_t fd_size;
    char *device = NULL;
    char *socket = NULL;
    char sockpath[128];
    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t";
    struct option lopt[] = {
        { "help", 0, NULL, 'h' },
        { "version", 0, NULL, 'V' },
        { "bind", 1, NULL, 'b' },
        { "port", 1, NULL, 'p' },
        { "socket", 1, NULL, 'k' },
        { "offset", 1, NULL, 'o' },
        { "read-only", 0, NULL, 'r' },
        { "partition", 1, NULL, 'P' },
        { "connect", 1, NULL, 'c' },
        { "disconnect", 0, NULL, 'd' },
        { "snapshot", 0, NULL, 's' },
        { "nocache", 0, NULL, 'n' },
        { "shared", 1, NULL, 'e' },
        { "persistent", 0, NULL, 't' },
        { "verbose", 0, NULL, 'v' },
        { NULL, 0, NULL, 0 }
    };
    int ch;
    int opt_ind = 0;
    int li;
    char *end;
    int flags = BDRV_O_RDWR;
    int partition = -1;
    int ret;
    int shared = 1;
    uint8_t *data;
    fd_set fds;
    int *sharing_fds;
    int fd;
    int i;
    int nb_fds = 0;
    int max_fd;
    int persistent = 0;
    uint32_t nbdflags;

    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
        switch (ch) {
        case 's':
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
            flags |= BDRV_O_NOCACHE;
            break;
        case 'b':
            bindto = optarg;
            break;
        case 'p':
            li = strtol(optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid port `%s'", optarg);
            }
            if (li < 1 || li > 65535) {
                errx(EXIT_FAILURE, "Port out of range `%s'", optarg);
            }
            port = (uint16_t)li;
            break;
        case 'o':
                dev_offset = strtoll (optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid offset `%s'", optarg);
            }
            if (dev_offset < 0) {
                errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg);
            }
            break;
        case 'r':
            readonly = true;
            flags &= ~BDRV_O_RDWR;
            break;
        case 'P':
            partition = strtol(optarg, &end, 0);
            if (*end)
                errx(EXIT_FAILURE, "Invalid partition `%s'", optarg);
            if (partition < 1 || partition > 8)
                errx(EXIT_FAILURE, "Invalid partition %d", partition);
            break;
        case 'k':
            socket = optarg;
            if (socket[0] != '/')
                errx(EXIT_FAILURE, "socket path must be absolute\n");
            break;
        case 'd':
            disconnect = true;
            break;
        case 'c':
            device = optarg;
            break;
        case 'e':
            shared = strtol(optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg);
            }
            if (shared < 1) {
                errx(EXIT_FAILURE, "Shared device number must be greater than 0\n");
            }
            break;
	case 't':
	    persistent = 1;
	    break;
        case 'v':
            verbose = 1;
            break;
        case 'V':
            version(argv[0]);
            exit(0);
            break;
        case 'h':
            usage(argv[0]);
            exit(0);
            break;
        case '?':
            errx(EXIT_FAILURE, "Try `%s --help' for more information.",
                 argv[0]);
        }
    }

    if ((argc - optind) != 1) {
        errx(EXIT_FAILURE, "Invalid number of argument.\n"
             "Try `%s --help' for more information.",
             argv[0]);
    }

    if (disconnect) {
        fd = open(argv[optind], O_RDWR);
        if (fd == -1)
            err(EXIT_FAILURE, "Cannot open %s", argv[optind]);

        nbd_disconnect(fd);

        close(fd);

        printf("%s disconnected\n", argv[optind]);

	return 0;
    }

    bdrv_init();

    bs = bdrv_new("hda");
    if (bs == NULL)
        return 1;

    if ((ret = bdrv_open(bs, argv[optind], flags, NULL)) < 0) {
        errno = -ret;
        err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]);
    }

    fd_size = bs->total_sectors * 512;

    if (partition != -1 &&
        find_partition(bs, partition, &dev_offset, &fd_size))
        err(EXIT_FAILURE, "Could not find partition %d", partition);

    if (device) {
        pid_t pid;
        int sock;

        /* want to fail before daemonizing */
        if (access(device, R_OK|W_OK) == -1) {
            err(EXIT_FAILURE, "Could not access '%s'", device);
        }

        if (!verbose) {
            /* detach client and server */
            if (daemon(0, 0) == -1) {
                err(EXIT_FAILURE, "Failed to daemonize");
            }
        }

        if (socket == NULL) {
            snprintf(sockpath, sizeof(sockpath), SOCKET_PATH,
                     basename(device));
            socket = sockpath;
        }

        pid = fork();
        if (pid < 0)
            return 1;
        if (pid != 0) {
            off_t size;
            size_t blocksize;

            ret = 0;
            bdrv_close(bs);

            do {
                sock = unix_socket_outgoing(socket);
                if (sock == -1) {
                    if (errno != ENOENT && errno != ECONNREFUSED) {
                        ret = 1;
                        goto out;
                    }
                    sleep(1);	/* wait children */
                }
            } while (sock == -1);

            fd = open(device, O_RDWR);
            if (fd == -1) {
                ret = 1;
                goto out;
            }

            ret = nbd_receive_negotiate(sock, NULL, &nbdflags,
					&size, &blocksize);
            if (ret == -1) {
                ret = 1;
                goto out;
            }

            ret = nbd_init(fd, sock, size, blocksize);
            if (ret == -1) {
                ret = 1;
                goto out;
            }

            printf("NBD device %s is now connected to file %s\n",
                    device, argv[optind]);

	    /* update partition table */

            show_parts(device);

            ret = nbd_client(fd);
            if (ret) {
                ret = 1;
            }
            close(fd);
 out:
            kill(pid, SIGTERM);
            unlink(socket);

            return ret;
        }
        /* children */
    }

    sharing_fds = qemu_malloc((shared + 1) * sizeof(int));

    if (socket) {
        sharing_fds[0] = unix_socket_incoming(socket);
    } else {
        sharing_fds[0] = tcp_socket_incoming(bindto, port);
    }

    if (sharing_fds[0] == -1)
        return 1;
    max_fd = sharing_fds[0];
    nb_fds++;

    data = qemu_blockalign(bs, NBD_BUFFER_SIZE);
    if (data == NULL)
        errx(EXIT_FAILURE, "Cannot allocate data buffer");

    do {

        FD_ZERO(&fds);
        for (i = 0; i < nb_fds; i++)
            FD_SET(sharing_fds[i], &fds);

        ret = select(max_fd + 1, &fds, NULL, NULL, NULL);
        if (ret == -1)
            break;

        if (FD_ISSET(sharing_fds[0], &fds))
            ret--;
        for (i = 1; i < nb_fds && ret; i++) {
            if (FD_ISSET(sharing_fds[i], &fds)) {
                if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset,
                    &offset, readonly, data, NBD_BUFFER_SIZE) != 0) {
                    close(sharing_fds[i]);
                    nb_fds--;
                    sharing_fds[i] = sharing_fds[nb_fds];
                    i--;
                }
                ret--;
            }
        }
        /* new connection ? */
        if (FD_ISSET(sharing_fds[0], &fds)) {
            if (nb_fds < shared + 1) {
                sharing_fds[nb_fds] = accept(sharing_fds[0],
                                             (struct sockaddr *)&addr,
                                             &addr_len);
                if (sharing_fds[nb_fds] != -1 &&
                    nbd_negotiate(sharing_fds[nb_fds], fd_size) != -1) {
                        if (sharing_fds[nb_fds] > max_fd)
                            max_fd = sharing_fds[nb_fds];
                        nb_fds++;
                }
            }
        }
    } while (persistent || nb_fds > 1);
    qemu_vfree(data);

    close(sharing_fds[0]);
    bdrv_close(bs);
    qemu_free(sharing_fds);
    if (socket)
        unlink(socket);

    return 0;
}
Ejemplo n.º 28
0
Archivo: dmg.c Proyecto: heiher/qemu
static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
{
    BDRVDMGState *s = bs->opaque;
    DmgHeaderState ds;
    uint64_t rsrc_fork_offset, rsrc_fork_length;
    uint64_t plist_xml_offset, plist_xml_length;
    int64_t offset;
    int ret;

    block_module_load_one("dmg-bz2");
    bs->read_only = true;

    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
    /* used by dmg_read_mish_block to keep track of the current I/O position */
    ds.data_fork_offset = 0;
    ds.max_compressed_size = 1;
    ds.max_sectors_per_chunk = 1;

    /* locate the UDIF trailer */
    offset = dmg_find_koly_offset(bs->file, errp);
    if (offset < 0) {
        ret = offset;
        goto fail;
    }

    /* offset of data fork (DataForkOffset) */
    ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset);
    if (ret < 0) {
        goto fail;
    } else if (ds.data_fork_offset > offset) {
        ret = -EINVAL;
        goto fail;
    }

    /* offset of resource fork (RsrcForkOffset) */
    ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset);
    if (ret < 0) {
        goto fail;
    }
    ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length);
    if (ret < 0) {
        goto fail;
    }
    if (rsrc_fork_offset >= offset ||
            rsrc_fork_length > offset - rsrc_fork_offset) {
        ret = -EINVAL;
        goto fail;
    }
    /* offset of property list (XMLOffset) */
    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
    if (ret < 0) {
        goto fail;
    }
    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
    if (ret < 0) {
        goto fail;
    }
    if (plist_xml_offset >= offset ||
            plist_xml_length > offset - plist_xml_offset) {
        ret = -EINVAL;
        goto fail;
    }
    ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors);
    if (ret < 0) {
        goto fail;
    }
    if (bs->total_sectors < 0) {
        ret = -EINVAL;
        goto fail;
    }
    if (rsrc_fork_length != 0) {
        ret = dmg_read_resource_fork(bs, &ds,
                                     rsrc_fork_offset, rsrc_fork_length);
        if (ret < 0) {
            goto fail;
        }
    } else if (plist_xml_length != 0) {
        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
        if (ret < 0) {
            goto fail;
        }
    } else {
        ret = -EINVAL;
        goto fail;
    }

    /* initialize zlib engine */
    s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
                          ds.max_compressed_size + 1);
    s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
                            512 * ds.max_sectors_per_chunk);
    if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
        ret = -ENOMEM;
        goto fail;
    }

    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
    }

    s->current_chunk = s->n_chunks;

    qemu_co_mutex_init(&s->lock);
    return 0;

fail:
    g_free(s->types);
    g_free(s->offsets);
    g_free(s->lengths);
    g_free(s->sectors);
    g_free(s->sectorcounts);
    qemu_vfree(s->compressed_chunk);
    qemu_vfree(s->uncompressed_chunk);
    return ret;
}
Ejemplo n.º 29
0
static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = s->source;
    BlockDriverState *target_bs = blk_bs(s->target);
    bool need_drain = true;
    int64_t length;
    BlockDriverInfo bdi;
    char backing_filename[2]; /* we only need 2 characters because we are only
                                 checking for a NULL string */
    int ret = 0;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->bdev_length = bdrv_getlength(bs);
    if (s->bdev_length < 0) {
        ret = s->bdev_length;
        goto immediate_exit;
    }

    /* Active commit must resize the base image if its size differs from the
     * active layer. */
    if (s->base == blk_bs(s->target)) {
        int64_t base_length;

        base_length = blk_getlength(s->target);
        if (base_length < 0) {
            ret = base_length;
            goto immediate_exit;
        }

        if (s->bdev_length > base_length) {
            ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF,
                               NULL);
            if (ret < 0) {
                goto immediate_exit;
            }
        }
    }

    if (s->bdev_length == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
            block_job_yield(&s->common);
        }
        s->common.cancelled = false;
        goto immediate_exit;
    }

    length = DIV_ROUND_UP(s->bdev_length, s->granularity);
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(target_bs, backing_filename,
                              sizeof(backing_filename));
    if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
        s->target_cluster_size = bdi.cluster_size;
    } else {
        s->target_cluster_size = BDRV_SECTOR_SIZE;
    }
    if (backing_filename[0] && !target_bs->backing &&
        s->granularity < s->target_cluster_size) {
        s->buf_size = MAX(s->buf_size, s->target_cluster_size);
        s->cow_bitmap = bitmap_new(length);
    }
    s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);

    s->buf = qemu_try_blockalign(bs, s->buf_size);
    if (s->buf == NULL) {
        ret = -ENOMEM;
        goto immediate_exit;
    }

    mirror_free_init(s);

    s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    if (!s->is_none_mode) {
        ret = mirror_dirty_init(s);
        if (ret < 0 || block_job_is_cancelled(&s->common)) {
            goto immediate_exit;
        }
    }

    assert(!s->dbi);
    s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
    for (;;) {
        uint64_t delay_ns = 0;
        int64_t cnt, delta;
        bool should_complete;

        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

        block_job_pause_point(&s->common);

        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
         * the number of bytes currently being processed; together those are
         * the current remaining operation length */
        block_job_progress_set_remaining(&s->common, s->bytes_in_flight + cnt);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that bdrv_drain_all() returns.
         * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is
         * an error, or when the source is clean, whichever comes first. */
        delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
        if (delta < BLOCK_JOB_SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
                mirror_wait_for_io(s);
                continue;
            } else if (cnt != 0) {
                delay_ns = mirror_iteration(s);
            }
        }

        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
            if (!s->synced) {
                if (mirror_flush(s) < 0) {
                    /* Go check s->ret.  */
                    continue;
                }
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                block_job_event_ready(&s->common);
                s->synced = true;
            }

            should_complete = s->should_complete ||
                block_job_is_cancelled(&s->common);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs, so pause it now.  Before deciding
             * whether to switch to target check one last time if I/O has
             * come in the meanwhile, and if not flush the data to disk.
             */
            trace_mirror_before_drain(s, cnt);

            bdrv_drained_begin(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
            if (cnt > 0 || mirror_flush(s) < 0) {
                bdrv_drained_end(bs);
                continue;
            }

            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            need_drain = false;
            break;
        }

        ret = 0;

        if (s->synced && !should_complete) {
            delay_ns = (s->in_flight == 0 &&
                        cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
        }
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        block_job_sleep_ns(&s->common, delay_ns);
        if (block_job_is_cancelled(&s->common) &&
            (!s->synced || s->common.force))
        {
            break;
        }
        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

immediate_exit:
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || ((s->common.force || !s->synced) &&
               block_job_is_cancelled(&s->common)));
        assert(need_drain);
        mirror_wait_for_all_io(s);
    }

    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_dirty_iter_free(s->dbi);

    data = g_malloc(sizeof(*data));
    data->ret = ret;

    if (need_drain) {
        bdrv_drained_begin(bs);
    }
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
Ejemplo n.º 30
0
Archivo: rbd.c Proyecto: nikunjad/qemu
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
                                 int64_t off,
                                 QEMUIOVector *qiov,
                                 int64_t size,
                                 BlockCompletionFunc *cb,
                                 void *opaque,
                                 RBDAIOCmd cmd)
{
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
    rbd_completion_t c;
    int r;

    BDRVRBDState *s = bs->opaque;

    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
    acb->cmd = cmd;
    acb->qiov = qiov;
    assert(!qiov || qiov->size == size);

    rcb = g_new(RADOSCB, 1);

    if (!LIBRBD_USE_IOVEC) {
        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
            acb->bounce = NULL;
        } else {
            acb->bounce = qemu_try_blockalign(bs, qiov->size);
            if (acb->bounce == NULL) {
                goto failed;
            }
        }
        if (cmd == RBD_AIO_WRITE) {
            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
        }
        rcb->buf = acb->bounce;
    }

    acb->ret = 0;
    acb->error = 0;
    acb->s = s;

    rcb->acb = acb;
    rcb->s = acb->s;
    rcb->size = size;
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
    if (r < 0) {
        goto failed;
    }

    switch (cmd) {
    case RBD_AIO_WRITE:
#ifdef LIBRBD_SUPPORTS_IOVEC
            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
#else
            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
#endif
        break;
    case RBD_AIO_READ:
#ifdef LIBRBD_SUPPORTS_IOVEC
            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
#else
            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
#endif
        break;
    case RBD_AIO_DISCARD:
        r = rbd_aio_discard_wrapper(s->image, off, size, c);
        break;
    case RBD_AIO_FLUSH:
        r = rbd_aio_flush_wrapper(s->image, c);
        break;
    default:
        r = -EINVAL;
    }

    if (r < 0) {
        goto failed_completion;
    }
    return &acb->common;

failed_completion:
    rbd_aio_release(c);
failed:
    g_free(rcb);
    if (!LIBRBD_USE_IOVEC) {
        qemu_vfree(acb->bounce);
    }

    qemu_aio_unref(acb);
    return NULL;
}