Example #1
0
/*
 * Copies iovecs from src to the end of dst. It starts copying after skipping
 * the given number of bytes in src and copies until src is completely copied
 * or the total size of the copied iovec reaches size.The size of the last
 * copied iovec is changed in order to fit the specified total size if it isn't
 * a perfect fit already.
 */
void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
    size_t size)
{
    int i;
    size_t done;
    void *iov_base;
    uint64_t iov_len;

    assert(dst->nalloc != -1);

    done = 0;
    for (i = 0; (i < src->niov) && (done != size); i++) {
        if (skip >= src->iov[i].iov_len) {
            /* Skip the whole iov */
            skip -= src->iov[i].iov_len;
            continue;
        } else {
            /* Skip only part (or nothing) of the iov */
            iov_base = (uint8_t*) src->iov[i].iov_base + skip;
            iov_len = src->iov[i].iov_len - skip;
            skip = 0;
        }

        if (done + iov_len > size) {
            qemu_iovec_add(dst, iov_base, size - done);
            break;
        } else {
            qemu_iovec_add(dst, iov_base, iov_len);
        }
        done += iov_len;
    }
}
Example #2
0
/**
 * Copy contents of I/O vector
 *
 * The relative relationships of overlapping iovecs are preserved.  This is
 * necessary to ensure identical semantics in the cloned I/O vector.
 */
void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
{
    IOVectorSortElem sortelems[src->niov];
    void *last_end;
    int i;

    /* Sort by source iovecs by base address */
    for (i = 0; i < src->niov; i++) {
        sortelems[i].src_index = i;
        sortelems[i].src_iov = &src->iov[i];
    }
    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);

    /* Allocate buffer space taking into account overlapping iovecs */
    last_end = NULL;
    for (i = 0; i < src->niov; i++) {
        struct iovec *cur = sortelems[i].src_iov;
        ptrdiff_t rewind = 0;

        /* Detect overlap */
        if (last_end && last_end > cur->iov_base) {
            rewind = last_end - cur->iov_base;
        }

        sortelems[i].dest_base = buf - rewind;
        buf += cur->iov_len - MIN(rewind, cur->iov_len);
        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
    }

    /* Sort by source iovec index and build destination iovec */
    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
    for (i = 0; i < src->niov; i++) {
        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
    }
}
Example #3
0
int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
{
    DMADirection dir = (p->pid == USB_TOKEN_IN) ?
        DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
    dma_addr_t len;
    void *mem;
    int i;

    for (i = 0; i < sgl->nsg; i++) {
        len = sgl->sg[i].len;
        mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &len, dir);
        if (!mem) {
            goto err;
        }
        qemu_iovec_add(&p->iov, mem, len);
        if (len != sgl->sg[i].len) {
            goto err;
        }
    }
    return 0;

err:
    usb_packet_unmap(p, sgl);
    return -1;
}
Example #4
0
static int coroutine_fn
raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                    QEMUIOVector *qiov, int flags)
{
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;

    if (bs->probed && sector_num == 0) {
        /* As long as these conditions are true, we can't get partial writes to
         * the probe buffer and can just directly check the request. */
        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);

        if (nb_sectors == 0) {
            /* qemu_iovec_to_buf() would fail, but we want to return success
             * instead of -EINVAL in this case. */
            return 0;
        }

        buf = qemu_try_blockalign(bs->file->bs, 512);
        if (!buf) {
            ret = -ENOMEM;
            goto fail;
        }

        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
        if (ret != 512) {
            ret = -EINVAL;
            goto fail;
        }

        drv = bdrv_probe_all(buf, 512, NULL);
        if (drv != bs->drv) {
            ret = -EPERM;
            goto fail;
        }

        /* Use the checked buffer, a malicious guest might be overwriting its
         * original buffer in the background. */
        qemu_iovec_init(&local_qiov, qiov->niov + 1);
        qemu_iovec_add(&local_qiov, buf, 512);
        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
        qiov = &local_qiov;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
    ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
                             nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);

fail:
    if (qiov == &local_qiov) {
        qemu_iovec_destroy(&local_qiov);
    }
    qemu_vfree(buf);
    return ret;
}
Example #5
0
/*
 * Copies iovecs from src to the end dst until src is completely copied or the
 * total size of the copied iovec reaches size. The size of the last copied
 * iovec is changed in order to fit the specified total size if it isn't a
 * perfect fit already.
 */
void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
{
    int i;
    size_t done;

    assert(dst->nalloc != -1);

    done = 0;
    for (i = 0; (i < src->niov) && (done != size); i++) {
        if (done + src->iov[i].iov_len > size) {
            qemu_iovec_add(dst, src->iov[i].iov_base, size - done);
            break;
        } else {
            qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len);
        }
        done += src->iov[i].iov_len;
    }
}
Example #6
0
static void dma_bdrv_cb(void *opaque, int ret)
{
    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
    target_phys_addr_t cur_addr, cur_len;
    void *mem;

    dbs->acb = NULL;
    dbs->sector_num += dbs->iov.size / 512;
    dma_bdrv_unmap(dbs);
    qemu_iovec_reset(&dbs->iov);

    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
        dbs->common.cb(dbs->common.opaque, ret);
        qemu_iovec_destroy(&dbs->iov);
        qemu_aio_release(dbs);
        return;
    }

    while (dbs->sg_cur_index < dbs->sg->nsg) {
        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
        mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->is_write);
        if (!mem)
            break;
        qemu_iovec_add(&dbs->iov, mem, cur_len);
        dbs->sg_cur_byte += cur_len;
        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
            dbs->sg_cur_byte = 0;
            ++dbs->sg_cur_index;
        }
    }

    if (dbs->iov.size == 0) {
        cpu_register_map_client(dbs, continue_after_map_failure);
        return;
    }

    if (dbs->is_write) {
        dbs->acb = bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
                                   dbs->iov.size / 512, dma_bdrv_cb, dbs);
    } else {
        dbs->acb = bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
                                  dbs->iov.size / 512, dma_bdrv_cb, dbs);
    }
    if (!dbs->acb) {
        dma_bdrv_unmap(dbs);
        qemu_iovec_destroy(&dbs->iov);
        return;
    }
}
Example #7
0
static inline void flash_sync_area(Flash *s, int64_t off, int64_t len)
{
    QEMUIOVector *iov;

    if (!s->blk || blk_is_read_only(s->blk)) {
        return;
    }

    assert(!(len % BDRV_SECTOR_SIZE));
    iov = g_new(QEMUIOVector, 1);
    qemu_iovec_init(iov, 1);
    qemu_iovec_add(iov, s->storage + off, len);
    blk_aio_pwritev(s->blk, off, iov, 0, blk_sync_complete, iov);
}
Example #8
0
static void flash_sync_page(Flash *s, int page)
{
    if (s->bdrv) {
        int bdrv_sector, nb_sectors;
        QEMUIOVector iov;

        bdrv_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE;
        nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE);
        qemu_iovec_init(&iov, 1);
        qemu_iovec_add(&iov, s->storage + bdrv_sector * BDRV_SECTOR_SIZE,
                                                nb_sectors * BDRV_SECTOR_SIZE);
        bdrv_aio_writev(s->bdrv, bdrv_sector, &iov, nb_sectors,
                                                bdrv_sync_complete, NULL);
    }
}
Example #9
0
static void flash_sync_page(Flash *s, int page)
{
    QEMUIOVector *iov;

    if (!s->blk || blk_is_read_only(s->blk)) {
        return;
    }

    iov = g_new(QEMUIOVector, 1);
    qemu_iovec_init(iov, 1);
    qemu_iovec_add(iov, s->storage + page * s->pi->page_size,
                   s->pi->page_size);
    blk_aio_pwritev(s->blk, page * s->pi->page_size, iov, 0,
                    blk_sync_complete, iov);
}
Example #10
0
/*
 * Parse multiple length statements for vectored I/O, and construct an I/O
 * vector matching it.
 */
static void *
create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern)
{
    size_t *sizes = g_new0(size_t, nr_iov);
    size_t count = 0;
    void *buf = NULL;
    void *p;
    int i;

    for (i = 0; i < nr_iov; i++) {
        char *arg = argv[i];
        int64_t len;

        len = cvtnum(arg);
        if (len < 0) {
            printf("non-numeric length argument -- %s\n", arg);
            goto fail;
        }

        /* should be SIZE_T_MAX, but that doesn't exist */
        if (len > INT_MAX) {
            printf("too large length argument -- %s\n", arg);
            goto fail;
        }

        if (len & 0x1ff) {
            printf("length argument %" PRId64
                   " is not sector aligned\n", len);
            goto fail;
        }

        sizes[i] = len;
        count += len;
    }

    qemu_iovec_init(qiov, nr_iov);

    buf = p = qemu_io_alloc(count, pattern);

    for (i = 0; i < nr_iov; i++) {
        qemu_iovec_add(qiov, p, sizes[i]);
        p += sizes[i];
    }

fail:
    g_free(sizes);
    return buf;
}
Example #11
0
static void dma_blk_cb(void *opaque, int ret)
{
    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
    dma_addr_t cur_addr, cur_len;
    void *mem;

    trace_dma_blk_cb(dbs, ret);

    dbs->acb = NULL;
    dbs->sector_num += dbs->iov.size / 512;

    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
        dma_complete(dbs, ret);
        return;
    }
    dma_blk_unmap(dbs);

    while (dbs->sg_cur_index < dbs->sg->nsg) {
        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
        if (!mem)
            break;
        qemu_iovec_add(&dbs->iov, mem, cur_len);
        dbs->sg_cur_byte += cur_len;
        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
            dbs->sg_cur_byte = 0;
            ++dbs->sg_cur_index;
        }
    }

    if (dbs->iov.size == 0) {
        trace_dma_map_wait(dbs);
        dbs->bh = aio_bh_new(blk_get_aio_context(dbs->blk),
                             reschedule_dma, dbs);
        cpu_register_map_client(dbs->bh);
        return;
    }

    if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
    }

    dbs->acb = dbs->io_func(dbs->blk, dbs->sector_num, &dbs->iov,
                            dbs->iov.size / 512, dma_blk_cb, dbs);
    assert(dbs->acb);
}
Example #12
0
static void flash_sync_page(Flash *s, int page)
{
    int blk_sector, nb_sectors;
    QEMUIOVector iov;

    if (!s->blk || blk_is_read_only(s->blk)) {
        return;
    }

    blk_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE;
    nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE);
    qemu_iovec_init(&iov, 1);
    qemu_iovec_add(&iov, s->storage + blk_sector * BDRV_SECTOR_SIZE,
                   nb_sectors * BDRV_SECTOR_SIZE);
    blk_aio_writev(s->blk, blk_sector, &iov, nb_sectors, blk_sync_complete,
                   NULL);
}
Example #13
0
static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
                                   int start, int num_reqs, int niov)
{
    QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
    int64_t sector_num = mrb->reqs[start]->sector_num;
    int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
    bool is_write = mrb->is_write;

    if (num_reqs > 1) {
        int i;
        struct iovec *tmp_iov = qiov->iov;
        int tmp_niov = qiov->niov;

        /* mrb->reqs[start]->qiov was initialized from external so we can't
         * modifiy it here. We need to initialize it locally and then add the
         * external iovecs. */
        qemu_iovec_init(qiov, niov);

        for (i = 0; i < tmp_niov; i++) {
            qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
        }

        for (i = start + 1; i < start + num_reqs; i++) {
            qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
                              mrb->reqs[i]->qiov.size);
            mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
            nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
        }
        assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);

        trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num,
                                         nb_sectors, is_write);
        block_acct_merge_done(blk_get_stats(blk),
                              is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
                              num_reqs - 1);
    }

    if (is_write) {
        blk_aio_writev(blk, sector_num, qiov, nb_sectors,
                       virtio_blk_rw_complete, mrb->reqs[start]);
    } else {
        blk_aio_readv(blk, sector_num, qiov, nb_sectors,
                      virtio_blk_rw_complete, mrb->reqs[start]);
    }
}
Example #14
0
static inline void flash_sync_area(Flash *s, int64_t off, int64_t len)
{
    int64_t start, end, nb_sectors;
    QEMUIOVector iov;

    if (!s->blk || blk_is_read_only(s->blk)) {
        return;
    }

    assert(!(len % BDRV_SECTOR_SIZE));
    start = off / BDRV_SECTOR_SIZE;
    end = (off + len) / BDRV_SECTOR_SIZE;
    nb_sectors = end - start;
    qemu_iovec_init(&iov, 1);
    qemu_iovec_add(&iov, s->storage + (start * BDRV_SECTOR_SIZE),
                                        nb_sectors * BDRV_SECTOR_SIZE);
    blk_aio_writev(s->blk, start, &iov, nb_sectors, blk_sync_complete, NULL);
}
Example #15
0
/*
 * Concatenates (partial) iovecs from src_iov to the end of dst.
 * It starts copying after skipping `soffset' bytes at the
 * beginning of src and adds individual vectors from src to
 * dst copies up to `sbytes' bytes total, or up to the end
 * of src_iov if it comes first.  This way, it is okay to specify
 * very large value for `sbytes' to indicate "up to the end
 * of src".
 * Only vector pointers are processed, not the actual data buffers.
 */
void qemu_iovec_concat_iov(QEMUIOVector *dst,
                           struct iovec *src_iov, unsigned int src_cnt,
                           size_t soffset, size_t sbytes)
{
    int i;
    size_t done;
    assert(dst->nalloc != -1);
    for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
        if (soffset < src_iov[i].iov_len) {
            size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
            qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
            done += len;
            soffset = 0;
        } else {
            soffset -= src_iov[i].iov_len;
        }
    }
    assert(soffset == 0); /* offset beyond end of src */
}
Example #16
0
/*
 * Concatenates (partial) iovecs from src to the end of dst.
 * It starts copying after skipping `soffset' bytes at the
 * beginning of src and adds individual vectors from src to
 * dst copies up to `sbytes' bytes total, or up to the end
 * of src if it comes first.  This way, it is okay to specify
 * very large value for `sbytes' to indicate "up to the end
 * of src".
 * Only vector pointers are processed, not the actual data buffers.
 */
void qemu_iovec_concat(QEMUIOVector *dst,
                       QEMUIOVector *src, size_t soffset, size_t sbytes)
{
    int i;
    size_t done;
    struct iovec *siov = src->iov;
    assert(dst->nalloc != -1);
    assert(src->size >= soffset);
    for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
        if (soffset < siov[i].iov_len) {
            size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
            qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
            done += len;
            soffset = 0;
        } else {
            soffset -= siov[i].iov_len;
        }
    }
    /* return done; */
}
Example #17
0
static void dma_bdrv_cb(void *opaque, int ret)
{
    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
    dma_addr_t cur_addr, cur_len;
    void *mem;

    trace_dma_bdrv_cb(dbs, ret);

    dbs->acb = NULL;
    dbs->sector_num += dbs->iov.size / 512;
    dma_bdrv_unmap(dbs);

    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
        dma_complete(dbs, ret);
        return;
    }

    while (dbs->sg_cur_index < dbs->sg->nsg) {
        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
        mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
        if (!mem)
            break;
        qemu_iovec_add(&dbs->iov, mem, cur_len);
        dbs->sg_cur_byte += cur_len;
        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
            dbs->sg_cur_byte = 0;
            ++dbs->sg_cur_index;
        }
    }

    if (dbs->iov.size == 0) {
        trace_dma_map_wait(dbs);
        cpu_register_map_client(dbs, continue_after_map_failure);
        return;
    }

    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
    assert(dbs->acb);
}
Example #18
0
File: nvme.c Project: CTU-IIG/qemu
static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
                             uint64_t prp2, uint32_t len, NvmeCtrl *n)
{
    hwaddr trans_len = n->page_size - (prp1 % n->page_size);
    trans_len = MIN(len, trans_len);
    int num_prps = (len >> n->page_bits) + 1;

    if (unlikely(!prp1)) {
        trace_nvme_err_invalid_prp();
        return NVME_INVALID_FIELD | NVME_DNR;
    } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
               prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
        qsg->nsg = 0;
        qemu_iovec_init(iov, num_prps);
        qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
    } else {
        pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
        qemu_sglist_add(qsg, prp1, trans_len);
    }
    len -= trans_len;
    if (len) {
        if (unlikely(!prp2)) {
            trace_nvme_err_invalid_prp2_missing();
            goto unmap;
        }
        if (len > n->page_size) {
            uint64_t prp_list[n->max_prp_ents];
            uint32_t nents, prp_trans;
            int i = 0;

            nents = (len + n->page_size - 1) >> n->page_bits;
            prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
            nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
            while (len != 0) {
                uint64_t prp_ent = le64_to_cpu(prp_list[i]);

                if (i == n->max_prp_ents - 1 && len > n->page_size) {
                    if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
                        trace_nvme_err_invalid_prplist_ent(prp_ent);
                        goto unmap;
                    }

                    i = 0;
                    nents = (len + n->page_size - 1) >> n->page_bits;
                    prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
                    nvme_addr_read(n, prp_ent, (void *)prp_list,
                        prp_trans);
                    prp_ent = le64_to_cpu(prp_list[i]);
                }

                if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
                    trace_nvme_err_invalid_prplist_ent(prp_ent);
                    goto unmap;
                }

                trans_len = MIN(len, n->page_size);
                if (qsg->nsg){
                    qemu_sglist_add(qsg, prp_ent, trans_len);
                } else {
                    qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
                }
                len -= trans_len;
                i++;
            }
        } else {
            if (unlikely(prp2 & (n->page_size - 1))) {
Example #19
0
/*
 * translate request into iovec + start offset
 * do sanity checks along the way
 */
static int ioreq_parse(struct ioreq *ioreq)
{
    struct XenBlkDev *blkdev = ioreq->blkdev;
    uintptr_t mem;
    size_t len;
    int i;

    xen_be_printf(&blkdev->xendev, 3,
		  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
		  ioreq->req.operation, ioreq->req.nr_segments,
		  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
	ioreq->prot = PROT_WRITE; /* to memory */
	break;
    case BLKIF_OP_WRITE_BARRIER:
        if (!ioreq->req.nr_segments) {
            ioreq->presync = 1;
            return 0;
        }
	if (!syncwrite)
	    ioreq->presync = ioreq->postsync = 1;
	/* fall through */
    case BLKIF_OP_WRITE:
	ioreq->prot = PROT_READ; /* from memory */
	if (syncwrite)
	    ioreq->postsync = 1;
	break;
    default:
	xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
		      ioreq->req.operation);
	goto err;
    };

    if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
        xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
        goto err;
    }

    ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
    for (i = 0; i < ioreq->req.nr_segments; i++) {
	if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
	    xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
	    goto err;
	}
	if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
	    xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
	    goto err;
	}
	if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
	    xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
	    goto err;
	}

	ioreq->domids[i] = blkdev->xendev.dom;
	ioreq->refs[i]   = ioreq->req.seg[i].gref;

	mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
	len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
        qemu_iovec_add(&ioreq->v, (void*)mem, len);
    }
    if (ioreq->start + ioreq->v.size > blkdev->file_size) {
	xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
	goto err;
    }
    return 0;

err:
    ioreq->status = BLKIF_RSP_ERROR;
    return -1;
}
Example #20
0
void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len)
{
    qemu_iovec_add(&p->iov, ptr, len);
}
Example #21
0
/* Submit async read while handling COW.
 * Returns: The number of bytes copied after and including offset,
 *          excluding any bytes copied prior to offset due to alignment.
 *          This will be @bytes if no alignment is necessary, or
 *          (new_end - offset) if tail is rounded up or down due to
 *          alignment or buffer limit.
 */
static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
                               uint64_t bytes)
{
    BlockBackend *source = s->common.blk;
    int nb_chunks;
    uint64_t ret;
    MirrorOp *op;
    uint64_t max_bytes;

    max_bytes = s->granularity * s->max_iov;

    /* We can only handle as much as buf_size at a time. */
    bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
    assert(bytes);
    assert(bytes < BDRV_REQUEST_MAX_BYTES);
    ret = bytes;

    if (s->cow_bitmap) {
        ret += mirror_cow_align(s, &offset, &bytes);
    }
    assert(bytes <= s->buf_size);
    /* The offset is granularity-aligned because:
     * 1) Caller passes in aligned values;
     * 2) mirror_cow_align is used only when target cluster is larger. */
    assert(QEMU_IS_ALIGNED(offset, s->granularity));
    /* The range is sector-aligned, since bdrv_getlength() rounds up. */
    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
    nb_chunks = DIV_ROUND_UP(bytes, s->granularity);

    while (s->buf_free_count < nb_chunks) {
        trace_mirror_yield_in_flight(s, offset, s->in_flight);
        mirror_wait_for_io(s);
    }

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_new(MirrorOp, 1);
    op->s = s;
    op->offset = offset;
    op->bytes = bytes;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        size_t remaining = bytes - op->qiov.size;

        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
    }

    /* Copy the dirty cluster.  */
    s->in_flight++;
    s->bytes_in_flight += bytes;
    trace_mirror_one_iteration(s, offset, bytes);

    blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
    return ret;
}
Example #22
0
static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
{
    struct XenBlkDev *blkdev = ioreq->blkdev;

    ioreq->buf = qemu_memalign(XC_PAGE_SIZE, ioreq->size);
    if (ioreq->req.nr_segments &&
        (ioreq->req.operation == BLKIF_OP_WRITE ||
         ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
        ioreq_grant_copy(ioreq)) {
        qemu_vfree(ioreq->buf);
        goto err;
    }

    ioreq->aio_inflight++;
    if (ioreq->presync) {
        blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq);
        return 0;
    }

    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
        qemu_iovec_add(&ioreq->v, ioreq->buf, ioreq->size);
        block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct,
                         ioreq->v.size, BLOCK_ACCT_READ);
        ioreq->aio_inflight++;
        blk_aio_preadv(blkdev->blk, ioreq->start, &ioreq->v, 0,
                       qemu_aio_complete, ioreq);
        break;
    case BLKIF_OP_WRITE:
    case BLKIF_OP_FLUSH_DISKCACHE:
        if (!ioreq->req.nr_segments) {
            break;
        }

        qemu_iovec_add(&ioreq->v, ioreq->buf, ioreq->size);
        block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct,
                         ioreq->v.size,
                         ioreq->req.operation == BLKIF_OP_WRITE ?
                         BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH);
        ioreq->aio_inflight++;
        blk_aio_pwritev(blkdev->blk, ioreq->start, &ioreq->v, 0,
                        qemu_aio_complete, ioreq);
        break;
    case BLKIF_OP_DISCARD:
    {
        struct blkif_request_discard *req = (void *)&ioreq->req;
        if (!blk_split_discard(ioreq, req->sector_number, req->nr_sectors)) {
            goto err;
        }
        break;
    }
    default:
        /* unknown operation (shouldn't happen -- parse catches this) */
        goto err;
    }

    qemu_aio_complete(ioreq, 0);

    return 0;

err:
    ioreq_finish(ioreq);
    ioreq->status = BLKIF_RSP_ERROR;
    return -1;
}
Example #23
0
static coroutine_fn int
block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
                       int remaining_sectors, QEMUIOVector *qiov)
{
    BlockCrypto *crypto = bs->opaque;
    int cur_nr_sectors; /* number of sectors in current iteration */
    uint64_t bytes_done = 0;
    uint8_t *cipher_data = NULL;
    QEMUIOVector hd_qiov;
    int ret = 0;
    size_t payload_offset =
        qcrypto_block_get_payload_offset(crypto->block) / 512;

    qemu_iovec_init(&hd_qiov, qiov->niov);

    /* Bounce buffer so we have a linear mem region for
     * entire sector. XXX optimize so we avoid bounce
     * buffer in case that qiov->niov == 1
     */
    cipher_data =
        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
                                              qiov->size));
    if (cipher_data == NULL) {
        ret = -ENOMEM;
        goto cleanup;
    }

    while (remaining_sectors) {
        cur_nr_sectors = remaining_sectors;

        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
        }

        qemu_iovec_to_buf(qiov, bytes_done,
                          cipher_data, cur_nr_sectors * 512);

        if (qcrypto_block_encrypt(crypto->block,
                                  sector_num,
                                  cipher_data, cur_nr_sectors * 512,
                                  NULL) < 0) {
            ret = -EIO;
            goto cleanup;
        }

        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);

        ret = bdrv_co_writev(bs->file,
                             payload_offset + sector_num,
                             cur_nr_sectors, &hd_qiov);
        if (ret < 0) {
            goto cleanup;
        }

        remaining_sectors -= cur_nr_sectors;
        sector_num += cur_nr_sectors;
        bytes_done += cur_nr_sectors * 512;
    }

 cleanup:
    qemu_iovec_destroy(&hd_qiov);
    qemu_vfree(cipher_data);

    return ret;
}
Example #24
0
static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
    MirrorOp *op;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
        bdrv_dirty_iter_init(source, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
        assert(s->sector_num >= 0);
    }

    hbitmap_next_sector = s->sector_num;
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    end = s->common.len >> BDRV_SECTOR_BITS;

    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
     *
     * We have to do this if we have no backing file yet in the destination,
     * and the cluster size is very large.  Then we need to do COW ourselves.
     * The first time a cluster is copied, copy it entirely.  Note that,
     * because both the granularity and the cluster size are powers of two,
     * the number of sectors to copy cannot exceed one cluster.
     *
     * We also want to extend the QEMUIOVector to include more adjacent
     * dirty blocks if possible, to limit the number of I/O operations and
     * run efficiently even with a small granularity.
     */
    nb_chunks = 0;
    nb_sectors = 0;
    next_sector = sector_num;
    next_chunk = sector_num / sectors_per_chunk;

    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
    while (test_bit(next_chunk, s->in_flight_bitmap)) {
        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
        qemu_coroutine_yield();
    }

    do {
        int added_sectors, added_chunks;

        if (!bdrv_get_dirty(source, next_sector) ||
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
        }

        added_sectors = sectors_per_chunk;
        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
            bdrv_round_to_clusters(s->target,
                                   next_sector, added_sectors,
                                   &next_sector, &added_sectors);

            /* On the first iteration, the rounding may make us copy
             * sectors before the first dirty one.
             */
            if (next_sector < sector_num) {
                assert(nb_sectors == 0);
                sector_num = next_sector;
                next_chunk = next_sector / sectors_per_chunk;
            }
        }

        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;

        /* When doing COW, it may happen that there is not enough space for
         * a full cluster.  Wait if that is the case.
         */
        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
            qemu_coroutine_yield();
        }
        if (s->buf_free_count < nb_chunks + added_chunks) {
            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
            break;
        }

        /* We have enough free space to copy these sectors.  */
        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);

        nb_sectors += added_sectors;
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
    } while (next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_slice_new(MirrorOp);
    op->s = s;
    op->sector_num = sector_num;
    op->nb_sectors = nb_sectors;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, s->granularity);

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

        next_sector += sectors_per_chunk;
    }

    bdrv_reset_dirty(source, sector_num, nb_sectors);

    /* Copy the dirty cluster.  */
    s->in_flight++;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
}
Example #25
0
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
    uint64_t delay_ns = 0;
    MirrorOp *op;
    int pnum;
    int64_t ret;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
        assert(s->sector_num >= 0);
    }

    hbitmap_next_sector = s->sector_num;
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    end = s->bdev_length / BDRV_SECTOR_SIZE;

    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
     *
     * We have to do this if we have no backing file yet in the destination,
     * and the cluster size is very large.  Then we need to do COW ourselves.
     * The first time a cluster is copied, copy it entirely.  Note that,
     * because both the granularity and the cluster size are powers of two,
     * the number of sectors to copy cannot exceed one cluster.
     *
     * We also want to extend the QEMUIOVector to include more adjacent
     * dirty blocks if possible, to limit the number of I/O operations and
     * run efficiently even with a small granularity.
     */
    nb_chunks = 0;
    nb_sectors = 0;
    next_sector = sector_num;
    next_chunk = sector_num / sectors_per_chunk;

    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
    while (test_bit(next_chunk, s->in_flight_bitmap)) {
        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
        s->waiting_for_io = true;
        qemu_coroutine_yield();
        s->waiting_for_io = false;
    }

    do {
        int added_sectors, added_chunks;

        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
        }

        added_sectors = sectors_per_chunk;
        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
            bdrv_round_to_clusters(s->target,
                                   next_sector, added_sectors,
                                   &next_sector, &added_sectors);

            /* On the first iteration, the rounding may make us copy
             * sectors before the first dirty one.
             */
            if (next_sector < sector_num) {
                assert(nb_sectors == 0);
                sector_num = next_sector;
                next_chunk = next_sector / sectors_per_chunk;
            }
        }

        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;

        /* When doing COW, it may happen that there is not enough space for
         * a full cluster.  Wait if that is the case.
         */
        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
            s->waiting_for_io = true;
            qemu_coroutine_yield();
            s->waiting_for_io = false;
        }
        if (s->buf_free_count < nb_chunks + added_chunks) {
            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
            break;
        }
        if (IOV_MAX < nb_chunks + added_chunks) {
            trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
            break;
        }

        /* We have enough free space to copy these sectors.  */
        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);

        nb_sectors += added_sectors;
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
        if (!s->synced && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
        }
    } while (delay_ns == 0 && next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_new(MirrorOp, 1);
    op->s = s;
    op->sector_num = sector_num;
    op->nb_sectors = nb_sectors;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;

        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
        if (next_sector > hbitmap_next_sector
            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

        next_sector += sectors_per_chunk;
    }

    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);

    /* Copy the dirty cluster.  */
    s->in_flight++;
    s->sectors_in_flight += nb_sectors;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);

    ret = bdrv_get_block_status_above(source, NULL, sector_num,
                                      nb_sectors, &pnum);
    if (ret < 0 || pnum < nb_sectors ||
            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                       mirror_read_complete, op);
    } else if (ret & BDRV_BLOCK_ZERO) {
        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                              mirror_write_complete, op);
    } else {
        assert(!(ret & BDRV_BLOCK_DATA));
        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
                         mirror_write_complete, op);
    }
    return delay_ns;
}
Example #26
0
static int xen_block_do_aio(XenBlockRequest *request)
{
    XenBlockDataPlane *dataplane = request->dataplane;

    if (request->req.nr_segments &&
        (request->req.operation == BLKIF_OP_WRITE ||
         request->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
        xen_block_copy_request(request)) {
        goto err;
    }

    request->aio_inflight++;
    if (request->presync) {
        blk_aio_flush(request->dataplane->blk, xen_block_complete_aio,
                      request);
        return 0;
    }

    switch (request->req.operation) {
    case BLKIF_OP_READ:
        qemu_iovec_add(&request->v, request->buf, request->size);
        block_acct_start(blk_get_stats(dataplane->blk), &request->acct,
                         request->v.size, BLOCK_ACCT_READ);
        request->aio_inflight++;
        blk_aio_preadv(dataplane->blk, request->start, &request->v, 0,
                       xen_block_complete_aio, request);
        break;
    case BLKIF_OP_WRITE:
    case BLKIF_OP_FLUSH_DISKCACHE:
        if (!request->req.nr_segments) {
            break;
        }

        qemu_iovec_add(&request->v, request->buf, request->size);
        block_acct_start(blk_get_stats(dataplane->blk), &request->acct,
                         request->v.size,
                         request->req.operation == BLKIF_OP_WRITE ?
                         BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH);
        request->aio_inflight++;
        blk_aio_pwritev(dataplane->blk, request->start, &request->v, 0,
                        xen_block_complete_aio, request);
        break;
    case BLKIF_OP_DISCARD:
    {
        struct blkif_request_discard *req = (void *)&request->req;
        if (!xen_block_split_discard(request, req->sector_number,
                                     req->nr_sectors)) {
            goto err;
        }
        break;
    }
    default:
        /* unknown operation (shouldn't happen -- parse catches this) */
        goto err;
    }

    xen_block_complete_aio(request, 0);

    return 0;

err:
    xen_block_finish_request(request);
    request->status = BLKIF_RSP_ERROR;
    return -1;
}