Beispiel #1
0
/**
 * Update header in-place (does not rewrite backing filename or other strings)
 *
 * This function only updates known header fields in-place and does not affect
 * extra data after the QED header.
 */
static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
                             void *opaque)
{
    /* We must write full sectors for O_DIRECT but cannot necessarily generate
     * the data following the header if an unrecognized compat feature is
     * active.  Therefore, first read the sectors containing the header, update
     * them, and write back.
     */

    int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
                   BDRV_SECTOR_SIZE;
    size_t len = nsectors * BDRV_SECTOR_SIZE;
    QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
                                                    cb, opaque);

    write_header_cb->s = s;
    write_header_cb->nsectors = nsectors;
    write_header_cb->buf = qemu_blockalign(s->bs, len);
    write_header_cb->iov.iov_base = write_header_cb->buf;
    write_header_cb->iov.iov_len = len;
    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);

    bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
                   qed_write_header_read_cb, write_header_cb);
}
Beispiel #2
0
static void *qemu_io_alloc(size_t len, int pattern)
{
	void *buf;

	if (misalign)
		len += MISALIGN_OFFSET;
	buf = qemu_blockalign(bs, len);
	memset(buf, pattern, len);
	if (misalign)
		buf += MISALIGN_OFFSET;
	return buf;
}
Beispiel #3
0
/* Reads the log header, and subsequent descriptors (if any).  This
 * will allocate all the space for buffer, which must be NULL when
 * passed into this function. Each descriptor will also be validated,
 * and error returned if any are invalid. */
static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
                              VHDXLogEntries *log, VHDXLogDescEntries **buffer)
{
    int ret = 0;
    uint32_t desc_sectors;
    uint32_t sectors_read;
    VHDXLogEntryHeader hdr;
    VHDXLogDescEntries *desc_entries = NULL;
    int i;

    assert(*buffer == NULL);

    ret = vhdx_log_peek_hdr(bs, log, &hdr);
    if (ret < 0) {
        goto exit;
    }
    vhdx_log_entry_hdr_le_import(&hdr);
    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
        ret = -EINVAL;
        goto exit;
    }

    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
    desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);

    ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
                                desc_sectors, false);
    if (ret < 0) {
        goto free_and_exit;
    }
    if (sectors_read != desc_sectors) {
        ret = -EINVAL;
        goto free_and_exit;
    }

    /* put in proper endianness, and validate each desc */
    for (i = 0; i < hdr.descriptor_count; i++) {
        vhdx_log_desc_le_import(&desc_entries->desc[i]);
        if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
            ret = -EINVAL;
            goto free_and_exit;
        }
    }

    *buffer = desc_entries;
    goto exit;

free_and_exit:
    qemu_vfree(desc_entries);
exit:
    return ret;
}
Beispiel #4
0
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
{
    BDRVQcowState *s = bs->opaque;
    Qcow2Cache *c;
    int i;

    c = g_malloc0(sizeof(*c));
    c->size = num_tables;
    c->entries = g_malloc0(sizeof(*c->entries) * num_tables);

    for (i = 0; i < c->size; i++) {
        c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
    }

    return c;
}
Beispiel #5
0
static int raw_open_common(BlockDriverState *bs, const char *filename,
                           int bdrv_flags, int open_flags)
{
    BDRVRawState *s = bs->opaque;
    int fd, ret;

    posix_aio_init();

    s->lseek_err_cnt = 0;

    s->open_flags = open_flags | O_BINARY;
    s->open_flags &= ~O_ACCMODE;
    if ((bdrv_flags & BDRV_O_ACCESS) == BDRV_O_RDWR) {
        s->open_flags |= O_RDWR;
    } else {
        s->open_flags |= O_RDONLY;
        bs->read_only = 1;
    }

    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
     * and O_DIRECT for no caching. */
    if ((bdrv_flags & BDRV_O_NOCACHE))
        s->open_flags |= O_DIRECT;
    else if (!(bdrv_flags & BDRV_O_CACHE_WB))
        s->open_flags |= O_DSYNC;

    s->fd = -1;
    fd = open(filename, s->open_flags, 0644);
    if (fd < 0) {
        ret = -errno;
        if (ret == -EROFS)
            ret = -EACCES;
        return ret;
    }
    s->fd = fd;
    s->aligned_buf = NULL;
    if ((bdrv_flags & BDRV_O_NOCACHE)) {
        s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
        if (s->aligned_buf == NULL) {
            ret = -errno;
            close(fd);
            return ret;
        }
    }
    return 0;
}
Beispiel #6
0
/**
 * Write out an updated part or all of a table
 *
 * @s:          QED state
 * @offset:     Offset of table in image file, in bytes
 * @table:      Table
 * @index:      Index of first element
 * @n:          Number of elements
 * @flush:      Whether or not to sync to disk
 * @cb:         Completion function
 * @opaque:     Argument for completion function
 */
static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                            unsigned int index, unsigned int n, bool flush,
                            BlockDriverCompletionFunc *cb, void *opaque)
{
    QEDWriteTableCB *write_table_cb;
    BlockDriverAIOCB *aiocb;
    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
    unsigned int start, end, i;
    size_t len_bytes;

    trace_qed_write_table(s, offset, table, index, n);

    /* Calculate indices of the first and one after last elements */
    start = index & ~sector_mask;
    end = (index + n + sector_mask) & ~sector_mask;

    len_bytes = (end - start) * sizeof(uint64_t);

    write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
    write_table_cb->s = s;
    write_table_cb->orig_table = table;
    write_table_cb->flush = flush;
    write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
    write_table_cb->iov.iov_base = write_table_cb->table->offsets;
    write_table_cb->iov.iov_len = len_bytes;
    qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);

    /* Byteswap table */
    for (i = start; i < end; i++) {
        uint64_t le_offset = cpu_to_le64(table->offsets[i]);
        write_table_cb->table->offsets[i - start] = le_offset;
    }

    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

    aiocb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
                            &write_table_cb->qiov,
                            write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
                            qed_write_table_cb, write_table_cb);
    if (!aiocb) {
        qed_write_table_cb(write_table_cb, -EIO);
    }
}
Beispiel #7
0
static NBDRequest *nbd_request_get(NBDClient *client)
{
    NBDRequest *req;
    NBDExport *exp = client->exp;

    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
    client->nb_requests++;

    if (QSIMPLEQ_EMPTY(&exp->requests)) {
        req = g_malloc0(sizeof(NBDRequest));
        req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
    } else {
        req = QSIMPLEQ_FIRST(&exp->requests);
        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
    }
    nbd_client_get(client);
    req->client = client;
    return req;
}
Beispiel #8
0
static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
                                            nb_sectors, cb, opaque);

    acb->verify = blkverify_verify_readv;
    acb->buf = qemu_blockalign(bs->file, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    return &acb->common;
}
Beispiel #9
0
static int compare_full_images (void)
{
    CompareFullCB *cf;
    int old_copy_on_read = FALSE;

    printf ("Performing a full comparison of the truth image and "
            "the test image...\n");

    if (!strncmp (bs->drv->format_name, "fvd", 3)) {
        /* Disable copy-on-read when scanning through the entire image. */
        old_copy_on_read = fvd_get_copy_on_read (bs);
        fvd_set_copy_on_read (bs, FALSE);
    }

    cf = g_malloc(sizeof(CompareFullCB));
    cf->max_nb_sectors = 1048576L / 512;
    cf->nb_sectors = MIN (cf->max_nb_sectors, total_sectors);
    if (posix_memalign ((void **) &cf->truth_buf, 512,
                        cf->max_nb_sectors * 512) != 0) {
        die ("posix_memalign");
    }
    cf->iov.iov_base = qemu_blockalign (bs, cf->max_nb_sectors * 512);
    cf->iov.iov_len = cf->nb_sectors * 512;
    cf->sector_num = 0;
    qemu_iovec_init_external (&cf->qiov, &cf->iov, 1);
    if (!bdrv_aio_readv (bs, cf->sector_num, &cf->qiov,
                         cf->nb_sectors, compare_full_images_cb, cf)) {
        die ("bdrv_aio_readv\n");
    }

    sim_all_tasks ();

    if (!strncmp (bs->drv->format_name, "fvd", 3)) {
        fvd_set_copy_on_read (bs, old_copy_on_read);
    }

    return 0;
}
Beispiel #10
0
QEDTable *qed_alloc_table(BDRVQEDState *s)
{
    /* Honor O_DIRECT memory alignment requirements */
    return qemu_blockalign(s->bs,
                           s->header.cluster_size * s->header.table_size);
}
Beispiel #11
0
int main(int argc, char **argv)
{
    BlockDriverState *bs;
    off_t dev_offset = 0;
    off_t offset = 0;
    bool readonly = false;
    bool disconnect = false;
    const char *bindto = "0.0.0.0";
    int port = NBD_DEFAULT_PORT;
    struct sockaddr_in addr;
    socklen_t addr_len = sizeof(addr);
    off_t fd_size;
    char *device = NULL;
    char *socket = NULL;
    char sockpath[128];
    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t";
    struct option lopt[] = {
        { "help", 0, NULL, 'h' },
        { "version", 0, NULL, 'V' },
        { "bind", 1, NULL, 'b' },
        { "port", 1, NULL, 'p' },
        { "socket", 1, NULL, 'k' },
        { "offset", 1, NULL, 'o' },
        { "read-only", 0, NULL, 'r' },
        { "partition", 1, NULL, 'P' },
        { "connect", 1, NULL, 'c' },
        { "disconnect", 0, NULL, 'd' },
        { "snapshot", 0, NULL, 's' },
        { "nocache", 0, NULL, 'n' },
        { "shared", 1, NULL, 'e' },
        { "persistent", 0, NULL, 't' },
        { "verbose", 0, NULL, 'v' },
        { NULL, 0, NULL, 0 }
    };
    int ch;
    int opt_ind = 0;
    int li;
    char *end;
    int flags = BDRV_O_RDWR;
    int partition = -1;
    int ret;
    int shared = 1;
    uint8_t *data;
    fd_set fds;
    int *sharing_fds;
    int fd;
    int i;
    int nb_fds = 0;
    int max_fd;
    int persistent = 0;
    uint32_t nbdflags;

    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
        switch (ch) {
        case 's':
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
            flags |= BDRV_O_NOCACHE;
            break;
        case 'b':
            bindto = optarg;
            break;
        case 'p':
            li = strtol(optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid port `%s'", optarg);
            }
            if (li < 1 || li > 65535) {
                errx(EXIT_FAILURE, "Port out of range `%s'", optarg);
            }
            port = (uint16_t)li;
            break;
        case 'o':
                dev_offset = strtoll (optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid offset `%s'", optarg);
            }
            if (dev_offset < 0) {
                errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg);
            }
            break;
        case 'r':
            readonly = true;
            flags &= ~BDRV_O_RDWR;
            break;
        case 'P':
            partition = strtol(optarg, &end, 0);
            if (*end)
                errx(EXIT_FAILURE, "Invalid partition `%s'", optarg);
            if (partition < 1 || partition > 8)
                errx(EXIT_FAILURE, "Invalid partition %d", partition);
            break;
        case 'k':
            socket = optarg;
            if (socket[0] != '/')
                errx(EXIT_FAILURE, "socket path must be absolute\n");
            break;
        case 'd':
            disconnect = true;
            break;
        case 'c':
            device = optarg;
            break;
        case 'e':
            shared = strtol(optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg);
            }
            if (shared < 1) {
                errx(EXIT_FAILURE, "Shared device number must be greater than 0\n");
            }
            break;
	case 't':
	    persistent = 1;
	    break;
        case 'v':
            verbose = 1;
            break;
        case 'V':
            version(argv[0]);
            exit(0);
            break;
        case 'h':
            usage(argv[0]);
            exit(0);
            break;
        case '?':
            errx(EXIT_FAILURE, "Try `%s --help' for more information.",
                 argv[0]);
        }
    }

    if ((argc - optind) != 1) {
        errx(EXIT_FAILURE, "Invalid number of argument.\n"
             "Try `%s --help' for more information.",
             argv[0]);
    }

    if (disconnect) {
        fd = open(argv[optind], O_RDWR);
        if (fd == -1)
            err(EXIT_FAILURE, "Cannot open %s", argv[optind]);

        nbd_disconnect(fd);

        close(fd);

        printf("%s disconnected\n", argv[optind]);

	return 0;
    }

    bdrv_init();

    bs = bdrv_new("hda");
    if (bs == NULL)
        return 1;

    if ((ret = bdrv_open(bs, argv[optind], flags, NULL)) < 0) {
        errno = -ret;
        err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]);
    }

    fd_size = bs->total_sectors * 512;

    if (partition != -1 &&
        find_partition(bs, partition, &dev_offset, &fd_size))
        err(EXIT_FAILURE, "Could not find partition %d", partition);

    if (device) {
        pid_t pid;
        int sock;

        /* want to fail before daemonizing */
        if (access(device, R_OK|W_OK) == -1) {
            err(EXIT_FAILURE, "Could not access '%s'", device);
        }

        if (!verbose) {
            /* detach client and server */
            if (daemon(0, 0) == -1) {
                err(EXIT_FAILURE, "Failed to daemonize");
            }
        }

        if (socket == NULL) {
            snprintf(sockpath, sizeof(sockpath), SOCKET_PATH,
                     basename(device));
            socket = sockpath;
        }

        pid = fork();
        if (pid < 0)
            return 1;
        if (pid != 0) {
            off_t size;
            size_t blocksize;

            ret = 0;
            bdrv_close(bs);

            do {
                sock = unix_socket_outgoing(socket);
                if (sock == -1) {
                    if (errno != ENOENT && errno != ECONNREFUSED) {
                        ret = 1;
                        goto out;
                    }
                    sleep(1);	/* wait children */
                }
            } while (sock == -1);

            fd = open(device, O_RDWR);
            if (fd == -1) {
                ret = 1;
                goto out;
            }

            ret = nbd_receive_negotiate(sock, NULL, &nbdflags,
					&size, &blocksize);
            if (ret == -1) {
                ret = 1;
                goto out;
            }

            ret = nbd_init(fd, sock, size, blocksize);
            if (ret == -1) {
                ret = 1;
                goto out;
            }

            printf("NBD device %s is now connected to file %s\n",
                    device, argv[optind]);

	    /* update partition table */

            show_parts(device);

            ret = nbd_client(fd);
            if (ret) {
                ret = 1;
            }
            close(fd);
 out:
            kill(pid, SIGTERM);
            unlink(socket);

            return ret;
        }
        /* children */
    }

    sharing_fds = qemu_malloc((shared + 1) * sizeof(int));

    if (socket) {
        sharing_fds[0] = unix_socket_incoming(socket);
    } else {
        sharing_fds[0] = tcp_socket_incoming(bindto, port);
    }

    if (sharing_fds[0] == -1)
        return 1;
    max_fd = sharing_fds[0];
    nb_fds++;

    data = qemu_blockalign(bs, NBD_BUFFER_SIZE);
    if (data == NULL)
        errx(EXIT_FAILURE, "Cannot allocate data buffer");

    do {

        FD_ZERO(&fds);
        for (i = 0; i < nb_fds; i++)
            FD_SET(sharing_fds[i], &fds);

        ret = select(max_fd + 1, &fds, NULL, NULL, NULL);
        if (ret == -1)
            break;

        if (FD_ISSET(sharing_fds[0], &fds))
            ret--;
        for (i = 1; i < nb_fds && ret; i++) {
            if (FD_ISSET(sharing_fds[i], &fds)) {
                if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset,
                    &offset, readonly, data, NBD_BUFFER_SIZE) != 0) {
                    close(sharing_fds[i]);
                    nb_fds--;
                    sharing_fds[i] = sharing_fds[nb_fds];
                    i--;
                }
                ret--;
            }
        }
        /* new connection ? */
        if (FD_ISSET(sharing_fds[0], &fds)) {
            if (nb_fds < shared + 1) {
                sharing_fds[nb_fds] = accept(sharing_fds[0],
                                             (struct sockaddr *)&addr,
                                             &addr_len);
                if (sharing_fds[nb_fds] != -1 &&
                    nbd_negotiate(sharing_fds[nb_fds], fd_size) != -1) {
                        if (sharing_fds[nb_fds] > max_fd)
                            max_fd = sharing_fds[nb_fds];
                        nb_fds++;
                }
            }
        }
    } while (persistent || nb_fds > 1);
    qemu_vfree(data);

    close(sharing_fds[0]);
    bdrv_close(bs);
    qemu_free(sharing_fds);
    if (socket)
        unlink(socket);

    return 0;
}
static void coroutine_fn commit_run(void *opaque)
{
    CommitBlockJob *s = opaque;
    BlockDriverState *active = s->active;
    BlockDriverState *top = s->top;
    BlockDriverState *base = s->base;
    BlockDriverState *overlay_bs = NULL;
    int64_t sector_num, end;
    int ret = 0;
    int n = 0;
    void *buf;
    int bytes_written = 0;
    int64_t base_len;

    ret = s->common.len = bdrv_getlength(top);


    if (s->common.len < 0) {
        goto exit_restore_reopen;
    }

    ret = base_len = bdrv_getlength(base);
    if (base_len < 0) {
        goto exit_restore_reopen;
    }

    if (base_len < s->common.len) {
        ret = bdrv_truncate(base, s->common.len);
        if (ret) {
            goto exit_restore_reopen;
        }
    }

    overlay_bs = bdrv_find_overlay(active, top);

    end = s->common.len >> BDRV_SECTOR_BITS;
    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);

    for (sector_num = 0; sector_num < end; sector_num += n) {
        uint64_t delay_ms = 0;
        bool copy;

wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that qemu_aio_flush() returns.
         */
        block_job_sleep(&s->common, rt_clock, delay_ms);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }
        /* Copy if allocated above the base */
        ret = bdrv_co_is_allocated_above(top, base, sector_num,
                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
                                         &n);
        copy = (ret == 1);
        trace_commit_one_iteration(s, sector_num, n, ret);
        if (copy) {
            if (s->common.speed) {
                delay_ms = ratelimit_calculate_delay(&s->limit, n);
                if (delay_ms > 0) {
                    goto wait;
                }
            }
            ret = commit_populate(top, base, sector_num, n, buf);
            bytes_written += n * BDRV_SECTOR_SIZE;
        }
        if (ret < 0) {
            if (s->on_error == BLOCK_ERR_STOP_ANY    ||
                s->on_error == BLOCK_ERR_REPORT      ||
                (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) {
                goto exit_free_buf;
            } else {
                n = 0;
                continue;
            }
        }
        /* Publish progress */
        s->common.offset += n * BDRV_SECTOR_SIZE;
    }

    ret = 0;

    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
        /* success */
        ret = bdrv_drop_intermediate(active, top, base);
    }

exit_free_buf:
    qemu_vfree(buf);

exit_restore_reopen:
    /* restore base open flags here if appropriate (e.g., change the base back
     * to r/o). These reopens do not need to be atomic, since we won't abort
     * even on failure here */
    if (s->base_flags != bdrv_get_flags(base)) {
        bdrv_reopen(base, s->base_flags, NULL);
    }
    if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
    }

    block_job_complete(&s->common, ret);
}
Beispiel #13
0
static int coroutine_fn stream_run(Job *job, Error **errp)
{
    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
    BlockBackend *blk = s->common.blk;
    BlockDriverState *bs = blk_bs(blk);
    BlockDriverState *base = s->base;
    int64_t len;
    int64_t offset = 0;
    uint64_t delay_ns = 0;
    int error = 0;
    int ret = 0;
    int64_t n = 0; /* bytes */
    void *buf;

    if (!bs->backing) {
        goto out;
    }

    len = bdrv_getlength(bs);
    if (len < 0) {
        ret = len;
        goto out;
    }
    job_progress_set_remaining(&s->common.job, len);

    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);

    /* Turn on copy-on-read for the whole block device so that guest read
     * requests help us make progress.  Only do this when copying the entire
     * backing chain since the copy-on-read operation does not take base into
     * account.
     */
    if (!base) {
        bdrv_enable_copy_on_read(bs);
    }

    for ( ; offset < len; offset += n) {
        bool copy;

        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
        job_sleep_ns(&s->common.job, delay_ns);
        if (job_is_cancelled(&s->common.job)) {
            break;
        }

        copy = false;

        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
        if (ret == 1) {
            /* Allocated in the top, no need to copy.  */
        } else if (ret >= 0) {
            /* Copy if allocated in the intermediate images.  Limit to the
             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
            ret = bdrv_is_allocated_above(backing_bs(bs), base,
                                          offset, n, &n);

            /* Finish early if end of backing file has been reached */
            if (ret == 0 && n == 0) {
                n = len - offset;
            }

            copy = (ret == 1);
        }
        trace_stream_one_iteration(s, offset, n, ret);
        if (copy) {
            ret = stream_populate(blk, offset, n, buf);
        }
        if (ret < 0) {
            BlockErrorAction action =
                block_job_error_action(&s->common, s->on_error, true, -ret);
            if (action == BLOCK_ERROR_ACTION_STOP) {
                n = 0;
                continue;
            }
            if (error == 0) {
                error = ret;
            }
            if (action == BLOCK_ERROR_ACTION_REPORT) {
                break;
            }
        }
        ret = 0;

        /* Publish progress */
        job_progress_update(&s->common.job, n);
        if (copy) {
            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
        } else {
            delay_ns = 0;
        }
    }

    if (!base) {
        bdrv_disable_copy_on_read(bs);
    }

    /* Do not remove the backing file if an error was there but ignored.  */
    ret = error;

    qemu_vfree(buf);

out:
    /* Modify backing chain and close BDSes in main loop */
    return ret;
}
Beispiel #14
0
static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
    char backing_filename[1024];
    int ret = 0;
    int n;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->common.len = bdrv_getlength(bs);
    if (s->common.len <= 0) {
        block_job_completed(&s->common, s->common.len);
        return;
    }

    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        bdrv_get_info(s->target, &bdi);
        if (s->granularity < bdi.cluster_size) {
            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
            s->cow_bitmap = bitmap_new(length);
        }
    }

    end = s->common.len >> BDRV_SECTOR_BITS;
    s->buf = qemu_blockalign(bs, s->buf_size);
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

    if (s->mode != MIRROR_SYNC_MODE_NONE) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base;
        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_is_allocated_above(bs, base,
                                          sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
            }

            assert(n > 0);
            if (ret == 1) {
                bdrv_set_dirty(bs, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
            }
        }
    }

    bdrv_dirty_iter_init(bs, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
        uint64_t delay_ns;
        int64_t cnt;
        bool should_complete;

        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

        cnt = bdrv_get_dirty_count(bs);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
                mirror_iteration(s);
                continue;
            }
        }

        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
            ret = bdrv_flush(s->target);
            if (ret < 0) {
                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                s->common.offset = end * BDRV_SECTOR_SIZE;
                if (!s->synced) {
                    block_job_ready(&s->common);
                    s->synced = true;
                }

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
                cnt = bdrv_get_dirty_count(bs);
            }
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain_all();
            cnt = bdrv_get_dirty_count(bs);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced);
        if (!s->synced) {
            /* Publish progress */
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;

            if (s->common.speed) {
                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
            } else {
                delay_ns = 0;
            }

            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

immediate_exit:
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
        mirror_drain(s);
    }

    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_set_dirty_tracking(bs, 0);
    bdrv_iostatus_disable(s->target);
    if (s->should_complete && ret == 0) {
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
    }
    bdrv_close(s->target);
    bdrv_unref(s->target);
    block_job_completed(&s->common, ret);
}
Beispiel #15
0
/* Flushes the descriptor described by desc to the VHDX image file.
 * If the descriptor is a data descriptor, than 'data' must be non-NULL,
 * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
 * written.
 *
 * Verification is performed to make sure the sequence numbers of a data
 * descriptor match the sequence number in the desc.
 *
 * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
 * In this case, it should be noted that zeroes are written to disk, and the
 * image file is not extended as a sparse file.  */
static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
                               VHDXLogDataSector *data)
{
    int ret = 0;
    uint64_t seq, file_offset;
    uint32_t offset = 0;
    void *buffer = NULL;
    uint64_t count = 1;
    int i;

    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);

    if (!memcmp(&desc->signature, "desc", 4)) {
        /* data sector */
        if (data == NULL) {
            ret = -EFAULT;
            goto exit;
        }

        /* The sequence number of the data sector must match that
         * in the descriptor */
        seq = data->sequence_high;
        seq <<= 32;
        seq |= data->sequence_low & 0xffffffff;

        if (seq != desc->sequence_number) {
            ret = -EINVAL;
            goto exit;
        }

        /* Each data sector is in total 4096 bytes, however the first
         * 8 bytes, and last 4 bytes, are located in the descriptor */
        memcpy(buffer, &desc->leading_bytes, 8);
        offset += 8;

        memcpy(buffer+offset, data->data, 4084);
        offset += 4084;

        memcpy(buffer+offset, &desc->trailing_bytes, 4);

    } else if (!memcmp(&desc->signature, "zero", 4)) {
        /* write 'count' sectors of sector */
        memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
        count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
    }

    file_offset = desc->file_offset;

    /* count is only > 1 if we are writing zeroes */
    for (i = 0; i < count; i++) {
        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
                               VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
        }
        file_offset += VHDX_LOG_SECTOR_SIZE;
    }

exit:
    qemu_vfree(buffer);
    return ret;
}
Beispiel #16
0
/* Flush the entire log (as described by 'logs') to the VHDX image
 * file, and then set the log to 'empty' status once complete.
 *
 * The log entries should be validate prior to flushing */
static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                          VHDXLogSequence *logs)
{
    int ret = 0;
    int i;
    uint32_t cnt, sectors_read;
    uint64_t new_file_size;
    void *data = NULL;
    VHDXLogDescEntries *desc_entries = NULL;
    VHDXLogEntryHeader hdr_tmp = { 0 };

    cnt = logs->count;

    data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);

    ret = vhdx_user_visible_write(bs, s);
    if (ret < 0) {
        goto exit;
    }

    /* each iteration represents one log sequence, which may span multiple
     * sectors */
    while (cnt--) {
        ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
        if (ret < 0) {
            goto exit;
        }
        /* if the log shows a FlushedFileOffset larger than our current file
         * size, then that means the file has been truncated / corrupted, and
         * we must refused to open it / use it */
        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
            ret = -EINVAL;
            goto exit;
        }

        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
        if (ret < 0) {
            goto exit;
        }

        for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
            if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
                /* data sector, so read a sector to flush */
                ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
                                            data, 1, false);
                if (ret < 0) {
                    goto exit;
                }
                if (sectors_read != 1) {
                    ret = -EINVAL;
                    goto exit;
                }
            }

            ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
            if (ret < 0) {
                goto exit;
            }
        }
        if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
            new_file_size = desc_entries->hdr.last_file_offset;
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
                bdrv_truncate(bs->file, new_file_size);
            }
        }
Beispiel #17
0
static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
{
    ssize_t nbytes;
    char *buf;

    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
        /*
         * If there is just a single buffer, and it is properly aligned
         * we can just use plain pread/pwrite without any problems.
         */
        if (aiocb->aio_niov == 1) {
             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
        }
        /*
         * We have more than one iovec, and all are properly aligned.
         *
         * Try preadv/pwritev first and fall back to linearizing the
         * buffer if it's not supported.
         */
        if (preadv_present) {
            nbytes = handle_aiocb_rw_vector(aiocb);
            if (nbytes == aiocb->aio_nbytes ||
                (nbytes < 0 && nbytes != -ENOSYS)) {
                return nbytes;
            }
            preadv_present = false;
        }

        /*
         * XXX(hch): short read/write.  no easy way to handle the reminder
         * using these interfaces.  For now retry using plain
         * pread/pwrite?
         */
    }

    /*
     * Ok, we have to do it the hard way, copy all segments into
     * a single aligned buffer.
     */
    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
    if (aiocb->aio_type & QEMU_AIO_WRITE) {
        char *p = buf;
        int i;

        for (i = 0; i < aiocb->aio_niov; ++i) {
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
            p += aiocb->aio_iov[i].iov_len;
        }
    }

    nbytes = handle_aiocb_rw_linear(aiocb, buf);
    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
        char *p = buf;
        size_t count = aiocb->aio_nbytes, copy;
        int i;

        for (i = 0; i < aiocb->aio_niov && count; ++i) {
            copy = count;
            if (copy > aiocb->aio_iov[i].iov_len) {
                copy = aiocb->aio_iov[i].iov_len;
            }
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
            p     += copy;
            count -= copy;
        }
    }
    qemu_vfree(buf);

    return nbytes;
}
Beispiel #18
0
static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
                                           int64_t sector_num,
                                           QEMUIOVector *qiov,
                                           int nb_sectors,
                                           BlockDriverCompletionFunc *cb,
                                           void *opaque, int write)
{
    RBDAIOCB *acb;
    RADOSCB *rcb;
    rbd_completion_t c;
    int64_t off, size;
    char *buf;
    int r;

    BDRVRBDState *s = bs->opaque;

    acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque);
    if (!acb) {
        return NULL;
    }
    acb->write = write;
    acb->qiov = qiov;
    acb->bounce = qemu_blockalign(bs, qiov->size);
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;
    acb->cancelled = 0;
    acb->bh = NULL;

    if (write) {
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
    }

    buf = acb->bounce;

    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

    s->qemu_aio_count++; /* All the RADOSCB */

    rcb = g_malloc(sizeof(RADOSCB));
    rcb->done = 0;
    rcb->acb = acb;
    rcb->buf = buf;
    rcb->s = acb->s;
    rcb->size = size;
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
    if (r < 0) {
        goto failed;
    }

    if (write) {
        r = rbd_aio_write(s->image, off, size, buf, c);
    } else {
        r = rbd_aio_read(s->image, off, size, buf, c);
    }

    if (r < 0) {
        goto failed;
    }

    return &acb->common;

failed:
    g_free(rcb);
    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
}
Beispiel #19
0
static int raw_open_common(BlockDriverState *bs, const char *filename,
                           int bdrv_flags, int open_flags)
{
    BDRVRawState *s = bs->opaque;
    int fd, ret;

    s->open_flags = open_flags | O_BINARY;
    s->open_flags &= ~O_ACCMODE;
    if (bdrv_flags & BDRV_O_RDWR) {
        s->open_flags |= O_RDWR;
    } else {
        s->open_flags |= O_RDONLY;
    }

    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
     * and O_DIRECT for no caching. */
    if ((bdrv_flags & BDRV_O_NOCACHE))
        s->open_flags |= O_DIRECT;
    else if (!(bdrv_flags & BDRV_O_CACHE_WB))
        s->open_flags |= O_DSYNC;

    s->fd = -1;
    fd = qemu_open(filename, s->open_flags, 0644);
    if (fd < 0) {
        ret = -errno;
        if (ret == -EROFS)
            ret = -EACCES;
        return ret;
    }
    s->fd = fd;
    s->aligned_buf = NULL;

    if ((bdrv_flags & BDRV_O_NOCACHE)) {
        s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
        if (s->aligned_buf == NULL) {
            goto out_close;
        }
    }

#ifdef CONFIG_LINUX_AIO
    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {

        /* We're falling back to POSIX AIO in some cases */
        paio_init();

        s->aio_ctx = laio_init();
        if (!s->aio_ctx) {
            goto out_free_buf;
        }
        s->use_aio = 1;
    } else
#endif
    {
        if (paio_init() < 0) {
            goto out_free_buf;
        }
#ifdef CONFIG_LINUX_AIO
        s->use_aio = 0;
#endif
    }

    return 0;

out_free_buf:
    qemu_vfree(s->aligned_buf);
out_close:
    close(fd);
    return -errno;
}
Beispiel #20
0
/* opens the specified header block from the VHDX file header section */
static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
{
    int ret = 0;
    VHDXHeader *header1;
    VHDXHeader *header2;
    bool h1_valid = false;
    bool h2_valid = false;
    uint64_t h1_seq = 0;
    uint64_t h2_seq = 0;
    uint8_t *buffer;

    header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
    header2 = qemu_blockalign(bs, sizeof(VHDXHeader));

    buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);

    s->headers[0] = header1;
    s->headers[1] = header2;

    /* We have to read the whole VHDX_HEADER_SIZE instead of
     * sizeof(VHDXHeader), because the checksum is over the whole
     * region */
    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
    if (ret < 0) {
        goto fail;
    }
    /* copy over just the relevant portion that we need */
    memcpy(header1, buffer, sizeof(VHDXHeader));
    vhdx_header_le_import(header1);

    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
        !memcmp(&header1->signature, "head", 4)             &&
        header1->version == 1) {
        h1_seq = header1->sequence_number;
        h1_valid = true;
    }

    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
    if (ret < 0) {
        goto fail;
    }
    /* copy over just the relevant portion that we need */
    memcpy(header2, buffer, sizeof(VHDXHeader));
    vhdx_header_le_import(header2);

    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
        !memcmp(&header2->signature, "head", 4)             &&
        header2->version == 1) {
        h2_seq = header2->sequence_number;
        h2_valid = true;
    }

    /* If there is only 1 valid header (or no valid headers), we
     * don't care what the sequence numbers are */
    if (h1_valid && !h2_valid) {
        s->curr_header = 0;
    } else if (!h1_valid && h2_valid) {
        s->curr_header = 1;
    } else if (!h1_valid && !h2_valid) {
        ret = -EINVAL;
        goto fail;
    } else {
        /* If both headers are valid, then we choose the active one by the
         * highest sequence number.  If the sequence numbers are equal, that is
         * invalid */
        if (h1_seq > h2_seq) {
            s->curr_header = 0;
        } else if (h2_seq > h1_seq) {
            s->curr_header = 1;
        } else {
            ret = -EINVAL;
            goto fail;
        }
    }

    ret = 0;

    goto exit;

fail:
    qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
    qemu_vfree(header1);
    qemu_vfree(header2);
    s->headers[0] = NULL;
    s->headers[1] = NULL;
exit:
    qemu_vfree(buffer);
    return ret;
}
Beispiel #21
0
int main(int argc, char **argv)
{
    BlockDriverState *bs;
    off_t dev_offset = 0;
    off_t offset = 0;
    uint32_t nbdflags = 0;
    bool disconnect = false;
    const char *bindto = "0.0.0.0";
    int port = NBD_DEFAULT_PORT;
    struct sockaddr_in addr;
    socklen_t addr_len = sizeof(addr);
    off_t fd_size;
    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t";
    struct option lopt[] = {
        { "help", 0, NULL, 'h' },
        { "version", 0, NULL, 'V' },
        { "bind", 1, NULL, 'b' },
        { "port", 1, NULL, 'p' },
        { "socket", 1, NULL, 'k' },
        { "offset", 1, NULL, 'o' },
        { "read-only", 0, NULL, 'r' },
        { "partition", 1, NULL, 'P' },
        { "connect", 1, NULL, 'c' },
        { "disconnect", 0, NULL, 'd' },
        { "snapshot", 0, NULL, 's' },
        { "nocache", 0, NULL, 'n' },
        { "shared", 1, NULL, 'e' },
        { "persistent", 0, NULL, 't' },
        { "verbose", 0, NULL, 'v' },
        { NULL, 0, NULL, 0 }
    };
    int ch;
    int opt_ind = 0;
    int li;
    char *end;
    int flags = BDRV_O_RDWR;
    int partition = -1;
    int ret;
    int shared = 1;
    uint8_t *data;
    fd_set fds;
    int *sharing_fds;
    int fd;
    int i;
    int nb_fds = 0;
    int max_fd;
    int persistent = 0;
    pthread_t client_thread;

    /* The client thread uses SIGTERM to interrupt the server.  A signal
     * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
     */
    struct sigaction sa_sigterm;
    int sigterm_fd[2];
    if (qemu_pipe(sigterm_fd) == -1) {
        err(EXIT_FAILURE, "Error setting up communication pipe");
    }

    sigterm_wfd = sigterm_fd[1];
    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
    sa_sigterm.sa_handler = termsig_handler;
    sigaction(SIGTERM, &sa_sigterm, NULL);

    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
        switch (ch) {
        case 's':
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
            flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
            break;
        case 'b':
            bindto = optarg;
            break;
        case 'p':
            li = strtol(optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid port `%s'", optarg);
            }
            if (li < 1 || li > 65535) {
                errx(EXIT_FAILURE, "Port out of range `%s'", optarg);
            }
            port = (uint16_t)li;
            break;
        case 'o':
                dev_offset = strtoll (optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid offset `%s'", optarg);
            }
            if (dev_offset < 0) {
                errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg);
            }
            break;
        case 'r':
            nbdflags |= NBD_FLAG_READ_ONLY;
            flags &= ~BDRV_O_RDWR;
            break;
        case 'P':
            partition = strtol(optarg, &end, 0);
            if (*end)
                errx(EXIT_FAILURE, "Invalid partition `%s'", optarg);
            if (partition < 1 || partition > 8)
                errx(EXIT_FAILURE, "Invalid partition %d", partition);
            break;
        case 'k':
            sockpath = optarg;
            if (sockpath[0] != '/')
                errx(EXIT_FAILURE, "socket path must be absolute\n");
            break;
        case 'd':
            disconnect = true;
            break;
        case 'c':
            device = optarg;
            break;
        case 'e':
            shared = strtol(optarg, &end, 0);
            if (*end) {
                errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg);
            }
            if (shared < 1) {
                errx(EXIT_FAILURE, "Shared device number must be greater than 0\n");
            }
            break;
	case 't':
	    persistent = 1;
	    break;
        case 'v':
            verbose = 1;
            break;
        case 'V':
            version(argv[0]);
            exit(0);
            break;
        case 'h':
            usage(argv[0]);
            exit(0);
            break;
        case '?':
            errx(EXIT_FAILURE, "Try `%s --help' for more information.",
                 argv[0]);
        }
    }

    if ((argc - optind) != 1) {
        errx(EXIT_FAILURE, "Invalid number of argument.\n"
             "Try `%s --help' for more information.",
             argv[0]);
    }

    if (disconnect) {
        fd = open(argv[optind], O_RDWR);
        if (fd == -1)
            err(EXIT_FAILURE, "Cannot open %s", argv[optind]);

        nbd_disconnect(fd);

        close(fd);

        printf("%s disconnected\n", argv[optind]);

	return 0;
    }

    if (device && !verbose) {
        int stderr_fd[2];
        pid_t pid;
        int ret;

        if (qemu_pipe(stderr_fd) == -1) {
            err(EXIT_FAILURE, "Error setting up communication pipe");
        }

        /* Now daemonize, but keep a communication channel open to
         * print errors and exit with the proper status code.
         */
        pid = fork();
        if (pid == 0) {
            close(stderr_fd[0]);
            ret = qemu_daemon(0, 0);

            /* Temporarily redirect stderr to the parent's pipe...  */
            dup2(stderr_fd[1], STDERR_FILENO);
            if (ret == -1) {
                err(EXIT_FAILURE, "Failed to daemonize");
            }

            /* ... close the descriptor we inherited and go on.  */
            close(stderr_fd[1]);
        } else {
            bool errors = false;
            char *buf;

            /* In the parent.  Print error messages from the child until
             * it closes the pipe.
             */
            close(stderr_fd[1]);
            buf = g_malloc(1024);
            while ((ret = read(stderr_fd[0], buf, 1024)) > 0) {
                errors = true;
                ret = qemu_write_full(STDERR_FILENO, buf, ret);
                if (ret == -1) {
                    exit(EXIT_FAILURE);
                }
            }
            if (ret == -1) {
                err(EXIT_FAILURE, "Cannot read from daemon");
            }

            /* Usually the daemon should not print any message.
             * Exit with zero status in that case.
             */
            exit(errors);
        }
    }

    if (device) {
        /* Open before spawning new threads.  In the future, we may
         * drop privileges after opening.
         */
        fd = open(device, O_RDWR);
        if (fd == -1) {
            err(EXIT_FAILURE, "Failed to open %s", device);
        }

        if (sockpath == NULL) {
            sockpath = g_malloc(128);
            snprintf(sockpath, 128, SOCKET_PATH, basename(device));
        }
    }

    bdrv_init();
    atexit(bdrv_close_all);

    bs = bdrv_new("hda");
    srcpath = argv[optind];
    if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) {
        errno = -ret;
        err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]);
    }

    fd_size = bs->total_sectors * 512;

    if (partition != -1 &&
        find_partition(bs, partition, &dev_offset, &fd_size)) {
        err(EXIT_FAILURE, "Could not find partition %d", partition);
    }

    sharing_fds = g_malloc((shared + 1) * sizeof(int));

    if (sockpath) {
        sharing_fds[0] = unix_socket_incoming(sockpath);
    } else {
        sharing_fds[0] = tcp_socket_incoming(bindto, port);
    }

    if (sharing_fds[0] == -1)
        return 1;

    if (device) {
        int ret;

        ret = pthread_create(&client_thread, NULL, nbd_client_thread, &fd);
        if (ret != 0) {
            errx(EXIT_FAILURE, "Failed to create client thread: %s",
                 strerror(ret));
        }
    } else {
        /* Shut up GCC warnings.  */
        memset(&client_thread, 0, sizeof(client_thread));
    }

    max_fd = sharing_fds[0];
    nb_fds++;

    data = qemu_blockalign(bs, NBD_BUFFER_SIZE);
    if (data == NULL) {
        errx(EXIT_FAILURE, "Cannot allocate data buffer");
    }

    do {
        FD_ZERO(&fds);
        FD_SET(sigterm_fd[0], &fds);
        for (i = 0; i < nb_fds; i++)
            FD_SET(sharing_fds[i], &fds);

        do {
            ret = select(max_fd + 1, &fds, NULL, NULL, NULL);
        } while (ret == -1 && errno == EINTR);
        if (ret == -1 || FD_ISSET(sigterm_fd[0], &fds)) {
            break;
        }

        if (FD_ISSET(sharing_fds[0], &fds))
            ret--;
        for (i = 1; i < nb_fds && ret; i++) {
            if (FD_ISSET(sharing_fds[i], &fds)) {
                if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset,
                    &offset, nbdflags, data, NBD_BUFFER_SIZE) != 0) {
                    close(sharing_fds[i]);
                    nb_fds--;
                    sharing_fds[i] = sharing_fds[nb_fds];
                    i--;
                }
                ret--;
            }
        }
        /* new connection ? */
        if (FD_ISSET(sharing_fds[0], &fds)) {
            if (nb_fds < shared + 1) {
                sharing_fds[nb_fds] = accept(sharing_fds[0],
                                             (struct sockaddr *)&addr,
                                             &addr_len);
                if (sharing_fds[nb_fds] != -1 &&
                    nbd_negotiate(sharing_fds[nb_fds], fd_size, nbdflags) != -1) {
                        if (sharing_fds[nb_fds] > max_fd)
                            max_fd = sharing_fds[nb_fds];
                        nb_fds++;
                }
            }
        }
    } while (persistent || nb_fds > 1);
    qemu_vfree(data);

    close(sharing_fds[0]);
    g_free(sharing_fds);
    if (sockpath) {
        unlink(sockpath);
    }

    if (device) {
        void *ret;
        pthread_join(client_thread, &ret);
        exit(ret != NULL);
    } else {
        exit(EXIT_SUCCESS);
    }
}
Beispiel #22
0
static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
                                       int64_t sector_num,
                                       QEMUIOVector *qiov,
                                       int nb_sectors,
                                       BlockDriverCompletionFunc *cb,
                                       void *opaque,
                                       RBDAIOCmd cmd)
{
    RBDAIOCB *acb;
    RADOSCB *rcb;
    rbd_completion_t c;
    int64_t off, size;
    char *buf;
    int r;

    BDRVRBDState *s = bs->opaque;

    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
    acb->cmd = cmd;
    acb->qiov = qiov;
    if (cmd == RBD_AIO_DISCARD) {
        acb->bounce = NULL;
    } else {
        acb->bounce = qemu_blockalign(bs, qiov->size);
    }
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;
    acb->cancelled = 0;
    acb->bh = NULL;

    if (cmd == RBD_AIO_WRITE) {
        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
    }

    buf = acb->bounce;

    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

    s->qemu_aio_count++; /* All the RADOSCB */

    rcb = g_malloc(sizeof(RADOSCB));
    rcb->done = 0;
    rcb->acb = acb;
    rcb->buf = buf;
    rcb->s = acb->s;
    rcb->size = size;
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
    if (r < 0) {
        goto failed;
    }

    switch (cmd) {
    case RBD_AIO_WRITE:
        r = rbd_aio_write(s->image, off, size, buf, c);
        break;
    case RBD_AIO_READ:
        r = rbd_aio_read(s->image, off, size, buf, c);
        break;
    case RBD_AIO_DISCARD:
        r = rbd_aio_discard_wrapper(s->image, off, size, c);
        break;
    default:
        r = -EINVAL;
    }

    if (r < 0) {
        goto failed;
    }

    return &acb->common;

failed:
    g_free(rcb);
    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
}
Beispiel #23
0
static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
{
    int ret = 0;
    uint8_t *buffer;
    int offset = 0;
    VHDXRegionTableEntry rt_entry;
    uint32_t i;
    bool bat_rt_found = false;
    bool metadata_rt_found = false;

    /* We have to read the whole 64KB block, because the crc32 is over the
     * whole block */
    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);

    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE);
    if (ret < 0) {
        goto fail;
    }
    memcpy(&s->rt, buffer, sizeof(s->rt));
    le32_to_cpus(&s->rt.signature);
    le32_to_cpus(&s->rt.checksum);
    le32_to_cpus(&s->rt.entry_count);
    le32_to_cpus(&s->rt.reserved);
    offset += sizeof(s->rt);

    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
        memcmp(&s->rt.signature, "regi", 4)) {
        ret = -EINVAL;
        goto fail;
    }

    /* Per spec, maximum region table entry count is 2047 */
    if (s->rt.entry_count > 2047) {
        ret = -EINVAL;
        goto fail;
    }

    for (i = 0; i < s->rt.entry_count; i++) {
        memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
        offset += sizeof(rt_entry);

        leguid_to_cpus(&rt_entry.guid);
        le64_to_cpus(&rt_entry.file_offset);
        le32_to_cpus(&rt_entry.length);
        le32_to_cpus(&rt_entry.data_bits);

        /* see if we recognize the entry */
        if (guid_eq(rt_entry.guid, bat_guid)) {
            /* must be unique; if we have already found it this is invalid */
            if (bat_rt_found) {
                ret = -EINVAL;
                goto fail;
            }
            bat_rt_found = true;
            s->bat_rt = rt_entry;
            continue;
        }

        if (guid_eq(rt_entry.guid, metadata_guid)) {
            /* must be unique; if we have already found it this is invalid */
            if (metadata_rt_found) {
                ret = -EINVAL;
                goto fail;
            }
            metadata_rt_found = true;
            s->metadata_rt = rt_entry;
            continue;
        }

        if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
            /* cannot read vhdx file - required region table entry that
             * we do not understand.  per spec, we must fail to open */
            ret = -ENOTSUP;
            goto fail;
        }
    }
    ret = 0;

fail:
    qemu_vfree(buffer);
    return ret;
}
Beispiel #24
0
static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
{
    BDRVVHDXState *s = bs->opaque;
    int ret = 0;
    uint32_t i;
    uint64_t signature;
    uint32_t data_blocks_cnt, bitmap_blocks_cnt;


    s->bat = NULL;

    qemu_co_mutex_init(&s->lock);

    /* validate the file signature */
    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
    }
    if (memcmp(&signature, "vhdxfile", 8)) {
        ret = -EINVAL;
        goto fail;
    }

    ret = vhdx_parse_header(bs, s);
    if (ret) {
        goto fail;
    }

    ret = vhdx_parse_log(bs, s);
    if (ret) {
        goto fail;
    }

    ret = vhdx_open_region_tables(bs, s);
    if (ret) {
        goto fail;
    }

    ret = vhdx_parse_metadata(bs, s);
    if (ret) {
        goto fail;
    }
    s->block_size = s->params.block_size;

    /* the VHDX spec dictates that virtual_disk_size is always a multiple of
     * logical_sector_size */
    bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;

    data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
    if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
        data_blocks_cnt++;
    }
    bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
    if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
        bitmap_blocks_cnt++;
    }

    if (s->parent_entries) {
        s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
    } else {
        s->bat_entries = data_blocks_cnt +
                         ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
    }

    s->bat_offset = s->bat_rt.file_offset;

    if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
        /* BAT allocation is not large enough for all entries */
        ret = -EINVAL;
        goto fail;
    }

    s->bat = qemu_blockalign(bs, s->bat_rt.length);

    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
    if (ret < 0) {
        goto fail;
    }

    for (i = 0; i < s->bat_entries; i++) {
        le64_to_cpus(&s->bat[i]);
    }

    if (flags & BDRV_O_RDWR) {
        ret = -ENOTSUP;
        goto fail;
    }

    /* TODO: differencing files, write */

    /* Disable migration when VHDX images are used */
    error_set(&s->migration_blocker,
            QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
            "vhdx", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

    return 0;
fail:
    qemu_vfree(s->headers[0]);
    qemu_vfree(s->headers[1]);
    qemu_vfree(s->bat);
    qemu_vfree(s->parent_entries);
    return ret;
}
Beispiel #25
0
/* Metadata initial parser
 *
 * This loads all the metadata entry fields.  This may cause additional
 * fields to be processed (e.g. parent locator, etc..).
 *
 * There are 5 Metadata items that are always required:
 *      - File Parameters (block size, has a parent)
 *      - Virtual Disk Size (size, in bytes, of the virtual drive)
 *      - Page 83 Data (scsi page 83 guid)
 *      - Logical Sector Size (logical sector size in bytes, either 512 or
 *                             4096.  We only support 512 currently)
 *      - Physical Sector Size (512 or 4096)
 *
 * Also, if the File Parameters indicate this is a differencing file,
 * we must also look for the Parent Locator metadata item.
 */
static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
{
    int ret = 0;
    uint8_t *buffer;
    int offset = 0;
    uint32_t i = 0;
    VHDXMetadataTableEntry md_entry;

    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);

    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
                     VHDX_METADATA_TABLE_MAX_SIZE);
    if (ret < 0) {
        goto exit;
    }
    memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
    offset += sizeof(s->metadata_hdr);

    le64_to_cpus(&s->metadata_hdr.signature);
    le16_to_cpus(&s->metadata_hdr.reserved);
    le16_to_cpus(&s->metadata_hdr.entry_count);

    if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
        ret = -EINVAL;
        goto exit;
    }

    s->metadata_entries.present = 0;

    if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
        (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
        ret = -EINVAL;
        goto exit;
    }

    for (i = 0; i < s->metadata_hdr.entry_count; i++) {
        memcpy(&md_entry, buffer + offset, sizeof(md_entry));
        offset += sizeof(md_entry);

        leguid_to_cpus(&md_entry.item_id);
        le32_to_cpus(&md_entry.offset);
        le32_to_cpus(&md_entry.length);
        le32_to_cpus(&md_entry.data_bits);
        le32_to_cpus(&md_entry.reserved2);

        if (guid_eq(md_entry.item_id, file_param_guid)) {
            if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
                ret = -EINVAL;
                goto exit;
            }
            s->metadata_entries.file_parameters_entry = md_entry;
            s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
            continue;
        }

        if (guid_eq(md_entry.item_id, virtual_size_guid)) {
            if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) {
                ret = -EINVAL;
                goto exit;
            }
            s->metadata_entries.virtual_disk_size_entry = md_entry;
            s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
            continue;
        }

        if (guid_eq(md_entry.item_id, page83_guid)) {
            if (s->metadata_entries.present & META_PAGE_83_PRESENT) {
                ret = -EINVAL;
                goto exit;
            }
            s->metadata_entries.page83_data_entry = md_entry;
            s->metadata_entries.present |= META_PAGE_83_PRESENT;
            continue;
        }

        if (guid_eq(md_entry.item_id, logical_sector_guid)) {
            if (s->metadata_entries.present &
                META_LOGICAL_SECTOR_SIZE_PRESENT) {
                ret = -EINVAL;
                goto exit;
            }
            s->metadata_entries.logical_sector_size_entry = md_entry;
            s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
            continue;
        }

        if (guid_eq(md_entry.item_id, phys_sector_guid)) {
            if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) {
                ret = -EINVAL;
                goto exit;
            }
            s->metadata_entries.phys_sector_size_entry = md_entry;
            s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
            continue;
        }

        if (guid_eq(md_entry.item_id, parent_locator_guid)) {
            if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
                ret = -EINVAL;
                goto exit;
            }
            s->metadata_entries.parent_locator_entry = md_entry;
            s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
            continue;
        }

        if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
            /* cannot read vhdx file - required region table entry that
             * we do not understand.  per spec, we must fail to open */
            ret = -ENOTSUP;
            goto exit;
        }
    }

    if (s->metadata_entries.present != META_ALL_PRESENT) {
        ret = -ENOTSUP;
        goto exit;
    }

    ret = bdrv_pread(bs->file,
                     s->metadata_entries.file_parameters_entry.offset
                                         + s->metadata_rt.file_offset,
                     &s->params,
                     sizeof(s->params));

    if (ret < 0) {
        goto exit;
    }

    le32_to_cpus(&s->params.block_size);
    le32_to_cpus(&s->params.data_bits);


    /* We now have the file parameters, so we can tell if this is a
     * differencing file (i.e.. has_parent), is dynamic or fixed
     * sized (leave_blocks_allocated), and the block size */

    /* The parent locator required iff the file parameters has_parent set */
    if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
        if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
            /* TODO: parse  parent locator fields */
            ret = -ENOTSUP; /* temp, until differencing files are supported */
            goto exit;
        } else {
            /* if has_parent is set, but there is not parent locator present,
             * then that is an invalid combination */
            ret = -EINVAL;
            goto exit;
        }
    }

    /* determine virtual disk size, logical sector size,
     * and phys sector size */

    ret = bdrv_pread(bs->file,
                     s->metadata_entries.virtual_disk_size_entry.offset
                                           + s->metadata_rt.file_offset,
                     &s->virtual_disk_size,
                     sizeof(uint64_t));
    if (ret < 0) {
        goto exit;
    }
    ret = bdrv_pread(bs->file,
                     s->metadata_entries.logical_sector_size_entry.offset
                                             + s->metadata_rt.file_offset,
                     &s->logical_sector_size,
                     sizeof(uint32_t));
    if (ret < 0) {
        goto exit;
    }
    ret = bdrv_pread(bs->file,
                     s->metadata_entries.phys_sector_size_entry.offset
                                          + s->metadata_rt.file_offset,
                     &s->physical_sector_size,
                     sizeof(uint32_t));
    if (ret < 0) {
        goto exit;
    }

    le64_to_cpus(&s->virtual_disk_size);
    le32_to_cpus(&s->logical_sector_size);
    le32_to_cpus(&s->physical_sector_size);

    if (s->logical_sector_size == 0 || s->params.block_size == 0) {
        ret = -EINVAL;
        goto exit;
    }

    /* both block_size and sector_size are guaranteed powers of 2 */
    s->sectors_per_block = s->params.block_size / s->logical_sector_size;
    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
                     (uint64_t)s->logical_sector_size /
                     (uint64_t)s->params.block_size;

    /* These values are ones we will want to use for division / multiplication
     * later on, and they are all guaranteed (per the spec) to be powers of 2,
     * so we can take advantage of that for shift operations during
     * reads/writes */
    if (s->logical_sector_size & (s->logical_sector_size - 1)) {
        ret = -EINVAL;
        goto exit;
    }
    if (s->sectors_per_block & (s->sectors_per_block - 1)) {
        ret = -EINVAL;
        goto exit;
    }
    if (s->chunk_ratio & (s->chunk_ratio - 1)) {
        ret = -EINVAL;
        goto exit;
    }
    s->block_size = s->params.block_size;
    if (s->block_size & (s->block_size - 1)) {
        ret = -EINVAL;
        goto exit;
    }

    s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
    s->sectors_per_block_bits =   31 - clz32(s->sectors_per_block);
    s->chunk_ratio_bits =         63 - clz64(s->chunk_ratio);
    s->block_size_bits =          31 - clz32(s->block_size);

    ret = 0;

exit:
    qemu_vfree(buffer);
    return ret;
}
Beispiel #26
0
static void perform_test(const char *truth_file, const char *test_file,
                         const char *format, int compare_before,
                         int compare_after)
{
    int flags, i;

    bs = bdrv_new ("hda");
    if (!bs) {
        die ("bdrv_new failed\n");
    }

    BlockDriver *drv = NULL;
    if (format) {
        drv = bdrv_find_format (format);
        if (!drv) {
            die ("Found no driver for format '%s'.\n", format);
        }
    }

    flags = BDRV_O_RDWR | BDRV_O_CACHE_WB;

    if (bdrv_open (bs, test_file, flags, drv) < 0) {
        die ("Failed to open '%s'\n", test_file);
    }

    fd = open (truth_file, O_RDWR | O_LARGEFILE, 0);
    if (fd < 0) {
        perror ("open");
        die ("Failed to open '%s'\n", truth_file);
    }

    int64_t l0 = lseek (fd, 0, SEEK_END);
    int64_t l1 = bdrv_getlength (bs);
    if (l0 < 0 || l1 < 0 || l0 < l1) {
        die ("Mismatch: truth image %s length %" PRId64 ", test image %s "
             "length %" PRId64 "\n", truth_file, l0, test_file, l1);
    }

    total_sectors = l1 / 512;
    if (total_sectors <= 1) {
        die ("Total sectors: %" PRId64 "\n", total_sectors);
    }

    io_size /= 512;
    if (io_size <= 0) {
        io_size = 1;
    } else if (io_size > total_sectors / 2) {
        io_size = total_sectors / 2;
    }

    if (compare_before) {
        if (compare_full_images ()) {
            die ("The original two files do not match.\n");
        }
    }

    if (round > 0) {
        /* Create testers. */
        testers = g_malloc(sizeof(RandomIO) * parallel);
        for (i = 0; i < parallel; i++) {
            RandomIO *r = &testers[i];
            r->test_buf = qemu_blockalign (bs, io_size * 512);
            if (posix_memalign ((void **) &r->truth_buf, 512, io_size * 512)) {
                die ("posix_memalign");
            }
            r->qiov.iov = g_malloc(sizeof(struct iovec) * max_iov);
            r->sector_num = 0;
            r->nb_sectors = 0;
            r->type = OP_READ;
            r->tester = i;
        }
        for (i = 0; i < parallel; i++) {
            perform_next_io (&testers[i]);
        }
    }

    sim_all_tasks ();        /* Run tests. */

    if (round > 0) {
        /* Create testers. */
        if (compare_after) {
            if (compare_full_images ()) {
                die ("The two files do not match after I/O operations.\n");
            }
        }

        for (i = 0; i < parallel; i++) {
            RandomIO *r = &testers[i];
            qemu_vfree (r->test_buf);
            free (r->truth_buf);
            g_free(r->qiov.iov);
        }
        g_free(testers);
    }

    printf ("Test process %d finished successfully\n", getpid ());

    int fvd = (strncmp (bs->drv->format_name, "fvd", 3) == 0);
    bdrv_delete (bs);
    if (fvd) {
        fvd_check_memory_usage ();
    }
    close (fd);
}
Beispiel #27
0
static int img_convert(int argc, char **argv)
{
    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
    int progress = 0, flags;
    const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
    BlockDriver *drv, *proto_drv;
    BlockDriverState **bs = NULL, *out_bs = NULL;
    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
    uint64_t bs_sectors;
    uint8_t * buf = NULL;
    const uint8_t *buf1;
    BlockDriverInfo bdi;
    QEMUOptionParameter *param = NULL, *create_options = NULL;
    QEMUOptionParameter *out_baseimg_param;
    char *options = NULL;
    const char *snapshot_name = NULL;
    float local_progress;
    int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */

    fmt = NULL;
    out_fmt = "raw";
    cache = "unsafe";
    out_baseimg = NULL;
    compress = 0;
    for(;;) {
        c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:");
        if (c == -1) {
            break;
        }
        switch(c) {
        case '?':
        case 'h':
            help();
            break;
        case 'f':
            fmt = optarg;
            break;
        case 'O':
            out_fmt = optarg;
            break;
        case 'B':
            out_baseimg = optarg;
            break;
        case 'c':
            compress = 1;
            break;
        case 'e':
            error_report("option -e is deprecated, please use \'-o "
                  "encryption\' instead!");
            return 1;
        case '6':
            error_report("option -6 is deprecated, please use \'-o "
                  "compat6\' instead!");
            return 1;
        case 'o':
            options = optarg;
            break;
        case 's':
            snapshot_name = optarg;
            break;
        case 'S':
        {
            int64_t sval;
            char *end;
            sval = strtosz_suffix(optarg, &end, STRTOSZ_DEFSUFFIX_B);
            if (sval < 0 || *end) {
                error_report("Invalid minimum zero buffer size for sparse output specified");
                return 1;
            }

            min_sparse = sval / BDRV_SECTOR_SIZE;
            break;
        }
        case 'p':
            progress = 1;
            break;
        case 't':
            cache = optarg;
            break;
        }
    }

    bs_n = argc - optind - 1;
    if (bs_n < 1) {
        help();
    }

    out_filename = argv[argc - 1];

    if (options && !strcmp(options, "?")) {
        ret = print_block_option_help(out_filename, out_fmt);
        goto out;
    }

    if (bs_n > 1 && out_baseimg) {
        error_report("-B makes no sense when concatenating multiple input "
                     "images");
        ret = -1;
        goto out;
    }

    qemu_progress_init(progress, 2.0);
    qemu_progress_print(0, 100);

    bs = g_malloc0(bs_n * sizeof(BlockDriverState *));

    total_sectors = 0;
    for (bs_i = 0; bs_i < bs_n; bs_i++) {
        bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, BDRV_O_FLAGS);
        if (!bs[bs_i]) {
            error_report("Could not open '%s'", argv[optind + bs_i]);
            ret = -1;
            goto out;
        }
        bdrv_get_geometry(bs[bs_i], &bs_sectors);
        total_sectors += bs_sectors;
    }

    if (snapshot_name != NULL) {
        if (bs_n > 1) {
            error_report("No support for concatenating multiple snapshot");
            ret = -1;
            goto out;
        }
        if (bdrv_snapshot_load_tmp(bs[0], snapshot_name) < 0) {
            error_report("Failed to load snapshot");
            ret = -1;
            goto out;
        }
    }

    /* Find driver and parse its options */
    drv = bdrv_find_format(out_fmt);
    if (!drv) {
        error_report("Unknown file format '%s'", out_fmt);
        ret = -1;
        goto out;
    }

    proto_drv = bdrv_find_protocol(out_filename);
    if (!proto_drv) {
        error_report("Unknown protocol '%s'", out_filename);
        ret = -1;
        goto out;
    }

    create_options = append_option_parameters(create_options,
                                              drv->create_options);
    create_options = append_option_parameters(create_options,
                                              proto_drv->create_options);

    if (options) {
        param = parse_option_parameters(options, create_options, param);
        if (param == NULL) {
            error_report("Invalid options for file format '%s'.", out_fmt);
            ret = -1;
            goto out;
        }
    } else {
        param = parse_option_parameters("", create_options, param);
    }

    set_option_parameter_int(param, BLOCK_OPT_SIZE, total_sectors * 512);
    ret = add_old_style_options(out_fmt, param, out_baseimg, NULL);
    if (ret < 0) {
        goto out;
    }

    /* Get backing file name if -o backing_file was used */
    out_baseimg_param = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
    if (out_baseimg_param) {
        out_baseimg = out_baseimg_param->value.s;
    }

    /* Check if compression is supported */
    if (compress) {
        QEMUOptionParameter *encryption =
            get_option_parameter(param, BLOCK_OPT_ENCRYPT);
        QEMUOptionParameter *preallocation =
            get_option_parameter(param, BLOCK_OPT_PREALLOC);

        if (!drv->bdrv_write_compressed) {
            error_report("Compression not supported for this file format");
            ret = -1;
            goto out;
        }

        if (encryption && encryption->value.n) {
            error_report("Compression and encryption not supported at "
                         "the same time");
            ret = -1;
            goto out;
        }

        if (preallocation && preallocation->value.s
            && strcmp(preallocation->value.s, "off"))
        {
            error_report("Compression and preallocation not supported at "
                         "the same time");
            ret = -1;
            goto out;
        }
    }

    /* Create the new image */
    ret = bdrv_create(drv, out_filename, param);
    if (ret < 0) {
        if (ret == -ENOTSUP) {
            error_report("Formatting not supported for file format '%s'",
                         out_fmt);
        } else if (ret == -EFBIG) {
            error_report("The image size is too large for file format '%s'",
                         out_fmt);
        } else {
            error_report("%s: error while converting %s: %s",
                         out_filename, out_fmt, strerror(-ret));
        }
        goto out;
    }

    flags = BDRV_O_RDWR;
    ret = bdrv_parse_cache_flags(cache, &flags);
    if (ret < 0) {
        error_report("Invalid cache option: %s", cache);
        return -1;
    }

    out_bs = bdrv_new_open(out_filename, out_fmt, flags);
    if (!out_bs) {
        ret = -1;
        goto out;
    }

    bs_i = 0;
    bs_offset = 0;
    bdrv_get_geometry(bs[0], &bs_sectors);
    buf = qemu_blockalign(out_bs, IO_BUF_SIZE);

    if (compress) {
        ret = bdrv_get_info(out_bs, &bdi);
        if (ret < 0) {
            error_report("could not get block driver info");
            goto out;
        }
        cluster_size = bdi.cluster_size;
        if (cluster_size <= 0 || cluster_size > IO_BUF_SIZE) {
            error_report("invalid cluster size");
            ret = -1;
            goto out;
        }
        cluster_sectors = cluster_size >> 9;
        sector_num = 0;

        nb_sectors = total_sectors;
        local_progress = (float)100 /
            (nb_sectors / MIN(nb_sectors, cluster_sectors));

        for(;;) {
            int64_t bs_num;
            int remainder;
            uint8_t *buf2;

            nb_sectors = total_sectors - sector_num;
            if (nb_sectors <= 0)
                break;
            if (nb_sectors >= cluster_sectors)
                n = cluster_sectors;
            else
                n = nb_sectors;

            bs_num = sector_num - bs_offset;
            assert (bs_num >= 0);
            remainder = n;
            buf2 = buf;
            while (remainder > 0) {
                int nlow;
                while (bs_num == bs_sectors) {
                    bs_i++;
                    assert (bs_i < bs_n);
                    bs_offset += bs_sectors;
                    bdrv_get_geometry(bs[bs_i], &bs_sectors);
                    bs_num = 0;
                    /* printf("changing part: sector_num=%" PRId64 ", "
                       "bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
                       "\n", sector_num, bs_i, bs_offset, bs_sectors); */
                }
                assert (bs_num < bs_sectors);

                nlow = (remainder > bs_sectors - bs_num) ? bs_sectors - bs_num : remainder;

                ret = bdrv_read(bs[bs_i], bs_num, buf2, nlow);
                if (ret < 0) {
                    error_report("error while reading sector %" PRId64 ": %s",
                                 bs_num, strerror(-ret));
                    goto out;
                }

                buf2 += nlow * 512;
                bs_num += nlow;

                remainder -= nlow;
            }
            assert (remainder == 0);

            if (n < cluster_sectors) {
                memset(buf + n * 512, 0, cluster_size - n * 512);
            }
            if (!buffer_is_zero(buf, cluster_size)) {
                ret = bdrv_write_compressed(out_bs, sector_num, buf,
                                            cluster_sectors);
                if (ret != 0) {
                    error_report("error while compressing sector %" PRId64
                                 ": %s", sector_num, strerror(-ret));
                    goto out;
                }
            }
            sector_num += n;
            qemu_progress_print(local_progress, 100);
        }
        /* signal EOF to align */
        bdrv_write_compressed(out_bs, 0, NULL, 0);
    } else {