static int64_t block_crypto_getlength(BlockDriverState *bs) { BlockCrypto *crypto = bs->opaque; int64_t len = bdrv_getlength(bs->file->bs); ssize_t offset = qcrypto_block_get_payload_offset(crypto->block); len -= offset; return len; }
static int nand_device_init(SysBusDevice *dev) { int pagesize; NANDFlashState *s = FROM_SYSBUS(NANDFlashState, dev); s->buswidth = nand_flash_ids[s->chip_id].width >> 3; s->size = nand_flash_ids[s->chip_id].size << 20; if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { s->page_shift = 11; s->erase_shift = 6; } else { s->page_shift = nand_flash_ids[s->chip_id].page_shift; s->erase_shift = nand_flash_ids[s->chip_id].erase_shift; } switch (1 << s->page_shift) { case 256: nand_init_256(s); break; case 512: nand_init_512(s); break; case 2048: nand_init_2048(s); break; default: error_report("Unsupported NAND block size"); return -1; } pagesize = 1 << s->oob_shift; s->mem_oob = 1; if (s->bdrv) { if (bdrv_is_read_only(s->bdrv)) { error_report("Can't use a read-only drive"); return -1; } if (bdrv_getlength(s->bdrv) >= (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { pagesize = 0; s->mem_oob = 0; } } else { pagesize += 1 << s->page_shift; } if (pagesize) { s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize), 0xff, s->pages * pagesize); } /* Give s->ioaddr a sane value in case we save state before it is used. */ s->ioaddr = s->io; return 0; }
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, uint32_t nbdflags) { NBDExport *exp = g_malloc0(sizeof(NBDExport)); QSIMPLEQ_INIT(&exp->requests); exp->bs = bs; exp->dev_offset = dev_offset; exp->nbdflags = nbdflags; exp->size = size == -1 ? bdrv_getlength(bs) : size; return exp; }
static int length_f(int argc, char **argv) { int64_t size; char s1[64]; size = bdrv_getlength(bs); if (size < 0) { printf("getlength: %s\n", strerror(-size)); return 0; } cvtstr(size, s1, sizeof(s1)); printf("%s\n", s1); return 0; }
static int check_dir_entry(BlockDriverState *bs, Qcow2BitmapDirEntry *entry) { BDRVQcow2State *s = bs->opaque; uint64_t phys_bitmap_bytes; int64_t len; bool fail = (entry->bitmap_table_size == 0) || (entry->bitmap_table_offset == 0) || (entry->bitmap_table_offset % s->cluster_size) || (entry->bitmap_table_size > BME_MAX_TABLE_SIZE) || (entry->granularity_bits > BME_MAX_GRANULARITY_BITS) || (entry->granularity_bits < BME_MIN_GRANULARITY_BITS) || (entry->flags & BME_RESERVED_FLAGS) || (entry->name_size > BME_MAX_NAME_SIZE) || (entry->type != BT_DIRTY_TRACKING_BITMAP); if (fail) { return -EINVAL; } phys_bitmap_bytes = (uint64_t)entry->bitmap_table_size * s->cluster_size; len = bdrv_getlength(bs); if (len < 0) { return len; } if (phys_bitmap_bytes > BME_MAX_PHYS_SIZE) { return -EINVAL; } if (!(entry->flags & BME_FLAG_IN_USE) && (len > ((phys_bitmap_bytes * 8) << entry->granularity_bits))) { /* * We've loaded a valid bitmap (IN_USE not set) or we are going to * store a valid bitmap, but the allocated bitmap table size is not * enough to store this bitmap. * * Note, that it's OK to have an invalid bitmap with invalid size due * to a bitmap that was not correctly saved after image resize. */ return -EINVAL; } return 0; }
static int check_constraints_on_bitmap(BlockDriverState *bs, const char *name, uint32_t granularity, Error **errp) { BDRVQcow2State *s = bs->opaque; int granularity_bits = ctz32(granularity); int64_t len = bdrv_getlength(bs); assert(granularity > 0); assert((granularity & (granularity - 1)) == 0); if (len < 0) { error_setg_errno(errp, -len, "Failed to get size of '%s'", bdrv_get_device_or_node_name(bs)); return len; } if (granularity_bits > BME_MAX_GRANULARITY_BITS) { error_setg(errp, "Granularity exceeds maximum (%llu bytes)", 1ULL << BME_MAX_GRANULARITY_BITS); return -EINVAL; } if (granularity_bits < BME_MIN_GRANULARITY_BITS) { error_setg(errp, "Granularity is under minimum (%llu bytes)", 1ULL << BME_MIN_GRANULARITY_BITS); return -EINVAL; } if ((len > (uint64_t)BME_MAX_PHYS_SIZE << granularity_bits) || (len > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size << granularity_bits)) { error_setg(errp, "Too much space will be occupied by the bitmap. " "Use larger granularity"); return -EINVAL; } if (strlen(name) > BME_MAX_NAME_SIZE) { error_setg(errp, "Name length exceeds maximum (%u characters)", BME_MAX_NAME_SIZE); return -EINVAL; } return 0; }
static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp) { BlockDriverState *file_bs = file->bs; int64_t length; int64_t offset = 0; uint8_t buffer[515]; int i, ret; /* bdrv_getlength returns a multiple of block size (512), rounded up. Since * dmg images can have odd sizes, try to look for the "koly" magic which * marks the begin of the UDIF trailer (512 bytes). This magic can be found * in the last 511 bytes of the second-last sector or the first 4 bytes of * the last sector (search space: 515 bytes) */ length = bdrv_getlength(file_bs); if (length < 0) { error_setg_errno(errp, -length, "Failed to get file size while reading UDIF trailer"); return length; } else if (length < 512) { error_setg(errp, "dmg file must be at least 512 bytes long"); return -EINVAL; } if (length > 511 + 512) { offset = length - 511 - 512; } length = length < 515 ? length : 515; ret = bdrv_pread(file, offset, buffer, length); if (ret < 0) { error_setg_errno(errp, -ret, "Failed while reading UDIF trailer"); return ret; } for (i = 0; i < length - 3; i++) { if (buffer[i] == 'k' && buffer[i+1] == 'o' && buffer[i+2] == 'l' && buffer[i+3] == 'y') { return offset + i; } } error_setg(errp, "Could not locate UDIF trailer in dmg file"); return -EINVAL; }
static int spapr_nvram_init(VIOsPAPRDevice *dev) { sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); if (nvram->drive) { nvram->size = bdrv_getlength(nvram->drive); } else { nvram->size = DEFAULT_NVRAM_SIZE; nvram->buf = g_malloc0(nvram->size); } if ((nvram->size < MIN_NVRAM_SIZE) || (nvram->size > MAX_NVRAM_SIZE)) { fprintf(stderr, "spapr-nvram must be between %d and %d bytes in size\n", MIN_NVRAM_SIZE, MAX_NVRAM_SIZE); return -1; } spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch); spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store); return 0; }
/* coalesce internal state, copy to pci i/o region 0 */ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) { VirtIOBlock *s = VIRTIO_BLK(vdev); struct virtio_blk_config blkcfg; uint64_t capacity; int blk_size = s->conf->logical_block_size; bdrv_get_geometry(s->bs, &capacity); memset(&blkcfg, 0, sizeof(blkcfg)); virtio_stq_p(vdev, &blkcfg.capacity, capacity); virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); virtio_stw_p(vdev, &blkcfg.cylinders, s->conf->cyls); virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); virtio_stw_p(vdev, &blkcfg.min_io_size, s->conf->min_io_size / blk_size); virtio_stw_p(vdev, &blkcfg.opt_io_size, s->conf->opt_io_size / blk_size); blkcfg.heads = s->conf->heads; /* * We must ensure that the block device capacity is a multiple of * the logical block size. If that is not the case, let's use * sector_mask to adopt the geometry to have a correct picture. * For those devices where the capacity is ok for the given geometry * we don't touch the sector value of the geometry, since some devices * (like s390 dasd) need a specific value. Here the capacity is already * cyls*heads*secs*blk_size and the sector value is not block size * divided by 512 - instead it is the amount of blk_size blocks * per track (cylinder). */ if (bdrv_getlength(s->bs) / s->conf->heads / s->conf->secs % blk_size) { blkcfg.sectors = s->conf->secs & ~s->sector_mask; } else { blkcfg.sectors = s->conf->secs; } blkcfg.size_max = 0; blkcfg.physical_block_exp = get_physical_block_exp(s->conf); blkcfg.alignment_offset = 0; blkcfg.wce = bdrv_enable_write_cache(s->bs); memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); }
static uint32_t allocate_chunk (BlockDriverState * bs) { BDRVFvdState *s = bs->opaque; /* Check if there is sufficient storage space. */ if (s->used_storage + s->chunk_size > s->data_storage) { if (s->add_storage_cmd) { if (system (s->add_storage_cmd)) { fprintf (stderr, "Error in executing %s\n", s->add_storage_cmd); } } else { /* If the image is stored on a file system, the image file size * can be increased by bdrv_truncate. */ int64_t new_size = (s->data_offset + s->used_storage + s->storage_grow_unit) * 512; bdrv_truncate (s->fvd_data, new_size); } /* Check how much storage is available now. */ int64_t size = bdrv_getlength (s->fvd_data); if (size < 0) { fprintf (stderr, "Error in bdrv_getlength(%s)\n", bs->filename); return EMPTY_TABLE; } s->data_storage = size / 512 - s->data_offset; if (s->used_storage + s->chunk_size > s->data_storage) { fprintf (stderr, "Could not allocate more storage space.\n"); return EMPTY_TABLE; } QDEBUG ("Increased storage to %" PRId64 " bytes.\n", size); } uint32_t allocated_chunk_id = s->used_storage / s->chunk_size; s->used_storage += s->chunk_size; return allocated_chunk_id; }
static int blk_init(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); int index, qflags, have_barriers, info = 0; char *h; /* read xenstore entries */ if (blkdev->params == NULL) { blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); h = strchr(blkdev->params, ':'); if (h != NULL) { blkdev->fileproto = blkdev->params; blkdev->filename = h+1; *h = 0; } else { blkdev->fileproto = "<unset>"; blkdev->filename = blkdev->params; } } if (blkdev->mode == NULL) blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); if (blkdev->type == NULL) blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); if (blkdev->dev == NULL) blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); if (blkdev->devtype == NULL) blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); /* do we have all we need? */ if (blkdev->params == NULL || blkdev->mode == NULL || blkdev->type == NULL || blkdev->dev == NULL) return -1; /* read-only ? */ if (strcmp(blkdev->mode, "w") == 0) { qflags = BDRV_O_RDWR; } else { qflags = 0; info |= VDISK_READONLY; } /* cdrom ? */ if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) info |= VDISK_CDROM; /* init qemu block driver */ index = (blkdev->xendev.dev - 202 * 256) / 16; blkdev->dinfo = drive_get(IF_XEN, 0, index); if (!blkdev->dinfo) { /* setup via xenbus -> create new block driver instance */ xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); blkdev->bs = bdrv_new(blkdev->dev); if (bdrv_open(blkdev->bs, blkdev->filename, qflags, bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) { bdrv_delete(blkdev->bs); return -1; } } else { /* setup via qemu cmdline -> already setup for us */ xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); blkdev->bs = blkdev->dinfo->bdrv; } blkdev->file_blk = BLOCK_SIZE; blkdev->file_size = bdrv_getlength(blkdev->bs); if (blkdev->file_size < 0) { xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n", (int)blkdev->file_size, strerror(-blkdev->file_size), blkdev->bs->drv ? blkdev->bs->drv->format_name : "-"); blkdev->file_size = 0; } have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0; xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," " size %" PRId64 " (%" PRId64 " MB)\n", blkdev->type, blkdev->fileproto, blkdev->filename, blkdev->file_size, blkdev->file_size >> 20); /* fill info */ xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers); xenstore_write_be_int(&blkdev->xendev, "info", info); xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); xenstore_write_be_int(&blkdev->xendev, "sectors", blkdev->file_size / blkdev->file_blk); return 0; }
static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; BlockDriverState *bs = s->common.bs; int64_t sector_num, end, length; uint64_t last_pause_ns; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ int ret = 0; int n; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } s->bdev_length = bdrv_getlength(bs); if (s->bdev_length < 0) { ret = s->bdev_length; goto immediate_exit; } else if (s->bdev_length == 0) { /* Report BLOCK_JOB_READY and wait for complete. */ block_job_event_ready(&s->common); s->synced = true; while (!block_job_is_cancelled(&s->common) && !s->should_complete) { block_job_yield(&s->common); } s->common.cancelled = false; goto immediate_exit; } length = DIV_ROUND_UP(s->bdev_length, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); if (backing_filename[0] && !s->target->backing) { ret = bdrv_get_info(s->target, &bdi); if (ret < 0) { goto immediate_exit; } if (s->granularity < bdi.cluster_size) { s->buf_size = MAX(s->buf_size, bdi.cluster_size); s->cow_bitmap = bitmap_new(length); } } end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; goto immediate_exit; } mirror_free_init(s); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target); for (sector_num = 0; sector_num < end; ) { /* Just to make sure we are not exceeding int limit. */ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, end - sector_num); int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (now - last_pause_ns > SLICE_TIME) { last_pause_ns = now; block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); } if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n); if (ret < 0) { goto immediate_exit; } assert(n > 0); if (ret == 1 || mark_all_dirty) { bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); } sector_num += n; } }
static void perform_test(const char *truth_file, const char *test_file, const char *format, int compare_before, int compare_after) { int flags, i; bs = bdrv_new ("hda"); if (!bs) { die ("bdrv_new failed\n"); } BlockDriver *drv = NULL; if (format) { drv = bdrv_find_format (format); if (!drv) { die ("Found no driver for format '%s'.\n", format); } } flags = BDRV_O_RDWR | BDRV_O_CACHE_WB; if (bdrv_open (bs, test_file, flags, drv) < 0) { die ("Failed to open '%s'\n", test_file); } fd = open (truth_file, O_RDWR | O_LARGEFILE, 0); if (fd < 0) { perror ("open"); die ("Failed to open '%s'\n", truth_file); } int64_t l0 = lseek (fd, 0, SEEK_END); int64_t l1 = bdrv_getlength (bs); if (l0 < 0 || l1 < 0 || l0 < l1) { die ("Mismatch: truth image %s length %" PRId64 ", test image %s " "length %" PRId64 "\n", truth_file, l0, test_file, l1); } total_sectors = l1 / 512; if (total_sectors <= 1) { die ("Total sectors: %" PRId64 "\n", total_sectors); } io_size /= 512; if (io_size <= 0) { io_size = 1; } else if (io_size > total_sectors / 2) { io_size = total_sectors / 2; } if (compare_before) { if (compare_full_images ()) { die ("The original two files do not match.\n"); } } if (round > 0) { /* Create testers. */ testers = g_malloc(sizeof(RandomIO) * parallel); for (i = 0; i < parallel; i++) { RandomIO *r = &testers[i]; r->test_buf = qemu_blockalign (bs, io_size * 512); if (posix_memalign ((void **) &r->truth_buf, 512, io_size * 512)) { die ("posix_memalign"); } r->qiov.iov = g_malloc(sizeof(struct iovec) * max_iov); r->sector_num = 0; r->nb_sectors = 0; r->type = OP_READ; r->tester = i; } for (i = 0; i < parallel; i++) { perform_next_io (&testers[i]); } } sim_all_tasks (); /* Run tests. */ if (round > 0) { /* Create testers. */ if (compare_after) { if (compare_full_images ()) { die ("The two files do not match after I/O operations.\n"); } } for (i = 0; i < parallel; i++) { RandomIO *r = &testers[i]; qemu_vfree (r->test_buf); free (r->truth_buf); g_free(r->qiov.iov); } g_free(testers); } printf ("Test process %d finished successfully\n", getpid ()); int fvd = (strncmp (bs->drv->format_name, "fvd", 3) == 0); bdrv_delete (bs); if (fvd) { fvd_check_memory_usage (); } close (fd); }
static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; BlockDriverState *bs = s->common.bs; int64_t sector_num, end, sectors_per_chunk, length; uint64_t last_pause_ns; BlockDriverInfo bdi; char backing_filename[1024]; int ret = 0; int n; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } s->common.len = bdrv_getlength(bs); if (s->common.len <= 0) { block_job_completed(&s->common, s->common.len); return; } length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); if (backing_filename[0] && !s->target->backing_hd) { bdrv_get_info(s->target, &bdi); if (s->granularity < bdi.cluster_size) { s->buf_size = MAX(s->buf_size, bdi.cluster_size); s->cow_bitmap = bitmap_new(length); } } end = s->common.len >> BDRV_SECTOR_BITS; s->buf = qemu_blockalign(bs, s->buf_size); sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; mirror_free_init(s); if (s->mode != MIRROR_SYNC_MODE_NONE) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base; base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; for (sector_num = 0; sector_num < end; ) { int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; ret = bdrv_is_allocated_above(bs, base, sector_num, next - sector_num, &n); if (ret < 0) { goto immediate_exit; } assert(n > 0); if (ret == 1) { bdrv_set_dirty(bs, sector_num, n); sector_num = next; } else { sector_num += n; } } } bdrv_dirty_iter_init(bs, &s->hbi); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { uint64_t delay_ns; int64_t cnt; bool should_complete; if (s->ret < 0) { ret = s->ret; goto immediate_exit; } cnt = bdrv_get_dirty_count(bs); /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that qemu_aio_flush() returns. * We do so every SLICE_TIME nanoseconds, or when there is an error, * or when the source is clean, whichever comes first. */ if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); qemu_coroutine_yield(); continue; } else if (cnt != 0) { mirror_iteration(s); continue; } } should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); ret = bdrv_flush(s->target); if (ret < 0) { if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { goto immediate_exit; } } else { /* We're out of the streaming phase. From now on, if the job * is cancelled we will actually complete all pending I/O and * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ s->common.offset = end * BDRV_SECTOR_SIZE; if (!s->synced) { block_job_ready(&s->common); s->synced = true; } should_complete = s->should_complete || block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(bs); } } if (cnt == 0 && should_complete) { /* The dirty bitmap is not updated while operations are pending. * If we're about to exit, wait for pending operations before * calling bdrv_get_dirty_count(bs), or we may exit while the * source has dirty data to copy! * * Note that I/O can be submitted by the guest while * mirror_populate runs. */ trace_mirror_before_drain(s, cnt); bdrv_drain_all(); cnt = bdrv_get_dirty_count(bs); } ret = 0; trace_mirror_before_sleep(s, cnt, s->synced); if (!s->synced) { /* Publish progress */ s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; if (s->common.speed) { delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk); } else { delay_ns = 0; } block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } } else if (!should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0); block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); } else if (cnt == 0) { /* The two disks are in sync. Exit and report successful * completion. */ assert(QLIST_EMPTY(&bs->tracked_requests)); s->common.cancelled = false; break; } last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: if (s->in_flight > 0) { /* We get here only if something went wrong. Either the job failed, * or it was cancelled prematurely so that we do not guarantee that * the target is a copy of the source. */ assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common))); mirror_drain(s); } assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_set_dirty_tracking(bs, 0); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); } bdrv_swap(s->target, s->common.bs); } bdrv_close(s->target); bdrv_unref(s->target); block_job_completed(&s->common, ret); }
/** * bdrv_query_image_info: * @bs: block device to examine * @p_info: location to store image information * @errp: location to store error information * * Store "flat" image information in @p_info. * * "Flat" means it does *not* query backing image information, * i.e. (*pinfo)->has_backing_image will be set to false and * (*pinfo)->backing_image to NULL even when the image does in fact have * a backing image. * * @p_info will be set only on success. On error, store error in @errp. */ void bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, Error **errp) { int64_t size; const char *backing_filename; BlockDriverInfo bdi; int ret; Error *err = NULL; ImageInfo *info; aio_context_acquire(bdrv_get_aio_context(bs)); size = bdrv_getlength(bs); if (size < 0) { error_setg_errno(errp, -size, "Can't get image size '%s'", bs->exact_filename); goto out; } info = g_new0(ImageInfo, 1); info->filename = g_strdup(bs->filename); info->format = g_strdup(bdrv_get_format_name(bs)); info->virtual_size = size; info->actual_size = bdrv_get_allocated_file_size(bs); info->has_actual_size = info->actual_size >= 0; if (bdrv_is_encrypted(bs)) { info->encrypted = true; info->has_encrypted = true; } if (bdrv_get_info(bs, &bdi) >= 0) { if (bdi.cluster_size != 0) { info->cluster_size = bdi.cluster_size; info->has_cluster_size = true; } info->dirty_flag = bdi.is_dirty; info->has_dirty_flag = true; } info->format_specific = bdrv_get_specific_info(bs); info->has_format_specific = info->format_specific != NULL; backing_filename = bs->backing_file; if (backing_filename[0] != '\0') { char *backing_filename2 = g_malloc0(PATH_MAX); info->backing_filename = g_strdup(backing_filename); info->has_backing_filename = true; bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err); if (err) { /* Can't reconstruct the full backing filename, so we must omit * this field and apply a Best Effort to this query. */ g_free(backing_filename2); backing_filename2 = NULL; error_free(err); err = NULL; } /* Always report the full_backing_filename if present, even if it's the * same as backing_filename. That they are same is useful info. */ if (backing_filename2) { info->full_backing_filename = g_strdup(backing_filename2); info->has_full_backing_filename = true; } if (bs->backing_format[0]) { info->backing_filename_format = g_strdup(bs->backing_format); info->has_backing_filename_format = true; } g_free(backing_filename2); } ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err); switch (ret) { case 0: if (info->snapshots) { info->has_snapshots = true; } break; /* recoverable error */ case -ENOMEDIUM: case -ENOTSUP: error_free(err); break; default: error_propagate(errp, err); qapi_free_ImageInfo(info); goto out; } *p_info = info; out: aio_context_release(bdrv_get_aio_context(bs)); }
/** * bdrv_query_image_info: * @bs: block device to examine * @p_info: location to store image information * @errp: location to store error information * * Store "flat" image information in @p_info. * * "Flat" means it does *not* query backing image information, * i.e. (*pinfo)->has_backing_image will be set to false and * (*pinfo)->backing_image to NULL even when the image does in fact have * a backing image. * * @p_info will be set only on success. On error, store error in @errp. */ void bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, Error **errp) { int64_t size; const char *backing_filename; char backing_filename2[1024]; BlockDriverInfo bdi; int ret; Error *err = NULL; ImageInfo *info; #ifdef __linux__ int fd, attr; #endif size = bdrv_getlength(bs); if (size < 0) { error_setg_errno(errp, -size, "Can't get size of device '%s'", bdrv_get_device_name(bs)); return; } info = g_new0(ImageInfo, 1); info->filename = g_strdup(bs->filename); info->format = g_strdup(bdrv_get_format_name(bs)); info->virtual_size = size; info->actual_size = bdrv_get_allocated_file_size(bs); info->has_actual_size = info->actual_size >= 0; if (bdrv_is_encrypted(bs)) { info->encrypted = true; info->has_encrypted = true; } if (bdrv_get_info(bs, &bdi) >= 0) { if (bdi.cluster_size != 0) { info->cluster_size = bdi.cluster_size; info->has_cluster_size = true; } info->dirty_flag = bdi.is_dirty; info->has_dirty_flag = true; } info->format_specific = bdrv_get_specific_info(bs); info->has_format_specific = info->format_specific != NULL; #ifdef __linux__ /* get NOCOW info */ fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK); if (fd >= 0) { if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) { info->has_nocow = true; info->nocow = true; } qemu_close(fd); } #endif backing_filename = bs->backing_file; if (backing_filename[0] != '\0') { info->backing_filename = g_strdup(backing_filename); info->has_backing_filename = true; bdrv_get_full_backing_filename(bs, backing_filename2, sizeof(backing_filename2)); if (strcmp(backing_filename, backing_filename2) != 0) { info->full_backing_filename = g_strdup(backing_filename2); info->has_full_backing_filename = true; } if (bs->backing_format[0]) { info->backing_filename_format = g_strdup(bs->backing_format); info->has_backing_filename_format = true; } } ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err); switch (ret) { case 0: if (info->snapshots) { info->has_snapshots = true; } break; /* recoverable error */ case -ENOMEDIUM: case -ENOTSUP: error_free(err); break; default: error_propagate(errp, err); qapi_free_ImageInfo(info); return; } *p_info = info; }
static int dmg_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVDMGState *s = bs->opaque; uint64_t info_begin, info_end, last_in_offset, last_out_offset; uint32_t count, tmp; uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i; int64_t offset; int ret; bs->read_only = 1; s->n_chunks = 0; s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; /* read offset of info blocks */ offset = bdrv_getlength(bs->file); if (offset < 0) { ret = offset; goto fail; } offset -= 0x1d8; ret = read_uint64(bs, offset, &info_begin); if (ret < 0) { goto fail; } else if (info_begin == 0) { ret = -EINVAL; goto fail; } ret = read_uint32(bs, info_begin, &tmp); if (ret < 0) { goto fail; } else if (tmp != 0x100) { ret = -EINVAL; goto fail; } ret = read_uint32(bs, info_begin + 4, &count); if (ret < 0) { goto fail; } else if (count == 0) { ret = -EINVAL; goto fail; } info_end = info_begin + count; offset = info_begin + 0x100; /* read offsets */ last_in_offset = last_out_offset = 0; while (offset < info_end) { uint32_t type; ret = read_uint32(bs, offset, &count); if (ret < 0) { goto fail; } else if (count == 0) { ret = -EINVAL; goto fail; } offset += 4; ret = read_uint32(bs, offset, &type); if (ret < 0) { goto fail; } if (type == 0x6d697368 && count >= 244) { size_t new_size; uint32_t chunk_count; offset += 4; offset += 200; chunk_count = (count - 204) / 40; new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); s->types = g_realloc(s->types, new_size / 2); s->offsets = g_realloc(s->offsets, new_size); s->lengths = g_realloc(s->lengths, new_size); s->sectors = g_realloc(s->sectors, new_size); s->sectorcounts = g_realloc(s->sectorcounts, new_size); for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) { ret = read_uint32(bs, offset, &s->types[i]); if (ret < 0) { goto fail; } offset += 4; if (s->types[i] != 0x80000005 && s->types[i] != 1 && s->types[i] != 2) { if (s->types[i] == 0xffffffff && i > 0) { last_in_offset = s->offsets[i - 1] + s->lengths[i - 1]; last_out_offset = s->sectors[i - 1] + s->sectorcounts[i - 1]; } chunk_count--; i--; offset += 36; continue; } offset += 4; ret = read_uint64(bs, offset, &s->sectors[i]); if (ret < 0) { goto fail; } s->sectors[i] += last_out_offset; offset += 8; ret = read_uint64(bs, offset, &s->sectorcounts[i]); if (ret < 0) { goto fail; } offset += 8; if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) { error_report("sector count %" PRIu64 " for chunk %" PRIu32 " is larger than max (%u)", s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX); ret = -EINVAL; goto fail; } ret = read_uint64(bs, offset, &s->offsets[i]); if (ret < 0) { goto fail; } s->offsets[i] += last_in_offset; offset += 8; ret = read_uint64(bs, offset, &s->lengths[i]); if (ret < 0) { goto fail; } offset += 8; if (s->lengths[i] > DMG_LENGTHS_MAX) { error_report("length %" PRIu64 " for chunk %" PRIu32 " is larger than max (%u)", s->lengths[i], i, DMG_LENGTHS_MAX); ret = -EINVAL; goto fail; } update_max_chunk_size(s, i, &max_compressed_size, &max_sectors_per_chunk); } s->n_chunks += chunk_count; } } /* initialize zlib engine */ s->compressed_chunk = qemu_try_blockalign(bs->file, max_compressed_size + 1); s->uncompressed_chunk = qemu_try_blockalign(bs->file, 512 * max_sectors_per_chunk); if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) { ret = -ENOMEM; goto fail; } if (inflateInit(&s->zstream) != Z_OK) { ret = -EINVAL; goto fail; } s->current_chunk = s->n_chunks; qemu_co_mutex_init(&s->lock); return 0; fail: g_free(s->types); g_free(s->offsets); g_free(s->lengths); g_free(s->sectors); g_free(s->sectorcounts); qemu_vfree(s->compressed_chunk); qemu_vfree(s->uncompressed_chunk); return ret; }
static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ int ret = 0; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } s->bdev_length = bdrv_getlength(bs); if (s->bdev_length < 0) { ret = s->bdev_length; goto immediate_exit; } /* Active commit must resize the base image if its size differs from the * active layer. */ if (s->base == blk_bs(s->target)) { int64_t base_length; base_length = blk_getlength(s->target); if (base_length < 0) { ret = base_length; goto immediate_exit; } if (s->bdev_length > base_length) { ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF, NULL); if (ret < 0) { goto immediate_exit; } } } if (s->bdev_length == 0) { /* Report BLOCK_JOB_READY and wait for complete. */ block_job_event_ready(&s->common); s->synced = true; while (!block_job_is_cancelled(&s->common) && !s->should_complete) { block_job_yield(&s->common); } s->common.cancelled = false; goto immediate_exit; } length = DIV_ROUND_UP(s->bdev_length, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ bdrv_get_backing_filename(target_bs, backing_filename, sizeof(backing_filename)); if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) { s->target_cluster_size = bdi.cluster_size; } else { s->target_cluster_size = BDRV_SECTOR_SIZE; } if (backing_filename[0] && !target_bs->backing && s->granularity < s->target_cluster_size) { s->buf_size = MAX(s->buf_size, s->target_cluster_size); s->cow_bitmap = bitmap_new(length); } s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; goto immediate_exit; } mirror_free_init(s); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (!s->is_none_mode) { ret = mirror_dirty_init(s); if (ret < 0 || block_job_is_cancelled(&s->common)) { goto immediate_exit; } } assert(!s->dbi); s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; bool should_complete; if (s->ret < 0) { ret = s->ret; goto immediate_exit; } block_job_pause_point(&s->common); cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is * the number of bytes currently being processed; together those are * the current remaining operation length */ block_job_progress_set_remaining(&s->common, s->bytes_in_flight + cnt); /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that bdrv_drain_all() returns. * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is * an error, or when the source is clean, whichever comes first. */ delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; if (delta < BLOCK_JOB_SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); } } should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); if (!s->synced) { if (mirror_flush(s) < 0) { /* Go check s->ret. */ continue; } /* We're out of the streaming phase. From now on, if the job * is cancelled we will actually complete all pending I/O and * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ block_job_event_ready(&s->common); s->synced = true; } should_complete = s->should_complete || block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(s->dirty_bitmap); } if (cnt == 0 && should_complete) { /* The dirty bitmap is not updated while operations are pending. * If we're about to exit, wait for pending operations before * calling bdrv_get_dirty_count(bs), or we may exit while the * source has dirty data to copy! * * Note that I/O can be submitted by the guest while * mirror_populate runs, so pause it now. Before deciding * whether to switch to target check one last time if I/O has * come in the meanwhile, and if not flush the data to disk. */ trace_mirror_before_drain(s, cnt); bdrv_drained_begin(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); if (cnt > 0 || mirror_flush(s) < 0) { bdrv_drained_end(bs); continue; } /* The two disks are in sync. Exit and report successful * completion. */ assert(QLIST_EMPTY(&bs->tracked_requests)); s->common.cancelled = false; need_drain = false; break; } ret = 0; if (s->synced && !should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0); } trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); block_job_sleep_ns(&s->common, delay_ns); if (block_job_is_cancelled(&s->common) && (!s->synced || s->common.force)) { break; } s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: if (s->in_flight > 0) { /* We get here only if something went wrong. Either the job failed, * or it was cancelled prematurely so that we do not guarantee that * the target is a copy of the source. */ assert(ret < 0 || ((s->common.force || !s->synced) && block_job_is_cancelled(&s->common))); assert(need_drain); mirror_wait_for_all_io(s); } assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_dirty_iter_free(s->dbi); data = g_malloc(sizeof(*data)); data->ret = ret; if (need_drain) { bdrv_drained_begin(bs); } block_job_defer_to_main_loop(&s->common, mirror_exit, data); }
static int64_t raw_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); }
int initNbd(char* filename) { BlockDriverState *bs; off_t dev_offset = 0; uint32_t nbdflags = 0; bool disconnect = false; const char *bindto = "0.0.0.0"; char *device = NULL; int port = NBD_DEFAULT_PORT; off_t fd_size; const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t"; struct option lopt[] = { { "help", 0, NULL, 'h' }, { "version", 0, NULL, 'V' }, { "bind", 1, NULL, 'b' }, { "port", 1, NULL, 'p' }, { "socket", 1, NULL, 'k' }, { "offset", 1, NULL, 'o' }, { "read-only", 0, NULL, 'r' }, { "partition", 1, NULL, 'P' }, { "connect", 1, NULL, 'c' }, { "disconnect", 0, NULL, 'd' }, { "snapshot", 0, NULL, 's' }, { "nocache", 0, NULL, 'n' }, { "shared", 1, NULL, 'e' }, { "persistent", 0, NULL, 't' }, { "verbose", 0, NULL, 'v' }, { NULL, 0, NULL, 0 } }; int ch; int opt_ind = 0; int li; char *end; int flags = BDRV_O_RDWR; int partition = -1; int ret; int fd; int persistent = 0; pthread_t client_thread; /* The client thread uses SIGTERM to interrupt the server. A signal * handler ensures that "qemu-nbd -v -c" exits with a nice status code. */ struct sigaction sa_sigterm; memset(&sa_sigterm, 0, sizeof(sa_sigterm)); sa_sigterm.sa_handler = termsig_handler; sigaction(SIGTERM, &sa_sigterm, NULL); // while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { // switch (ch) { // case 's': // flags |= BDRV_O_SNAPSHOT; // break; // case 'n': // flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; // break; // case 'b': // bindto = optarg; // break; // case 'p': // li = strtol(optarg, &end, 0); // if (*end) { // errx(EXIT_FAILURE, "Invalid port `%s'", optarg); // } // if (li < 1 || li > 65535) { // errx(EXIT_FAILURE, "Port out of range `%s'", optarg); // } // port = (uint16_t)li; // break; // case 'o': // dev_offset = strtoll (optarg, &end, 0); // if (*end) { // errx(EXIT_FAILURE, "Invalid offset `%s'", optarg); // } // if (dev_offset < 0) { // errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg); // } // break; // case 'r': // nbdflags |= NBD_FLAG_READ_ONLY; // flags &= ~BDRV_O_RDWR; // break; // case 'P': // partition = strtol(optarg, &end, 0); // if (*end) // errx(EXIT_FAILURE, "Invalid partition `%s'", optarg); // if (partition < 1 || partition > 8) // errx(EXIT_FAILURE, "Invalid partition %d", partition); // break; // case 'k': // sockpath = optarg; // if (sockpath[0] != '/') // errx(EXIT_FAILURE, "socket path must be absolute\n"); // break; // case 'd': // disconnect = true; // break; // case 'c': // device = optarg; // break; // case 'e': // shared = strtol(optarg, &end, 0); // if (*end) { // errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg); // } // if (shared < 1) { // errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); // } // break; // case 't': // persistent = 1; // break; // case 'v': // verbose = 1; // break; // case 'V': // version(argv[0]); // exit(0); // break; // case 'h': // usage(argv[0]); // exit(0); // break; // case '?': // errx(EXIT_FAILURE, "Try `%s --help' for more information.", // argv[0]); // } // } // if ((argc - optind) != 1) { // errx(EXIT_FAILURE, "Invalid number of argument.\n" // "Try `%s --help' for more information.", // argv[0]); // } /* Start a daemon! Use client thread to start daemon and write errors; Use parent thread to wait for and write error messages. */ // if (device && !verbose) { // int stderr_fd[2]; // pid_t pid; // int ret; // // //Setting up Pipe, close after succeeded. // if (qemu_pipe(stderr_fd) < 0) { // err(EXIT_FAILURE, "Error setting up communication pipe"); // } // // /* Now daemonize, but keep a communication channel open to // * print errors and exit with the proper status code. // */ // //Fork returns 0 for child process and the pid of child process for the parent process // pid = fork(); // if (pid == 0) { // close(stderr_fd[0]); // ret = qemu_daemon(1, 0); // // /* Temporarily redirect stderr to the parent's pipe... */ // dup2(stderr_fd[1], STDERR_FILENO); // if (ret < 0) { // err(EXIT_FAILURE, "Failed to daemonize"); // } // // /* ... close the descriptor we inherited and go on. */ // close(stderr_fd[1]); // } else { // bool errors = false; // char *buf; // // /* In the parent. Print error messages from the child until // * it closes the pipe. // */ // close(stderr_fd[1]); // buf = g_malloc(1024); // while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { // errors = true; // ret = qemu_write_full(STDERR_FILENO, buf, ret); // if (ret < 0) { // exit(EXIT_FAILURE); // } // } // if (ret < 0) { // err(EXIT_FAILURE, "Cannot read from daemon"); // } // // /* Usually the daemon should not print any message. // * Exit with zero status in that case. // */ // exit(errors); // } // } // // //Set sock path... But what is sock path??? // if (device != NULL && sockpath == NULL) { // sockpath = g_malloc(128); // snprintf(sockpath, 128, SOCKET_PATH, basename(device)); // } //Init a block device! bdrv_init(); atexit(bdrv_close_all); //Malloc a new block device state bs = bdrv_new("hda"); srcpath = filename; if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) { errno = -ret; err(EXIT_FAILURE, "Failed to bdrv_open '%s'", srcpath); } fd_size = bdrv_getlength(bs); if (partition != -1) { ret = find_partition(bs, partition, &dev_offset, &fd_size); if (ret < 0) { errno = -ret; err(EXIT_FAILURE, "Could not find partition %d", partition); } } exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags); if (sockpath) { fd = unix_socket_incoming(sockpath); fprintf(stderr, "NBD device running on sock path :%s\n", sockpath); } else { fd = tcp_socket_incoming(bindto, port); fprintf(stderr, "NBD device running on port :%i\n", port); } if (fd < 0) { return 1; } if (device) { int ret; ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); if (ret != 0) { errx(EXIT_FAILURE, "Failed to create client thread: %s", strerror(ret)); } } else { /* Shut up GCC warnings. */ memset(&client_thread, 0, sizeof(client_thread)); } qemu_init_main_loop(); qemu_set_fd_handler2(fd, nbd_can_accept, nbd_accept, NULL, (void *)(uintptr_t)fd); /* now when the initialization is (almost) complete, chdir("/") * to free any busy filesystems */ if (chdir("/") < 0) { err(EXIT_FAILURE, "Could not chdir to root directory"); } do { main_loop_wait(false); } while (!sigterm_reported && (persistent || !nbd_started || nb_fds > 0)); nbd_export_close(exp); if (sockpath) { unlink(sockpath); } if (device) { void *ret; pthread_join(client_thread, &ret); exit(ret != NULL); } else { exit(EXIT_SUCCESS); } }
static int64_t cor_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file->bs); }
static int vpc_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVVPCState *s = bs->opaque; int i; VHDFooter *footer; VHDDynDiskHeader *dyndisk_header; uint8_t buf[HEADER_SIZE]; uint32_t checksum; uint64_t computed_size; uint64_t pagetable_size; int disk_type = VHD_DYNAMIC; int ret; ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); if (ret < 0) { goto fail; } footer = (VHDFooter *) s->footer_buf; if (strncmp(footer->creator, "conectix", 8)) { int64_t offset = bdrv_getlength(bs->file->bs); if (offset < 0) { ret = offset; goto fail; } else if (offset < HEADER_SIZE) { ret = -EINVAL; goto fail; } /* If a fixed disk, the footer is found only at the end of the file */ ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf, HEADER_SIZE); if (ret < 0) { goto fail; } if (strncmp(footer->creator, "conectix", 8)) { error_setg(errp, "invalid VPC image"); ret = -EINVAL; goto fail; } disk_type = VHD_FIXED; } checksum = be32_to_cpu(footer->checksum); footer->checksum = 0; if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) fprintf(stderr, "block-vpc: The header checksum of '%s' is " "incorrect.\n", bs->filename); /* Write 'checksum' back to footer, or else will leave it with zero. */ footer->checksum = cpu_to_be32(checksum); // The visible size of a image in Virtual PC depends on the geometry // rather than on the size stored in the footer (the size in the footer // is too large usually) bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; /* Images that have exactly the maximum geometry are probably bigger and * would be truncated if we adhered to the geometry for them. Rely on * footer->current_size for them. */ if (bs->total_sectors == VHD_MAX_GEOMETRY) { bs->total_sectors = be64_to_cpu(footer->current_size) / BDRV_SECTOR_SIZE; } /* Allow a maximum disk size of approximately 2 TB */ if (bs->total_sectors >= VHD_MAX_SECTORS) { ret = -EFBIG; goto fail; } if (disk_type == VHD_DYNAMIC) { ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE); if (ret < 0) { goto fail; } dyndisk_header = (VHDDynDiskHeader *) buf; if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { ret = -EINVAL; goto fail; } s->block_size = be32_to_cpu(dyndisk_header->block_size); if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) { error_setg(errp, "Invalid block size %" PRIu32, s->block_size); ret = -EINVAL; goto fail; } s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) { ret = -EINVAL; goto fail; } if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) { ret = -EINVAL; goto fail; } computed_size = (uint64_t) s->max_table_entries * s->block_size; if (computed_size < bs->total_sectors * 512) { ret = -EINVAL; goto fail; } if (s->max_table_entries > SIZE_MAX / 4 || s->max_table_entries > (int) INT_MAX / 4) { error_setg(errp, "Max Table Entries too large (%" PRId32 ")", s->max_table_entries); ret = -EINVAL; goto fail; } pagetable_size = (uint64_t) s->max_table_entries * 4; s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size); if (s->pagetable == NULL) { ret = -ENOMEM; goto fail; } s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable, pagetable_size); if (ret < 0) { goto fail; } s->free_data_block_offset = ROUND_UP(s->bat_offset + pagetable_size, 512); for (i = 0; i < s->max_table_entries; i++) { be32_to_cpus(&s->pagetable[i]); if (s->pagetable[i] != 0xFFFFFFFF) { int64_t next = (512 * (int64_t) s->pagetable[i]) + s->bitmap_size + s->block_size; if (next > s->free_data_block_offset) { s->free_data_block_offset = next; } } } if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) { error_setg(errp, "block-vpc: free_data_block_offset points after " "the end of file. The image has been truncated."); ret = -EINVAL; goto fail; } s->last_bitmap_offset = (int64_t) -1; #ifdef CACHE s->pageentry_u8 = g_malloc(512); s->pageentry_u32 = s->pageentry_u8; s->pageentry_u16 = s->pageentry_u8; s->last_pagetable = -1; #endif } qemu_co_mutex_init(&s->lock); /* Disable migration when VHD images are used */ error_setg(&s->migration_blocker, "The vpc format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); migrate_add_blocker(s->migration_blocker); return 0; fail: qemu_vfree(s->pagetable); #ifdef CACHE g_free(s->pageentry_u8); #endif return ret; }
static int dmg_open(BlockDriverState *bs, int flags) { BDRVDMGState *s = bs->opaque; uint64_t info_begin,info_end,last_in_offset,last_out_offset; uint32_t count, tmp; uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i; int64_t offset; int ret; bs->read_only = 1; s->n_chunks = 0; s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; /* read offset of info blocks */ offset = bdrv_getlength(bs->file); if (offset < 0) { ret = offset; goto fail; } offset -= 0x1d8; ret = read_uint64(bs, offset, &info_begin); if (ret < 0) { goto fail; } else if (info_begin == 0) { ret = -EINVAL; goto fail; } ret = read_uint32(bs, info_begin, &tmp); if (ret < 0) { goto fail; } else if (tmp != 0x100) { ret = -EINVAL; goto fail; } ret = read_uint32(bs, info_begin + 4, &count); if (ret < 0) { goto fail; } else if (count == 0) { ret = -EINVAL; goto fail; } info_end = info_begin + count; offset = info_begin + 0x100; /* read offsets */ last_in_offset = last_out_offset = 0; while (offset < info_end) { uint32_t type; ret = read_uint32(bs, offset, &count); if (ret < 0) { goto fail; } else if (count == 0) { ret = -EINVAL; goto fail; } offset += 4; ret = read_uint32(bs, offset, &type); if (ret < 0) { goto fail; } if (type == 0x6d697368 && count >= 244) { int new_size, chunk_count; offset += 4; offset += 200; chunk_count = (count-204)/40; new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); s->types = g_realloc(s->types, new_size/2); s->offsets = g_realloc(s->offsets, new_size); s->lengths = g_realloc(s->lengths, new_size); s->sectors = g_realloc(s->sectors, new_size); s->sectorcounts = g_realloc(s->sectorcounts, new_size); for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) { ret = read_uint32(bs, offset, &s->types[i]); if (ret < 0) { goto fail; } offset += 4; if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) { if(s->types[i]==0xffffffff) { last_in_offset = s->offsets[i-1]+s->lengths[i-1]; last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1]; } chunk_count--; i--; offset += 36; continue; } offset += 4; ret = read_uint64(bs, offset, &s->sectors[i]); if (ret < 0) { goto fail; } s->sectors[i] += last_out_offset; offset += 8; ret = read_uint64(bs, offset, &s->sectorcounts[i]); if (ret < 0) { goto fail; } offset += 8; ret = read_uint64(bs, offset, &s->offsets[i]); if (ret < 0) { goto fail; } s->offsets[i] += last_in_offset; offset += 8; ret = read_uint64(bs, offset, &s->lengths[i]); if (ret < 0) { goto fail; } offset += 8; if(s->lengths[i]>max_compressed_size) max_compressed_size = s->lengths[i]; if(s->sectorcounts[i]>max_sectors_per_chunk) max_sectors_per_chunk = s->sectorcounts[i]; } s->n_chunks+=chunk_count; } } /* initialize zlib engine */ s->compressed_chunk = g_malloc(max_compressed_size+1); s->uncompressed_chunk = g_malloc(512*max_sectors_per_chunk); if(inflateInit(&s->zstream) != Z_OK) { ret = -EINVAL; goto fail; } s->current_chunk = s->n_chunks; qemu_co_mutex_init(&s->lock); return 0; fail: g_free(s->types); g_free(s->offsets); g_free(s->lengths); g_free(s->sectors); g_free(s->sectorcounts); g_free(s->compressed_chunk); g_free(s->uncompressed_chunk); return ret; }
int main(int argc, char **argv) { BlockDriverState *bs; BlockDriver *drv; off_t dev_offset = 0; uint32_t nbdflags = 0; bool disconnect = false; const char *bindto = "0.0.0.0"; char *device = NULL; int port = NBD_DEFAULT_PORT; off_t fd_size; QemuOpts *sn_opts = NULL; const char *sn_id_or_name = NULL; const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:"; struct option lopt[] = { { "help", 0, NULL, 'h' }, { "version", 0, NULL, 'V' }, { "bind", 1, NULL, 'b' }, { "port", 1, NULL, 'p' }, { "socket", 1, NULL, 'k' }, { "offset", 1, NULL, 'o' }, { "read-only", 0, NULL, 'r' }, { "partition", 1, NULL, 'P' }, { "connect", 1, NULL, 'c' }, { "disconnect", 0, NULL, 'd' }, { "snapshot", 0, NULL, 's' }, { "load-snapshot", 1, NULL, 'l' }, { "nocache", 0, NULL, 'n' }, { "cache", 1, NULL, QEMU_NBD_OPT_CACHE }, #ifdef CONFIG_LINUX_AIO { "aio", 1, NULL, QEMU_NBD_OPT_AIO }, #endif { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD }, { "shared", 1, NULL, 'e' }, { "format", 1, NULL, 'f' }, { "persistent", 0, NULL, 't' }, { "verbose", 0, NULL, 'v' }, { NULL, 0, NULL, 0 } }; int ch; int opt_ind = 0; int li; char *end; int flags = BDRV_O_RDWR; int partition = -1; int ret; int fd; bool seen_cache = false; bool seen_discard = false; #ifdef CONFIG_LINUX_AIO bool seen_aio = false; #endif pthread_t client_thread; const char *fmt = NULL; Error *local_err = NULL; /* The client thread uses SIGTERM to interrupt the server. A signal * handler ensures that "qemu-nbd -v -c" exits with a nice status code. */ struct sigaction sa_sigterm; memset(&sa_sigterm, 0, sizeof(sa_sigterm)); sa_sigterm.sa_handler = termsig_handler; sigaction(SIGTERM, &sa_sigterm, NULL); qemu_init_exec_dir(argv[0]); while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { case 's': flags |= BDRV_O_SNAPSHOT; break; case 'n': optarg = (char *) "none"; /* fallthrough */ case QEMU_NBD_OPT_CACHE: if (seen_cache) { errx(EXIT_FAILURE, "-n and --cache can only be specified once"); } seen_cache = true; if (bdrv_parse_cache_flags(optarg, &flags) == -1) { errx(EXIT_FAILURE, "Invalid cache mode `%s'", optarg); } break; #ifdef CONFIG_LINUX_AIO case QEMU_NBD_OPT_AIO: if (seen_aio) { errx(EXIT_FAILURE, "--aio can only be specified once"); } seen_aio = true; if (!strcmp(optarg, "native")) { flags |= BDRV_O_NATIVE_AIO; } else if (!strcmp(optarg, "threads")) { /* this is the default */ } else { errx(EXIT_FAILURE, "invalid aio mode `%s'", optarg); } break; #endif case QEMU_NBD_OPT_DISCARD: if (seen_discard) { errx(EXIT_FAILURE, "--discard can only be specified once"); } seen_discard = true; if (bdrv_parse_discard_flags(optarg, &flags) == -1) { errx(EXIT_FAILURE, "Invalid discard mode `%s'", optarg); } break; case 'b': bindto = optarg; break; case 'p': li = strtol(optarg, &end, 0); if (*end) { errx(EXIT_FAILURE, "Invalid port `%s'", optarg); } if (li < 1 || li > 65535) { errx(EXIT_FAILURE, "Port out of range `%s'", optarg); } port = (uint16_t)li; break; case 'o': dev_offset = strtoll (optarg, &end, 0); if (*end) { errx(EXIT_FAILURE, "Invalid offset `%s'", optarg); } if (dev_offset < 0) { errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg); } break; case 'l': if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { sn_opts = qemu_opts_parse(&internal_snapshot_opts, optarg, 0); if (!sn_opts) { errx(EXIT_FAILURE, "Failed in parsing snapshot param `%s'", optarg); } } else { sn_id_or_name = optarg; } /* fall through */ case 'r': nbdflags |= NBD_FLAG_READ_ONLY; flags &= ~BDRV_O_RDWR; break; case 'P': partition = strtol(optarg, &end, 0); if (*end) errx(EXIT_FAILURE, "Invalid partition `%s'", optarg); if (partition < 1 || partition > 8) errx(EXIT_FAILURE, "Invalid partition %d", partition); break; case 'k': sockpath = optarg; if (sockpath[0] != '/') errx(EXIT_FAILURE, "socket path must be absolute\n"); break; case 'd': disconnect = true; break; case 'c': device = optarg; break; case 'e': shared = strtol(optarg, &end, 0); if (*end) { errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg); } if (shared < 1) { errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); } break; case 'f': fmt = optarg; break; case 't': persistent = 1; break; case 'v': verbose = 1; break; case 'V': version(argv[0]); exit(0); break; case 'h': usage(argv[0]); exit(0); break; case '?': errx(EXIT_FAILURE, "Try `%s --help' for more information.", argv[0]); } } if ((argc - optind) != 1) { errx(EXIT_FAILURE, "Invalid number of argument.\n" "Try `%s --help' for more information.", argv[0]); } if (disconnect) { fd = open(argv[optind], O_RDWR); if (fd < 0) { err(EXIT_FAILURE, "Cannot open %s", argv[optind]); } nbd_disconnect(fd); close(fd); printf("%s disconnected\n", argv[optind]); return 0; } if (device && !verbose) { int stderr_fd[2]; pid_t pid; int ret; if (qemu_pipe(stderr_fd) < 0) { err(EXIT_FAILURE, "Error setting up communication pipe"); } /* Now daemonize, but keep a communication channel open to * print errors and exit with the proper status code. */ pid = fork(); if (pid == 0) { close(stderr_fd[0]); ret = qemu_daemon(1, 0); /* Temporarily redirect stderr to the parent's pipe... */ dup2(stderr_fd[1], STDERR_FILENO); if (ret < 0) { err(EXIT_FAILURE, "Failed to daemonize"); } /* ... close the descriptor we inherited and go on. */ close(stderr_fd[1]); } else { bool errors = false; char *buf; /* In the parent. Print error messages from the child until * it closes the pipe. */ close(stderr_fd[1]); buf = g_malloc(1024); while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { errors = true; ret = qemu_write_full(STDERR_FILENO, buf, ret); if (ret < 0) { exit(EXIT_FAILURE); } } if (ret < 0) { err(EXIT_FAILURE, "Cannot read from daemon"); } /* Usually the daemon should not print any message. * Exit with zero status in that case. */ exit(errors); } } if (device != NULL && sockpath == NULL) { sockpath = g_malloc(128); snprintf(sockpath, 128, SOCKET_PATH, basename(device)); } qemu_init_main_loop(); bdrv_init(); atexit(bdrv_close_all); if (fmt) { drv = bdrv_find_format(fmt); if (!drv) { errx(EXIT_FAILURE, "Unknown file format '%s'", fmt); } } else { drv = NULL; } bs = bdrv_new("hda"); srcpath = argv[optind]; ret = bdrv_open(&bs, srcpath, NULL, NULL, flags, drv, &local_err); if (ret < 0) { errno = -ret; err(EXIT_FAILURE, "Failed to bdrv_open '%s': %s", argv[optind], error_get_pretty(local_err)); } if (sn_opts) { ret = bdrv_snapshot_load_tmp(bs, qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), &local_err); } else if (sn_id_or_name) { ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name, &local_err); } if (ret < 0) { errno = -ret; err(EXIT_FAILURE, "Failed to load snapshot: %s", error_get_pretty(local_err)); } fd_size = bdrv_getlength(bs); if (partition != -1) { ret = find_partition(bs, partition, &dev_offset, &fd_size); if (ret < 0) { errno = -ret; err(EXIT_FAILURE, "Could not find partition %d", partition); } } exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed); if (sockpath) { fd = unix_socket_incoming(sockpath); } else { fd = tcp_socket_incoming(bindto, port); } if (fd < 0) { return 1; } if (device) { int ret; ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); if (ret != 0) { errx(EXIT_FAILURE, "Failed to create client thread: %s", strerror(ret)); } } else { /* Shut up GCC warnings. */ memset(&client_thread, 0, sizeof(client_thread)); } qemu_set_fd_handler2(fd, nbd_can_accept, nbd_accept, NULL, (void *)(uintptr_t)fd); /* now when the initialization is (almost) complete, chdir("/") * to free any busy filesystems */ if (chdir("/") < 0) { err(EXIT_FAILURE, "Could not chdir to root directory"); } state = RUNNING; do { main_loop_wait(false); if (state == TERMINATE) { state = TERMINATING; nbd_export_close(exp); nbd_export_put(exp); exp = NULL; } } while (state != TERMINATED); bdrv_close(bs); if (sockpath) { unlink(sockpath); } if (sn_opts) { qemu_opts_del(sn_opts); } if (device) { void *ret; pthread_join(client_thread, &ret); exit(ret != NULL); } else { exit(EXIT_SUCCESS); } }
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVQEDState *s = bs->opaque; QEDHeader le_header; int64_t file_size; int ret; s->bs = bs; QSIMPLEQ_INIT(&s->allocating_write_reqs); ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); if (ret < 0) { return ret; } qed_header_le_to_cpu(&le_header, &s->header); if (s->header.magic != QED_MAGIC) { error_setg(errp, "Image not in QED format"); return -EINVAL; } if (s->header.features & ~QED_FEATURE_MASK) { /* image uses unsupported feature bits */ char buf[64]; snprintf(buf, sizeof(buf), "%" PRIx64, s->header.features & ~QED_FEATURE_MASK); error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "QED", buf); return -ENOTSUP; } if (!qed_is_cluster_size_valid(s->header.cluster_size)) { return -EINVAL; } /* Round down file size to the last cluster */ file_size = bdrv_getlength(bs->file); if (file_size < 0) { return file_size; } s->file_size = qed_start_of_cluster(s, file_size); if (!qed_is_table_size_valid(s->header.table_size)) { return -EINVAL; } if (!qed_is_image_size_valid(s->header.image_size, s->header.cluster_size, s->header.table_size)) { return -EINVAL; } if (!qed_check_table_offset(s, s->header.l1_table_offset)) { return -EINVAL; } s->table_nelems = (s->header.cluster_size * s->header.table_size) / sizeof(uint64_t); s->l2_shift = ffs(s->header.cluster_size) - 1; s->l2_mask = s->table_nelems - 1; s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1; if ((s->header.features & QED_F_BACKING_FILE)) { if ((uint64_t)s->header.backing_filename_offset + s->header.backing_filename_size > s->header.cluster_size * s->header.header_size) { return -EINVAL; } ret = qed_read_string(bs->file, s->header.backing_filename_offset, s->header.backing_filename_size, bs->backing_file, sizeof(bs->backing_file)); if (ret < 0) { return ret; } if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) { pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); } } /* Reset unknown autoclear feature bits. This is a backwards * compatibility mechanism that allows images to be opened by older * programs, which "knock out" unknown feature bits. When an image is * opened by a newer program again it can detect that the autoclear * feature is no longer valid. */ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) { s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; ret = qed_write_header_sync(s); if (ret) { return ret; } /* From here on only known autoclear feature bits are valid */ bdrv_flush(bs->file); } s->l1_table = qed_alloc_table(s); qed_init_l2_cache(&s->l2_cache); ret = qed_read_l1_table_sync(s); if (ret) { goto out; } /* If image was not closed cleanly, check consistency */ if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) { /* Read-only images cannot be fixed. There is no risk of corruption * since write operations are not possible. Therefore, allow * potentially inconsistent images to be opened read-only. This can * aid data recovery from an otherwise inconsistent image. */ if (!bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) { BdrvCheckResult result = {0}; ret = qed_check(s, &result, true); if (ret) { goto out; } } } bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs)); out: if (ret) { qed_free_l2_cache(&s->l2_cache); qemu_vfree(s->l1_table); } return ret; }
/* Flush the entire log (as described by 'logs') to the VHDX image * file, and then set the log to 'empty' status once complete. * * The log entries should be validate prior to flushing */ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs) { int ret = 0; int i; uint32_t cnt, sectors_read; uint64_t new_file_size; void *data = NULL; VHDXLogDescEntries *desc_entries = NULL; VHDXLogEntryHeader hdr_tmp = { 0 }; cnt = logs->count; data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); ret = vhdx_user_visible_write(bs, s); if (ret < 0) { goto exit; } /* each iteration represents one log sequence, which may span multiple * sectors */ while (cnt--) { ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); if (ret < 0) { goto exit; } /* if the log shows a FlushedFileOffset larger than our current file * size, then that means the file has been truncated / corrupted, and * we must refused to open it / use it */ if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) { ret = -EINVAL; goto exit; } ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries); if (ret < 0) { goto exit; } for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) { /* data sector, so read a sector to flush */ ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, data, 1, false); if (ret < 0) { goto exit; } if (sectors_read != 1) { ret = -EINVAL; goto exit; } } ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); if (ret < 0) { goto exit; } } if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) { new_file_size = desc_entries->hdr.last_file_offset; if (new_file_size % (1024*1024)) { /* round up to nearest 1MB boundary */ new_file_size = ((new_file_size >> 20) + 1) << 20; bdrv_truncate(bs->file, new_file_size); } }
static int coroutine_fn stream_run(Job *job, Error **errp) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockBackend *blk = s->common.blk; BlockDriverState *bs = blk_bs(blk); BlockDriverState *base = s->base; int64_t len; int64_t offset = 0; uint64_t delay_ns = 0; int error = 0; int ret = 0; int64_t n = 0; /* bytes */ void *buf; if (!bs->backing) { goto out; } len = bdrv_getlength(bs); if (len < 0) { ret = len; goto out; } job_progress_set_remaining(&s->common.job, len); buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE); /* Turn on copy-on-read for the whole block device so that guest read * requests help us make progress. Only do this when copying the entire * backing chain since the copy-on-read operation does not take base into * account. */ if (!base) { bdrv_enable_copy_on_read(bs); } for ( ; offset < len; offset += n) { bool copy; /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ job_sleep_ns(&s->common.job, delay_ns); if (job_is_cancelled(&s->common.job)) { break; } copy = false; ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n); if (ret == 1) { /* Allocated in the top, no need to copy. */ } else if (ret >= 0) { /* Copy if allocated in the intermediate images. Limit to the * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ ret = bdrv_is_allocated_above(backing_bs(bs), base, offset, n, &n); /* Finish early if end of backing file has been reached */ if (ret == 0 && n == 0) { n = len - offset; } copy = (ret == 1); } trace_stream_one_iteration(s, offset, n, ret); if (copy) { ret = stream_populate(blk, offset, n, buf); } if (ret < 0) { BlockErrorAction action = block_job_error_action(&s->common, s->on_error, true, -ret); if (action == BLOCK_ERROR_ACTION_STOP) { n = 0; continue; } if (error == 0) { error = ret; } if (action == BLOCK_ERROR_ACTION_REPORT) { break; } } ret = 0; /* Publish progress */ job_progress_update(&s->common.job, n); if (copy) { delay_ns = block_job_ratelimit_get_delay(&s->common, n); } else { delay_ns = 0; } } if (!base) { bdrv_disable_copy_on_read(bs); } /* Do not remove the backing file if an error was there but ignored. */ ret = error; qemu_vfree(buf); out: /* Modify backing chain and close BDSes in main loop */ return ret; }
static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; BlockDriverState *bs = s->common.bs; int64_t sector_num, end, sectors_per_chunk, length; uint64_t last_pause_ns; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ int ret = 0; int n; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } s->bdev_length = bdrv_getlength(bs); if (s->bdev_length < 0) { ret = s->bdev_length; goto immediate_exit; } else if (s->bdev_length == 0) { /* Report BLOCK_JOB_READY and wait for complete. */ block_job_event_ready(&s->common); s->synced = true; while (!block_job_is_cancelled(&s->common) && !s->should_complete) { block_job_yield(&s->common); } s->common.cancelled = false; goto immediate_exit; } length = DIV_ROUND_UP(s->bdev_length, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); if (backing_filename[0] && !s->target->backing_hd) { ret = bdrv_get_info(s->target, &bdi); if (ret < 0) { goto immediate_exit; } if (s->granularity < bdi.cluster_size) { s->buf_size = MAX(s->buf_size, bdi.cluster_size); s->cow_bitmap = bitmap_new(length); } } end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; goto immediate_exit; } sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; mirror_free_init(s); if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; for (sector_num = 0; sector_num < end; ) { int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; ret = bdrv_is_allocated_above(bs, base, sector_num, next - sector_num, &n); if (ret < 0) { goto immediate_exit; } assert(n > 0); if (ret == 1) { bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); sector_num = next; } else { sector_num += n; } } } bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { uint64_t delay_ns = 0; int64_t cnt; bool should_complete; if (s->ret < 0) { ret = s->ret; goto immediate_exit; } cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* s->common.offset contains the number of bytes already processed so * far, cnt is the number of dirty sectors remaining and * s->sectors_in_flight is the number of sectors currently being * processed; together those are the current total operation length */ s->common.len = s->common.offset + (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE; /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that bdrv_drain_all() returns. * We do so every SLICE_TIME nanoseconds, or when there is an error, * or when the source is clean, whichever comes first. */ if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); qemu_coroutine_yield(); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); } } should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); ret = bdrv_flush(s->target); if (ret < 0) { if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) { goto immediate_exit; } } else { /* We're out of the streaming phase. From now on, if the job * is cancelled we will actually complete all pending I/O and * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ if (!s->synced) { block_job_event_ready(&s->common); s->synced = true; } should_complete = s->should_complete || block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(s->dirty_bitmap); } } if (cnt == 0 && should_complete) { /* The dirty bitmap is not updated while operations are pending. * If we're about to exit, wait for pending operations before * calling bdrv_get_dirty_count(bs), or we may exit while the * source has dirty data to copy! * * Note that I/O can be submitted by the guest while * mirror_populate runs. */ trace_mirror_before_drain(s, cnt); bdrv_drain(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); } ret = 0; trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } } else if (!should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0); block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); } else if (cnt == 0) { /* The two disks are in sync. Exit and report successful * completion. */ assert(QLIST_EMPTY(&bs->tracked_requests)); s->common.cancelled = false; break; } last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: if (s->in_flight > 0) { /* We get here only if something went wrong. Either the job failed, * or it was cancelled prematurely so that we do not guarantee that * the target is a copy of the source. */ assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common))); mirror_drain(s); } assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); data = g_malloc(sizeof(*data)); data->ret = ret; block_job_defer_to_main_loop(&s->common, mirror_exit, data); }
static int64_t blkverify_getlength(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; return bdrv_getlength(s->test_file); }
static void coroutine_fn commit_run(void *opaque) { CommitBlockJob *s = opaque; BlockDriverState *active = s->active; BlockDriverState *top = s->top; BlockDriverState *base = s->base; BlockDriverState *overlay_bs = NULL; int64_t sector_num, end; int ret = 0; int n = 0; void *buf; int bytes_written = 0; int64_t base_len; ret = s->common.len = bdrv_getlength(top); if (s->common.len < 0) { goto exit_restore_reopen; } ret = base_len = bdrv_getlength(base); if (base_len < 0) { goto exit_restore_reopen; } if (base_len < s->common.len) { ret = bdrv_truncate(base, s->common.len); if (ret) { goto exit_restore_reopen; } } overlay_bs = bdrv_find_overlay(active, top); end = s->common.len >> BDRV_SECTOR_BITS; buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); for (sector_num = 0; sector_num < end; sector_num += n) { uint64_t delay_ms = 0; bool copy; wait: /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that qemu_aio_flush() returns. */ block_job_sleep(&s->common, rt_clock, delay_ms); if (block_job_is_cancelled(&s->common)) { break; } /* Copy if allocated above the base */ ret = bdrv_co_is_allocated_above(top, base, sector_num, COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); copy = (ret == 1); trace_commit_one_iteration(s, sector_num, n, ret); if (copy) { if (s->common.speed) { delay_ms = ratelimit_calculate_delay(&s->limit, n); if (delay_ms > 0) { goto wait; } } ret = commit_populate(top, base, sector_num, n, buf); bytes_written += n * BDRV_SECTOR_SIZE; } if (ret < 0) { if (s->on_error == BLOCK_ERR_STOP_ANY || s->on_error == BLOCK_ERR_REPORT || (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) { goto exit_free_buf; } else { n = 0; continue; } } /* Publish progress */ s->common.offset += n * BDRV_SECTOR_SIZE; } ret = 0; if (!block_job_is_cancelled(&s->common) && sector_num == end) { /* success */ ret = bdrv_drop_intermediate(active, top, base); } exit_free_buf: qemu_vfree(buf); exit_restore_reopen: /* restore base open flags here if appropriate (e.g., change the base back * to r/o). These reopens do not need to be atomic, since we won't abort * even on failure here */ if (s->base_flags != bdrv_get_flags(base)) { bdrv_reopen(base, s->base_flags, NULL); } if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } block_job_complete(&s->common, ret); }