/* alloc shared memory pages */ void *qemu_vmalloc(size_t size) { #if defined(CONFIG_KQEMU) if (kqemu_allowed) return kqemu_vmalloc(size); #endif #ifndef __ia64__ return qemu_memalign(getpagesize(), size); #else return qemu_memalign(65536, size); #endif }
static int virtio_blk_handle_write(VirtIOBlockReq *req) { if (!req->buffer) { size_t offset = 0; int i; for (i = 1; i < req->elem.out_num; i++) req->size += req->elem.out_sg[i].iov_len; req->buffer = qemu_memalign(512, req->size); if (req->buffer == NULL) { qemu_free(req); return -1; } /* We copy the data from the SG list to avoid splitting up the request. This helps performance a lot until we can pass full sg lists as AIO operations */ for (i = 1; i < req->elem.out_num; i++) { size_t len; len = MIN(req->elem.out_sg[i].iov_len, req->size - offset); memcpy(req->buffer + offset, req->elem.out_sg[i].iov_base, len); offset += len; } } bdrv_aio_write(req->dev->bs, req->out->sector, req->buffer, req->size / 512, virtio_blk_rw_complete, req); return 0; }
int xenfb_pv_display_init(DisplayState *ds) { if (!fb_path || !kbd_path) return -1; xs = qemu_mallocz(sizeof(XenFBState)); if (!xs) return -1; init_SEMAPHORE(&xs->kbd_sem, 0); xs->ds = ds; create_thread("kbdfront", kbdfront_thread, (void*) xs); ds->data = xs->nonshared_vram = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE); memset(ds->data, 0, VGA_RAM_SIZE); ds->opaque = xs; ds->depth = 32; ds->bgr = 0; ds->width = 640; ds->height = 400; ds->linesize = 640 * 4; ds->dpy_update = xenfb_pv_update; ds->dpy_resize = xenfb_pv_resize; ds->dpy_resize_shared = xenfb_pv_resize_shared; ds->dpy_setdata = xenfb_pv_setdata; ds->dpy_refresh = xenfb_pv_refresh; return 0; }
/* alloc shared memory pages */ void *qemu_vmalloc(size_t size) { void *ptr; ptr = qemu_memalign(getpagesize(), size); trace_qemu_vmalloc(size, ptr); return ptr; }
static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) { XenBlockRequest *request = NULL; if (QLIST_EMPTY(&dataplane->freelist)) { if (dataplane->requests_total >= dataplane->max_requests) { goto out; } /* allocate new struct */ request = g_malloc0(sizeof(*request)); request->dataplane = dataplane; /* * We cannot need more pages per requests than this, and since we * re-use requests, allocate the memory once here. It will be freed * xen_block_dataplane_destroy() when the request list is freed. */ request->buf = qemu_memalign(XC_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * XC_PAGE_SIZE); dataplane->requests_total++; qemu_iovec_init(&request->v, 1); } else { /* get one from freelist */ request = QLIST_FIRST(&dataplane->freelist); QLIST_REMOVE(request, list); } QLIST_INSERT_HEAD(&dataplane->inflight, request, list); dataplane->requests_inflight++; out: return request; }
/* alloc shared memory pages */ void *qemu_vmalloc(size_t size) { #if defined(CONFIG_KQEMU) if (kqemu_allowed) return kqemu_vmalloc(size); #endif return qemu_memalign(getpagesize(), size); }
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBlock *s = to_virtio_blk(vdev); VirtIOBlockReq *req; while ((req = virtio_blk_get_request(s))) { int i; if (req->elem.out_num < 1 || req->elem.in_num < 1) { fprintf(stderr, "virtio-blk missing headers\n"); exit(1); } if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { fprintf(stderr, "virtio-blk header not in correct element\n"); exit(1); } req->out = (void *)req->elem.out_sg[0].iov_base; req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) { unsigned int len = sizeof(*req->in); req->in->status = VIRTIO_BLK_S_UNSUPP; virtqueue_push(vq, &req->elem, len); virtio_notify(vdev, vq); qemu_free(req); } else if (req->out->type & VIRTIO_BLK_T_OUT) { if (virtio_blk_handle_write(req) < 0) break; } else { for (i = 0; i < req->elem.in_num - 1; i++) req->size += req->elem.in_sg[i].iov_len; req->buffer = qemu_memalign(512, req->size); if (req->buffer == NULL) { qemu_free(req); break; } bdrv_aio_read(s->bs, req->out->sector, req->buffer, req->size / 512, virtio_blk_rw_complete, req); } } /* * FIXME: Want to check for completions before returning to guest mode, * so cached reads and writes are reported as quickly as possible. But * that should be done in the generic block layer. */ }
static void *qemu_io_alloc(size_t len, int pattern) { void *buf; if (misalign) len += MISALIGN_OFFSET; buf = qemu_memalign(512, len); memset(buf, pattern, len); if (misalign) buf += MISALIGN_OFFSET; return buf; }
static int raw_open(BlockDriverState *bs, const char *filename, int flags) { BDRVRawState *s = bs->opaque; int fd, open_flags, ret; posix_aio_init(); s->lseek_err_cnt = 0; open_flags = O_BINARY; if ((flags & BDRV_O_ACCESS) == O_RDWR) { open_flags |= O_RDWR; } else { open_flags |= O_RDONLY; bs->read_only = 1; } if (flags & BDRV_O_CREAT) open_flags |= O_CREAT | O_TRUNC; /* Use O_DSYNC for write-through caching, no flags for write-back caching, * and O_DIRECT for no caching. */ if ((flags & BDRV_O_NOCACHE)) open_flags |= O_DIRECT; else if (!(flags & BDRV_O_CACHE_WB)) open_flags |= O_DSYNC; s->type = FTYPE_FILE; fd = open(filename, open_flags, 0644); if (fd < 0) { ret = -errno; if (ret == -EROFS) ret = -EACCES; return ret; } s->fd = fd; s->aligned_buf = NULL; if ((flags & BDRV_O_NOCACHE)) { s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE); if (s->aligned_buf == NULL) { ret = -errno; close(fd); return ret; } } return 0; }
/* alloc shared memory pages */ void *qemu_vmalloc(size_t size) { void *ptr; size_t align = QEMU_VMALLOC_ALIGN; #if defined(CONFIG_VALGRIND) if (running_on_valgrind < 0) { /* First call, test whether we are running on Valgrind. This is a substitute for RUNNING_ON_VALGRIND from valgrind.h. */ const char *ld = getenv("LD_PRELOAD"); running_on_valgrind = (ld != NULL && strstr(ld, "vgpreload")); } #endif if (size < align || running_on_valgrind) { align = getpagesize(); } ptr = qemu_memalign(align, size); trace_qemu_vmalloc(size, ptr); return ptr; }
/* alloc shared memory pages */ void *qemu_vmalloc(size_t size) { return qemu_memalign(getpagesize(), size); }
static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { BDRVRawState *s = bs->opaque; int fd, ret; ret = raw_normalize_devicepath(&filename); if (ret != 0) { return ret; } s->open_flags = open_flags | O_BINARY; s->open_flags &= ~O_ACCMODE; if (bdrv_flags & BDRV_O_RDWR) { s->open_flags |= O_RDWR; } else { s->open_flags |= O_RDONLY; } /* Use O_DSYNC for write-through caching, no flags for write-back caching, * and O_DIRECT for no caching. */ if ((bdrv_flags & BDRV_O_NOCACHE)) s->open_flags |= O_DIRECT; if (!(bdrv_flags & BDRV_O_CACHE_WB)) s->open_flags |= O_DSYNC; s->fd = -1; fd = qemu_open(filename, s->open_flags, 0644); if (fd < 0) { ret = -errno; if (ret == -EROFS) ret = -EACCES; return ret; } s->fd = fd; s->aligned_buf = NULL; if ((bdrv_flags & BDRV_O_NOCACHE)) { /* * Allocate a buffer for read/modify/write cycles. Chose the size * pessimistically as we don't know the block size yet. */ s->aligned_buf_size = 32 * MAX_BLOCKSIZE; s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size); if (s->aligned_buf == NULL) { goto out_close; } } #ifdef CONFIG_LINUX_AIO if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { /* We're falling back to POSIX AIO in some cases */ paio_init(); s->aio_ctx = laio_init(); if (!s->aio_ctx) { goto out_free_buf; } s->use_aio = 1; } else #endif { if (paio_init() < 0) { goto out_free_buf; } #ifdef CONFIG_LINUX_AIO s->use_aio = 0; #endif } #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { s->is_xfs = 1; } #endif return 0; out_free_buf: qemu_vfree(s->aligned_buf); out_close: close(fd); return -errno; }
void kvm_arch_save_regs(CPUState *env) { struct kvm_regs regs; struct kvm_fpu fpu; struct kvm_sregs sregs; struct kvm_msr_entry msrs[100]; uint32_t hflags; uint32_t i, n, rc, bit; assert(kvm_cpu_is_stopped(env) || env->thread_id == kvm_get_thread_id()); kvm_get_regs(env, ®s); env->regs[R_EAX] = regs.rax; env->regs[R_EBX] = regs.rbx; env->regs[R_ECX] = regs.rcx; env->regs[R_EDX] = regs.rdx; env->regs[R_ESI] = regs.rsi; env->regs[R_EDI] = regs.rdi; env->regs[R_ESP] = regs.rsp; env->regs[R_EBP] = regs.rbp; #ifdef TARGET_X86_64 env->regs[8] = regs.r8; env->regs[9] = regs.r9; env->regs[10] = regs.r10; env->regs[11] = regs.r11; env->regs[12] = regs.r12; env->regs[13] = regs.r13; env->regs[14] = regs.r14; env->regs[15] = regs.r15; #endif env->eflags = regs.rflags; env->eip = regs.rip; #ifdef KVM_CAP_XSAVE if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) { struct kvm_xsave* xsave; uint16_t cwd, swd, twd, fop; xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); kvm_get_xsave(env, xsave); cwd = (uint16_t)xsave->region[0]; swd = (uint16_t)(xsave->region[0] >> 16); twd = (uint16_t)xsave->region[1]; fop = (uint16_t)(xsave->region[1] >> 16); env->fpstt = (swd >> 11) & 7; env->fpus = swd; env->fpuc = cwd; for (i = 0; i < 8; ++i) env->fptags[i] = !((twd >> i) & 1); env->mxcsr = xsave->region[XSAVE_MXCSR]; memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE], sizeof env->fpregs); memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE], sizeof env->xmm_regs); env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV]; memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE], sizeof env->ymmh_regs); if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) { struct kvm_xcrs xcrs; kvm_get_xcrs(env, &xcrs); if (xcrs.xcrs[0].xcr == 0) env->xcr0 = xcrs.xcrs[0].value; } qemu_free(xsave); } else {
void kvm_arch_load_regs(CPUState *env, int level) { struct kvm_regs regs; struct kvm_fpu fpu; struct kvm_sregs sregs; struct kvm_msr_entry msrs[100]; int rc, n, i; assert(kvm_cpu_is_stopped(env) || env->thread_id == kvm_get_thread_id()); regs.rax = env->regs[R_EAX]; regs.rbx = env->regs[R_EBX]; regs.rcx = env->regs[R_ECX]; regs.rdx = env->regs[R_EDX]; regs.rsi = env->regs[R_ESI]; regs.rdi = env->regs[R_EDI]; regs.rsp = env->regs[R_ESP]; regs.rbp = env->regs[R_EBP]; #ifdef TARGET_X86_64 regs.r8 = env->regs[8]; regs.r9 = env->regs[9]; regs.r10 = env->regs[10]; regs.r11 = env->regs[11]; regs.r12 = env->regs[12]; regs.r13 = env->regs[13]; regs.r14 = env->regs[14]; regs.r15 = env->regs[15]; #endif regs.rflags = env->eflags; regs.rip = env->eip; kvm_set_regs(env, ®s); #ifdef KVM_CAP_XSAVE if (kvm_check_extension(kvm_state, KVM_CAP_XSAVE)) { struct kvm_xsave* xsave; uint16_t cwd, swd, twd, fop; xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); memset(xsave, 0, sizeof(struct kvm_xsave)); cwd = swd = twd = fop = 0; swd = env->fpus & ~(7 << 11); swd |= (env->fpstt & 7) << 11; cwd = env->fpuc; for (i = 0; i < 8; ++i) twd |= (!env->fptags[i]) << i; xsave->region[0] = (uint32_t)(swd << 16) + cwd; xsave->region[1] = (uint32_t)(fop << 16) + twd; memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs, sizeof env->fpregs); memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs, sizeof env->xmm_regs); xsave->region[XSAVE_MXCSR] = env->mxcsr; *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv; memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs, sizeof env->ymmh_regs); kvm_set_xsave(env, xsave); if (kvm_check_extension(kvm_state, KVM_CAP_XCRS)) { struct kvm_xcrs xcrs; xcrs.nr_xcrs = 1; xcrs.flags = 0; xcrs.xcrs[0].xcr = 0; xcrs.xcrs[0].value = env->xcr0; kvm_set_xcrs(env, &xcrs); } qemu_free(xsave); } else { #endif memset(&fpu, 0, sizeof fpu); fpu.fsw = env->fpus & ~(7 << 11); fpu.fsw |= (env->fpstt & 7) << 11; fpu.fcw = env->fpuc; for (i = 0; i < 8; ++i) fpu.ftwx |= (!env->fptags[i]) << i; memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); fpu.mxcsr = env->mxcsr; kvm_set_fpu(env, &fpu); #ifdef KVM_CAP_XSAVE } #endif memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); if (env->interrupt_injected >= 0) { sregs.interrupt_bitmap[env->interrupt_injected / 64] |= (uint64_t)1 << (env->interrupt_injected % 64); } if ((env->eflags & VM_MASK)) { set_v8086_seg(&sregs.cs, &env->segs[R_CS]); set_v8086_seg(&sregs.ds, &env->segs[R_DS]); set_v8086_seg(&sregs.es, &env->segs[R_ES]); set_v8086_seg(&sregs.fs, &env->segs[R_FS]); set_v8086_seg(&sregs.gs, &env->segs[R_GS]); set_v8086_seg(&sregs.ss, &env->segs[R_SS]); } else { set_seg(&sregs.cs, &env->segs[R_CS]); set_seg(&sregs.ds, &env->segs[R_DS]); set_seg(&sregs.es, &env->segs[R_ES]); set_seg(&sregs.fs, &env->segs[R_FS]); set_seg(&sregs.gs, &env->segs[R_GS]); set_seg(&sregs.ss, &env->segs[R_SS]); if (env->cr[0] & CR0_PE_MASK) { /* force ss cpl to cs cpl */ sregs.ss.selector = (sregs.ss.selector & ~3) | (sregs.cs.selector & 3); sregs.ss.dpl = sregs.ss.selector & 3; } } set_seg(&sregs.tr, &env->tr); set_seg(&sregs.ldt, &env->ldt); sregs.idt.limit = env->idt.limit; sregs.idt.base = env->idt.base; sregs.gdt.limit = env->gdt.limit; sregs.gdt.base = env->gdt.base; sregs.cr0 = env->cr[0]; sregs.cr2 = env->cr[2]; sregs.cr3 = env->cr[3]; sregs.cr4 = env->cr[4]; sregs.cr8 = cpu_get_apic_tpr(env->apic_state); sregs.apic_base = cpu_get_apic_base(env->apic_state); sregs.efer = env->efer; kvm_set_sregs(env, &sregs); /* msrs */ n = 0; /* Remember to increase msrs size if you add new registers below */ kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); if (kvm_has_msr_star) kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); if (kvm_has_vm_hsave_pa) kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); kvm_msr_entry_set(&msrs[n++], MSR_LSTAR , env->lstar); } #endif if (level == KVM_PUT_FULL_STATE) { /* * KVM is yet unable to synchronize TSC values of multiple VCPUs on * writeback. Until this is fixed, we only write the offset to SMP * guests after migration, desynchronizing the VCPUs, but avoiding * huge jump-backs that would occur without any writeback at all. */ if (smp_cpus == 1 || env->tsc != 0) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); } kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); } #ifdef KVM_CAP_MCE if (env->mcg_cap) { if (level == KVM_PUT_RESET_STATE) kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); else if (level == KVM_PUT_FULL_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); for (i = 0; i < (env->mcg_cap & 0xff); i++) kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); } } #endif rc = kvm_set_msrs(env, msrs, n); if (rc == -1) perror("kvm_set_msrs FAILED"); if (level >= KVM_PUT_RESET_STATE) { kvm_arch_load_mpstate(env); kvm_load_lapic(env); } if (level == KVM_PUT_FULL_STATE) { if (env->kvm_vcpu_update_vapic) kvm_tpr_enable_vapic(env); } kvm_put_vcpu_events(env, level); kvm_put_debugregs(env); /* must be last */ kvm_guest_debug_workarounds(env); }
static int ioreq_runio_qemu_aio(struct ioreq *ioreq) { struct XenBlkDev *blkdev = ioreq->blkdev; ioreq->buf = qemu_memalign(XC_PAGE_SIZE, ioreq->size); if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE || ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) && ioreq_grant_copy(ioreq)) { qemu_vfree(ioreq->buf); goto err; } ioreq->aio_inflight++; if (ioreq->presync) { blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); return 0; } switch (ioreq->req.operation) { case BLKIF_OP_READ: qemu_iovec_add(&ioreq->v, ioreq->buf, ioreq->size); block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, ioreq->v.size, BLOCK_ACCT_READ); ioreq->aio_inflight++; blk_aio_preadv(blkdev->blk, ioreq->start, &ioreq->v, 0, qemu_aio_complete, ioreq); break; case BLKIF_OP_WRITE: case BLKIF_OP_FLUSH_DISKCACHE: if (!ioreq->req.nr_segments) { break; } qemu_iovec_add(&ioreq->v, ioreq->buf, ioreq->size); block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, ioreq->v.size, ioreq->req.operation == BLKIF_OP_WRITE ? BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH); ioreq->aio_inflight++; blk_aio_pwritev(blkdev->blk, ioreq->start, &ioreq->v, 0, qemu_aio_complete, ioreq); break; case BLKIF_OP_DISCARD: { struct blkif_request_discard *req = (void *)&ioreq->req; if (!blk_split_discard(ioreq, req->sector_number, req->nr_sectors)) { goto err; } break; } default: /* unknown operation (shouldn't happen -- parse catches this) */ goto err; } qemu_aio_complete(ioreq, 0); return 0; err: ioreq_finish(ioreq); ioreq->status = BLKIF_RSP_ERROR; return -1; }
static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb) { size_t nbytes; char *buf; if (!aiocb_needs_copy(aiocb)) { /* * If there is just a single buffer, and it is properly aligned * we can just use plain pread/pwrite without any problems. */ if (aiocb->aio_niov == 1) return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); /* * We have more than one iovec, and all are properly aligned. * * Try preadv/pwritev first and fall back to linearizing the * buffer if it's not supported. */ if (preadv_present) { nbytes = handle_aiocb_rw_vector(aiocb); if (nbytes == aiocb->aio_nbytes) return nbytes; if (nbytes < 0 && nbytes != -ENOSYS) return nbytes; preadv_present = 0; } /* * XXX(hch): short read/write. no easy way to handle the reminder * using these interfaces. For now retry using plain * pread/pwrite? */ } /* * Ok, we have to do it the hard way, copy all segments into * a single aligned buffer. */ buf = qemu_memalign(512, aiocb->aio_nbytes); if (aiocb->aio_type == QEMU_PAIO_WRITE) { char *p = buf; int i; for (i = 0; i < aiocb->aio_niov; ++i) { memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); p += aiocb->aio_iov[i].iov_len; } } nbytes = handle_aiocb_rw_linear(aiocb, buf); if (aiocb->aio_type != QEMU_PAIO_WRITE) { char *p = buf; size_t count = aiocb->aio_nbytes, copy; int i; for (i = 0; i < aiocb->aio_niov && count; ++i) { copy = count; if (copy > aiocb->aio_iov[i].iov_len) copy = aiocb->aio_iov[i].iov_len; memcpy(aiocb->aio_iov[i].iov_base, p, copy); p += copy; count -= copy; } } qemu_vfree(buf); return nbytes; }