static inline void user_access_enable(void) { if (pmap_smap_enabled) { stac(); #if DEVELOPMENT smaplog_add_entry(TRUE); #endif } }
static void load_pdptrs(const struct acrn_vcpu *vcpu) { uint64_t guest_cr3 = exec_vmread(VMX_GUEST_CR3); /* TODO: check whether guest cr3 is valid */ uint64_t *guest_cr3_hva = (uint64_t *)gpa2hva(vcpu->vm, guest_cr3); stac(); exec_vmwrite64(VMX_GUEST_PDPTE0_FULL, get_pgentry(guest_cr3_hva + 0UL)); exec_vmwrite64(VMX_GUEST_PDPTE1_FULL, get_pgentry(guest_cr3_hva + 1UL)); exec_vmwrite64(VMX_GUEST_PDPTE2_FULL, get_pgentry(guest_cr3_hva + 2UL)); exec_vmwrite64(VMX_GUEST_PDPTE3_FULL, get_pgentry(guest_cr3_hva + 3UL)); clac(); }
static inline void enter_s3(struct acrn_vm *vm, uint32_t pm1a_cnt_val, uint32_t pm1b_cnt_val) { uint32_t guest_wakeup_vec32; /* Save the wakeup vec set by guest OS. Will return to guest * with this wakeup vec as entry. */ stac(); guest_wakeup_vec32 = *(vm->pm.sx_state_data->wake_vector_32); clac(); pause_vm(vm); /* pause sos_vm before suspend system */ host_enter_s3(vm->pm.sx_state_data, pm1a_cnt_val, pm1b_cnt_val); resume_vm_from_s3(vm, guest_wakeup_vec32); /* jump back to vm */ }
/** * Hook function for the thread-preempting event. * * @param pPreemptNotifier Pointer to the preempt_notifier struct. * @param pNext Pointer to the task that is preempting the * current thread. * * @remarks Called with the rq (runqueue) lock held and with preemption and * interrupts disabled! */ static void rtThreadCtxHooksLnxSchedOut(struct preempt_notifier *pPreemptNotifier, struct task_struct *pNext) { PRTTHREADCTXINT pThis = RT_FROM_MEMBER(pPreemptNotifier, RTTHREADCTXINT, hPreemptNotifier); #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) RTCCUINTREG fSavedEFlags = ASMGetFlags(); stac(); #endif AssertPtr(pThis); AssertPtr(pThis->pfnThreadCtxHook); Assert(pThis->fRegistered); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); pThis->pfnThreadCtxHook(RTTHREADCTXEVENT_PREEMPTING, pThis->pvUser); #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) ASMSetFlags(fSavedEFlags); # if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 19) && defined(RT_ARCH_AMD64) pThis->fSavedRFlags = fSavedEFlags; # endif #endif }
/** * Hook function for the thread-resumed event. * * @param pPreemptNotifier Pointer to the preempt_notifier struct. * @param iCpu The CPU this thread is scheduled on. * * @remarks Called without holding the rq (runqueue) lock and with preemption * enabled! */ static void rtThreadCtxHooksLnxSchedIn(struct preempt_notifier *pPreemptNotifier, int iCpu) { PRTTHREADCTXINT pThis = RT_FROM_MEMBER(pPreemptNotifier, RTTHREADCTXINT, hPreemptNotifier); #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) RTCCUINTREG fSavedEFlags = ASMGetFlags(); stac(); #endif AssertPtr(pThis); AssertPtr(pThis->pfnThreadCtxHook); Assert(pThis->fRegistered); pThis->pfnThreadCtxHook(RTTHREADCTXEVENT_RESUMED, pThis->pvUser); #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) # if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 19) && defined(RT_ARCH_AMD64) fSavedEFlags &= ~RT_BIT_64(18) /*X86_EFL_AC*/; fSavedEFlags |= pThis->fSavedRFlags & RT_BIT_64(18) /*X86_EFL_AC*/; # endif ASMSetFlags(fSavedEFlags); #endif }
uint32_t sbuf_get(struct shared_buf *sbuf, uint8_t *data) { const void *from; uint32_t ele_size; stac(); if (sbuf_is_empty(sbuf)) { clac(); /* no data available */ return 0; } from = (void *)sbuf + SBUF_HEAD_SIZE + sbuf->head; (void)memcpy_s((void *)data, sbuf->ele_size, from, sbuf->ele_size); sbuf->head = sbuf_next_ptr(sbuf->head, sbuf->ele_size, sbuf->size); ele_size = sbuf->ele_size; clac(); return ele_size; }
static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg) #endif { PSUPDRVSESSION pSession = (PSUPDRVSESSION)pFilp->private_data; int rc; /* * Deal with the two high-speed IOCtl that takes it's arguments from * the session and iCmd, and only returns a VBox status code. */ #ifdef HAVE_UNLOCKED_IOCTL if (RT_LIKELY( ( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN || uCmd == SUP_IOCTL_FAST_DO_HM_RUN || uCmd == SUP_IOCTL_FAST_DO_NOP) && pSession->fUnrestricted == true)) { stac(); rc = supdrvIOCtlFast(uCmd, ulArg, &g_DevExt, pSession); clac(); return rc; } return VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg, pSession); #else /* !HAVE_UNLOCKED_IOCTL */ unlock_kernel(); if (RT_LIKELY( ( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN || uCmd == SUP_IOCTL_FAST_DO_HM_RUN || uCmd == SUP_IOCTL_FAST_DO_NOP) && pSession->fUnrestricted == true)) rc = supdrvIOCtlFast(uCmd, ulArg, &g_DevExt, pSession); else rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg, pSession); lock_kernel(); return rc; #endif /* !HAVE_UNLOCKED_IOCTL */ }
uint32_t sbuf_put(struct shared_buf *sbuf, uint8_t *data) { void *to; uint32_t next_tail; uint32_t ele_size; bool trigger_overwrite = false; stac(); next_tail = sbuf_next_ptr(sbuf->tail, sbuf->ele_size, sbuf->size); /* if this write would trigger overrun */ if (next_tail == sbuf->head) { /* accumulate overrun count if necessary */ sbuf->overrun_cnt += sbuf->flags & OVERRUN_CNT_EN; if ((sbuf->flags & OVERWRITE_EN) == 0U) { /* if not enable over write, return here. */ clac(); return 0; } trigger_overwrite = true; } to = (void *)sbuf + SBUF_HEAD_SIZE + sbuf->tail; (void)memcpy_s(to, sbuf->ele_size, data, sbuf->ele_size); if (trigger_overwrite) { sbuf->head = sbuf_next_ptr(sbuf->head, sbuf->ele_size, sbuf->size); } sbuf->tail = next_tail; ele_size = sbuf->ele_size; clac(); return ele_size; }
/** * Device I/O Control entry point. * * @param pFilp Associated file pointer. * @param uCmd The function specified to ioctl(). * @param ulArg The argument specified to ioctl(). * @param pSession The session instance. */ static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg, PSUPDRVSESSION pSession) { int rc; SUPREQHDR Hdr; PSUPREQHDR pHdr; uint32_t cbBuf; Log6(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p pid=%d/%d\n", pFilp, uCmd, (void *)ulArg, RTProcSelf(), current->pid)); /* * Read the header. */ if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr)))) { Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x\n", ulArg, uCmd)); return -EFAULT; } if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC)) { Log(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd)); return -EINVAL; } /* * Buffer the request. */ cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut); if (RT_UNLIKELY(cbBuf > _1M*16)) { Log(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd)); return -E2BIG; } if (RT_UNLIKELY(_IOC_SIZE(uCmd) ? cbBuf != _IOC_SIZE(uCmd) : Hdr.cbIn < sizeof(Hdr))) { Log(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x\n", cbBuf, _IOC_SIZE(uCmd), uCmd)); return -EINVAL; } pHdr = RTMemAlloc(cbBuf); if (RT_UNLIKELY(!pHdr)) { OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x\n", cbBuf, uCmd)); return -ENOMEM; } if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn))) { Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x\n", ulArg, Hdr.cbIn, uCmd)); RTMemFree(pHdr); return -EFAULT; } if (Hdr.cbIn < cbBuf) RT_BZERO((uint8_t *)pHdr + Hdr.cbIn, cbBuf - Hdr.cbIn); /* * Process the IOCtl. */ stac(); rc = supdrvIOCtl(uCmd, &g_DevExt, pSession, pHdr, cbBuf); clac(); /* * Copy ioctl data and output buffer back to user space. */ if (RT_LIKELY(!rc)) { uint32_t cbOut = pHdr->cbOut; if (RT_UNLIKELY(cbOut > cbBuf)) { OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd)); cbOut = cbBuf; } if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut))) { /* this is really bad! */ OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd)); rc = -EFAULT; } } else { Log(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc)); rc = -EINVAL; } RTMemFree(pHdr); Log6(("VBoxDrvLinuxIOCtl: returns %d (pid=%d/%d)\n", rc, RTProcSelf(), current->pid)); return rc; }
static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg) #endif { PSUPDRVSESSION pSession = (PSUPDRVSESSION)pFilp->private_data; int rc; #if defined(VBOX_STRICT) || defined(VBOX_WITH_EFLAGS_AC_SET_IN_VBOXDRV) RTCCUINTREG fSavedEfl; /* * Refuse all I/O control calls if we've ever detected EFLAGS.AC being cleared. * * This isn't a problem, as there is absolutely nothing in the kernel context that * depend on user context triggering cleanups. That would be pretty wild, right? */ if (RT_UNLIKELY(g_DevExt.cBadContextCalls > 0)) { SUPR0Printf("VBoxDrvLinuxIOCtl: EFLAGS.AC=0 detected %u times, refusing all I/O controls!\n", g_DevExt.cBadContextCalls); return ESPIPE; } fSavedEfl = ASMAddFlags(X86_EFL_AC); # else stac(); # endif /* * Deal with the two high-speed IOCtl that takes it's arguments from * the session and iCmd, and only returns a VBox status code. */ #ifdef HAVE_UNLOCKED_IOCTL if (RT_LIKELY( ( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN || uCmd == SUP_IOCTL_FAST_DO_HM_RUN || uCmd == SUP_IOCTL_FAST_DO_NOP) && pSession->fUnrestricted == true)) rc = supdrvIOCtlFast(uCmd, ulArg, &g_DevExt, pSession); else rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg, pSession); #else /* !HAVE_UNLOCKED_IOCTL */ unlock_kernel(); if (RT_LIKELY( ( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN || uCmd == SUP_IOCTL_FAST_DO_HM_RUN || uCmd == SUP_IOCTL_FAST_DO_NOP) && pSession->fUnrestricted == true)) rc = supdrvIOCtlFast(uCmd, ulArg, &g_DevExt, pSession); else rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg, pSession); lock_kernel(); #endif /* !HAVE_UNLOCKED_IOCTL */ #if defined(VBOX_STRICT) || defined(VBOX_WITH_EFLAGS_AC_SET_IN_VBOXDRV) /* * Before we restore AC and the rest of EFLAGS, check if the IOCtl handler code * accidentially modified it or some other important flag. */ if (RT_UNLIKELY( (ASMGetFlags() & (X86_EFL_AC | X86_EFL_IF | X86_EFL_DF | X86_EFL_IOPL)) != ((fSavedEfl & (X86_EFL_AC | X86_EFL_IF | X86_EFL_DF | X86_EFL_IOPL)) | X86_EFL_AC) )) { char szTmp[48]; RTStrPrintf(szTmp, sizeof(szTmp), "uCmd=%#x: %#x->%#x!", _IOC_NR(uCmd), (uint32_t)fSavedEfl, (uint32_t)ASMGetFlags()); supdrvBadContext(&g_DevExt, "SUPDrv-linux.c", __LINE__, szTmp); } ASMSetFlags(fSavedEfl); #else clac(); #endif return rc; }
static int privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int mode, struct thread *td) { int error, i; switch (cmd) { case IOCTL_PRIVCMD_HYPERCALL: { struct ioctl_privcmd_hypercall *hcall; hcall = (struct ioctl_privcmd_hypercall *)arg; #ifdef __amd64__ /* * The hypervisor page table walker will refuse to access * user-space pages if SMAP is enabled, so temporary disable it * while performing the hypercall. */ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) stac(); #endif error = privcmd_hypercall(hcall->op, hcall->arg[0], hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]); #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) clac(); #endif if (error >= 0) { hcall->retval = error; error = 0; } else { error = xen_translate_error(error); hcall->retval = 0; } break; } case IOCTL_PRIVCMD_MMAPBATCH: { struct ioctl_privcmd_mmapbatch *mmap; vm_map_t map; vm_map_entry_t entry; vm_object_t mem; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; struct xen_add_to_physmap_range add; xen_ulong_t *idxs; xen_pfn_t *gpfns; int *errs, index; struct privcmd_map *umap; uint16_t num; mmap = (struct ioctl_privcmd_mmapbatch *)arg; if ((mmap->num == 0) || ((mmap->addr & PAGE_MASK) != 0)) { error = EINVAL; break; } map = &td->td_proc->p_vmspace->vm_map; error = vm_map_lookup(&map, mmap->addr, VM_PROT_NONE, &entry, &mem, &pindex, &prot, &wired); if (error != KERN_SUCCESS) { error = EINVAL; break; } if ((entry->start != mmap->addr) || (entry->end != mmap->addr + (mmap->num * PAGE_SIZE))) { vm_map_lookup_done(map, entry); error = EINVAL; break; } vm_map_lookup_done(map, entry); if ((mem->type != OBJT_MGTDEVICE) || (mem->un_pager.devp.ops != &privcmd_pg_ops)) { error = EINVAL; break; } umap = mem->handle; add.domid = DOMID_SELF; add.space = XENMAPSPACE_gmfn_foreign; add.foreign_domid = mmap->dom; /* * The 'size' field in the xen_add_to_physmap_range only * allows for UINT16_MAX mappings in a single hypercall. */ num = MIN(mmap->num, UINT16_MAX); idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK); gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK); errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK); set_xen_guest_handle(add.idxs, idxs); set_xen_guest_handle(add.gpfns, gpfns); set_xen_guest_handle(add.errs, errs); /* Allocate a bitset to store broken page mappings. */ umap->err = BITSET_ALLOC(mmap->num, M_PRIVCMD, M_WAITOK | M_ZERO); for (index = 0; index < mmap->num; index += num) { num = MIN(mmap->num - index, UINT16_MAX); add.size = num; error = copyin(&mmap->arr[index], idxs, sizeof(idxs[0]) * num); if (error != 0) goto mmap_out; for (i = 0; i < num; i++) gpfns[i] = atop(umap->phys_base_addr + (i + index) * PAGE_SIZE); bzero(errs, sizeof(*errs) * num); error = HYPERVISOR_memory_op( XENMEM_add_to_physmap_range, &add); if (error != 0) { error = xen_translate_error(error); goto mmap_out; } for (i = 0; i < num; i++) { if (errs[i] != 0) { errs[i] = xen_translate_error(errs[i]); /* Mark the page as invalid. */ BIT_SET(mmap->num, index + i, umap->err); } } error = copyout(errs, &mmap->err[index], sizeof(errs[0]) * num); if (error != 0) goto mmap_out; } umap->mapped = true; mmap_out: free(idxs, M_PRIVCMD); free(gpfns, M_PRIVCMD); free(errs, M_PRIVCMD); if (!umap->mapped) free(umap->err, M_PRIVCMD); break; } default: error = ENOSYS; break; } return (error); }