/* * Define the code needed before returning to user mode, for trap and * syscall. */ void userret(struct thread *td, struct trapframe *frame) { struct proc *p = td->td_proc; CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, td->td_name); KASSERT((p->p_flag & P_WEXIT) == 0, ("Exiting process returns to usermode")); #if 0 #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); thread_lock(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); thread_unlock(td); PROC_UNLOCK(p); #endif #endif #ifdef KTRACE KTRUSERRET(td); #endif /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); /* * Let the scheduler adjust our priority etc. */ sched_userret(td); KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held.", td->td_locks)); #ifdef VIMAGE /* Unfortunately td_vnet_lpush needs VNET_DEBUG. */ VNET_ASSERT(curvnet == NULL, ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s", __func__, td, p->p_pid, td->td_name, curvnet, (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A")); #endif #ifdef XEN PT_UPDATES_FLUSH(); #endif }
static void cpu_initialize_context(unsigned int cpu) { /* vcpu_guest_context_t is too large to allocate on the stack. * Hence we allocate statically and protect it with a lock */ vm_page_t m[NPGPTD + 2]; static vcpu_guest_context_t ctxt; vm_offset_t boot_stack; vm_offset_t newPTD; vm_paddr_t ma[NPGPTD]; int i; /* * Page 0,[0-3] PTD * Page 1, [4] boot stack * Page [5] PDPT * */ for (i = 0; i < NPGPTD + 2; i++) { m[i] = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); pmap_zero_page(m[i]); } boot_stack = kmem_alloc_nofault(kernel_map, PAGE_SIZE); newPTD = kmem_alloc_nofault(kernel_map, NPGPTD * PAGE_SIZE); ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; #ifdef PAE pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); for (i = 0; i < NPGPTD; i++) { ((vm_paddr_t *)boot_stack)[i] = ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; } #endif /* * Copy cpu0 IdlePTD to new IdlePTD - copying only * kernel mappings */ pmap_qenter(newPTD, m, 4); memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), nkpt*sizeof(vm_paddr_t)); pmap_qremove(newPTD, 4); kmem_free(kernel_map, newPTD, 4 * PAGE_SIZE); /* * map actual idle stack to boot_stack */ pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); rw_wlock(&pvh_global_lock); for (i = 0; i < 4; i++) { int pdir = (PTDPTDI + i) / NPDEPG; int curoffset = (PTDPTDI + i) % NPDEPG; xen_queue_pt_update((vm_paddr_t) ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), ma[i]); } PT_UPDATES_FLUSH(); rw_wunlock(&pvh_global_lock); memset(&ctxt, 0, sizeof(ctxt)); ctxt.flags = VGCF_IN_KERNEL; ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.eip = (unsigned long)init_secondary; ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); smp_trap_init(ctxt.trap_ctxt); ctxt.ldt_ents = 0; ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); ctxt.gdt_ents = 512; #ifdef __i386__ ctxt.user_regs.esp = boot_stack + PAGE_SIZE; ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.kernel_sp = boot_stack + PAGE_SIZE; ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); #else /* __x86_64__ */ ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.kernel_sp = idle->thread.rsp0; ctxt.event_callback_eip = (unsigned long)hypervisor_callback; ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.syscall_callback_eip = (unsigned long)system_call; ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); #endif printf("gdtpfn=%lx pdptpfn=%lx\n", ctxt.gdt_frames[0], ctxt.ctrlreg[3] >> PAGE_SHIFT); PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); DELAY(3000); PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); }
/* * Define the code needed before returning to user mode, for trap and * syscall. */ void userret(struct thread *td, struct trapframe *frame) { struct proc *p = td->td_proc; CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, td->td_name); KASSERT((p->p_flag & P_WEXIT) == 0, ("Exiting process returns to usermode")); #if 0 #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); thread_lock(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); thread_unlock(td); PROC_UNLOCK(p); #endif #endif #ifdef KTRACE KTRUSERRET(td); #endif /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); /* * Let the scheduler adjust our priority etc. */ sched_userret(td); #ifdef XEN PT_UPDATES_FLUSH(); #endif /* * Check for misbehavior. * * In case there is a callchain tracing ongoing because of * hwpmc(4), skip the scheduler pinning check. * hwpmc(4) subsystem, infact, will collect callchain informations * at ast() checkpoint, which is past userret(). */ WITNESS_WARN(WARN_PANIC, NULL, "userret: returning"); KASSERT(td->td_critnest == 0, ("userret: Returning in a critical section")); KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held", td->td_locks)); KASSERT(td->td_rw_rlocks == 0, ("userret: Returning with %d rwlocks held in read mode", td->td_rw_rlocks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, ("userret: Returning with with pinned thread")); KASSERT(td->td_vp_reserv == 0, ("userret: Returning while holding vnode reservation")); KASSERT((td->td_flags & TDF_SBDRY) == 0, ("userret: Returning with stop signals deferred")); #ifdef VIMAGE /* Unfortunately td_vnet_lpush needs VNET_DEBUG. */ VNET_ASSERT(curvnet == NULL, ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s", __func__, td, p->p_pid, td->td_name, curvnet, (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A")); #endif #ifdef RACCT PROC_LOCK(p); while (p->p_throttled == 1) msleep(p->p_racct, &p->p_mtx, 0, "racct", 0); PROC_UNLOCK(p); #endif }
/* Full PV mode suspension. */ static void xctrl_suspend() { int i, j, k, fpp, suspend_cancelled; unsigned long max_pfn, start_info_mfn; EVENTHANDLER_INVOKE(power_suspend); #ifdef SMP struct thread *td; cpuset_t map; u_int cpuid; /* * Bind us to CPU 0 and stop any other VCPUs. */ td = curthread; thread_lock(td); sched_bind(td, 0); thread_unlock(td); cpuid = PCPU_GET(cpuid); KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0")); map = all_cpus; CPU_CLR(cpuid, &map); CPU_NAND(&map, &stopped_cpus); if (!CPU_EMPTY(&map)) stop_cpus(map); #endif /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. */ mtx_lock(&Giant); if (DEVICE_SUSPEND(root_bus) != 0) { mtx_unlock(&Giant); printf("%s: device_suspend failed\n", __func__); #ifdef SMP if (!CPU_EMPTY(&map)) restart_cpus(map); #endif return; } mtx_unlock(&Giant); local_irq_disable(); xencons_suspend(); gnttab_suspend(); intr_suspend(); max_pfn = HYPERVISOR_shared_info->arch.max_pfn; void *shared_info = HYPERVISOR_shared_info; HYPERVISOR_shared_info = NULL; pmap_kremove((vm_offset_t) shared_info); PT_UPDATES_FLUSH(); xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn); /* * We'll stop somewhere inside this hypercall. When it returns, * we'll start resuming after the restore. */ start_info_mfn = VTOMFN(xen_start_info); pmap_suspend(); suspend_cancelled = HYPERVISOR_suspend(start_info_mfn); pmap_resume(); pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info); HYPERVISOR_shared_info = shared_info; HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = VTOMFN(xen_pfn_to_mfn_frame_list_list); fpp = PAGE_SIZE/sizeof(unsigned long); for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { if ((j % fpp) == 0) { k++; xen_pfn_to_mfn_frame_list_list[k] = VTOMFN(xen_pfn_to_mfn_frame_list[k]); j = 0; } xen_pfn_to_mfn_frame_list[k][j] = VTOMFN(&xen_phys_machine[i]); } HYPERVISOR_shared_info->arch.max_pfn = max_pfn; gnttab_resume(); intr_resume(suspend_cancelled != 0); local_irq_enable(); xencons_resume(); #ifdef CONFIG_SMP for_each_cpu(i) vcpu_prepare(i); #endif /* * Only resume xenbus /after/ we've prepared our VCPUs; otherwise * the VCPU hotplug callback can race with our vcpu_prepare */ mtx_lock(&Giant); DEVICE_RESUME(root_bus); mtx_unlock(&Giant); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); if (!CPU_EMPTY(&map)) restart_cpus(map); #endif EVENTHANDLER_INVOKE(power_resume); }