/* * This is called when a task is terminated, and also on exec(). * Clear machine-dependent state that is stored on the task. */ void machine_task_terminate(task_t task) { if (task) { user_ldt_t user_ldt; void *task_debug; #if HYPERVISOR if (task->hv_task_target) { hv_callbacks.task_destroy(task->hv_task_target); task->hv_task_target = NULL; } #endif user_ldt = task->i386_ldt; if (user_ldt != 0) { task->i386_ldt = 0; user_ldt_free(user_ldt); } task_debug = task->task_debug; if (task_debug != NULL) { task->task_debug = NULL; zfree(ids_zone, task_debug); } } }
/* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); }
void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. */ if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); }
/* * Clear registers on exec * XXX copied from ia32_signal.c. */ static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_gs = _ugssel; regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = imgp->ps_strings; fpstate_drop(td); /* Do full restore on return so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); td->td_retval[1] = 0; }
void cpu_lwp_exit(void) { struct thread *td = curthread; struct pcb *pcb; struct pcb_ext *ext; /* * If we were using a private TSS do a forced-switch to ourselves * to switch back to the common TSS before freeing it. */ pcb = td->td_pcb; if ((ext = pcb->pcb_ext) != NULL) { crit_enter(); pcb->pcb_ext = NULL; lwkt_switch_return(td->td_switch(td)); crit_exit(); kmem_free(&kernel_map, (vm_offset_t)ext, ctob(IOPAGES + 1)); } user_ldt_free(pcb); if (pcb->pcb_flags & PCB_DBREGS) { /* * disable all hardware breakpoints */ reset_dbregs(); pcb->pcb_flags &= ~PCB_DBREGS; } td->td_gd->gd_cnt.v_swtch++; crit_enter_quick(td); if (td->td_flags & TDF_TSLEEPQ) tsleep_remove(td); lwkt_deschedule_self(td); lwkt_remove_tdallq(td); cpu_thread_exit(); }
/* * Finish a fork operation, with lwp lp2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) { struct pcb *pcb2; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct pcb *pcb1 = lp1->lwp_thread->td_pcb; struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt; if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) { pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len); user_ldt_free(pcb1); pcb1->pcb_ldt = pcb_ldt; set_user_ldt(pcb1); } } return; } #if NNPX > 0 /* Ensure that lp1's pcb is up to date. */ if (mdcpu->gd_npxthread == lp1->lwp_thread) npxsave(lp1->lwp_thread->td_savefpu); #endif /* * Copy lp1's PCB. This really only applies to the * debug registers and FP state, but its faster to just copy the * whole thing. Because we only save the PCB at switchout time, * the register state may not be current. */ pcb2 = lp2->lwp_thread->td_pcb; *pcb2 = *lp1->lwp_thread->td_pcb; /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies the user mode register values. The * 16 byte offset saves space for vm86, and must match * common_tss.esp0 (kernel stack pointer on entry from user mode) * * pcb_esp must allocate an additional call-return pointer below * the trap frame which will be restored by cpu_restore from * PCB_EIP, and the thread's td_sp pointer must allocate an * additonal two worsd below the pcb_esp call-return pointer to * hold the LWKT restore function pointer and eflags. * * The LWKT restore function pointer must be set to cpu_restore, * which is our standard heavy weight process switch-in function. * YYY eventually we should shortcut fork_return and fork_trampoline * to use the LWKT restore function directly so we can get rid of * all the extra crap we are setting up. */ lp2->lwp_md.md_regs = (struct trapframe *)((char *)pcb2 - 16) - 1; bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs)); /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_cr3 = vtophys(vmspace_pmap(lp2->lwp_proc->p_vmspace)->pm_pdir); pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)lp2->lwp_md.md_regs - sizeof(void *); pcb2->pcb_ebx = (int)lp2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_esp - sizeof(void *)); *(u_int32_t *)lp2->lwp_thread->td_sp = PSL_USER; lp2->lwp_thread->td_sp -= sizeof(void *); *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore; /* * pcb2->pcb_ldt: duplicated below, if necessary. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above (always 0 here). * pcb2->pcb_onfault: cloned above (always NULL here). * pcb2->pcb_onfault_sp:cloned above (don't care) */ /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = NULL; /* Copy the LDT, if necessary. */ if (pcb2->pcb_ldt != NULL) { if (flags & RFMEM) { pcb2->pcb_ldt->ldt_refcnt++; } else { pcb2->pcb_ldt = user_ldt_alloc(pcb2, pcb2->pcb_ldt->ldt_len); } } bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls, sizeof(lp2->lwp_thread->td_tls)); /* * Now, cpu_switch() can schedule the new lwp. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new lwp's pointer. fork_trampoline() * will set up a stack to call fork_return(lp, frame); to complete * the return to user-mode. */ }
/* * Clear registers on exec */ void exec_setregs(u_long entry, u_long stack, u_long ps_strings) { struct thread *td = curthread; struct lwp *lp = td->td_lwp; struct trapframe *regs = lp->lwp_md.md_regs; struct pcb *pcb = lp->lwp_thread->td_pcb; /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = 0; regs->tf_ds = 0; regs->tf_es = 0; regs->tf_fs = 0; regs->tf_gs = 0; regs->tf_cs = 0; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == td->td_pcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ pcb->pcb_flags &= ~FP_SOFTFP; /* * note: do not set CR0_TS here. npxinit() must do it after clearing * gd_npxthread. Otherwise a preemptive interrupt thread may panic * in npxdna(). */ crit_enter(); #if 0 load_cr0(rcr0() | CR0_MP); #endif #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(); #endif crit_exit(); /* * note: linux emulator needs edx to be 0x0 on entry, which is * handled in execve simply by setting the 64 bit syscall * return value to 0. */ }
int i386_set_ldt( uint32_t *retval, uint32_t start_sel, uint32_t descs, /* out */ uint32_t num_sels) { user_ldt_t new_ldt, old_ldt; struct real_descriptor *dp; unsigned int i; unsigned int min_selector = LDTSZ_MIN; /* do not allow the system selectors to be changed */ task_t task = current_task(); unsigned int ldt_count; kern_return_t err; if (start_sel != LDT_AUTO_ALLOC && (start_sel != 0 || num_sels != 0) && (start_sel < min_selector || start_sel >= LDTSZ)) return EINVAL; if (start_sel != LDT_AUTO_ALLOC && (uint64_t)start_sel + (uint64_t)num_sels > LDTSZ) /* cast to uint64_t to detect wrap-around */ return EINVAL; task_lock(task); old_ldt = task->i386_ldt; if (start_sel == LDT_AUTO_ALLOC) { if (old_ldt) { unsigned int null_count; struct real_descriptor null_ldt; bzero(&null_ldt, sizeof(null_ldt)); /* * Look for null selectors among the already-allocated * entries. */ null_count = 0; i = 0; while (i < old_ldt->count) { if (!memcmp(&old_ldt->ldt[i++], &null_ldt, sizeof(null_ldt))) { null_count++; if (null_count == num_sels) break; /* break out of while loop */ } else { null_count = 0; } } /* * If we broke out of the while loop, i points to the selector * after num_sels null selectors. Otherwise it points to the end * of the old LDTs, and null_count is the number of null selectors * at the end. * * Either way, there are null_count null selectors just prior to * the i-indexed selector, and either null_count >= num_sels, * or we're at the end, so we can extend. */ start_sel = old_ldt->start + i - null_count; } else { start_sel = LDTSZ_MIN; } if (start_sel + num_sels > LDTSZ) { task_unlock(task); return ENOMEM; } } if (start_sel == 0 && num_sels == 0) { new_ldt = NULL; } else { /* * Allocate new LDT */ unsigned int begin_sel = start_sel; unsigned int end_sel = begin_sel + num_sels; if (old_ldt != NULL) { if (old_ldt->start < begin_sel) begin_sel = old_ldt->start; if (old_ldt->start + old_ldt->count > end_sel) end_sel = old_ldt->start + old_ldt->count; } ldt_count = end_sel - begin_sel; new_ldt = (user_ldt_t)kalloc(sizeof(struct user_ldt) + (ldt_count * sizeof(struct real_descriptor))); if (new_ldt == NULL) { task_unlock(task); return ENOMEM; } new_ldt->start = begin_sel; new_ldt->count = ldt_count; /* * Have new LDT. If there was a an old ldt, copy descriptors * from old to new. */ if (old_ldt) { bcopy(&old_ldt->ldt[0], &new_ldt->ldt[old_ldt->start - begin_sel], old_ldt->count * sizeof(struct real_descriptor)); /* * If the old and new LDTs are non-overlapping, fill the * center in with null selectors. */ if (old_ldt->start + old_ldt->count < start_sel) bzero(&new_ldt->ldt[old_ldt->count], (start_sel - (old_ldt->start + old_ldt->count)) * sizeof(struct real_descriptor)); else if (old_ldt->start > start_sel + num_sels) bzero(&new_ldt->ldt[num_sels], (old_ldt->start - (start_sel + num_sels)) * sizeof(struct real_descriptor)); } /* * Install new descriptors. */ if (descs != 0) { err = copyin(descs, (char *)&new_ldt->ldt[start_sel - begin_sel], num_sels * sizeof(struct real_descriptor)); if (err != 0) { task_unlock(task); user_ldt_free(new_ldt); return err; } } else { bzero(&new_ldt->ldt[start_sel - begin_sel], num_sels * sizeof(struct real_descriptor)); } /* * Validate descriptors. * Only allow descriptors with user priviledges. */ for (i = 0, dp = (struct real_descriptor *) &new_ldt->ldt[start_sel - begin_sel]; i < num_sels; i++, dp++) { switch (dp->access & ~ACC_A) { case 0: case ACC_P: /* valid empty descriptor */ break; case ACC_P | ACC_PL_U | ACC_DATA: case ACC_P | ACC_PL_U | ACC_DATA_W: case ACC_P | ACC_PL_U | ACC_DATA_E: case ACC_P | ACC_PL_U | ACC_DATA_EW: case ACC_P | ACC_PL_U | ACC_CODE: case ACC_P | ACC_PL_U | ACC_CODE_R: case ACC_P | ACC_PL_U | ACC_CODE_C: case ACC_P | ACC_PL_U | ACC_CODE_CR: case ACC_P | ACC_PL_U | ACC_CALL_GATE_16: case ACC_P | ACC_PL_U | ACC_CALL_GATE: break; default: task_unlock(task); user_ldt_free(new_ldt); return EACCES; } } } task->i386_ldt = new_ldt; /* new LDT for task */ /* * Switch to new LDT. We need to do this on all CPUs, since * another thread in this same task may be currently running, * and we need to make sure the new LDT is in place * throughout the task before returning to the user. */ mp_rendezvous_no_intrs(user_ldt_set_action, task); task_unlock(task); /* free old LDT. We can't do this until after we've * rendezvoused with all CPUs, in case another thread * in this task was in the process of context switching. */ if (old_ldt) user_ldt_free(old_ldt); *retval = start_sel; return 0; }