int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current) unlazy_fpu(tsk, task_pt_regs(tsk)); return 0; } /* * Memory allocation at the first usage of the FPU and other state. */ if (!tsk->thread.xstate) { tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); if (!tsk->thread.xstate) return -ENOMEM; } if (boot_cpu_data.flags & CPU_HAS_FPU) { struct sh_fpu_hard_struct *fp = &tsk->thread.xstate->hardfpu; memset(fp, 0, xstate_size); fp->fpscr = FPSCR_INIT; } else { struct sh_fpu_soft_struct *fp = &tsk->thread.xstate->softfpu; memset(fp, 0, xstate_size); fp->fpscr = FPSCR_INIT; } set_stopped_child_used_math(tsk); return 0; }
/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. * * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; if (!tsk_used_math(tsk)) { local_irq_enable(); /* * does a slab alloc which can sleep */ if (init_fpu(tsk)) { /* * ran out of memory! */ do_group_exit(SIGKILL); return; } local_irq_disable(); } clts(); /* Allow maths ops (or we recurse) */ /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { stts(); force_sig(SIGSEGV, tsk); return; } thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; }
void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); BUG(); return; } if (!tsk_used_math(tsk)) { local_irq_enable(); /* * does a slab alloc which can sleep */ if (init_fpu(tsk)) { /* * ran out of memory! */ do_group_exit(SIGKILL); return; } local_irq_disable(); } grab_fpu(regs); __fpu_state_restore(); }
__notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); bool preload_fpu; preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; __unlazy_fpu(prev_p); if (preload_fpu) prefetch(next->xstate); load_sp0(tss, next); lazy_save_gs(prev->gs); load_TLS(next, cpu); if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); if (preload_fpu) clts(); arch_end_context_switch(next_p); if (preload_fpu) __math_state_restore(); if (prev->gs | next->gs) lazy_load_gs(next->gs); percpu_write(current_task, next_p); return prev_p; }
static inline int elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) { struct pt_regs *regs = task_pt_regs(t); if (!tsk_used_math(t)) return 0; if (t == current) unlazy_fpu(t); memcpy(xfpu, &t->thread.i387.fxsave, sizeof(elf_fpxregset_t)); xfpu->fcs = regs->cs; xfpu->fos = t->thread.ds; /* right? */ return 1; }
/* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; dst->thread.fpu_counter = 0; dst->thread.fpu.has_fpu = 0; dst->thread.fpu.state = NULL; task_disable_lazy_fpu_restore(dst); if (tsk_used_math(src)) { int err = fpu_alloc(&dst->thread.fpu); if (err) return err; fpu_copy(dst, src); } return 0; }
/* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; dst->thread.fpu_counter = 0; dst->thread.fpu.has_fpu = 0; dst->thread.fpu.last_cpu = ~0; dst->thread.fpu.state = NULL; if (tsk_used_math(src)) { int err = fpu_alloc(&dst->thread.fpu); if (err) return err; fpu_copy(dst, src); } return 0; }
static void init_fp_ctx(struct task_struct *target) { /* If FP has been used then the target already has context */ if (tsk_used_math(target)) return; /* Begin with data registers set to all 1s... */ memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr)); /* FCSR has been preset by `mips_set_personality_nan'. */ /* * Record that the target has "used" math, such that the context * just initialised, and any modifications made by the caller, * aren't discarded. */ set_stopped_child_used_math(target); }
static inline int elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu) { struct _fpstate_ia32 *fpstate = (void*)fpu; mm_segment_t oldfs = get_fs(); if (!tsk_used_math(tsk)) return 0; if (!regs) regs = task_pt_regs(tsk); if (tsk == current) unlazy_fpu(tsk); set_fs(KERNEL_DS); save_i387_ia32(tsk, fpstate, regs, 1); /* Correct for i386 bug. It puts the fop into the upper 16bits of the tag word (like FXSAVE), not into the fcs*/ fpstate->cssel |= fpstate->tag & 0xffff0000; set_fs(oldfs); return 1; }
int ptrace_getfpregs(struct task_struct *child, __u32 __user *data) { int i; if (!access_ok(VERIFY_WRITE, data, 33 * 8)) return -EIO; if (tsk_used_math(child)) { union fpureg *fregs = get_fpu_regs(child); for (i = 0; i < 32; i++) __put_user(get_fpr64(&fregs[i], 0), i + (__u64 __user *)data); } else { for (i = 0; i < 32; i++) __put_user((__u64) -1, i + (__u64 __user *) data); } __put_user(child->thread.fpu.fcr31, data + 64); __put_user(boot_cpu_data.fpu_id, data + 65); return 0; }
/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. * * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; if (!tsk_used_math(tsk)) { local_irq_enable(); /* * does a slab alloc which can sleep */ if (init_fpu(tsk)) { /* * ran out of memory! */ do_group_exit(SIGKILL); return; } local_irq_disable(); } clts(); /* Allow maths ops (or we recurse) */ __math_state_restore(); }
/* * switch_to(x,yn) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time * (as a call from the fsave or fwait in effect) rather than to * the wrong process. Lazy FP saving no longer makes any sense * with modern CPU's, and this simplifies a lot of things (SMP * and UP become the same). * * NOTE! We used to use the x86 hardware context switching. The * reason for not using it any more becomes apparent when you * try to recover gracefully from saved state that is no longer * valid (stale segment register values in particular). With the * hardware task-switch, there is no way to fix up bad state in * a reasonable manner. * * The fact that Intel documents the hardware task-switching to * be slow is a fairly red herring - this code is not noticeably * faster. However, there _is_ some room for improvement here, * so the performance issues may eventually be a valid point. * More important, however, is the fact that this allows us much * more flexibility. * * The return value (in %ax) will be the "prev" task after * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ __unlazy_fpu(prev_p); if (next_p->mm) load_user_cs_desc(cpu, next_p->mm); /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) prefetch(next->xstate); /* * Reload esp0. */ load_sp0(tss, next); /* * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation * where we temporarily have non-reloadable segments in %fs * and %gs. This could be an issue if the NMI handler ever * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); /* * Restore IOPL if needed. In normal use, the flags restore * in the switch assembly will handle this. But if the kernel * is running virtualized at a non-zero CPL, the popf will * not restore flags, so it must be done in a separate step. */ if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); /* * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be * done before math_state_restore, so the TS bit is up * to date. */ arch_end_context_switch(next_p); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now * * tsk_used_math() checks prevent calling math_state_restore(), * which can sleep in the case of !tsk_used_math() */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); /* * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) lazy_load_gs(next->gs); percpu_write(current_task, next_p); return prev_p; }
long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; void __user *addrp = (void __user *) addr; void __user *datavp = (void __user *) data; unsigned long __user *datalp = (void __user *) data; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: ret = generic_ptrace_peekdata(child, addr, data); break; /* Read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { struct pt_regs *regs; union fpureg *fregs; unsigned long tmp = 0; regs = task_pt_regs(child); ret = 0; /* Default return value. */ switch (addr) { case 0 ... 31: tmp = regs->regs[addr]; break; case FPR_BASE ... FPR_BASE + 31: if (!tsk_used_math(child)) { /* FP not yet used */ tmp = -1; break; } fregs = get_fpu_regs(child); #ifdef CONFIG_32BIT if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even * registers. */ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE], addr & 1); break; } #endif tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; break; case CAUSE: tmp = regs->cp0_cause; break; case BADVADDR: tmp = regs->cp0_badvaddr; break; case MMHI: tmp = regs->hi; break; case MMLO: tmp = regs->lo; break; #ifdef CONFIG_CPU_HAS_SMARTMIPS case ACX: tmp = regs->acx; break; #endif case FPC_CSR: tmp = child->thread.fpu.fcr31; break; case FPC_EIR: /* implementation / version register */ tmp = boot_cpu_data.fpu_id; break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; if (!cpu_has_dsp) { tmp = 0; ret = -EIO; goto out; } dregs = __get_dsp_regs(child); tmp = dregs[addr - DSP_BASE]; break; } case DSP_CONTROL: if (!cpu_has_dsp) { tmp = 0; ret = -EIO; goto out; } tmp = child->thread.dsp.dspcontrol; break; default: tmp = 0; ret = -EIO; goto out; } ret = put_user(tmp, datalp); break; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = generic_ptrace_pokedata(child, addr, data); break; case PTRACE_POKEUSR: { struct pt_regs *regs; ret = 0; regs = task_pt_regs(child); switch (addr) { case 0 ... 31: regs->regs[addr] = data; /* System call number may have been changed */ if (addr == 2) mips_syscall_update_nr(child, regs); else if (addr == 4 && mips_syscall_is_indirect(child, regs)) mips_syscall_update_nr(child, regs); break; case FPR_BASE ... FPR_BASE + 31: { union fpureg *fregs = get_fpu_regs(child); init_fp_ctx(child); #ifdef CONFIG_32BIT if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even * registers. */ set_fpr32(&fregs[(addr & ~1) - FPR_BASE], addr & 1, data); break; } #endif set_fpr64(&fregs[addr - FPR_BASE], 0, data); break; } case PC: regs->cp0_epc = data; break; case MMHI: regs->hi = data; break; case MMLO: regs->lo = data; break; #ifdef CONFIG_CPU_HAS_SMARTMIPS case ACX: regs->acx = data; break; #endif case FPC_CSR: init_fp_ctx(child); ptrace_setfcr31(child, data); break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; if (!cpu_has_dsp) { ret = -EIO; break; } dregs = __get_dsp_regs(child); dregs[addr - DSP_BASE] = data; break; } case DSP_CONTROL: if (!cpu_has_dsp) { ret = -EIO; break; } child->thread.dsp.dspcontrol = data; break; default: /* The rest are not allowed. */ ret = -EIO; break; } break; } case PTRACE_GETREGS: ret = ptrace_getregs(child, datavp); break; case PTRACE_SETREGS: ret = ptrace_setregs(child, datavp); break; case PTRACE_GETFPREGS: ret = ptrace_getfpregs(child, datavp); break; case PTRACE_SETFPREGS: ret = ptrace_setfpregs(child, datavp); break; case PTRACE_GET_THREAD_AREA: ret = put_user(task_thread_info(child)->tp_value, datalp); break; case PTRACE_GET_WATCH_REGS: ret = ptrace_get_watch_regs(child, addrp); break; case PTRACE_SET_WATCH_REGS: ret = ptrace_set_watch_regs(child, addrp); break; default: ret = ptrace_request(child, request, addr, data); break; } out: return ret; }