/* * Copy the floating point context of the forked thread. */ void fp_fork(klwp_t *lwp, klwp_t *clwp) { kfpu_t *cfp, *pfp; int i; cfp = lwptofpu(clwp); pfp = lwptofpu(lwp); /* * copy the parents fpq */ cfp->fpu_qcnt = pfp->fpu_qcnt; for (i = 0; i < pfp->fpu_qcnt; i++) cfp->fpu_q[i] = pfp->fpu_q[i]; /* * save the context of the parent into the childs fpu structure */ cfp->fpu_fprs = pfp->fpu_fprs; if (ttolwp(curthread) == lwp && fpu_exists) { fp_fksave(cfp); } else { for (i = 0; i < 32; i++) cfp->fpu_fr.fpu_regs[i] = pfp->fpu_fr.fpu_regs[i]; for (i = 16; i < 32; i++) cfp->fpu_fr.fpu_dregs[i] = pfp->fpu_fr.fpu_dregs[i]; } cfp->fpu_en = 1; }
/* * fill in the extra register state area specified with the specified lwp's * platform-dependent floating-point extra register state information. * NOTE: 'lwp' might not correspond to 'curthread' since this is * called from code in /proc to get the registers of another lwp. */ void xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp) { prxregset_t *xregs = (prxregset_t *)xrp; kfpu_t *fp = lwptofpu(lwp); uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); uint64_t gsr; /* * fp_fksave() does not flush the GSR register into * the lwp area, so do it now */ kpreempt_disable(); if (ttolwp(curthread) == lwp && fpu_exists) { fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { _fp_write_fprs(fprs); fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; } save_gsr(fp); } gsr = get_gsr(fp); kpreempt_enable(); PRXREG_GSR(xregs) = gsr; }
/* * Free lwp fpu regs. */ void lwp_freeregs(klwp_t *lwp, int isexec) { kfpu_t *fp = lwptofpu(lwp); if (lwptot(lwp) == curthread) fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) fp_free(fp, isexec); }
/* * set the specified lwp's platform-dependent floating-point * extra register state based on the specified input */ void xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp) { prxregset_t *xregs = (prxregset_t *)xrp; kfpu_t *fp = lwptofpu(lwp); uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); uint64_t gsr = PRXREG_GSR(xregs); kpreempt_disable(); set_gsr(gsr, lwptofpu(lwp)); if ((lwp == ttolwp(curthread)) && fpu_exists) { fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { _fp_write_fprs(fprs); fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; } restore_gsr(lwptofpu(lwp)); } kpreempt_enable(); }
/*ARGSUSED1*/ void fp_free(kfpu_t *fp, int isexec) { int s; uint32_t fprs = 0; if (curthread->t_lwp != NULL && lwptofpu(curthread->t_lwp) == fp) { fp->fpu_en = 0; fp->fpu_fprs = fprs; s = splhigh(); _fp_write_fprs(fprs); splx(s); } }
void fp_enable(void) { klwp_id_t lwp; kfpu_t *fp; lwp = ttolwp(curthread); ASSERT(lwp != NULL); fp = lwptofpu(lwp); if (fpu_exists) { if (fp->fpu_en) { #ifdef DEBUG if (fpdispr) cmn_err(CE_NOTE, "fpu disabled, but already enabled\n"); #endif if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { fp->fpu_fprs = FPRS_FEF; #ifdef DEBUG if (fpdispr) cmn_err(CE_NOTE, "fpu disabled, saved fprs disabled\n"); #endif } _fp_write_fprs(FPRS_FEF); fp_restore(fp); } else { fp->fpu_en = 1; fp->fpu_fsr = 0; fp->fpu_fprs = FPRS_FEF; _fp_write_fprs(FPRS_FEF); fp_clearregs(fp); } } else { int i; if (!fp->fpu_en) { fp->fpu_en = 1; fp->fpu_fsr = 0; for (i = 0; i < 32; i++) fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */ for (i = 16; i < 32; i++) /* NaN */ fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1; } } }
void setfpasrs(klwp_t *lwp, asrset_t asr) { kfpu_t *fp = lwptofpu(lwp); uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); kpreempt_disable(); if (ttolwp(curthread) == lwp) fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { set_gsr(asr[ASR_GSR], fp); if (fpu_exists && ttolwp(curthread) == lwp) { if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { _fp_write_fprs(fprs); fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; } restore_gsr(fp); } } kpreempt_enable(); }
void fp_precise(struct regs *rp) { fp_simd_type fpsd; int inst_ftt; union { uint_t i; fp_inst_type inst; } kluge; klwp_t *lwp = ttolwp(curthread); kfpu_t *fp = lwptofpu(lwp); uint64_t gsr; int mstate; if (fpu_exists) save_gsr(fp); gsr = get_gsr(fp); /* * Get the instruction to be emulated from the pc saved by the trap. * Note that the kernel is NOT prepared to handle a kernel fp * exception if it can't pass successfully through the fp simulator. * * If the trap occurred in user mode, set lwp_state to LWP_SYS for the * purposes of clock accounting and switch to the LMS_TRAP microstate. */ if (USERMODE(rp->r_tstate)) { inst_ftt = _fp_read_inst((uint32_t *)rp->r_pc, &kluge.i, &fpsd); mstate = new_mstate(curthread, LMS_TRAP); lwp->lwp_state = LWP_SYS; } else { kluge.i = *(uint_t *)rp->r_pc; inst_ftt = ftt_none; } if (inst_ftt != ftt_none) { /* * Save the bad address and post the signal. * It can only be an ftt_alignment or ftt_fault trap. * XXX - How can this work w/mainsail and do_unaligned? */ fpsd.fp_trapaddr = (caddr_t)rp->r_pc; fp_traps(&fpsd, inst_ftt, rp); } else { /* * Conjure up a floating point queue and advance the pc/npc * to fake a deferred fp trap. We now run the fp simulator * in fp_precise, while allowing setfpregs to call fp_runq, * because this allows us to do the ugly machinations to * inc/dec the pc depending on the trap type, as per * bugid 1210159. fp_runq is still going to have the * generic "how do I connect the "fp queue to the pc/npc" * problem alluded to in bugid 1192883, which is only a * problem for a restorecontext of a v8 fp queue on a * v9 system, which seems like the .000000001% case (on v9)! */ struct _fpq *pfpq = &fp->fpu_q->FQu.fpq; fp_simd_type fpsd; int fptrap; pfpq->fpq_addr = (uint_t *)rp->r_pc; pfpq->fpq_instr = kluge.i; fp->fpu_qcnt = 1; fp->fpu_q_entrysize = sizeof (struct _fpq); kpreempt_disable(); (void) flush_user_windows_to_stack(NULL); fptrap = fpu_vis_sim((fp_simd_type *)&fpsd, (fp_inst_type *)pfpq->fpq_addr, rp, (fsr_type *)&fp->fpu_fsr, gsr, kluge.i); /* update the hardware fp fsr state for sake of ucontext */ if (fpu_exists) _fp_write_pfsr(&fp->fpu_fsr); if (fptrap) { /* back up the pc if the signal needs to be precise */ if (fptrap != ftt_ieee) { fp->fpu_qcnt = 0; } /* post signal */ fp_traps(&fpsd, fptrap, rp); /* decrement queue count for ieee exceptions */ if (fptrap == ftt_ieee) { fp->fpu_qcnt = 0; } } else { fp->fpu_qcnt = 0; } /* update the software pcb copies of hardware fp registers */ if (fpu_exists) { fp_save(fp); } kpreempt_enable(); } /* * Reset lwp_state to LWP_USER for the purposes of clock accounting, * and restore the previously saved microstate. */ if (USERMODE(rp->r_tstate)) { (void) new_mstate(curthread, mstate); lwp->lwp_state = LWP_USER; } }
/* * Process the floating point queue in lwp->lwp_pcb. * * Each entry in the floating point queue is processed in turn. * If processing an entry results in an exception fp_traps() is called to * handle the exception - this usually results in the generation of a signal * to be delivered to the user. There are 2 possible outcomes to this (note * that hardware generated signals cannot be held!): * * 1. If the signal is being ignored we continue to process the rest * of the entries in the queue. * * 2. If arrangements have been made for return to a user signal handler, * sendsig() will have copied the floating point queue onto the user's * signal stack and zero'ed the queue count in the u_pcb. Note that * this has the side effect of terminating fp_runq's processing loop. * We will re-run the floating point queue on return from the user * signal handler if necessary as part of normal setcontext processing. */ void fp_runq(struct regs *rp) { kfpu_t *fp = lwptofpu(curthread->t_lwp); struct _fq *fqp = fp->fpu_q; fp_simd_type fpsd; uint64_t gsr = get_gsr(fp); /* * don't preempt while manipulating the queue */ kpreempt_disable(); while (fp->fpu_qcnt) { int fptrap; fptrap = fpu_simulator((fp_simd_type *)&fpsd, (fp_inst_type *)fqp->FQu.fpq.fpq_addr, (fsr_type *)&fp->fpu_fsr, gsr, fqp->FQu.fpq.fpq_instr); if (fptrap) { /* * Instruction could not be simulated so we will * attempt to deliver a signal. * We may be called again upon signal exit (setcontext) * and can continue to process the queue then. */ if (fqp != fp->fpu_q) { int i; struct _fq *fqdp; /* * We need to normalize the floating queue so * the excepting instruction is at the head, * so that the queue may be copied onto the * user signal stack by sendsig(). */ fqdp = fp->fpu_q; for (i = fp->fpu_qcnt; i; i--) { *fqdp++ = *fqp++; } fqp = fp->fpu_q; } fp->fpu_q_entrysize = sizeof (struct _fpq); /* * fpu_simulator uses the fp registers directly but it * uses the software copy of the fsr. We need to write * that back to fpu so that fpu's state is current for * ucontext. */ if (fpu_exists) _fp_write_pfsr(&fp->fpu_fsr); /* post signal */ fp_traps(&fpsd, fptrap, rp); /* * Break from loop to allow signal to be sent. * If there are other instructions in the fp queue * they will be processed when/if the user retuns * from the signal handler with a non-empty queue. */ break; } fp->fpu_qcnt--; fqp++; } /* * fpu_simulator uses the fp registers directly, so we have * to update the pcb copies to keep current, but it uses the * software copy of the fsr, so we write that back to fpu */ if (fpu_exists) { int i; for (i = 0; i < 32; i++) _fp_read_pfreg(&fp->fpu_fr.fpu_regs[i], i); for (i = 16; i < 32; i++) _fp_read_pdreg(&fp->fpu_fr.fpu_dregs[i], i); _fp_write_pfsr(&fp->fpu_fsr); } kpreempt_enable(); }
/* * fp_disabled normally occurs when the first floating point in a non-threaded * program causes an fp_disabled trap. For threaded programs, the ILP32 threads * library calls the .setpsr fasttrap, which has been modified to also set the * appropriate bits in fpu_en and fpu_fprs, as well as to enable the %fprs, * as before. The LP64 threads library will write to the %fprs directly, * so fpu_en will never get updated for LP64 threaded programs, * although fpu_fprs will, via resume. */ void fp_disabled(struct regs *rp) { klwp_id_t lwp; kfpu_t *fp; int ftt; #ifdef SF_ERRATA_30 /* call causes fp-disabled */ /* * This code is here because sometimes the call instruction * generates an fp_disabled trap when the call offset is large. */ if (spitfire_call_bug) { uint_t instr = 0; extern void trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t mmu_fsr); if (USERMODE(rp->r_tstate)) { (void) fuword32((void *)rp->r_pc, &instr); } else { instr = *(uint_t *)(rp->r_pc); } if ((instr & 0xc0000000) == 0x40000000) { ill_fpcalls++; trap(rp, NULL, T_UNIMP_INSTR, 0); return; } } #endif /* SF_ERRATA_30 - call causes fp-disabled */ #ifdef CHEETAH_ERRATUM_109 /* interrupts not taken during fpops */ /* * UltraSPARC III will report spurious fp-disabled exceptions when * the pipe is full of fpops and an interrupt is triggered. By the * time we get here the interrupt has been taken and we just need * to return to where we came from and try again. */ if (fpu_exists && _fp_read_fprs() & FPRS_FEF) return; #endif /* CHEETAH_ERRATUM_109 */ lwp = ttolwp(curthread); ASSERT(lwp != NULL); fp = lwptofpu(lwp); if (fpu_exists) { kpreempt_disable(); if (fp->fpu_en) { #ifdef DEBUG if (fpdispr) cmn_err(CE_NOTE, "fpu disabled, but already enabled\n"); #endif if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { fp->fpu_fprs = FPRS_FEF; #ifdef DEBUG if (fpdispr) cmn_err(CE_NOTE, "fpu disabled, saved fprs disabled\n"); #endif } _fp_write_fprs(FPRS_FEF); fp_restore(fp); } else { fp->fpu_en = 1; fp->fpu_fsr = 0; fp->fpu_fprs = FPRS_FEF; _fp_write_fprs(FPRS_FEF); fp_clearregs(fp); } kpreempt_enable(); } else { fp_simd_type fpsd; int i; (void) flush_user_windows_to_stack(NULL); if (!fp->fpu_en) { fp->fpu_en = 1; fp->fpu_fsr = 0; for (i = 0; i < 32; i++) fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */ for (i = 16; i < 32; i++) /* NaN */ fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1; } if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc, rp, (ulong_t *)rp->r_sp, fp)) { fp->fpu_q_entrysize = sizeof (struct _fpq); fp_traps(&fpsd, ftt, rp); } } }
/* * Copy regs from parent to child. */ void lwp_forkregs(klwp_t *lwp, klwp_t *clwp) { kthread_t *t, *pt = lwptot(lwp); struct machpcb *mpcb = lwptompcb(clwp); struct machpcb *pmpcb = lwptompcb(lwp); kfpu_t *fp, *pfp = lwptofpu(lwp); caddr_t wbuf; uint_t wstate; t = mpcb->mpcb_thread; /* * remember child's fp and wbuf since they will get erased during * the bcopy. */ fp = mpcb->mpcb_fpu; wbuf = mpcb->mpcb_wbuf; wstate = mpcb->mpcb_wstate; /* * Don't copy mpcb_frame since we hand-crafted it * in thread_load(). */ bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF); mpcb->mpcb_thread = t; mpcb->mpcb_fpu = fp; fp->fpu_q = mpcb->mpcb_fpu_q; /* * It is theoretically possibly for the lwp's wstate to * be different from its value assigned in lwp_stk_init, * since lwp_stk_init assumed the data model of the process. * Here, we took on the data model of the cloned lwp. */ if (mpcb->mpcb_wstate != wstate) { if (wstate == WSTATE_USER32) { kmem_cache_free(wbuf32_cache, wbuf); wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP); wstate = WSTATE_USER64; } else { kmem_cache_free(wbuf64_cache, wbuf); wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP); wstate = WSTATE_USER32; } } mpcb->mpcb_pa = va_to_pa(mpcb); mpcb->mpcb_wbuf = wbuf; mpcb->mpcb_wbuf_pa = va_to_pa(wbuf); ASSERT(mpcb->mpcb_wstate == wstate); if (mpcb->mpcb_wbcnt != 0) { bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf, mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ? sizeof (struct rwindow32) : sizeof (struct rwindow64))); } if (pt == curthread) pfp->fpu_fprs = _fp_read_fprs(); if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) { if (pt == curthread && fpu_exists) { save_gsr(clwp->lwp_fpu); } else { uint64_t gsr; gsr = get_gsr(lwp->lwp_fpu); set_gsr(gsr, clwp->lwp_fpu); } fp_fork(lwp, clwp); } }