/* * This function is for PCI IO space and memory space access. * It assumes that offset, bdf, acc_attr are current in prg_p. * It assumes that prg_p->phys_addr is the final phys addr (including offset). * This function modifies prg_p status and data. */ int pxtool_pciiomem_access(px_t *px_p, pcitool_reg_t *prg_p, uint64_t *data_p, boolean_t is_write) { on_trap_data_t otd; uint32_t io_stat = 0; dev_info_t *dip = px_p->px_dip; px_pec_t *pec_p = px_p->px_pec_p; size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr); int rval = 0; /* Alignment checking. */ if (!IS_P2ALIGNED(prg_p->offset, size)) { DBG(DBG_TOOLS, dip, "not aligned.\n"); prg_p->status = PCITOOL_NOT_ALIGNED; return (EINVAL); } mutex_enter(&pec_p->pec_pokefault_mutex); pec_p->pec_ontrap_data = &otd; if (is_write) { pci_device_t bdf = PX_GET_BDF(prg_p); if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr)) *data_p = pxtool_swap_endian(*data_p, size); pec_p->pec_safeacc_type = DDI_FM_ERR_POKE; if (!on_trap(&otd, OT_DATA_ACCESS)) { otd.ot_trampoline = (uintptr_t)&poke_fault; rval = hvio_poke(px_p->px_dev_hdl, prg_p->phys_addr, size, *data_p, bdf, &io_stat); } else rval = H_EIO; if (otd.ot_trap & OT_DATA_ACCESS) rval = H_EIO; DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", bdf:0x%x, " "rval:%d, io_stat:%d\n", prg_p->phys_addr, bdf, rval, io_stat); } else { *data_p = 0; pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK; if (!on_trap(&otd, OT_DATA_ACCESS)) { otd.ot_trampoline = (uintptr_t)&peek_fault; rval = hvio_peek(px_p->px_dev_hdl, prg_p->phys_addr, size, &io_stat, data_p); } else rval = H_EIO; DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", " "size:0x%" PRIx64 ", hdl:0x%" PRIx64 ", " "rval:%d, io_stat:%d\n", prg_p->phys_addr, size, px_p->px_dev_hdl, rval, io_stat); DBG(DBG_TOOLS, dip, "read data:0x%" PRIx64 "\n", *data_p); if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr)) *data_p = pxtool_swap_endian(*data_p, size); } /* * Workaround: delay taking down safe access env. * For more info, see comment where pxtool_iomem_delay_usec is declared. */ if (pxtool_iomem_delay_usec > 0) delay(drv_usectohz(pxtool_iomem_delay_usec)); no_trap(); pec_p->pec_ontrap_data = NULL; pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED; mutex_exit(&pec_p->pec_pokefault_mutex); if (rval != SUCCESS) { prg_p->status = PCITOOL_INVALID_ADDRESS; rval = EINVAL; } else if (io_stat != SUCCESS) { prg_p->status = PCITOOL_IO_ERROR; rval = EIO; } else prg_p->status = PCITOOL_SUCCESS; return (rval); }
/* * Attempt to clear a UE from a page. * Returns 1 if the error has been successfully cleared. */ static int page_clear_transient_ue(page_t *pp) { caddr_t kaddr; uint8_t rb, wb; uint64_t pa; uint32_t pa_hi, pa_lo; on_trap_data_t otd; int errors = 0; int i; ASSERT(PAGE_EXCL(pp)); ASSERT(PP_PR_REQ(pp)); ASSERT(pp->p_szc == 0); ASSERT(!hat_page_is_mapped(pp)); /* * Clear the page and attempt to clear the UE. If we trap * on the next access to the page, we know the UE has recurred. */ pagescrub(pp, 0, PAGESIZE); /* * Map the page and write a bunch of bit patterns to compare * what we wrote with what we read back. This isn't a perfect * test but it should be good enough to catch most of the * recurring UEs. If this fails to catch a recurrent UE, we'll * retire the page the next time we see a UE on the page. */ kaddr = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)-1); pa = ptob((uint64_t)page_pptonum(pp)); pa_hi = (uint32_t)(pa >> 32); pa_lo = (uint32_t)pa; /* * Fill the page with each (0x00 - 0xFF] bit pattern, flushing * the cache in between reading and writing. We do this under * on_trap() protection to avoid recursion. */ if (on_trap(&otd, OT_DATA_EC)) { PR_MESSAGE(CE_WARN, 1, MSG_UE, pa); errors = 1; } else { for (wb = 0xff; wb > 0; wb--) { for (i = 0; i < PAGESIZE; i++) { kaddr[i] = wb; } sync_data_memory(kaddr, PAGESIZE); for (i = 0; i < PAGESIZE; i++) { rb = kaddr[i]; if (rb != wb) { /* * We had a mismatch without a trap. * Uh-oh. Something is really wrong * with this system. */ if (page_retire_messages) { cmn_err(CE_WARN, MSG_DM, pa_hi, pa_lo, rb, wb); } errors = 1; goto out; /* double break */ } } } } out: no_trap(); ppmapout(kaddr); return (errors ? 0 : 1); }
int pxtool_pcicfg_access(px_t *px_p, pcitool_reg_t *prg_p, uint64_t *data_p, boolean_t is_write) { pci_cfg_data_t data; on_trap_data_t otd; dev_info_t *dip = px_p->px_dip; px_pec_t *pec_p = px_p->px_pec_p; size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr); int rval = 0; pci_cfgacc_req_t req; if ((size <= 0) || (size > 8)) { DBG(DBG_TOOLS, dip, "not supported size.\n"); prg_p->status = PCITOOL_INVALID_SIZE; return (ENOTSUP); } /* Alignment checking. */ if (!IS_P2ALIGNED(prg_p->offset, size)) { DBG(DBG_TOOLS, dip, "not aligned.\n"); prg_p->status = PCITOOL_NOT_ALIGNED; return (EINVAL); } mutex_enter(&pec_p->pec_pokefault_mutex); pec_p->pec_ontrap_data = &otd; req.rcdip = dip; req.bdf = PCI_GETBDF(prg_p->bus_no, prg_p->dev_no, prg_p->func_no); req.offset = prg_p->offset; req.size = size; req.write = is_write; if (is_write) { if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr)) data.qw = pxtool_swap_endian(*data_p, size); else data.qw = *data_p; switch (size) { case sizeof (uint8_t): data.b = (uint8_t)data.qw; break; case sizeof (uint16_t): data.w = (uint16_t)data.qw; break; case sizeof (uint32_t): data.dw = (uint32_t)data.qw; break; case sizeof (uint64_t): break; } DBG(DBG_TOOLS, dip, "put: bdf:%d,%d,%d, off:0x%"PRIx64", size:" "0x%"PRIx64", data:0x%"PRIx64"\n", prg_p->bus_no, prg_p->dev_no, prg_p->func_no, prg_p->offset, size, data.qw); pec_p->pec_safeacc_type = DDI_FM_ERR_POKE; if (!on_trap(&otd, OT_DATA_ACCESS)) { otd.ot_trampoline = (uintptr_t)&poke_fault; VAL64(&req) = data.qw; pci_cfgacc_acc(&req); } else rval = H_EIO; if (otd.ot_trap & OT_DATA_ACCESS) rval = H_EIO; } else { data.qw = 0; pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK; if (!on_trap(&otd, OT_DATA_ACCESS)) { otd.ot_trampoline = (uintptr_t)&peek_fault; pci_cfgacc_acc(&req); data.qw = VAL64(&req); } else rval = H_EIO; switch (size) { case sizeof (uint8_t): data.qw = (uint64_t)data.b; break; case sizeof (uint16_t): data.qw = (uint64_t)data.w; break; case sizeof (uint32_t): data.qw = (uint64_t)data.dw; break; case sizeof (uint64_t): break; } DBG(DBG_TOOLS, dip, "get: bdf:%d,%d,%d, off:0x%"PRIx64", size:" "0x%"PRIx64", data:0x%"PRIx64"\n", prg_p->bus_no, prg_p->dev_no, prg_p->func_no, prg_p->offset, size, data.qw); *data_p = data.qw; if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr)) *data_p = pxtool_swap_endian(*data_p, size); } /* * Workaround: delay taking down safe access env. * For more info, see comments where pxtool_cfg_delay_usec is declared. */ if (pxtool_cfg_delay_usec > 0) drv_usecwait(pxtool_cfg_delay_usec); no_trap(); pec_p->pec_ontrap_data = NULL; pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED; mutex_exit(&pec_p->pec_pokefault_mutex); if (rval != SUCCESS) { prg_p->status = PCITOOL_INVALID_ADDRESS; rval = EINVAL; } else prg_p->status = PCITOOL_SUCCESS; return (rval); }
/* * Safe C wrapper around assy language routine px_phys_peek_4u * * Type is TRUE for big endian, FALSE for little endian. * Size is 1, 2, 4 or 8 bytes. * paddr is the physical address in IO space to access read. * value_p is where the value is returned. */ static int pxtool_safe_phys_peek(px_t *px_p, boolean_t type, size_t size, uint64_t paddr, uint64_t *value_p) { px_pec_t *pec_p = px_p->px_pec_p; pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; on_trap_data_t otd; peek_poke_value_t peek_value; int err = DDI_SUCCESS; mutex_enter(&pec_p->pec_pokefault_mutex); pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK; pxu_p->pcitool_addr = (caddr_t)(paddr & px_paddr_mask); /* * Set up trap handling to make the access safe. * * on_trap works like setjmp. * Set it up to not panic on data access error, * but to call peek_fault instead. * Call px_phys_peek_4u after trap handling is setup. * When on_trap returns FALSE, it has been setup. * When it returns TRUE, an it has caught an error. */ if (!on_trap(&otd, OT_DATA_ACCESS)) { otd.ot_trampoline = (uintptr_t)&peek_fault; err = px_phys_peek_4u(size, paddr, &peek_value.u64, type); } else err = DDI_FAILURE; no_trap(); /* * Workaround: delay taking down safe access env. * For more info, see comments where pxtool_delay_ticks is declared. */ if (pxtool_delay_ticks > 0) delay(pxtool_delay_ticks); pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED; pxu_p->pcitool_addr = NULL; mutex_exit(&pec_p->pec_pokefault_mutex); if (err != DDI_FAILURE) { switch (size) { case 8: *value_p = peek_value.u64; break; case 4: *value_p = (uint64_t)peek_value.u32; break; case 2: *value_p = (uint64_t)peek_value.u16; break; case 1: *value_p = (uint64_t)peek_value.u8; break; default: err = DDI_FAILURE; } } return (err); }
/* * Safe C wrapper around assy language routine px_phys_poke_4u * * Type is TRUE for big endian, FALSE for little endian. * Size is 1,2,4 or 8 bytes. * paddr is the physical address in IO space to access read. * value contains the value to be written. */ static int pxtool_safe_phys_poke(px_t *px_p, boolean_t type, size_t size, uint64_t paddr, uint64_t value) { on_trap_data_t otd; pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; px_pec_t *pec_p = px_p->px_pec_p; peek_poke_value_t poke_value; int err = DDI_SUCCESS; switch (size) { case 8: poke_value.u64 = value; break; case 4: poke_value.u32 = (uint32_t)value; break; case 2: poke_value.u16 = (uint16_t)value; break; case 1: poke_value.u8 = (uint8_t)value; break; default: return (DDI_FAILURE); } mutex_enter(&pec_p->pec_pokefault_mutex); pec_p->pec_ontrap_data = &otd; pec_p->pec_safeacc_type = DDI_FM_ERR_POKE; pxu_p->pcitool_addr = (caddr_t)(paddr & px_paddr_mask); /* * on_trap works like setjmp. * Set it up to not panic on data access error, * but to call poke_fault instead. * Call px_phys_poke_4u after trap handling is setup. * When on_trap returns FALSE, it has been setup. * When it returns TRUE, an it has caught an error. */ if (!on_trap(&otd, OT_DATA_ACCESS)) { otd.ot_trampoline = (uintptr_t)&poke_fault; err = px_phys_poke_4u(size, paddr, &poke_value.u64, type); } else err = DDI_FAILURE; px_lib_clr_errs(px_p); if (otd.ot_trap & OT_DATA_ACCESS) err = DDI_FAILURE; /* Take down protected environment. */ no_trap(); pec_p->pec_ontrap_data = NULL; /* * Workaround: delay taking down safe access env. * For more info, see comments where pxtool_delay_ticks is declared. */ if (pxtool_delay_ticks > 0) delay(pxtool_delay_ticks); pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED; pxu_p->pcitool_addr = NULL; mutex_exit(&pec_p->pec_pokefault_mutex); return (err); }
/* * Update the segment registers with new values from the pcb. * * We have to do this carefully, and in the following order, * in case any of the selectors points at a bogus descriptor. * If they do, we'll catch trap with on_trap and return 1. * returns 0 on success. * * This is particularly tricky for %gs. * This routine must be executed under a cli. */ int update_sregs(struct regs *rp, klwp_t *lwp) { pcb_t *pcb = &lwp->lwp_pcb; ulong_t kgsbase; on_trap_data_t otd; int rc = 0; if (!on_trap(&otd, OT_SEGMENT_ACCESS)) { #if defined(__xpv) /* * On the hyervisor this is easy. The hypercall below will * swapgs and load %gs with the user selector. If the user * selector is bad the hypervisor will catch the fault and * load %gs with the null selector instead. Either way the * kernel's gsbase is not damaged. */ kgsbase = (ulong_t)CPU; if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, pcb->pcb_gs) != 0) { no_trap(); return (1); } rp->r_gs = pcb->pcb_gs; ASSERT((cpu_t *)kgsbase == CPU); #else /* __xpv */ /* * A little more complicated running native. */ kgsbase = (ulong_t)CPU; __set_gs(pcb->pcb_gs); /* * If __set_gs fails it's because the new %gs is a bad %gs, * we'll be taking a trap but with the original %gs and %gsbase * undamaged (i.e. pointing at curcpu). * * We've just mucked up the kernel's gsbase. Oops. In * particular we can't take any traps at all. Make the newly * computed gsbase be the hidden gs via __swapgs, and fix * the kernel's gsbase back again. Later, when we return to * userland we'll swapgs again restoring gsbase just loaded * above. */ __swapgs(); rp->r_gs = pcb->pcb_gs; /* * restore kernel's gsbase */ wrmsr(MSR_AMD_GSBASE, kgsbase); #endif /* __xpv */ /* * Only override the descriptor base address if * r_gs == LWPGS_SEL or if r_gs == NULL. A note on * NULL descriptors -- 32-bit programs take faults * if they deference NULL descriptors; however, * when 64-bit programs load them into %fs or %gs, * they DONT fault -- only the base address remains * whatever it was from the last load. Urk. * * XXX - note that lwp_setprivate now sets %fs/%gs to the * null selector for 64 bit processes. Whereas before * %fs/%gs were set to LWP(FS|GS)_SEL regardless of * the process's data model. For now we check for both * values so that the kernel can also support the older * libc. This should be ripped out at some point in the * future. */ if (pcb->pcb_gs == LWPGS_SEL || pcb->pcb_gs == 0) { #if defined(__xpv) if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gsbase)) { no_trap(); return (1); } #else wrmsr(MSR_AMD_KGSBASE, pcb->pcb_gsbase); #endif } __set_ds(pcb->pcb_ds); rp->r_ds = pcb->pcb_ds; __set_es(pcb->pcb_es); rp->r_es = pcb->pcb_es; __set_fs(pcb->pcb_fs); rp->r_fs = pcb->pcb_fs; /* * Same as for %gs */ if (pcb->pcb_fs == LWPFS_SEL || pcb->pcb_fs == 0) { #if defined(__xpv) if (HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fsbase)) { no_trap(); return (1); } #else wrmsr(MSR_AMD_FSBASE, pcb->pcb_fsbase); #endif } } else { cli(); rc = 1; } no_trap(); return (rc); }
/* * Perform I/O to a given process. This will return EIO if we dectect * corrupt memory and ENXIO if there is no such mapped address in the * user process's address space. */ static int urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a) { caddr_t addr = (caddr_t)a; caddr_t page; caddr_t vaddr; struct seg *seg; int error = 0; int err = 0; uint_t prot; uint_t prot_rw = writing ? PROT_WRITE : PROT_READ; int protchanged; on_trap_data_t otd; int retrycnt; struct as *as = p->p_as; enum seg_rw rw; /* * Locate segment containing address of interest. */ page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK); retrycnt = 0; AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); retry: if ((seg = as_segat(as, page)) == NULL || !page_valid(seg, page)) { AS_LOCK_EXIT(as, &as->a_lock); return (ENXIO); } SEGOP_GETPROT(seg, page, 0, &prot); protchanged = 0; if ((prot & prot_rw) == 0) { protchanged = 1; err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw); if (err == IE_RETRY) { protchanged = 0; ASSERT(retrycnt == 0); retrycnt++; goto retry; } if (err != 0) { AS_LOCK_EXIT(as, &as->a_lock); return (ENXIO); } } /* * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break * sharing to avoid a copy on write of a softlocked page by another * thread. But since we locked the address space as a writer no other * thread can cause a copy on write. S_READ_NOCOW is passed as the * access type to tell segvn that it's ok not to do a copy-on-write * for this SOFTLOCK fault. */ if (writing) rw = S_WRITE; else if (seg->s_ops == &segvn_ops) rw = S_READ_NOCOW; else rw = S_READ; if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) { if (protchanged) (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); AS_LOCK_EXIT(as, &as->a_lock); return (ENXIO); } CPU_STATS_ADD_K(vm, softlock, 1); /* * Make sure we're not trying to read or write off the end of the page. */ ASSERT(len <= page + PAGESIZE - addr); /* * Map in the locked page, copy to our local buffer, * then map the page out and unlock it. */ vaddr = mapin(as, addr, writing); /* * Since we are copying memory on behalf of the user process, * protect against memory error correction faults. */ if (!on_trap(&otd, OT_DATA_EC)) { if (seg->s_ops == &segdev_ops) { /* * Device memory can behave strangely; invoke * a segdev-specific copy operation instead. */ if (writing) { if (segdev_copyto(seg, addr, buf, vaddr, len)) error = ENXIO; } else { if (segdev_copyfrom(seg, addr, vaddr, buf, len)) error = ENXIO; } } else { if (writing) bcopy(buf, vaddr, len); else bcopy(vaddr, buf, len); } } else { error = EIO; } no_trap(); /* * If we're writing to an executable page, we may need to sychronize * the I$ with the modifications we made through the D$. */ if (writing && (prot & PROT_EXEC)) sync_icache(vaddr, (uint_t)len); mapout(as, addr, vaddr, writing); if (rw == S_READ_NOCOW) rw = S_READ; (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw); if (protchanged) (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); AS_LOCK_EXIT(as, &as->a_lock); return (error); }