/* * Return a page to service by moving it from the retired_pages vnode * onto the freelist. * * Called from mmioctl_page_retire() on behalf of the FMA DE. * * Returns: * * - 0 if the page is unretired, * - EAGAIN if the pp can not be locked, * - EINVAL if the PA is whacko, and * - EIO if the pp is not retired. */ int page_unretire(uint64_t pa) { page_t *pp; pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { return (page_retire_done(pp, PRD_INVALID_PA)); } return (page_unretire_pp(pp, 1)); }
static void less_pages(uint64_t base, uint64_t len) { uint64_t pa, end = base + len; extern int kcage_on; for (pa = base; pa < end; pa += PAGESIZE) { pfn_t pfnum; page_t *pp; pfnum = (pfn_t)(pa >> PAGESHIFT); if ((pp = page_numtopp_nolock(pfnum)) == NULL) cmn_err(CE_PANIC, "missing pfnum %lx", pfnum); /* * must break up any large pages that may have * constituent pages being utilized for * prom_alloc()'s. page_reclaim() can't handle * large pages. */ if (pp->p_szc != 0) page_boot_demote(pp); if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) { /* * Ahhh yes, a prom page, * suck it off the freelist, * lock it, and hashin on prom_pages vp. */ if (page_trylock(pp, SE_EXCL) == 0) cmn_err(CE_PANIC, "prom page locked"); (void) page_reclaim(pp, NULL); /* * vnode offsets on the prom_ppages vnode * are page numbers (gack) for >32 bit * physical memory machines. */ (void) page_hashin(pp, &promvp, (offset_t)pfnum, NULL); if (kcage_on) { ASSERT(pp->p_szc == 0); if (PP_ISNORELOC(pp) == 0) { PP_SETNORELOC(pp); PLCNT_XFER_NORELOC(pp); } } (void) page_pp_lock(pp, 0, 1); } } }
/* * Return the page for the kpm virtual address vaddr. * Caller is responsible for the kpm mapping and lock * state of the page. */ page_t * hat_kpm_vaddr2page(caddr_t vaddr) { uintptr_t paddr; pfn_t pfn; ASSERT(IS_KPM_ADDR(vaddr)); SFMMU_KPM_VTOP(vaddr, paddr); pfn = (pfn_t)btop(paddr); return (page_numtopp_nolock(pfn)); }
/*ARGSUSED*/ static int bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, cred_t *cr) { bootfs_node_t *bnp = vp->v_data; page_t *pp, *fpp; pfn_t pfn; for (;;) { /* Easy case where the page exists */ pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED); if (pp != NULL) { if (pl != NULL) { pl[0] = pp; pl[1] = NULL; } else { page_unlock(pp); } return (0); } pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg, addr); /* * If we didn't get the page, that means someone else beat us to * creating this so we need to try again. */ if (pp != NULL) break; } pfn = btop((bnp->bvn_addr + off) & PAGEMASK); fpp = page_numtopp_nolock(pfn); if (ppcopy(fpp, pp) == 0) { pvn_read_done(pp, B_ERROR); return (EIO); } if (pl != NULL) { pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); } else { pvn_io_done(pp); } return (0); }
void boot_mapin(caddr_t addr, size_t size) { caddr_t eaddr; page_t *pp; pfn_t pfnum; if (page_resv(btop(size), KM_NOSLEEP) == 0) panic("boot_mapin: page_resv failed"); for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { pfnum = va_to_pfn(addr); if (pfnum == PFN_INVALID) continue; if ((pp = page_numtopp_nolock(pfnum)) == NULL) panic("boot_mapin(): No pp for pfnum = %lx", pfnum); /* * must break up any large pages that may have constituent * pages being utilized for BOP_ALLOC()'s before calling * page_numtopp().The locking code (ie. page_reclaim()) * can't handle them */ if (pp->p_szc != 0) page_boot_demote(pp); pp = page_numtopp(pfnum, SE_EXCL); if (pp == NULL || PP_ISFREE(pp)) panic("boot_alloc: pp is NULL or free"); /* * If the cage is on but doesn't yet contain this page, * mark it as non-relocatable. */ if (kcage_on && !PP_ISNORELOC(pp)) { PP_SETNORELOC(pp); PLCNT_XFER_NORELOC(pp); } (void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL); pp->p_lckcnt = 1; #if defined(__x86) page_downgrade(pp); #else page_unlock(pp); #endif } }
/* * Test to see if the page_t for a given PA is retired, and return the * hardware errors we have seen on the page if requested. * * Called from mmioctl_page_retire on behalf of the FMA DE. * * Returns: * * - 0 if the page is retired, * - EIO if the page is not retired and has no errors, * - EAGAIN if the page is not retired but is pending; and * - EINVAL if the PA is whacko. */ int page_retire_check(uint64_t pa, uint64_t *errors) { page_t *pp; if (errors) { *errors = 0; } pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { return (page_retire_done(pp, PRD_INVALID_PA)); } return (page_retire_check_pp(pp, errors)); }
/* * Return the page frame number if a valid segkpm mapping exists * for vaddr, otherwise return PFN_INVALID. No locks are grabbed. * Should only be used by other sfmmu routines. */ pfn_t sfmmu_kpm_vatopfn(caddr_t vaddr) { uintptr_t paddr; pfn_t pfn; page_t *pp; ASSERT(kpm_enable && IS_KPM_ADDR(vaddr)); SFMMU_KPM_VTOP(vaddr, paddr); pfn = (pfn_t)btop(paddr); pp = page_numtopp_nolock(pfn); if (pp) return (pfn); else return ((pfn_t)PFN_INVALID); }
int plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret) { page_t *pp = page_numtopp_nolock(pfn); if (pp == NULL) return (PLAT_HOLD_FAIL); #if !defined(__xpv) /* * Pages are locked SE_SHARED because some hypervisors * like xVM ESX reclaim Guest OS memory by locking * it SE_EXCL so we want to leave these pages alone. */ if (lock == PLAT_HOLD_LOCK) { ASSERT(pp_ret != NULL); if (page_trylock(pp, SE_SHARED) == 0) return (PLAT_HOLD_FAIL); } #else /* __xpv */ if (lock == PLAT_HOLD_LOCK) { ASSERT(pp_ret != NULL); if (page_trylock(pp, SE_EXCL) == 0) return (PLAT_HOLD_FAIL); } if (mfn_list[pfn] == MFN_INVALID) { /* We failed - release the lock if we grabbed it earlier */ if (lock == PLAT_HOLD_LOCK) { page_unlock(pp); } return (PLAT_HOLD_FAIL); } #endif /* __xpv */ if (lock == PLAT_HOLD_LOCK) *pp_ret = pp; return (PLAT_HOLD_OK); }
/* * Map address "addr" in address space "as" into a kernel virtual address. * The memory is guaranteed to be resident and locked down. */ static caddr_t mapin(struct as *as, caddr_t addr, int writing) { page_t *pp; caddr_t kaddr; pfn_t pfnum; /* * NB: Because of past mistakes, we have bits being returned * by getpfnum that are actually the page type bits of the pte. * When the object we are trying to map is a memory page with * a page structure everything is ok and we can use the optimal * method, ppmapin. Otherwise, we have to do something special. */ pfnum = hat_getpfnum(as->a_hat, addr); if (pf_is_memory(pfnum)) { pp = page_numtopp_nolock(pfnum); if (pp != NULL) { ASSERT(PAGE_LOCKED(pp)); kaddr = ppmapin(pp, writing ? (PROT_READ | PROT_WRITE) : PROT_READ, (caddr_t)-1); return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); } } /* * Oh well, we didn't have a page struct for the object we were * trying to map in; ppmapin doesn't handle devices, but allocating a * heap address allows ppmapout to free virutal space when done. */ kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK); return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); }
/* * page_retire() - the front door in to retire a page. * * Ideally, page_retire() would instantly retire the requested page. * Unfortunately, some pages are locked or otherwise tied up and cannot be * retired right away. To deal with that, bits are set in p_toxic of the * page_t. An attempt is made to lock the page; if the attempt is successful, * we instantly unlock the page counting on page_unlock() to notice p_toxic * is nonzero and to call back into page_retire_pp(). Success is determined * by looking to see whether the page has been retired once it has been * unlocked. * * Returns: * * - 0 on success, * - EINVAL when the PA is whacko, * - EIO if the page is already retired or already pending retirement, or * - EAGAIN if the page could not be _immediately_ retired but is pending. */ int page_retire(uint64_t pa, uchar_t reason) { page_t *pp; ASSERT(reason & PR_REASONS); /* there must be a reason */ ASSERT(!(reason & ~PR_REASONS)); /* but no other bits */ pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { PR_MESSAGE(CE_WARN, 1, "Cannot schedule clearing of error on" " page 0x%08x.%08x; page is not relocatable memory", pa); return (page_retire_done(pp, PRD_INVALID_PA)); } if (PP_RETIRED(pp)) { PR_DEBUG(prd_dup1); return (page_retire_done(pp, PRD_DUPLICATE)); } if ((reason & PR_UE) && !PP_TOXIC(pp)) { PR_MESSAGE(CE_NOTE, 1, "Scheduling clearing of error on" " page 0x%08x.%08x", pa); } else if (PP_PR_REQ(pp)) { PR_DEBUG(prd_dup2); return (page_retire_done(pp, PRD_DUPLICATE)); } else { PR_MESSAGE(CE_NOTE, 1, "Scheduling removal of" " page 0x%08x.%08x", pa); } page_settoxic(pp, reason); page_retire_enqueue(pp); /* * And now for some magic. * * We marked this page toxic up above. All there is left to do is * to try to lock the page and then unlock it. The page lock routines * will intercept the page and retire it if they can. If the page * cannot be locked, 's okay -- page_unlock() will eventually get it, * or the background thread, until then the lock routines will deny * further locks on it. */ if (MTBF(pr_calls, pr_mtbf) && page_trylock(pp, SE_EXCL)) { PR_DEBUG(prd_prlocked); page_unlock(pp); } else { PR_DEBUG(prd_prnotlocked); } if (PP_RETIRED(pp)) { PR_DEBUG(prd_prretired); return (0); } else { cv_signal(&pr_cv); PR_INCR_KSTAT(pr_failed); if (pp->p_toxic & PR_MSG) { return (page_retire_done(pp, PRD_FAILED)); } else { return (page_retire_done(pp, PRD_PENDING)); } } }
/* * find prom phys pages and alloc space for a tmp copy */ static int i_cpr_find_ppages(void) { struct page *pp; struct memlist *pmem; pgcnt_t npages, pcnt, scnt, vcnt; pfn_t ppn, plast, *dst; int mapflag; cpr_clear_bitmaps(); mapflag = REGULAR_BITMAP; /* * there should be a page_t for each phys page used by the kernel; * set a bit for each phys page not tracked by a page_t */ pcnt = 0; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (page_numtopp_nolock(ppn)) continue; (void) cpr_setbit(ppn, mapflag); pcnt++; } } memlist_read_unlock(); /* * clear bits for phys pages in each segment */ scnt = cpr_count_seg_pages(mapflag, cpr_clrbit); /* * set bits for phys pages referenced by the promvp vnode; * these pages are mostly comprised of forthdebug words */ vcnt = 0; for (pp = promvp.v_pages; pp; ) { if (cpr_setbit(pp->p_offset, mapflag) == 0) vcnt++; pp = pp->p_vpnext; if (pp == promvp.v_pages) break; } /* * total number of prom pages are: * (non-page_t pages - seg pages + vnode pages) */ ppage_count = pcnt - scnt + vcnt; CPR_DEBUG(CPR_DEBUG1, "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n", pcnt, scnt, vcnt, ppage_count); /* * alloc array of pfn_t to store phys page list */ pphys_list_size = ppage_count * sizeof (pfn_t); pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP); if (pphys_list == NULL) { cpr_err(CE_WARN, "cannot alloc pphys_list"); return (ENOMEM); } /* * phys pages referenced in the bitmap should be * those used by the prom; scan bitmap and save * a list of prom phys page numbers */ dst = pphys_list; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (cpr_isset(ppn, mapflag)) { ASSERT(dst < (pphys_list + ppage_count)); *dst++ = ppn; } } } memlist_read_unlock(); /* * allocate space to store prom pages */ ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP); if (ppage_buf == NULL) { kmem_free(pphys_list, pphys_list_size); pphys_list = NULL; cpr_err(CE_WARN, "cannot alloc ppage_buf"); return (ENOMEM); } return (0); }