/* ARGSUSED */ static int fastboot_xc_func(fastboot_info_t *nk, xc_arg_t unused2, xc_arg_t unused3) { void (*fastboot_func)(fastboot_info_t *); fastboot_file_t *fb = &nk->fi_files[FASTBOOT_SWTCH]; fastboot_func = (void (*)())(fb->fb_va); kthread_t *t_intr = curthread->t_intr; if (&kas != curproc->p_as) { hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); } /* * If we have pinned a thread, make sure the address is mapped * in the address space of the pinned thread. */ if (t_intr && t_intr->t_procp->p_as->a_hat != curproc->p_as->a_hat && t_intr->t_procp->p_as != &kas) hat_devload(t_intr->t_procp->p_as->a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); (*psm_shutdownf)(A_SHUTDOWN, AD_FASTREBOOT); (*fastboot_func)(nk); /*NOTREACHED*/ return (0); }
static caddr_t pci_cfgacc_map(paddr_t phys_addr) { #ifdef __xpv phys_addr = pfn_to_pa(xen_assign_pfn(mmu_btop(phys_addr))) | (phys_addr & MMU_PAGEOFFSET); #endif if (khat_running) { pfn_t pfn = mmu_btop(phys_addr); /* * pci_cfgacc_virt_base may hold address left from early * boot, which points to low mem. Realloc virtual address * in kernel space since it's already late in boot now. * Note: no need to unmap first, clear_boot_mappings() will * do that for us. */ if (pci_cfgacc_virt_base < (caddr_t)kernelbase) pci_cfgacc_virt_base = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); hat_devload(kas.a_hat, pci_cfgacc_virt_base, MMU_PAGESIZE, pfn, PROT_READ | PROT_WRITE | HAT_STRICTORDER, HAT_LOAD_LOCK); } else { paddr_t pa_base = P2ALIGN(phys_addr, MMU_PAGESIZE); if (pci_cfgacc_virt_base == NULL) pci_cfgacc_virt_base = (caddr_t)alloc_vaddr(MMU_PAGESIZE, MMU_PAGESIZE); kbm_map((uintptr_t)pci_cfgacc_virt_base, pa_base, 0, 0); } return (pci_cfgacc_virt_base + (phys_addr & MMU_PAGEOFFSET)); }
static pfn_t pte2mfn(x86pte_t pte, uint_t level) { pfn_t mfn; if (level > 0 && (pte & PT_PAGESIZE)) mfn = mmu_btop(pte & PT_PADDR_LGPG); else mfn = mmu_btop(pte & PT_PADDR); return (mfn); }
/* * Replacement for devmap_devmem_setup() which will map a machine address * instead of a register set/offset. */ void gfxp_map_devmem(devmap_cookie_t dhc, gfx_maddr_t maddr, size_t length, ddi_device_acc_attr_t *attrp) { devmap_handle_t *dhp = (devmap_handle_t *)dhc; pfn_t pfn; #ifdef __xpv ASSERT(DOMAIN_IS_INITDOMAIN(xen_info)); pfn = xen_assign_pfn(mmu_btop(maddr)); #else pfn = mmu_btop(maddr); #endif dhp->dh_pfn = pfn; dhp->dh_len = mmu_ptob(mmu_btopr(length)); dhp->dh_roff = 0; #ifndef DEVMAP_DEVMEM_COOKIE #define DEVMAP_DEVMEM_COOKIE ((ddi_umem_cookie_t)0x1) /* XXPV */ #endif /* DEVMAP_DEVMEM_COOKIE */ dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE; /*LINTED: E_EXPR_NULL_EFFECT*/ dhp->dh_flags |= DEVMAP_DEFAULTS; dhp->dh_maxprot = PROT_ALL & dhp->dh_orig_maxprot; /* no callbacks needed */ bzero(&dhp->dh_callbackops, sizeof (struct devmap_callback_ctl)); switch (attrp->devacc_attr_dataorder) { case DDI_UNORDERED_OK_ACC: dhp->dh_hat_attr = HAT_UNORDERED_OK; break; case DDI_MERGING_OK_ACC: dhp->dh_hat_attr = HAT_MERGING_OK; break; case DDI_LOADCACHING_OK_ACC: dhp->dh_hat_attr = HAT_LOADCACHING_OK; break; case DDI_STORECACHING_OK_ACC: dhp->dh_hat_attr = HAT_STORECACHING_OK; break; case DDI_STRICTORDER_ACC: default: dhp->dh_hat_attr = HAT_STRICTORDER; } /* don't use large pages */ dhp->dh_mmulevel = 0; dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; dhp->dh_flags |= DEVMAP_SETUP_DONE; }
/* * Allocate bitmaps according to the phys_install list. */ static int i_cpr_bitmap_setup(void) { struct memlist *pmem; cbd_t *dp, *tail; void *space; size_t size; /* * The number of bitmap descriptors will be the count of * phys_install ranges plus 1 for a trailing NULL struct. */ cpr_nbitmaps = 1; for (pmem = phys_install; pmem; pmem = pmem->ml_next) cpr_nbitmaps++; if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) { cpr_err(CE_WARN, "too many physical memory ranges %d, max %d", cpr_nbitmaps, CPR_MAX_BMDESC - 1); return (EFBIG); } /* Alloc an array of bitmap descriptors. */ dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP); if (dp == NULL) { cpr_nbitmaps = 0; return (ENOMEM); } tail = dp + cpr_nbitmaps; CPR->c_bmda = dp; for (pmem = phys_install; pmem; pmem = pmem->ml_next) { size = BITMAP_BYTES(pmem->ml_size); space = kmem_zalloc(size * 2, KM_NOSLEEP); if (space == NULL) return (ENOMEM); ASSERT(dp < tail); dp->cbd_magic = CPR_BITMAP_MAGIC; dp->cbd_spfn = mmu_btop(pmem->ml_address); dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1; dp->cbd_size = size; dp->cbd_reg_bitmap = (cpr_ptr)space; dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size); dp++; } /* set magic for the last descriptor */ ASSERT(dp == (tail - 1)); dp->cbd_magic = CPR_BITMAP_MAGIC; return (0); }
int xen_gdt_setprot(cpu_t *cp, uint_t prot) { int err; #if defined(__amd64) int pt_bits = PT_VALID; if (prot & PROT_WRITE) pt_bits |= PT_WRITABLE; #endif if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt, MMU_PAGESIZE, prot)) != 0) goto done; #if defined(__amd64) err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits); #endif done: if (err) { cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d", cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only", err); } return (err); }
void mach_kpm_init() { uintptr_t start, end; struct memlist *pmem; /* * Map each of the memsegs into the kpm segment, coalesing * adjacent memsegs to allow mapping with the largest * possible pages. */ pmem = phys_install; start = pmem->ml_address; end = start + pmem->ml_size; for (;;) { if (pmem == NULL || pmem->ml_address > end) { hat_devload(kas.a_hat, kpm_vbase + start, end - start, mmu_btop(start), PROT_READ | PROT_WRITE, HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); if (pmem == NULL) break; start = pmem->ml_address; } end = pmem->ml_address + pmem->ml_size; pmem = pmem->ml_next; } }
/* * The list of mfn pages is out of date. Recompute it. */ static void rebuild_mfn_list(void) { int i = 0; size_t sz; size_t off; pfn_t pfn; SUSPEND_DEBUG("rebuild_mfn_list\n"); sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK; for (off = 0; off < sz; off += MMU_PAGESIZE) { size_t j = mmu_btop(off); if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) { pfn = hat_getpfnum(kas.a_hat, (caddr_t)&mfn_list_pages[j]); mfn_list_pages_page[i++] = pfn_to_mfn(pfn); } pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off); mfn_list_pages[j] = pfn_to_mfn(pfn); } pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = pfn_to_mfn(pfn); }
/* * Add a mapping for the machine page at the given virtual address. */ void kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) { ASSERT(level == 0); hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); }
static paddr_t mdb_ma_to_pa(uint64_t ma) { pfn_t pfn = mdb_mfn_to_pfn(mmu_btop(ma)); if (pfn == -(pfn_t)1) return (-(paddr_t)1); return (mmu_ptob((paddr_t)pfn) | (ma & (MMU_PAGESIZE - 1))); }
/* * Return a page to service by moving it from the retired_pages vnode * onto the freelist. * * Called from mmioctl_page_retire() on behalf of the FMA DE. * * Returns: * * - 0 if the page is unretired, * - EAGAIN if the pp can not be locked, * - EINVAL if the PA is whacko, and * - EIO if the pp is not retired. */ int page_unretire(uint64_t pa) { page_t *pp; pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { return (page_retire_done(pp, PRD_INVALID_PA)); } return (page_unretire_pp(pp, 1)); }
/* * Jump to the fast reboot switcher. This function never returns. */ void fast_reboot() { processorid_t bootcpuid = 0; extern uintptr_t postbootkernelbase; extern char fb_swtch_image[]; fastboot_file_t *fb; int i; postbootkernelbase = 0; fb = &newkernel.fi_files[FASTBOOT_SWTCH]; /* * Map the address into both the current proc's address * space and the kernel's address space in case the panic * is forced by kmdb. */ if (&kas != curproc->p_as) { hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); } bcopy((void *)fb_swtch_image, (void *)fb->fb_va, fb->fb_size); /* * Set fb_va to fake_va */ for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) { newkernel.fi_files[i].fb_va = fake_va; } if (panicstr && CPU->cpu_id != bootcpuid && CPU_ACTIVE(cpu_get(bootcpuid))) { extern void panic_idle(void); cpuset_t cpuset; CPUSET_ZERO(cpuset); CPUSET_ADD(cpuset, bootcpuid); xc_priority((xc_arg_t)&newkernel, 0, 0, CPUSET2BV(cpuset), (xc_func_t)fastboot_xc_func); panic_idle(); } else (void) fastboot_xc_func(&newkernel, 0, 0); }
/* * Test to see if the page_t for a given PA is retired, and return the * hardware errors we have seen on the page if requested. * * Called from mmioctl_page_retire on behalf of the FMA DE. * * Returns: * * - 0 if the page is retired, * - EIO if the page is not retired and has no errors, * - EAGAIN if the page is not retired but is pending; and * - EINVAL if the PA is whacko. */ int page_retire_check(uint64_t pa, uint64_t *errors) { page_t *pp; if (errors) { *errors = 0; } pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { return (page_retire_done(pp, PRD_INVALID_PA)); } return (page_retire_check_pp(pp, errors)); }
/* * From a machine address, find the corresponding pseudo-physical address. * Pseudo-physical address are contiguous and run from mfn_base in each VM. * Machine addresses are the real underlying hardware addresses. * These are needed for page table entries. Note that this routine is * poorly protected. A bad value of "ma" will cause a page fault. */ paddr_t ma_to_pa(maddr_t ma) { ulong_t pgoff = ma & MMU_PAGEOFFSET; ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; paddr_t pa; if (pfn >= xen_info->nr_pages) return (-(paddr_t)1); pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; #ifdef DEBUG if (ma != pa_to_ma(pa)) dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); #endif return (pa); }
/* * From a pseudo-physical address, find the corresponding machine address. */ maddr_t pa_to_ma(paddr_t pa) { pfn_t pfn; ulong_t mfn; pfn = mmu_btop(pa - mfn_base); if (pa < mfn_base || pfn >= xen_info->nr_pages) dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; #ifdef DEBUG if (mfn_to_pfn_mapping[mfn] != pfn) dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", pfn, mfn, mfn_to_pfn_mapping[mfn]); #endif return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); }
/* ARGSUSED */ static int impl_acc_check(dev_info_t *dip, const void *handle, const void *addr, const void *not_used) { pfn_t pfn, fault_pfn; ddi_acc_hdl_t *hp; hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); ASSERT(hp); if (addr != NULL) { pfn = hp->ah_pfn; fault_pfn = mmu_btop(*(uint64_t *)addr); if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) return (DDI_FM_NONFATAL); } return (DDI_FM_UNKNOWN); }
caddr_t psm_map_phys_new(paddr_t addr, size_t len, int prot) { uint_t pgoffset; paddr_t base; pgcnt_t npages; caddr_t cvaddr; if (len == 0) return (0); pgoffset = addr & MMU_PAGEOFFSET; base = addr - pgoffset; npages = mmu_btopr(len + pgoffset); cvaddr = device_arena_alloc(ptob(npages), VM_NOSLEEP); if (cvaddr == NULL) return (0); hat_devload(kas.a_hat, cvaddr, mmu_ptob(npages), mmu_btop(base), prot, HAT_LOAD_LOCK); return (cvaddr + pgoffset); }
int xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot) { int err; caddr_t lva = (caddr_t)ldt; #if defined(__amd64) int pt_bits = PT_VALID; pgcnt_t npgs; if (prot & PROT_WRITE) pt_bits |= PT_WRITABLE; #endif /* __amd64 */ if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0) goto done; #if defined(__amd64) ASSERT(IS_P2ALIGNED(lsize, PAGESIZE)); npgs = mmu_btop(lsize); while (npgs--) { if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva), pt_bits)) != 0) break; lva += PAGESIZE; } #endif /* __amd64 */ done: if (err) { cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d", (void *)lva, (prot & PROT_WRITE) ? "writable" : "read-only", err); } return (err); }
/* * Report all hat's that either use PFN as a page table or that map the page. */ static int do_report_maps(pfn_t pfn) { struct hat *hatp; struct hat hat; htable_t *ht; htable_t htable; uintptr_t base; int h; int level; int entry; x86pte_t pte; x86pte_t buf; x86pte32_t *pte32 = (x86pte32_t *)&buf; physaddr_t paddr; size_t len; /* * The hats are kept in a list with khat at the head. */ for (hatp = khat; hatp != NULL; hatp = hat.hat_next) { /* * read the hat and its hash table */ if (mdb_vread(&hat, sizeof (hat), (uintptr_t)hatp) == -1) { mdb_warn("Couldn't read struct hat\n"); return (DCMD_ERR); } /* * read the htable hashtable */ paddr = 0; for (h = 0; h < hat.hat_num_hash; ++h) { if (mdb_vread(&ht, sizeof (htable_t *), (uintptr_t)(hat.hat_ht_hash + h)) == -1) { mdb_warn("Couldn't read htable\n"); return (DCMD_ERR); } for (; ht != NULL; ht = htable.ht_next) { if (mdb_vread(&htable, sizeof (htable_t), (uintptr_t)ht) == -1) { mdb_warn("Couldn't read htable\n"); return (DCMD_ERR); } /* * only report kernel addresses once */ if (hatp != khat && htable.ht_vaddr >= kernelbase) continue; /* * Is the PFN a pagetable itself? */ if (htable.ht_pfn == pfn) { mdb_printf("Pagetable for " "hat=%p htable=%p\n", hatp, ht); continue; } /* * otherwise, examine page mappings */ level = htable.ht_level; if (level > mmu.max_page_level) continue; paddr = mmu_ptob((physaddr_t)htable.ht_pfn); for (entry = 0; entry < HTABLE_NUM_PTES(&htable); ++entry) { base = htable.ht_vaddr + entry * mmu.level_size[level]; /* * only report kernel addresses once */ if (hatp != khat && base >= kernelbase) continue; len = mdb_pread(&buf, mmu.pte_size, paddr + entry * mmu.pte_size); if (len != mmu.pte_size) return (DCMD_ERR); if (mmu.pte_size == sizeof (x86pte_t)) pte = buf; else pte = *pte32; if ((pte & PT_VALID) == 0) continue; if (level == 0 || !(pte & PT_PAGESIZE)) pte &= PT_PADDR; else pte &= PT_PADDR_LGPG; if (mmu_btop(mdb_ma_to_pa(pte)) != pfn) continue; mdb_printf("hat=%p maps addr=%p\n", hatp, (caddr_t)base); } } } } done: return (DCMD_OK); }
/* * find prom phys pages and alloc space for a tmp copy */ static int i_cpr_find_ppages(void) { struct page *pp; struct memlist *pmem; pgcnt_t npages, pcnt, scnt, vcnt; pfn_t ppn, plast, *dst; int mapflag; cpr_clear_bitmaps(); mapflag = REGULAR_BITMAP; /* * there should be a page_t for each phys page used by the kernel; * set a bit for each phys page not tracked by a page_t */ pcnt = 0; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (page_numtopp_nolock(ppn)) continue; (void) cpr_setbit(ppn, mapflag); pcnt++; } } memlist_read_unlock(); /* * clear bits for phys pages in each segment */ scnt = cpr_count_seg_pages(mapflag, cpr_clrbit); /* * set bits for phys pages referenced by the promvp vnode; * these pages are mostly comprised of forthdebug words */ vcnt = 0; for (pp = promvp.v_pages; pp; ) { if (cpr_setbit(pp->p_offset, mapflag) == 0) vcnt++; pp = pp->p_vpnext; if (pp == promvp.v_pages) break; } /* * total number of prom pages are: * (non-page_t pages - seg pages + vnode pages) */ ppage_count = pcnt - scnt + vcnt; CPR_DEBUG(CPR_DEBUG1, "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n", pcnt, scnt, vcnt, ppage_count); /* * alloc array of pfn_t to store phys page list */ pphys_list_size = ppage_count * sizeof (pfn_t); pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP); if (pphys_list == NULL) { cpr_err(CE_WARN, "cannot alloc pphys_list"); return (ENOMEM); } /* * phys pages referenced in the bitmap should be * those used by the prom; scan bitmap and save * a list of prom phys page numbers */ dst = pphys_list; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (cpr_isset(ppn, mapflag)) { ASSERT(dst < (pphys_list + ppage_count)); *dst++ = ppn; } } } memlist_read_unlock(); /* * allocate space to store prom pages */ ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP); if (ppage_buf == NULL) { kmem_free(pphys_list, pphys_list_size); pphys_list = NULL; cpr_err(CE_WARN, "cannot alloc ppage_buf"); return (ENOMEM); } return (0); }
/* * page_retire() - the front door in to retire a page. * * Ideally, page_retire() would instantly retire the requested page. * Unfortunately, some pages are locked or otherwise tied up and cannot be * retired right away. To deal with that, bits are set in p_toxic of the * page_t. An attempt is made to lock the page; if the attempt is successful, * we instantly unlock the page counting on page_unlock() to notice p_toxic * is nonzero and to call back into page_retire_pp(). Success is determined * by looking to see whether the page has been retired once it has been * unlocked. * * Returns: * * - 0 on success, * - EINVAL when the PA is whacko, * - EIO if the page is already retired or already pending retirement, or * - EAGAIN if the page could not be _immediately_ retired but is pending. */ int page_retire(uint64_t pa, uchar_t reason) { page_t *pp; ASSERT(reason & PR_REASONS); /* there must be a reason */ ASSERT(!(reason & ~PR_REASONS)); /* but no other bits */ pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { PR_MESSAGE(CE_WARN, 1, "Cannot schedule clearing of error on" " page 0x%08x.%08x; page is not relocatable memory", pa); return (page_retire_done(pp, PRD_INVALID_PA)); } if (PP_RETIRED(pp)) { PR_DEBUG(prd_dup1); return (page_retire_done(pp, PRD_DUPLICATE)); } if ((reason & PR_UE) && !PP_TOXIC(pp)) { PR_MESSAGE(CE_NOTE, 1, "Scheduling clearing of error on" " page 0x%08x.%08x", pa); } else if (PP_PR_REQ(pp)) { PR_DEBUG(prd_dup2); return (page_retire_done(pp, PRD_DUPLICATE)); } else { PR_MESSAGE(CE_NOTE, 1, "Scheduling removal of" " page 0x%08x.%08x", pa); } page_settoxic(pp, reason); page_retire_enqueue(pp); /* * And now for some magic. * * We marked this page toxic up above. All there is left to do is * to try to lock the page and then unlock it. The page lock routines * will intercept the page and retire it if they can. If the page * cannot be locked, 's okay -- page_unlock() will eventually get it, * or the background thread, until then the lock routines will deny * further locks on it. */ if (MTBF(pr_calls, pr_mtbf) && page_trylock(pp, SE_EXCL)) { PR_DEBUG(prd_prlocked); page_unlock(pp); } else { PR_DEBUG(prd_prnotlocked); } if (PP_RETIRED(pp)) { PR_DEBUG(prd_prretired); return (0); } else { cv_signal(&pr_cv); PR_INCR_KSTAT(pr_failed); if (pp->p_toxic & PR_MSG) { return (page_retire_done(pp, PRD_FAILED)); } else { return (page_retire_done(pp, PRD_PENDING)); } } }
/* * This function performs the following tasks: * - Read the sizes of the new kernel and boot archive. * - Allocate memory for the new kernel and boot archive. * - Allocate memory for page tables necessary for mapping the memory * allocated for the files. * - Read the new kernel and boot archive into memory. * - Map in the fast reboot switcher. * - Load the fast reboot switcher to FASTBOOT_SWTCH_PA. * - Build the new multiboot_info structure * - Build page tables for the low 1G of physical memory. * - Mark the data structure as valid if all steps have succeeded. */ void fastboot_load_kernel(char *mdep) { void *buf = NULL; int i; fastboot_file_t *fb; uint32_t dboot_start_offset; char kern_bootpath[OBP_MAXPATHLEN]; extern uintptr_t postbootkernelbase; uintptr_t saved_kernelbase; int bootpath_len = 0; int is_failsafe = 0; int is_retry = 0; uint64_t end_addr; if (!fastreboot_capable) return; if (newkernel.fi_valid) fastboot_free_newkernel(&newkernel); saved_kernelbase = postbootkernelbase; postbootkernelbase = 0; /* * Initialize various HAT related fields in the data structure */ fastboot_init_fields(&newkernel); bzero(kern_bootpath, OBP_MAXPATHLEN); /* * Process the boot argument */ bzero(fastboot_args, OBP_MAXPATHLEN); fastboot_parse_mdep(mdep, kern_bootpath, &bootpath_len, fastboot_args); /* * Make sure we get the null character */ bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_UNIX], bootpath_len); bcopy(kern_bootfile, &fastboot_filename[FASTBOOT_NAME_UNIX][bootpath_len], strlen(kern_bootfile) + 1); bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE], bootpath_len); if (bcmp(kern_bootfile, FAILSAFE_BOOTFILE32, (sizeof (FAILSAFE_BOOTFILE32) - 1)) == 0 || bcmp(kern_bootfile, FAILSAFE_BOOTFILE64, (sizeof (FAILSAFE_BOOTFILE64) - 1)) == 0) { is_failsafe = 1; } load_kernel_retry: /* * Read in unix and boot_archive */ end_addr = DBOOT_ENTRY_ADDRESS; for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) { struct _buf *file; uintptr_t va; uint64_t fsize; size_t fsize_roundup, pt_size; int page_index; uintptr_t offset; ddi_dma_attr_t dma_attr = fastboot_dma_attr; dprintf("fastboot_filename[%d] = %s\n", i, fastboot_filename[i]); if ((file = kobj_open_file(fastboot_filename[i])) == (struct _buf *)-1) { cmn_err(CE_NOTE, "!Fastboot: Couldn't open %s", fastboot_filename[i]); goto err_out; } if (kobj_get_filesize(file, &fsize) != 0) { cmn_err(CE_NOTE, "!Fastboot: Couldn't get filesize for %s", fastboot_filename[i]); goto err_out; } fsize_roundup = P2ROUNDUP_TYPED(fsize, PAGESIZE, size_t); /* * Where the files end in physical memory after being * relocated by the fast boot switcher. */ end_addr += fsize_roundup; if (end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_hi) { cmn_err(CE_NOTE, "!Fastboot: boot archive is too big"); goto err_out; } /* * Adjust dma_attr_addr_lo so that the new kernel and boot * archive will not be overridden during relocation. */ if (end_addr > fastboot_dma_attr.dma_attr_addr_lo || end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_lo) { if (is_retry) { /* * If we have already tried and didn't succeed, * just give up. */ cmn_err(CE_NOTE, "!Fastboot: boot archive is too big"); goto err_out; } else { /* Set the flag so we don't keep retrying */ is_retry++; /* Adjust dma_attr_addr_lo */ fastboot_dma_attr.dma_attr_addr_lo = end_addr; fastboot_below_1G_dma_attr.dma_attr_addr_lo = end_addr; /* * Free the memory we have already allocated * whose physical addresses might not fit * the new lo and hi constraints. */ fastboot_free_mem(&newkernel, end_addr); goto load_kernel_retry; } } if (!fastboot_contig) dma_attr.dma_attr_sgllen = (fsize / PAGESIZE) + (((fsize % PAGESIZE) == 0) ? 0 : 1); if ((buf = contig_alloc(fsize, &dma_attr, PAGESIZE, 0)) == NULL) { cmn_err(CE_NOTE, fastboot_enomem_msg, fsize, "64G"); goto err_out; } va = P2ROUNDUP_TYPED((uintptr_t)buf, PAGESIZE, uintptr_t); if (kobj_read_file(file, (char *)va, fsize, 0) < 0) { cmn_err(CE_NOTE, "!Fastboot: Couldn't read %s", fastboot_filename[i]); goto err_out; } fb = &newkernel.fi_files[i]; fb->fb_va = va; fb->fb_size = fsize; fb->fb_sectcnt = 0; pt_size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup); /* * If we have reserved memory but it not enough, free it. */ if (fb->fb_pte_list_size && fb->fb_pte_list_size < pt_size) { contig_free((void *)fb->fb_pte_list_va, fb->fb_pte_list_size); fb->fb_pte_list_size = 0; } if (fb->fb_pte_list_size == 0) { if ((fb->fb_pte_list_va = (x86pte_t *)contig_alloc(pt_size, &fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) { cmn_err(CE_NOTE, fastboot_enomem_msg, (uint64_t)pt_size, "1G"); goto err_out; } /* * fb_pte_list_size must be set after the allocation * succeeds as it's used to determine how much memory to * free. */ fb->fb_pte_list_size = pt_size; } bzero((void *)(fb->fb_pte_list_va), fb->fb_pte_list_size); fb->fb_pte_list_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat, (caddr_t)fb->fb_pte_list_va)); for (page_index = 0, offset = 0; offset < fb->fb_size; offset += PAGESIZE) { uint64_t paddr; paddr = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat, (caddr_t)fb->fb_va + offset)); ASSERT(paddr >= fastboot_dma_attr.dma_attr_addr_lo); /* * Include the pte_bits so we don't have to make * it in assembly. */ fb->fb_pte_list_va[page_index++] = (x86pte_t) (paddr | pte_bits); } fb->fb_pte_list_va[page_index] = FASTBOOT_TERMINATE; if (i == FASTBOOT_UNIX) { Ehdr *ehdr = (Ehdr *)va; int j; /* * Sanity checks: */ for (j = 0; j < SELFMAG; j++) { if (ehdr->e_ident[j] != ELFMAG[j]) { cmn_err(CE_NOTE, "!Fastboot: Bad ELF " "signature"); goto err_out; } } if (ehdr->e_ident[EI_CLASS] == ELFCLASS32 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB && ehdr->e_machine == EM_386) { fb->fb_sectcnt = sizeof (fb->fb_sections) / sizeof (fb->fb_sections[0]); if (fastboot_elf32_find_loadables((void *)va, fsize, &fb->fb_sections[0], &fb->fb_sectcnt, &dboot_start_offset) < 0) { cmn_err(CE_NOTE, "!Fastboot: ELF32 " "program section failure"); goto err_out; } if (fb->fb_sectcnt == 0) { cmn_err(CE_NOTE, "!Fastboot: No ELF32 " "program sections found"); goto err_out; } if (is_failsafe) { /* Failsafe boot_archive */ bcopy(BOOTARCHIVE32_FAILSAFE, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE32_FAILSAFE)); } else { bcopy(BOOTARCHIVE32, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE32)); } } else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB && ehdr->e_machine == EM_AMD64) { if (fastboot_elf64_find_dboot_load_offset( (void *)va, fsize, &dboot_start_offset) != 0) { cmn_err(CE_NOTE, "!Fastboot: Couldn't " "find ELF64 dboot entry offset"); goto err_out; } if (!is_x86_feature(x86_featureset, X86FSET_64) || !is_x86_feature(x86_featureset, X86FSET_PAE)) { cmn_err(CE_NOTE, "Fastboot: Cannot " "reboot to %s: " "not a 64-bit capable system", kern_bootfile); goto err_out; } if (is_failsafe) { /* Failsafe boot_archive */ bcopy(BOOTARCHIVE64_FAILSAFE, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE64_FAILSAFE)); } else { bcopy(BOOTARCHIVE64, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE64)); } } else { cmn_err(CE_NOTE, "!Fastboot: Unknown ELF type"); goto err_out; } fb->fb_dest_pa = DBOOT_ENTRY_ADDRESS - dboot_start_offset; fb->fb_next_pa = DBOOT_ENTRY_ADDRESS + fsize_roundup; } else { fb->fb_dest_pa = newkernel.fi_files[i - 1].fb_next_pa; fb->fb_next_pa = fb->fb_dest_pa + fsize_roundup; } kobj_close_file(file); } /* * Add the function that will switch us to 32-bit protected mode */ fb = &newkernel.fi_files[FASTBOOT_SWTCH]; fb->fb_va = fb->fb_dest_pa = FASTBOOT_SWTCH_PA; fb->fb_size = MMU_PAGESIZE; hat_devload(kas.a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); /* * Build the new multiboot_info structure */ if (fastboot_build_mbi(fastboot_args, &newkernel) != 0) { goto err_out; } /* * Build page table for low 1G physical memory. Use big pages. * Allocate 4 (5 for amd64) pages for the page tables. * 1 page for PML4 (amd64) * 1 page for Page-Directory-Pointer Table * 2 pages for Page Directory * 1 page for Page Table. * The page table entry will be rewritten to map the physical * address as we do the copying. */ if (newkernel.fi_has_pae) { #ifdef __amd64 size_t size = MMU_PAGESIZE * 5; #else size_t size = MMU_PAGESIZE * 4; #endif /* __amd64 */ if (newkernel.fi_pagetable_size && newkernel.fi_pagetable_size < size) { contig_free((void *)newkernel.fi_pagetable_va, newkernel.fi_pagetable_size); newkernel.fi_pagetable_size = 0; } if (newkernel.fi_pagetable_size == 0) { if ((newkernel.fi_pagetable_va = (uintptr_t) contig_alloc(size, &fastboot_below_1G_dma_attr, MMU_PAGESIZE, 0)) == NULL) { cmn_err(CE_NOTE, fastboot_enomem_msg, (uint64_t)size, "1G"); goto err_out; } /* * fi_pagetable_size must be set after the allocation * succeeds as it's used to determine how much memory to * free. */ newkernel.fi_pagetable_size = size; } bzero((void *)(newkernel.fi_pagetable_va), size); newkernel.fi_pagetable_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat, (caddr_t)newkernel.fi_pagetable_va)); newkernel.fi_last_table_pa = newkernel.fi_pagetable_pa + size - MMU_PAGESIZE; newkernel.fi_next_table_va = newkernel.fi_pagetable_va + MMU_PAGESIZE; newkernel.fi_next_table_pa = newkernel.fi_pagetable_pa + MMU_PAGESIZE; fastboot_build_pagetables(&newkernel); } /* Generate MD5 checksums */ fastboot_cksum_generate(&newkernel); /* Mark it as valid */ newkernel.fi_valid = 1; newkernel.fi_magic = FASTBOOT_MAGIC; postbootkernelbase = saved_kernelbase; return; err_out: postbootkernelbase = saved_kernelbase; newkernel.fi_valid = 0; fastboot_free_newkernel(&newkernel); }
void vpm_init() { long npages; struct vpmap *vpm; struct vpmfree *vpmflp; int i, ndx; extern void prefetch_smap_w(void *); if (!vpm_cache_enable) { return; } /* * Set the size of the cache. */ vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100); if (vpm_cache_size < VPMAP_MINCACHE) { vpm_cache_size = VPMAP_MINCACHE; } /* * Number of freelists. */ if (vpm_nfreelist == 0) { vpm_nfreelist = max_ncpus; } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) { cmn_err(CE_WARN, "vpmap create : number of freelist " "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus); vpm_nfreelist = 2 * max_ncpus; } /* * Round it up to the next power of 2 */ if (vpm_nfreelist & (vpm_nfreelist - 1)) { vpm_nfreelist = 1 << (highbit(vpm_nfreelist)); } vpmd_freemsk = vpm_nfreelist - 1; /* * Use a per cpu rotor index to spread the allocations evenly * across the available vpm freelists. */ vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP); ndx = 0; for (i = 0; i < max_ncpus; i++) { vpmd_cpu[i].vfree_ndx = ndx; ndx = (ndx + 1) & vpmd_freemsk; } /* * Allocate and initialize the freelist. */ vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree), KM_SLEEP); for (i = 0; i < vpm_nfreelist; i++) { vpmflp = &vpmd_free[i]; /* * Set up initial queue pointers. They will get flipped * back and forth. */ vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ]; vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ]; } npages = mmu_btop(vpm_cache_size); /* * Allocate and initialize the vpmap structs. */ vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP); for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) { struct vpmfree *vpmflp; union vpm_freeq *releq; struct vpmap *vpmapf; /* * Use prefetch as we have to walk thru a large number of * these data structures. We just use the smap's prefetch * routine as it does the same. This should work fine * for x64(this needs to be modifed when enabled on sparc). */ prefetch_smap_w((void *)vpm); vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm); vpmflp = VPMAP2VMF(vpm); releq = vpmflp->vpm_releq; vpmapf = releq->vpmq_free; if (vpmapf == NULL) { releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; } else { vpm->vpm_next = vpmapf; vpm->vpm_prev = vpmapf->vpm_prev; vpmapf->vpm_prev = vpm; vpm->vpm_prev->vpm_next = vpm; releq->vpmq_free = vpm->vpm_next; } /* * Indicate that the vpmap is on the releq at start */ vpm->vpm_ndxflg = VPMRELEQ; } }
int px_pec_attach(px_t *px_p) { px_pec_t *pec_p; int i, len; int nrange = px_p->px_ranges_length / sizeof (px_ranges_t); dev_info_t *dip = px_p->px_dip; px_ranges_t *rangep = px_p->px_ranges_p; int ret; /* * Allocate a state structure for the PEC and cross-link it * to its per px node state structure. */ pec_p = kmem_zalloc(sizeof (px_pec_t), KM_SLEEP); px_p->px_pec_p = pec_p; pec_p->pec_px_p = px_p; len = snprintf(pec_p->pec_nameinst_str, sizeof (pec_p->pec_nameinst_str), "%s%d", NAMEINST(dip)); pec_p->pec_nameaddr_str = pec_p->pec_nameinst_str + ++len; (void) snprintf(pec_p->pec_nameaddr_str, sizeof (pec_p->pec_nameinst_str) - len, "%s@%s", NAMEADDR(dip)); /* * Add interrupt handlers to process correctable/fatal/non fatal * PCIE messages. */ if ((ret = px_pec_msg_add_intr(px_p)) != DDI_SUCCESS) { px_pec_msg_rem_intr(px_p); return (ret); } /* * Get this pec's mem32 and mem64 segments to determine whether * a dma object originates from ths pec. i.e. dev to dev dma */ for (i = 0; i < nrange; i++, rangep++) { uint64_t rng_addr, rng_size, *pfnbp, *pfnlp; uint32_t rng_type = rangep->child_high & PCI_ADDR_MASK; switch (rng_type) { case PCI_ADDR_MEM32: pfnbp = &pec_p->pec_base32_pfn; pfnlp = &pec_p->pec_last32_pfn; break; case PCI_ADDR_MEM64: pfnbp = &pec_p->pec_base64_pfn; pfnlp = &pec_p->pec_last64_pfn; break; case PCI_ADDR_CONFIG: case PCI_ADDR_IO: default: continue; } rng_addr = (uint64_t)(rangep->parent_high & px_ranges_phi_mask) << 32; rng_addr |= (uint64_t)rangep->parent_low; rng_size = (uint64_t)rangep->size_high << 32; rng_size |= (uint64_t)rangep->size_low; *pfnbp = mmu_btop(rng_addr); *pfnlp = mmu_btop(rng_addr + rng_size); } mutex_init(&pec_p->pec_pokefault_mutex, NULL, MUTEX_DRIVER, (void *)px_p->px_fm_ibc); return (DDI_SUCCESS); }
/* * Fill in the remaining CPU context and initialize it. */ static int mp_set_cpu_context(vcpu_guest_context_t *vgc, cpu_t *cp) { uint_t vec, iopl; vgc->flags = VGCF_IN_KERNEL; /* * fpu_ctx we leave as zero; on first fault we'll store * sse_initial into it anyway. */ #if defined(__amd64) vgc->user_regs.cs = KCS_SEL | SEL_KPL; /* force to ring 3 */ #else vgc->user_regs.cs = KCS_SEL; #endif vgc->user_regs.ds = KDS_SEL; vgc->user_regs.es = KDS_SEL; vgc->user_regs.ss = KDS_SEL; vgc->kernel_ss = KDS_SEL; /* * Allow I/O privilege level for Dom0 kernel. */ if (DOMAIN_IS_INITDOMAIN(xen_info)) iopl = (PS_IOPL & 0x1000); /* ring 1 */ else iopl = 0; #if defined(__amd64) vgc->user_regs.fs = 0; vgc->user_regs.gs = 0; vgc->user_regs.rflags = F_OFF | iopl; #elif defined(__i386) vgc->user_regs.fs = KFS_SEL; vgc->user_regs.gs = KGS_SEL; vgc->user_regs.eflags = F_OFF | iopl; vgc->event_callback_cs = vgc->user_regs.cs; vgc->failsafe_callback_cs = vgc->user_regs.cs; #endif /* * Initialize the trap_info_t from the IDT */ #if !defined(__lint) ASSERT(NIDT == sizeof (vgc->trap_ctxt) / sizeof (vgc->trap_ctxt[0])); #endif for (vec = 0; vec < NIDT; vec++) { trap_info_t *ti = &vgc->trap_ctxt[vec]; if (xen_idt_to_trap_info(vec, &cp->cpu_m.mcpu_idt[vec], ti) == 0) { ti->cs = KCS_SEL; ti->vector = vec; } } /* * No LDT */ /* * (We assert in various places that the GDT is (a) aligned on a * page boundary and (b) one page long, so this really should fit..) */ #ifdef CRASH_XEN vgc->gdt_frames[0] = pa_to_ma(mmu_btop(cp->cpu_m.mcpu_gdtpa)); #else vgc->gdt_frames[0] = pfn_to_mfn(mmu_btop(cp->cpu_m.mcpu_gdtpa)); #endif vgc->gdt_ents = NGDT; vgc->ctrlreg[0] = CR0_ENABLE_FPU_FLAGS(getcr0()); #if defined(__i386) if (mmu.pae_hat) vgc->ctrlreg[3] = xen_pfn_to_cr3(pfn_to_mfn(kas.a_hat->hat_htable->ht_pfn)); else #endif vgc->ctrlreg[3] = pa_to_ma(mmu_ptob(kas.a_hat->hat_htable->ht_pfn)); vgc->ctrlreg[4] = getcr4(); vgc->event_callback_eip = (uintptr_t)xen_callback; vgc->failsafe_callback_eip = (uintptr_t)xen_failsafe_callback; vgc->flags |= VGCF_failsafe_disables_events; #if defined(__amd64) /* * XXPV should this be moved to init_cpu_syscall? */ vgc->syscall_callback_eip = (uintptr_t)sys_syscall; vgc->flags |= VGCF_syscall_disables_events; ASSERT(vgc->user_regs.gs == 0); vgc->gs_base_kernel = (uintptr_t)cp; #endif return (xen_vcpu_initialize(cp->cpu_id, vgc)); }