/* * craft tlb info for tmp use during resume; this data gets used by * cprboot to install tlb entries. we also mark each struct as tmp * so those tlb entries will get flushed after switching to the kernel * trap table. no data needs to be recorded for vaddr when it falls * within the nucleus since we've already recorded nucleus ttes and * a 8K tte would conflict with a 4MB tte. eg: the cpr module * text/data may have been loaded into the text/data nucleus. */ static void i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase) { pfn_t ppn; uint_t rw; if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase)) return; while ((1 << ctip->index) & ctip->skip) ctip->index--; ASSERT(ctip->index > 0); ASSERT(ctip->dst < ctip->tail); /* * without any global service available to lookup * a tte by vaddr, we craft our own here: */ ppn = va_to_pfn(vaddr); rw = (nbase == datava) ? TTE_HWWR_INT : 0; ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn); ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT | TTE_CP_INT | TTE_PRIV_INT | rw; ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK); ctip->dst->index = ctip->index--; ctip->dst->tmp = 1; ctip->dst++; }
void boot_mapin(caddr_t addr, size_t size) { caddr_t eaddr; page_t *pp; pfn_t pfnum; if (page_resv(btop(size), KM_NOSLEEP) == 0) panic("boot_mapin: page_resv failed"); for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { pfnum = va_to_pfn(addr); if (pfnum == PFN_INVALID) continue; if ((pp = page_numtopp_nolock(pfnum)) == NULL) panic("boot_mapin(): No pp for pfnum = %lx", pfnum); /* * must break up any large pages that may have constituent * pages being utilized for BOP_ALLOC()'s before calling * page_numtopp().The locking code (ie. page_reclaim()) * can't handle them */ if (pp->p_szc != 0) page_boot_demote(pp); pp = page_numtopp(pfnum, SE_EXCL); if (pp == NULL || PP_ISFREE(pp)) panic("boot_alloc: pp is NULL or free"); /* * If the cage is on but doesn't yet contain this page, * mark it as non-relocatable. */ if (kcage_on && !PP_ISNORELOC(pp)) { PP_SETNORELOC(pp); PLCNT_XFER_NORELOC(pp); } (void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL); pp->p_lckcnt = 1; #if defined(__x86) page_downgrade(pp); #else page_unlock(pp); #endif } }
void kdi_tlb_page_lock(caddr_t va, int do_dtlb) { tte_t tte; pfn_t pfn = va_to_pfn(va); uint64_t ret; sfmmu_memtte(&tte, pfn, (PROC_TEXT | HAT_NOSYNC), TTE8K); ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); if (ret != H_EOK) { cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for " "va=0x%p, hv error code 0x%lux", getprocessorid(), (void *)va, ret); } }
void xen_hvm_init(void) { struct cpuid_regs cp; uint32_t xen_signature[4], base; char *xen_str; struct xen_add_to_physmap xatp; xen_capabilities_info_t caps; pfn_t pfn; uint64_t msrval, val; extern int apix_enable; if (xen_hvm_inited != 0) return; xen_hvm_inited = 1; /* * Xen's pseudo-cpuid function returns a string representing * the Xen signature in %ebx, %ecx, and %edx. * Loop over the base values, since it may be different if * the hypervisor has hyper-v emulation switched on. * * %eax contains the maximum supported cpuid function. */ for (base = 0x40000000; base < 0x40010000; base += 0x100) { cp.cp_eax = base; (void) __cpuid_insn(&cp); xen_signature[0] = cp.cp_ebx; xen_signature[1] = cp.cp_ecx; xen_signature[2] = cp.cp_edx; xen_signature[3] = 0; xen_str = (char *)xen_signature; if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax >= (base + 2)) break; } if (base >= 0x40010000) return; /* * cpuid function at base + 1 returns the Xen version in %eax. The * top 16 bits are the major version, the bottom 16 are the minor * version. */ cp.cp_eax = base + 1; (void) __cpuid_insn(&cp); xen_major = cp.cp_eax >> 16; xen_minor = cp.cp_eax & 0xffff; /* * Below version 3.1 we can't do anything special as a HVM domain; * the PV drivers don't work, many hypercalls are not available, * etc. */ if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) return; /* * cpuid function at base + 2 returns information about the * hypercall page. %eax nominally contains the number of pages * with hypercall code, but according to the Xen guys, "I'll * guarantee that remains one forever more, so you can just * allocate a single page and get quite upset if you ever see CPUID * return more than one page." %ebx contains an MSR we use to ask * Xen to remap each page at a specific pfn. */ cp.cp_eax = base + 2; (void) __cpuid_insn(&cp); /* * Let Xen know where we want the hypercall page mapped. We * already have a page allocated in the .text section to simplify * the wrapper code. */ pfn = va_to_pfn(&hypercall_page); msrval = mmu_ptob(pfn); wrmsr(cp.cp_ebx, msrval); /* Fill in the xen_info data */ xen_info = &__xen_info; (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor); if (hvm_get_param(HVM_PARAM_STORE_PFN, &val) < 0) return; /* * The first hypercall worked, so mark hypercalls as working. */ xen_hvm_features |= XEN_HVM_HYPERCALLS; xen_info->store_mfn = (mfn_t)val; if (hvm_get_param(HVM_PARAM_STORE_EVTCHN, &val) < 0) return; xen_info->store_evtchn = (mfn_t)val; /* Figure out whether the hypervisor is 32-bit or 64-bit. */ if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) { ((char *)(caps))[sizeof (caps) - 1] = '\0'; if (strstr(caps, "x86_64") != NULL) xen_bits = 64; else if (strstr(caps, "x86_32") != NULL) xen_bits = 32; } if (xen_bits < 0) return; #ifdef __amd64 ASSERT(xen_bits == 64); #endif /* * Allocate space for the shared_info page and tell Xen where it * is. */ xen_shared_info_frame = va_to_pfn(&hypercall_shared_info_page); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; xatp.gpfn = xen_shared_info_frame; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0) return; HYPERVISOR_shared_info = (void *)&hypercall_shared_info_page; /* * A working HVM tlb flush hypercall was introduced in Xen 3.3. */ if (xen_major > 3 || (xen_major == 3 && xen_minor >= 3)) xen_hvm_features |= XEN_HVM_TLBFLUSH; /* FIXME Disable apix for the time being */ apix_enable = 0; }
/* * This routine remaps the kernel using large ttes * All entries except locked ones will be removed from the tlb. * It assumes that both the text and data segments reside in a separate * 4mb virtual and physical contigous memory chunk. This routine * is only executed by the first cpu. The remaining cpus execute * sfmmu_mp_startup() instead. * XXX It assumes that the start of the text segment is KERNELBASE. It should * actually be based on start. */ void sfmmu_remap_kernel(void) { pfn_t pfn; uint_t attr; int flags; extern char end[]; extern struct as kas; textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M); pfn = va_to_pfn(textva); if (pfn == PFN_INVALID) prom_panic("can't find kernel text pfn"); pfn &= TTE_PFNMASK(TTE4M); attr = PROC_TEXT | HAT_NOSYNC; flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD; sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M); /* * We set the lock bit in the tte to lock the translation in * the tlb. */ TTE_SET_LOCKED(&ktext_tte); sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags); datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M); pfn = va_to_pfn(datava); if (pfn == PFN_INVALID) prom_panic("can't find kernel data pfn"); pfn &= TTE_PFNMASK(TTE4M); attr = PROC_DATA | HAT_NOSYNC; sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M); /* * We set the lock bit in the tte to lock the translation in * the tlb. We also set the mod bit to avoid taking dirty bit * traps on kernel data. */ TTE_SET_LOCKED(&kdata_tte); TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT); sfmmu_tteload(kas.a_hat, &kdata_tte, datava, (struct page *)NULL, flags); /* * create bigktsb ttes if necessary. */ if (enable_bigktsb) { int i = 0; caddr_t va = ktsb_base; size_t tsbsz = ktsb_sz; tte_t tte; ASSERT(va >= datava + MMU_PAGESIZE4M); ASSERT(tsbsz >= MMU_PAGESIZE4M); ASSERT(IS_P2ALIGNED(tsbsz, tsbsz)); ASSERT(IS_P2ALIGNED(va, tsbsz)); attr = PROC_DATA | HAT_NOSYNC; while (tsbsz != 0) { ASSERT(i < MAX_BIGKTSB_TTES); pfn = va_to_pfn(va); ASSERT(pfn != PFN_INVALID); ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0); sfmmu_memtte(&tte, pfn, attr, TTE4M); ASSERT(TTE_IS_MOD(&tte)); /* * No need to lock if we use physical addresses. * Since we invalidate the kernel TSB using virtual * addresses, it's an optimization to load them now * so that we won't have to load them later. */ if (!ktsb_phys) { TTE_SET_LOCKED(&tte); } sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags); bigktsb_ttes[i] = tte; va += MMU_PAGESIZE4M; tsbsz -= MMU_PAGESIZE4M; i++; } bigktsb_nttes = i; } sfmmu_set_tlb(); }