/* ARGSUSED */ static int fastboot_xc_func(fastboot_info_t *nk, xc_arg_t unused2, xc_arg_t unused3) { void (*fastboot_func)(fastboot_info_t *); fastboot_file_t *fb = &nk->fi_files[FASTBOOT_SWTCH]; fastboot_func = (void (*)())(fb->fb_va); kthread_t *t_intr = curthread->t_intr; if (&kas != curproc->p_as) { hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); } /* * If we have pinned a thread, make sure the address is mapped * in the address space of the pinned thread. */ if (t_intr && t_intr->t_procp->p_as->a_hat != curproc->p_as->a_hat && t_intr->t_procp->p_as != &kas) hat_devload(t_intr->t_procp->p_as->a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); (*psm_shutdownf)(A_SHUTDOWN, AD_FASTREBOOT); (*fastboot_func)(nk); /*NOTREACHED*/ return (0); }
void mach_kpm_init() { uintptr_t start, end; struct memlist *pmem; /* * Map each of the memsegs into the kpm segment, coalesing * adjacent memsegs to allow mapping with the largest * possible pages. */ pmem = phys_install; start = pmem->ml_address; end = start + pmem->ml_size; for (;;) { if (pmem == NULL || pmem->ml_address > end) { hat_devload(kas.a_hat, kpm_vbase + start, end - start, mmu_btop(start), PROT_READ | PROT_WRITE, HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); if (pmem == NULL) break; start = pmem->ml_address; } end = pmem->ml_address + pmem->ml_size; pmem = pmem->ml_next; } }
static caddr_t pci_cfgacc_map(paddr_t phys_addr) { #ifdef __xpv phys_addr = pfn_to_pa(xen_assign_pfn(mmu_btop(phys_addr))) | (phys_addr & MMU_PAGEOFFSET); #endif if (khat_running) { pfn_t pfn = mmu_btop(phys_addr); /* * pci_cfgacc_virt_base may hold address left from early * boot, which points to low mem. Realloc virtual address * in kernel space since it's already late in boot now. * Note: no need to unmap first, clear_boot_mappings() will * do that for us. */ if (pci_cfgacc_virt_base < (caddr_t)kernelbase) pci_cfgacc_virt_base = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); hat_devload(kas.a_hat, pci_cfgacc_virt_base, MMU_PAGESIZE, pfn, PROT_READ | PROT_WRITE | HAT_STRICTORDER, HAT_LOAD_LOCK); } else { paddr_t pa_base = P2ALIGN(phys_addr, MMU_PAGESIZE); if (pci_cfgacc_virt_base == NULL) pci_cfgacc_virt_base = (caddr_t)alloc_vaddr(MMU_PAGESIZE, MMU_PAGESIZE); kbm_map((uintptr_t)pci_cfgacc_virt_base, pa_base, 0, 0); } return (pci_cfgacc_virt_base + (phys_addr & MMU_PAGEOFFSET)); }
/*ARGSUSED*/ void hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs, offset_t kpm_pages_off) { pfn_t base, end; /* * kphysm_add_memory_dynamic() does not set nkpmpgs * when page_t memory is externally allocated. That * code must properly calculate nkpmpgs in all cases * if nkpmpgs needs to be used at some point. */ /* * The meta (page_t) pages for dynamically added memory are allocated * either from the incoming memory itself or from existing memory. * In the former case the base of the incoming pages will be different * than the base of the dynamic segment so call memseg_get_start() to * get the actual base of the incoming memory for each case. */ base = memseg_get_start(msp); end = msp->pages_end; hat_devload(kas.a_hat, kpm_vbase + mmu_ptob(base), mmu_ptob(end - base), base, PROT_READ | PROT_WRITE, HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); }
/* * Legacy entry points from here to end of file. */ void segkmem_mapin(struct seg *seg, void *addr, size_t size, uint_t vprot, pfn_t pfn, uint_t flags) { hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK); hat_devload(seg->s_as->a_hat, addr, size, pfn, vprot, flags | HAT_LOAD_LOCK); }
/* * Add a mapping for the machine page at the given virtual address. */ void kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) { ASSERT(level == 0); hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); }
static int mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio, page_t *pp) { int error = 0; int devload = 0; int is_memory = pf_is_memory(pfn); size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), (size_t)uio->uio_iov->iov_len); caddr_t va = NULL; mutex_enter(&mm_lock); if (is_memory && kpm_enable) { if (pp) va = hat_kpm_mapin(pp, NULL); else va = hat_kpm_mapin_pfn(pfn); } if (va == NULL) { hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE), HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK); va = mm_map; devload = 1; } if (!is_memory) { if (allowio) { size_t c = uio->uio_iov->iov_len; if (ddi_peekpokeio(NULL, uio, rw, (caddr_t)(uintptr_t)uio->uio_loffset, c, sizeof (int32_t)) != DDI_SUCCESS) error = EFAULT; } else error = EIO; } else error = uiomove(va + pageoff, nbytes, rw, uio); if (devload) hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); else if (pp) hat_kpm_mapout(pp, NULL, va); else hat_kpm_mapout_pfn(pfn); mutex_exit(&mm_lock); return (error); }
/* * Jump to the fast reboot switcher. This function never returns. */ void fast_reboot() { processorid_t bootcpuid = 0; extern uintptr_t postbootkernelbase; extern char fb_swtch_image[]; fastboot_file_t *fb; int i; postbootkernelbase = 0; fb = &newkernel.fi_files[FASTBOOT_SWTCH]; /* * Map the address into both the current proc's address * space and the kernel's address space in case the panic * is forced by kmdb. */ if (&kas != curproc->p_as) { hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); } bcopy((void *)fb_swtch_image, (void *)fb->fb_va, fb->fb_size); /* * Set fb_va to fake_va */ for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) { newkernel.fi_files[i].fb_va = fake_va; } if (panicstr && CPU->cpu_id != bootcpuid && CPU_ACTIVE(cpu_get(bootcpuid))) { extern void panic_idle(void); cpuset_t cpuset; CPUSET_ZERO(cpuset); CPUSET_ADD(cpuset, bootcpuid); xc_priority((xc_arg_t)&newkernel, 0, 0, CPUSET2BV(cpuset), (xc_func_t)fastboot_xc_func); panic_idle(); } else (void) fastboot_xc_func(&newkernel, 0, 0); }
/* * xpvtap_segmf_register() */ static int xpvtap_segmf_register(xpvtap_state_t *state) { struct seg *seg; uint64_t pte_ma; struct as *as; caddr_t uaddr; uint_t pgcnt; int i; as = state->bt_map.um_as; pgcnt = btopr(state->bt_map.um_guest_size); uaddr = state->bt_map.um_guest_pages; if (pgcnt == 0) { return (DDI_FAILURE); } AS_LOCK_ENTER(as, RW_READER); seg = as_findseg(as, state->bt_map.um_guest_pages, 0); if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > (seg->s_base + seg->s_size))) { AS_LOCK_EXIT(as); return (DDI_FAILURE); } /* * lock down the htables so the HAT can't steal them. Register the * PTE MA's for each gref page with seg_mf so we can do user space * gref mappings. */ for (i = 0; i < pgcnt; i++) { hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); hat_release_mapping(as->a_hat, uaddr); segmf_add_gref_pte(seg, uaddr, pte_ma); uaddr += PAGESIZE; } state->bt_map.um_registered = B_TRUE; AS_LOCK_EXIT(as); return (DDI_SUCCESS); }
caddr_t psm_map_phys_new(paddr_t addr, size_t len, int prot) { uint_t pgoffset; paddr_t base; pgcnt_t npages; caddr_t cvaddr; if (len == 0) return (0); pgoffset = addr & MMU_PAGEOFFSET; base = addr - pgoffset; npages = mmu_btopr(len + pgoffset); cvaddr = device_arena_alloc(ptob(npages), VM_NOSLEEP); if (cvaddr == NULL) return (0); hat_devload(kas.a_hat, cvaddr, mmu_ptob(npages), mmu_btop(base), prot, HAT_LOAD_LOCK); return (cvaddr + pgoffset); }
/* * Map address "addr" in address space "as" into a kernel virtual address. * The memory is guaranteed to be resident and locked down. */ static caddr_t mapin(struct as *as, caddr_t addr, int writing) { page_t *pp; caddr_t kaddr; pfn_t pfnum; /* * NB: Because of past mistakes, we have bits being returned * by getpfnum that are actually the page type bits of the pte. * When the object we are trying to map is a memory page with * a page structure everything is ok and we can use the optimal * method, ppmapin. Otherwise, we have to do something special. */ pfnum = hat_getpfnum(as->a_hat, addr); if (pf_is_memory(pfnum)) { pp = page_numtopp_nolock(pfnum); if (pp != NULL) { ASSERT(PAGE_LOCKED(pp)); kaddr = ppmapin(pp, writing ? (PROT_READ | PROT_WRITE) : PROT_READ, (caddr_t)-1); return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); } } /* * Oh well, we didn't have a page struct for the object we were * trying to map in; ppmapin doesn't handle devices, but allocating a * heap address allows ppmapout to free virutal space when done. */ kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK); return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); }
/* * This function performs the following tasks: * - Read the sizes of the new kernel and boot archive. * - Allocate memory for the new kernel and boot archive. * - Allocate memory for page tables necessary for mapping the memory * allocated for the files. * - Read the new kernel and boot archive into memory. * - Map in the fast reboot switcher. * - Load the fast reboot switcher to FASTBOOT_SWTCH_PA. * - Build the new multiboot_info structure * - Build page tables for the low 1G of physical memory. * - Mark the data structure as valid if all steps have succeeded. */ void fastboot_load_kernel(char *mdep) { void *buf = NULL; int i; fastboot_file_t *fb; uint32_t dboot_start_offset; char kern_bootpath[OBP_MAXPATHLEN]; extern uintptr_t postbootkernelbase; uintptr_t saved_kernelbase; int bootpath_len = 0; int is_failsafe = 0; int is_retry = 0; uint64_t end_addr; if (!fastreboot_capable) return; if (newkernel.fi_valid) fastboot_free_newkernel(&newkernel); saved_kernelbase = postbootkernelbase; postbootkernelbase = 0; /* * Initialize various HAT related fields in the data structure */ fastboot_init_fields(&newkernel); bzero(kern_bootpath, OBP_MAXPATHLEN); /* * Process the boot argument */ bzero(fastboot_args, OBP_MAXPATHLEN); fastboot_parse_mdep(mdep, kern_bootpath, &bootpath_len, fastboot_args); /* * Make sure we get the null character */ bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_UNIX], bootpath_len); bcopy(kern_bootfile, &fastboot_filename[FASTBOOT_NAME_UNIX][bootpath_len], strlen(kern_bootfile) + 1); bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE], bootpath_len); if (bcmp(kern_bootfile, FAILSAFE_BOOTFILE32, (sizeof (FAILSAFE_BOOTFILE32) - 1)) == 0 || bcmp(kern_bootfile, FAILSAFE_BOOTFILE64, (sizeof (FAILSAFE_BOOTFILE64) - 1)) == 0) { is_failsafe = 1; } load_kernel_retry: /* * Read in unix and boot_archive */ end_addr = DBOOT_ENTRY_ADDRESS; for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) { struct _buf *file; uintptr_t va; uint64_t fsize; size_t fsize_roundup, pt_size; int page_index; uintptr_t offset; ddi_dma_attr_t dma_attr = fastboot_dma_attr; dprintf("fastboot_filename[%d] = %s\n", i, fastboot_filename[i]); if ((file = kobj_open_file(fastboot_filename[i])) == (struct _buf *)-1) { cmn_err(CE_NOTE, "!Fastboot: Couldn't open %s", fastboot_filename[i]); goto err_out; } if (kobj_get_filesize(file, &fsize) != 0) { cmn_err(CE_NOTE, "!Fastboot: Couldn't get filesize for %s", fastboot_filename[i]); goto err_out; } fsize_roundup = P2ROUNDUP_TYPED(fsize, PAGESIZE, size_t); /* * Where the files end in physical memory after being * relocated by the fast boot switcher. */ end_addr += fsize_roundup; if (end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_hi) { cmn_err(CE_NOTE, "!Fastboot: boot archive is too big"); goto err_out; } /* * Adjust dma_attr_addr_lo so that the new kernel and boot * archive will not be overridden during relocation. */ if (end_addr > fastboot_dma_attr.dma_attr_addr_lo || end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_lo) { if (is_retry) { /* * If we have already tried and didn't succeed, * just give up. */ cmn_err(CE_NOTE, "!Fastboot: boot archive is too big"); goto err_out; } else { /* Set the flag so we don't keep retrying */ is_retry++; /* Adjust dma_attr_addr_lo */ fastboot_dma_attr.dma_attr_addr_lo = end_addr; fastboot_below_1G_dma_attr.dma_attr_addr_lo = end_addr; /* * Free the memory we have already allocated * whose physical addresses might not fit * the new lo and hi constraints. */ fastboot_free_mem(&newkernel, end_addr); goto load_kernel_retry; } } if (!fastboot_contig) dma_attr.dma_attr_sgllen = (fsize / PAGESIZE) + (((fsize % PAGESIZE) == 0) ? 0 : 1); if ((buf = contig_alloc(fsize, &dma_attr, PAGESIZE, 0)) == NULL) { cmn_err(CE_NOTE, fastboot_enomem_msg, fsize, "64G"); goto err_out; } va = P2ROUNDUP_TYPED((uintptr_t)buf, PAGESIZE, uintptr_t); if (kobj_read_file(file, (char *)va, fsize, 0) < 0) { cmn_err(CE_NOTE, "!Fastboot: Couldn't read %s", fastboot_filename[i]); goto err_out; } fb = &newkernel.fi_files[i]; fb->fb_va = va; fb->fb_size = fsize; fb->fb_sectcnt = 0; pt_size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup); /* * If we have reserved memory but it not enough, free it. */ if (fb->fb_pte_list_size && fb->fb_pte_list_size < pt_size) { contig_free((void *)fb->fb_pte_list_va, fb->fb_pte_list_size); fb->fb_pte_list_size = 0; } if (fb->fb_pte_list_size == 0) { if ((fb->fb_pte_list_va = (x86pte_t *)contig_alloc(pt_size, &fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) { cmn_err(CE_NOTE, fastboot_enomem_msg, (uint64_t)pt_size, "1G"); goto err_out; } /* * fb_pte_list_size must be set after the allocation * succeeds as it's used to determine how much memory to * free. */ fb->fb_pte_list_size = pt_size; } bzero((void *)(fb->fb_pte_list_va), fb->fb_pte_list_size); fb->fb_pte_list_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat, (caddr_t)fb->fb_pte_list_va)); for (page_index = 0, offset = 0; offset < fb->fb_size; offset += PAGESIZE) { uint64_t paddr; paddr = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat, (caddr_t)fb->fb_va + offset)); ASSERT(paddr >= fastboot_dma_attr.dma_attr_addr_lo); /* * Include the pte_bits so we don't have to make * it in assembly. */ fb->fb_pte_list_va[page_index++] = (x86pte_t) (paddr | pte_bits); } fb->fb_pte_list_va[page_index] = FASTBOOT_TERMINATE; if (i == FASTBOOT_UNIX) { Ehdr *ehdr = (Ehdr *)va; int j; /* * Sanity checks: */ for (j = 0; j < SELFMAG; j++) { if (ehdr->e_ident[j] != ELFMAG[j]) { cmn_err(CE_NOTE, "!Fastboot: Bad ELF " "signature"); goto err_out; } } if (ehdr->e_ident[EI_CLASS] == ELFCLASS32 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB && ehdr->e_machine == EM_386) { fb->fb_sectcnt = sizeof (fb->fb_sections) / sizeof (fb->fb_sections[0]); if (fastboot_elf32_find_loadables((void *)va, fsize, &fb->fb_sections[0], &fb->fb_sectcnt, &dboot_start_offset) < 0) { cmn_err(CE_NOTE, "!Fastboot: ELF32 " "program section failure"); goto err_out; } if (fb->fb_sectcnt == 0) { cmn_err(CE_NOTE, "!Fastboot: No ELF32 " "program sections found"); goto err_out; } if (is_failsafe) { /* Failsafe boot_archive */ bcopy(BOOTARCHIVE32_FAILSAFE, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE32_FAILSAFE)); } else { bcopy(BOOTARCHIVE32, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE32)); } } else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB && ehdr->e_machine == EM_AMD64) { if (fastboot_elf64_find_dboot_load_offset( (void *)va, fsize, &dboot_start_offset) != 0) { cmn_err(CE_NOTE, "!Fastboot: Couldn't " "find ELF64 dboot entry offset"); goto err_out; } if (!is_x86_feature(x86_featureset, X86FSET_64) || !is_x86_feature(x86_featureset, X86FSET_PAE)) { cmn_err(CE_NOTE, "Fastboot: Cannot " "reboot to %s: " "not a 64-bit capable system", kern_bootfile); goto err_out; } if (is_failsafe) { /* Failsafe boot_archive */ bcopy(BOOTARCHIVE64_FAILSAFE, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE64_FAILSAFE)); } else { bcopy(BOOTARCHIVE64, &fastboot_filename [FASTBOOT_NAME_BOOTARCHIVE] [bootpath_len], sizeof (BOOTARCHIVE64)); } } else { cmn_err(CE_NOTE, "!Fastboot: Unknown ELF type"); goto err_out; } fb->fb_dest_pa = DBOOT_ENTRY_ADDRESS - dboot_start_offset; fb->fb_next_pa = DBOOT_ENTRY_ADDRESS + fsize_roundup; } else { fb->fb_dest_pa = newkernel.fi_files[i - 1].fb_next_pa; fb->fb_next_pa = fb->fb_dest_pa + fsize_roundup; } kobj_close_file(file); } /* * Add the function that will switch us to 32-bit protected mode */ fb = &newkernel.fi_files[FASTBOOT_SWTCH]; fb->fb_va = fb->fb_dest_pa = FASTBOOT_SWTCH_PA; fb->fb_size = MMU_PAGESIZE; hat_devload(kas.a_hat, (caddr_t)fb->fb_va, MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa), PROT_READ | PROT_WRITE | PROT_EXEC, HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); /* * Build the new multiboot_info structure */ if (fastboot_build_mbi(fastboot_args, &newkernel) != 0) { goto err_out; } /* * Build page table for low 1G physical memory. Use big pages. * Allocate 4 (5 for amd64) pages for the page tables. * 1 page for PML4 (amd64) * 1 page for Page-Directory-Pointer Table * 2 pages for Page Directory * 1 page for Page Table. * The page table entry will be rewritten to map the physical * address as we do the copying. */ if (newkernel.fi_has_pae) { #ifdef __amd64 size_t size = MMU_PAGESIZE * 5; #else size_t size = MMU_PAGESIZE * 4; #endif /* __amd64 */ if (newkernel.fi_pagetable_size && newkernel.fi_pagetable_size < size) { contig_free((void *)newkernel.fi_pagetable_va, newkernel.fi_pagetable_size); newkernel.fi_pagetable_size = 0; } if (newkernel.fi_pagetable_size == 0) { if ((newkernel.fi_pagetable_va = (uintptr_t) contig_alloc(size, &fastboot_below_1G_dma_attr, MMU_PAGESIZE, 0)) == NULL) { cmn_err(CE_NOTE, fastboot_enomem_msg, (uint64_t)size, "1G"); goto err_out; } /* * fi_pagetable_size must be set after the allocation * succeeds as it's used to determine how much memory to * free. */ newkernel.fi_pagetable_size = size; } bzero((void *)(newkernel.fi_pagetable_va), size); newkernel.fi_pagetable_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat, (caddr_t)newkernel.fi_pagetable_va)); newkernel.fi_last_table_pa = newkernel.fi_pagetable_pa + size - MMU_PAGESIZE; newkernel.fi_next_table_va = newkernel.fi_pagetable_va + MMU_PAGESIZE; newkernel.fi_next_table_pa = newkernel.fi_pagetable_pa + MMU_PAGESIZE; fastboot_build_pagetables(&newkernel); } /* Generate MD5 checksums */ fastboot_cksum_generate(&newkernel); /* Mark it as valid */ newkernel.fi_valid = 1; newkernel.fi_magic = FASTBOOT_MAGIC; postbootkernelbase = saved_kernelbase; return; err_out: postbootkernelbase = saved_kernelbase; newkernel.fi_valid = 0; fastboot_free_newkernel(&newkernel); }
/* * We want to add memory, but have no spare page_t structures. Use some of * our new memory for the page_t structures. * * Somewhat similar to kphysm_add_memory_dynamic(), but simpler. */ static int balloon_init_new_pages(mfn_t framelist[], pgcnt_t count) { pgcnt_t metapgs, totalpgs, num_pages; paddr_t metasz; pfn_t meta_start; page_t *page_array; caddr_t va; int i, rv, locked; mem_structs_t *mem; struct memseg *segp; /* Calculate the number of pages we're going to add */ totalpgs = bln_stats.bln_new_target - bln_stats.bln_current_pages; /* * The following calculates the number of "meta" pages -- the pages * that will be required to hold page_t structures for all new pages. * Proof of this calculation is left up to the reader. */ metapgs = totalpgs - (((uint64_t)(totalpgs) << PAGESHIFT) / (PAGESIZE + sizeof (page_t))); /* * Given the number of page_t structures we need, is there also * room in our meta pages for a memseg and memlist struct? * If not, we'll need one more meta page. */ if ((metapgs << PAGESHIFT) < (totalpgs * sizeof (page_t) + MEM_STRUCT_SIZE)) metapgs++; /* * metapgs is calculated from totalpgs, which may be much larger than * count. If we don't have enough pages, all of the pages in this * batch will be made meta pages, and a future trip through * balloon_inc_reservation() will add the rest of the meta pages. */ if (metapgs > count) metapgs = count; /* * Figure out the number of page_t structures that can fit in metapgs * * This will cause us to initialize more page_t structures than we * need - these may be used in future memory increases. */ metasz = pfn_to_pa(metapgs); num_pages = (metasz - MEM_STRUCT_SIZE) / sizeof (page_t); DTRACE_PROBE3(balloon__alloc__stats, pgcnt_t, totalpgs, pgcnt_t, num_pages, pgcnt_t, metapgs); /* * We only increment mfn_count by count, not num_pages, to keep the * space of all valid pfns contiguous. This means we create page_t * structures with invalid pagenums -- we deal with this situation * in balloon_page_sub. */ mfn_count += count; /* * Get a VA for the pages that will hold page_t and other structures. * The memseg and memlist structures will go at the beginning, with * the page_t structures following. */ va = (caddr_t)vmem_alloc(heap_arena, metasz, VM_SLEEP); /* LINTED: improper alignment */ mem = (mem_structs_t *)va; page_array = mem->pages; meta_start = bln_stats.bln_max_pages; /* * Set the mfn to pfn mapping for the meta pages. */ locked = balloon_lock_contig_pfnlist(metapgs); for (i = 0; i < metapgs; i++) { reassign_pfn(bln_stats.bln_max_pages + i, framelist[i]); } if (locked) unlock_contig_pfnlist(); /* * For our meta pages, map them in and zero the page. * This will be the first time touching the new pages. */ hat_devload(kas.a_hat, va, metasz, bln_stats.bln_max_pages, PROT_READ | PROT_WRITE, HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); bzero(va, metasz); /* * Initialize the page array for the new pages. */ for (i = 0; i < metapgs; i++) { page_array[i].p_pagenum = bln_stats.bln_max_pages++; page_array[i].p_offset = (u_offset_t)-1; page_iolock_init(&page_array[i]); rv = page_lock(&page_array[i], SE_EXCL, NULL, P_NO_RECLAIM); ASSERT(rv == 1); } /* * For the rest of the pages, initialize the page_t struct and * add them to the free list */ for (i = metapgs; i < num_pages; i++) { page_array[i].p_pagenum = bln_stats.bln_max_pages++; page_array[i].p_offset = (u_offset_t)-1; page_iolock_init(&page_array[i]); rv = page_lock(&page_array[i], SE_EXCL, NULL, P_NO_RECLAIM); ASSERT(rv == 1); balloon_page_add(&page_array[i]); } /* * Remember where I said that we don't call this function? The missing * code right here is why. We need to set up kpm mappings for any new * pages coming in. However, if someone starts up a domain with small * memory, then greatly increases it, we could get in some horrible * deadlock situations as we steal page tables for kpm use, and * userland applications take them right back before we can use them * to set up our new memory. Once a way around that is found, and a * few other changes are made, we'll be able to enable this code. */ /* * Update kernel structures, part 1: memsegs list */ mem->memseg.pages_base = meta_start; mem->memseg.pages_end = bln_stats.bln_max_pages - 1; mem->memseg.pages = &page_array[0]; mem->memseg.epages = &page_array[num_pages - 1]; mem->memseg.next = NULL; memsegs_lock(1); for (segp = memsegs; segp->next != NULL; segp = segp->next) ; segp->next = &mem->memseg; memsegs_unlock(1); /* * Update kernel structures, part 2: mem_node array */ mem_node_add_slice(meta_start, bln_stats.bln_max_pages); /* * Update kernel structures, part 3: phys_install array * (*sigh* how many of these things do we need?) */ memlist_write_lock(); memlist_add(pfn_to_pa(meta_start), num_pages, &mem->memlist, &phys_install); memlist_write_unlock(); build_pfn_hash(); return (metapgs); }
int vmxnet3s_txcache_init(vmxnet3s_softc_t *dp, vmxnet3s_txq_t *txq) { int i; int ndescrs; int node; page_t *page; struct seg kseg; vmxnet3s_txcache_t *cache = &dp->txcache; dev_info_t *dip = dp->dip; cache->num_pages = ((txq->cmdring.size * VMXNET3_HDR_COPY_SIZE) + (PAGESIZE - 1)) / PAGESIZE; /* Allocate pages */ if (!page_resv(cache->num_pages, KM_SLEEP)) { dev_err(dip, CE_WARN, "failed to reserve %d pages", cache->num_pages); goto out; } if (!page_create_wait(cache->num_pages, 0)) { dev_err(dip, CE_WARN, "failed to create %d pages", cache->num_pages); goto unresv_pages; } cache->pages = kmem_zalloc(cache->num_pages * sizeof (page_t *), KM_SLEEP); cache->page_maps = kmem_zalloc(cache->num_pages * sizeof (page_t *), KM_SLEEP); kseg.s_as = &kas; for (i = 0; i < cache->num_pages; i++) { page = page_get_freelist(&kvp, 0, &kseg, (caddr_t)(i*PAGESIZE), PAGESIZE, 0, NULL); if (page == NULL) { page = page_get_cachelist(&kvp, 0, &kseg, (caddr_t)(i * PAGESIZE), 0, NULL); if (page == NULL) goto free_pages; if (!PP_ISAGED(page)) page_hashout(page, NULL); } PP_CLRFREE(page); PP_CLRAGED(page); cache->pages[i] = page; } for (i = 0; i < cache->num_pages; i++) page_downgrade(cache->pages[i]); /* Allocate virtual address range for mapping pages */ cache->window = vmem_alloc(heap_arena, ptob(cache->num_pages), VM_SLEEP); ASSERT(cache->window); cache->num_nodes = txq->cmdring.size; /* Map pages */ for (i = 0; i < cache->num_pages; i++) { cache->page_maps[i] = cache->window + ptob(i); hat_devload(kas.a_hat, cache->page_maps[i], ptob(1), cache->pages[i]->p_pagenum, PROT_READ | PROT_WRITE | HAT_STRICTORDER, HAT_LOAD_LOCK); } /* Now setup cache items */ cache->nodes = kmem_zalloc(txq->cmdring.size * sizeof (vmxnet3s_txcache_node_t), KM_SLEEP); ndescrs = txq->cmdring.size; node = 0; for (i = 0; i < cache->num_pages; i++) { caddr_t va; int j; int lim; uint64_t pa; lim = (ndescrs <= VMXNET3_TX_CACHE_ITEMS_PER_PAGE) ? ndescrs : VMXNET3_TX_CACHE_ITEMS_PER_PAGE; va = cache->page_maps[i]; pa = cache->pages[i]->p_pagenum << PAGESHIFT; for (j = 0; j < lim; j++) { cache->nodes[node].pa = pa; cache->nodes[node].va = va; pa += VMXNET3_HDR_COPY_SIZE; va += VMXNET3_HDR_COPY_SIZE; node++; } ndescrs -= lim; } return (DDI_SUCCESS); free_pages: page_create_putback(cache->num_pages - i); while (--i >= 0) { if (!page_tryupgrade(cache->pages[i])) { page_unlock(cache->pages[i]); while (!page_lock(cache->pages[i], SE_EXCL, NULL, P_RECLAIM)) ; } page_free(cache->pages[i], 0); } kmem_free(cache->pages, cache->num_pages * PAGESIZE); unresv_pages: page_unresv(cache->num_pages); out: cache->num_pages = cache->num_nodes = 0; return (DDI_FAILURE); }
/*ARGSUSED*/ static int segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr, enum fault_type type, uint_t prot) { struct segmf_data *data = seg->s_data; uint_t hat_flags = HAT_LOAD_NOCONSIST; mfn_t mfn; x86pte_t pte; segmf_map_t *map; uint_t idx; idx = seg_page(seg, addr); map = &data->map[idx]; ASSERT(map->t_type == SEGMF_MAP_MFN); mfn = map->u.m.m_mfn; if (type == F_SOFTLOCK) { mutex_enter(&freemem_lock); data->softlockcnt++; mutex_exit(&freemem_lock); hat_flags |= HAT_LOAD_LOCK; } else hat_flags |= HAT_LOAD; if (segmf_faultpage_debug > 0) { uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n", (void *)addr, data->domid, mfn, prot); segmf_faultpage_debug--; } /* * Ask the HAT to load a throwaway mapping to page zero, then * overwrite it with our foreign domain mapping. It gets removed * later via hat_unload() */ hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0, PROT_READ | HAT_UNORDERED_OK, hat_flags); pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN; if (prot & PROT_WRITE) pte |= PT_WRITABLE; if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte, UVMF_INVLPG | UVMF_ALL, data->domid) != 0) { hat_flags = HAT_UNLOAD_UNMAP; if (type == F_SOFTLOCK) { hat_flags |= HAT_UNLOAD_UNLOCK; mutex_enter(&freemem_lock); data->softlockcnt--; mutex_exit(&freemem_lock); } hat_unload(hat, addr, MMU_PAGESIZE, hat_flags); return (FC_MAKE_ERR(EFAULT)); } return (0); }