void vmxnet3s_txcache_release(vmxnet3s_softc_t *dp) { int i; int rc; vmxnet3s_txcache_t *cache = &dp->txcache; /* Unmap pages */ hat_unload(kas.a_hat, cache->window, ptob(cache->num_pages), HAT_UNLOAD_UNLOCK); vmem_free(heap_arena, cache->window, ptob(cache->num_pages)); /* Free pages */ for (i = 0; i < cache->num_pages; i++) { rc = page_tryupgrade(cache->pages[i]); if (!rc) { page_unlock(cache->pages[i]); while (!page_lock(cache->pages[i], SE_EXCL, NULL, P_RECLAIM)) ; } page_free(cache->pages[i], 0); } page_unresv(cache->num_pages); kmem_free(cache->pages, cache->num_pages * sizeof (page_t *)); kmem_free(cache->page_maps, cache->num_pages * sizeof (page_t *)); kmem_free(cache->nodes, cache->num_nodes * sizeof (vmxnet3s_txcache_node_t)); }
/* * Any changes to this routine must also be carried over to * devmap_free_pages() in the seg_dev driver. This is because * we currently don't have a special kernel segment for non-paged * kernel memory that is exported by drivers to user space. */ static void segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp, void (*func)(page_t *)) { page_t *pp; caddr_t addr = inaddr; caddr_t eaddr; pgcnt_t npages = btopr(size); ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0); ASSERT(vp != NULL); if (kvseg.s_base == NULL) { segkmem_gc_list_t *gc = inaddr; gc->gc_arena = vmp; gc->gc_size = size; gc->gc_next = segkmem_gc_list; segkmem_gc_list = gc; return; } hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { #if defined(__x86) pp = page_find(vp, (u_offset_t)(uintptr_t)addr); if (pp == NULL) panic("segkmem_free: page not found"); if (!page_tryupgrade(pp)) { /* * Some other thread has a sharelock. Wait for * it to drop the lock so we can free this page. */ page_unlock(pp); pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL); } #else pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL); #endif if (pp == NULL) panic("segkmem_free: page not found"); /* Clear p_lckcnt so page_destroy() doesn't update availrmem */ pp->p_lckcnt = 0; if (func) func(pp); else page_destroy(pp, 0); } if (func == NULL) page_unresv(npages); if (vmp != NULL) vmem_free(vmp, inaddr, size); }
/* * This function is called when we want to decrease the memory reservation * of our domain. Allocate the memory and make a hypervisor call to give * it back. */ static spgcnt_t balloon_dec_reservation(ulong_t debit) { int i, locked; long rv; ulong_t request; page_t *pp; bzero(mfn_frames, sizeof (mfn_frames)); bzero(pfn_frames, sizeof (pfn_frames)); if (debit > FRAME_ARRAY_SIZE) { debit = FRAME_ARRAY_SIZE; } request = debit; /* * Don't bother if there isn't a safe amount of kmem left. */ if (kmem_avail() < balloon_minkmem) { kmem_reap(); if (kmem_avail() < balloon_minkmem) return (0); } if (page_resv(request, KM_NOSLEEP) == 0) { return (0); } xen_block_migrate(); for (i = 0; i < debit; i++) { pp = page_get_high_mfn(new_high_mfn); new_high_mfn = 0; if (pp == NULL) { /* * Call kmem_reap(), then try once more, * but only if there is a safe amount of * kmem left. */ kmem_reap(); if (kmem_avail() < balloon_minkmem || (pp = page_get_high_mfn(0)) == NULL) { debit = i; break; } } ASSERT(PAGE_EXCL(pp)); ASSERT(!hat_page_is_mapped(pp)); balloon_page_add(pp); pfn_frames[i] = pp->p_pagenum; mfn_frames[i] = pfn_to_mfn(pp->p_pagenum); } if (debit == 0) { xen_allow_migrate(); page_unresv(request); return (0); } /* * We zero all the pages before we start reassigning them in order to * minimize the time spent holding the lock on the contig pfn list. */ if (balloon_zero_memory) { for (i = 0; i < debit; i++) { pfnzero(pfn_frames[i], 0, PAGESIZE); } } /* * Remove all mappings for the pfns from the system */ locked = balloon_lock_contig_pfnlist(debit); for (i = 0; i < debit; i++) { reassign_pfn(pfn_frames[i], MFN_INVALID); } if (locked) unlock_contig_pfnlist(); rv = balloon_free_pages(debit, mfn_frames, NULL, NULL); if (rv < 0) { cmn_err(CE_WARN, "Attempt to return pages to the hypervisor " "failed - up to %lu pages lost (error = %ld)", debit, rv); rv = 0; } else if (rv != debit) { panic("Unexpected return value (%ld) from decrease reservation " "hypervisor call", rv); } xen_allow_migrate(); if (debit != request) page_unresv(request - debit); return (rv); }
/* * This function is called when our reservation is increasing. Make a * hypervisor call to get our new pages, then integrate them into the system. */ static spgcnt_t balloon_inc_reservation(ulong_t credit) { int i, cnt, locked; int meta_pg_start, meta_pg_end; long rv; page_t *pp; page_t *new_list_front, *new_list_back; /* Make sure we're single-threaded. */ ASSERT(MUTEX_HELD(&bln_mutex)); rv = 0; new_list_front = new_list_back = NULL; meta_pg_start = meta_pg_end = 0; bzero(mfn_frames, PAGESIZE); if (credit > FRAME_ARRAY_SIZE) credit = FRAME_ARRAY_SIZE; xen_block_migrate(); rv = balloon_alloc_pages(credit, mfn_frames); if (rv < 0) { xen_allow_migrate(); return (0); } for (i = 0; i < rv; i++) { if (mfn_frames[i] > new_high_mfn) new_high_mfn = mfn_frames[i]; pp = balloon_page_sub(); if (pp == NULL) { /* * We pass the index into the current mfn array, * then move the counter past the mfns we used */ meta_pg_start = i; cnt = balloon_init_new_pages(&mfn_frames[i], rv - i); i += cnt; meta_pg_end = i; if (i < rv) { pp = balloon_page_sub(); } else { ASSERT(i == rv); } } if (pp == NULL) { break; } if (new_list_back == NULL) { new_list_front = new_list_back = pp; } else { new_list_back->p_next = pp; new_list_back = pp; } pp->p_next = NULL; } cnt = i; locked = balloon_lock_contig_pfnlist(cnt); for (i = 0, pp = new_list_front; i < meta_pg_start; i++, pp = pp->p_next) { reassign_pfn(pp->p_pagenum, mfn_frames[i]); } for (i = meta_pg_end; i < cnt; i++, pp = pp->p_next) { reassign_pfn(pp->p_pagenum, mfn_frames[i]); } if (locked) unlock_contig_pfnlist(); /* * Make sure we don't allow pages without pfn->mfn mappings * into the system. */ ASSERT(pp == NULL); while (new_list_front != NULL) { pp = new_list_front; new_list_front = pp->p_next; page_free(pp, 1); } /* * Variable review: at this point, rv contains the number of pages * the hypervisor gave us. cnt contains the number of pages for which * we had page_t structures. i contains the number of pages * where we set up pfn <-> mfn mappings. If this ASSERT trips, that * means we somehow lost page_t's from our local list. */ ASSERT(cnt == i); if (cnt < rv) { /* * We couldn't get page structures. * * This shouldn't happen, but causes no real harm if it does. * On debug kernels, we'll flag it. On all kernels, we'll * give back the pages we couldn't assign. * * Since these pages are new to the system and haven't been * used, we don't bother zeroing them. */ #ifdef DEBUG cmn_err(CE_WARN, "Could only assign %d of %ld pages", cnt, rv); #endif /* DEBUG */ (void) balloon_free_pages(rv - cnt, &mfn_frames[i], NULL, NULL); rv = cnt; } xen_allow_migrate(); page_unresv(rv - (meta_pg_end - meta_pg_start)); return (rv); }
int vmxnet3s_txcache_init(vmxnet3s_softc_t *dp, vmxnet3s_txq_t *txq) { int i; int ndescrs; int node; page_t *page; struct seg kseg; vmxnet3s_txcache_t *cache = &dp->txcache; dev_info_t *dip = dp->dip; cache->num_pages = ((txq->cmdring.size * VMXNET3_HDR_COPY_SIZE) + (PAGESIZE - 1)) / PAGESIZE; /* Allocate pages */ if (!page_resv(cache->num_pages, KM_SLEEP)) { dev_err(dip, CE_WARN, "failed to reserve %d pages", cache->num_pages); goto out; } if (!page_create_wait(cache->num_pages, 0)) { dev_err(dip, CE_WARN, "failed to create %d pages", cache->num_pages); goto unresv_pages; } cache->pages = kmem_zalloc(cache->num_pages * sizeof (page_t *), KM_SLEEP); cache->page_maps = kmem_zalloc(cache->num_pages * sizeof (page_t *), KM_SLEEP); kseg.s_as = &kas; for (i = 0; i < cache->num_pages; i++) { page = page_get_freelist(&kvp, 0, &kseg, (caddr_t)(i*PAGESIZE), PAGESIZE, 0, NULL); if (page == NULL) { page = page_get_cachelist(&kvp, 0, &kseg, (caddr_t)(i * PAGESIZE), 0, NULL); if (page == NULL) goto free_pages; if (!PP_ISAGED(page)) page_hashout(page, NULL); } PP_CLRFREE(page); PP_CLRAGED(page); cache->pages[i] = page; } for (i = 0; i < cache->num_pages; i++) page_downgrade(cache->pages[i]); /* Allocate virtual address range for mapping pages */ cache->window = vmem_alloc(heap_arena, ptob(cache->num_pages), VM_SLEEP); ASSERT(cache->window); cache->num_nodes = txq->cmdring.size; /* Map pages */ for (i = 0; i < cache->num_pages; i++) { cache->page_maps[i] = cache->window + ptob(i); hat_devload(kas.a_hat, cache->page_maps[i], ptob(1), cache->pages[i]->p_pagenum, PROT_READ | PROT_WRITE | HAT_STRICTORDER, HAT_LOAD_LOCK); } /* Now setup cache items */ cache->nodes = kmem_zalloc(txq->cmdring.size * sizeof (vmxnet3s_txcache_node_t), KM_SLEEP); ndescrs = txq->cmdring.size; node = 0; for (i = 0; i < cache->num_pages; i++) { caddr_t va; int j; int lim; uint64_t pa; lim = (ndescrs <= VMXNET3_TX_CACHE_ITEMS_PER_PAGE) ? ndescrs : VMXNET3_TX_CACHE_ITEMS_PER_PAGE; va = cache->page_maps[i]; pa = cache->pages[i]->p_pagenum << PAGESHIFT; for (j = 0; j < lim; j++) { cache->nodes[node].pa = pa; cache->nodes[node].va = va; pa += VMXNET3_HDR_COPY_SIZE; va += VMXNET3_HDR_COPY_SIZE; node++; } ndescrs -= lim; } return (DDI_SUCCESS); free_pages: page_create_putback(cache->num_pages - i); while (--i >= 0) { if (!page_tryupgrade(cache->pages[i])) { page_unlock(cache->pages[i]); while (!page_lock(cache->pages[i], SE_EXCL, NULL, P_RECLAIM)) ; } page_free(cache->pages[i], 0); } kmem_free(cache->pages, cache->num_pages * PAGESIZE); unresv_pages: page_unresv(cache->num_pages); out: cache->num_pages = cache->num_nodes = 0; return (DDI_FAILURE); }
/* * Allocate pages to back the virtual address range [addr, addr + size). * If addr is NULL, allocate the virtual address space as well. */ void * segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg) { page_t *ppl; caddr_t addr = inaddr; pgcnt_t npages = btopr(size); int allocflag; if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL) return (NULL); ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0); if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { if (inaddr == NULL) vmem_free(vmp, addr, size); return (NULL); } ppl = page_create_func(addr, size, vmflag, pcarg); if (ppl == NULL) { if (inaddr == NULL) vmem_free(vmp, addr, size); page_unresv(npages); return (NULL); } /* * Under certain conditions, we need to let the HAT layer know * that it cannot safely allocate memory. Allocations from * the hat_memload vmem arena always need this, to prevent * infinite recursion. * * In addition, the x86 hat cannot safely do memory * allocations while in vmem_populate(), because there * is no simple bound on its usage. */ if (vmflag & VM_MEMLOAD) allocflag = HAT_NO_KALLOC; #if defined(__x86) else if (vmem_is_populator()) allocflag = HAT_NO_KALLOC; #endif else allocflag = 0; while (ppl != NULL) { page_t *pp = ppl; page_sub(&ppl, pp); ASSERT(page_iolock_assert(pp)); ASSERT(PAGE_EXCL(pp)); page_io_unlock(pp); hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK | allocflag); pp->p_lckcnt = 1; #if defined(__x86) page_downgrade(pp); #else if (vmflag & SEGKMEM_SHARELOCKED) page_downgrade(pp); else page_unlock(pp); #endif } return (addr); }
/* * Allocate a large page to back the virtual address range * [addr, addr + size). If addr is NULL, allocate the virtual address * space as well. */ static void * segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg) { caddr_t addr = inaddr, pa; size_t lpsize = segkmem_lpsize; pgcnt_t npages = btopr(size); pgcnt_t nbpages = btop(lpsize); pgcnt_t nlpages = size >> segkmem_lpshift; size_t ppasize = nbpages * sizeof (page_t *); page_t *pp, *rootpp, **ppa, *pplist = NULL; int i; vmflag |= VM_NOSLEEP; if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { return (NULL); } /* * allocate an array we need for hat_memload_array. * we use a separate arena to avoid recursion. * we will not need this array when hat_memload_array learns pp++ */ if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) { goto fail_array_alloc; } if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL) goto fail_vmem_alloc; ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0); /* create all the pages */ for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) { if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL) goto fail_page_create; page_list_concat(&pplist, &pp); } /* at this point we have all the resource to complete the request */ while ((rootpp = pplist) != NULL) { for (i = 0; i < nbpages; i++) { ASSERT(pplist != NULL); pp = pplist; page_sub(&pplist, pp); ASSERT(page_iolock_assert(pp)); page_io_unlock(pp); ppa[i] = pp; } /* * Load the locked entry. It's OK to preload the entry into the * TSB since we now support large mappings in the kernel TSB. */ hat_memload_array(kas.a_hat, (caddr_t)(uintptr_t)rootpp->p_offset, lpsize, ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); for (--i; i >= 0; --i) { ppa[i]->p_lckcnt = 1; page_unlock(ppa[i]); } } vmem_free(segkmem_ppa_arena, ppa, ppasize); return (addr); fail_page_create: while ((rootpp = pplist) != NULL) { for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) { ASSERT(pp != NULL); page_sub(&pplist, pp); ASSERT(page_iolock_assert(pp)); page_io_unlock(pp); } page_destroy_pages(rootpp); } if (inaddr == NULL) vmem_free(vmp, addr, size); fail_vmem_alloc: vmem_free(segkmem_ppa_arena, ppa, ppasize); fail_array_alloc: page_unresv(npages); return (NULL); }
/* * This does the real work of segkp allocation. * Return to client base addr. len must be page-aligned. A null value is * returned if there are no more vm resources (e.g. pages, swap). The len * and base recorded in the private data structure include the redzone * and the redzone length (if applicable). If the user requests a redzone * either the first or last page is left unmapped depending whether stacks * grow to low or high memory. * * The client may also specify a no-wait flag. If that is set then the * request will choose a non-blocking path when requesting resources. * The default is make the client wait. */ static caddr_t segkp_get_internal( struct seg *seg, size_t len, uint_t flags, struct segkp_data **tkpd, struct anon_map *amp) { struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data; struct segkp_data *kpd; caddr_t vbase = NULL; /* always first virtual, may not be mapped */ pgcnt_t np = 0; /* number of pages in the resource */ pgcnt_t segkpindex; long i; caddr_t va; pgcnt_t pages = 0; ulong_t anon_idx = 0; int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP; caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base; if (len & PAGEOFFSET) { panic("segkp_get: len is not page-aligned"); /*NOTREACHED*/ } ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL)); /* Only allow KPD_NO_ANON if we are going to lock it down */ if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON) return (NULL); if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL) return (NULL); /* * Fix up the len to reflect the REDZONE if applicable */ if (flags & KPD_HASREDZONE) len += PAGESIZE; np = btop(len); vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT); if (vbase == NULL) { kmem_free(kpd, sizeof (struct segkp_data)); return (NULL); } /* If locking, reserve physical memory */ if (flags & KPD_LOCKED) { pages = btop(SEGKP_MAPLEN(len, flags)); if (page_resv(pages, kmflag) == 0) { vmem_free(SEGKP_VMEM(seg), vbase, len); kmem_free(kpd, sizeof (struct segkp_data)); return (NULL); } if ((flags & KPD_NO_ANON) == 0) atomic_add_long(&anon_segkp_pages_locked, pages); } /* * Reserve sufficient swap space for this vm resource. We'll * actually allocate it in the loop below, but reserving it * here allows us to back out more gracefully than if we * had an allocation failure in the body of the loop. * * Note that we don't need swap space for the red zone page. */ if (amp != NULL) { /* * The swap reservation has been done, if required, and the * anon_hdr is separate. */ anon_idx = 0; kpd->kp_anon_idx = anon_idx; kpd->kp_anon = amp->ahp; TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u", kpd, vbase, len, flags, 1); } else if ((flags & KPD_NO_ANON) == 0) { if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) { if (flags & KPD_LOCKED) { atomic_add_long(&anon_segkp_pages_locked, -pages); page_unresv(pages); } vmem_free(SEGKP_VMEM(seg), vbase, len); kmem_free(kpd, sizeof (struct segkp_data)); return (NULL); } atomic_add_long(&anon_segkp_pages_resv, btop(SEGKP_MAPLEN(len, flags))); anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT; kpd->kp_anon_idx = anon_idx; kpd->kp_anon = kpsd->kpsd_anon; TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u", kpd, vbase, len, flags, 1); } else {