Esempio n. 1
0
void
vmxnet3s_txcache_release(vmxnet3s_softc_t *dp)
{
	int		i;
	int		rc;
	vmxnet3s_txcache_t *cache = &dp->txcache;

	/* Unmap pages */
	hat_unload(kas.a_hat, cache->window, ptob(cache->num_pages),
	    HAT_UNLOAD_UNLOCK);
	vmem_free(heap_arena, cache->window, ptob(cache->num_pages));

	/* Free pages */
	for (i = 0; i < cache->num_pages; i++) {
		rc = page_tryupgrade(cache->pages[i]);
		if (!rc) {
			page_unlock(cache->pages[i]);
			while (!page_lock(cache->pages[i], SE_EXCL, NULL,
			    P_RECLAIM))
				;
		}
		page_free(cache->pages[i], 0);
	}
	page_unresv(cache->num_pages);

	kmem_free(cache->pages, cache->num_pages * sizeof (page_t *));
	kmem_free(cache->page_maps, cache->num_pages * sizeof (page_t *));
	kmem_free(cache->nodes,
	    cache->num_nodes * sizeof (vmxnet3s_txcache_node_t));
}
Esempio n. 2
0
/*
 * Any changes to this routine must also be carried over to
 * devmap_free_pages() in the seg_dev driver. This is because
 * we currently don't have a special kernel segment for non-paged
 * kernel memory that is exported by drivers to user space.
 */
static void
segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp,
    void (*func)(page_t *))
{
	page_t *pp;
	caddr_t addr = inaddr;
	caddr_t eaddr;
	pgcnt_t npages = btopr(size);

	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
	ASSERT(vp != NULL);

	if (kvseg.s_base == NULL) {
		segkmem_gc_list_t *gc = inaddr;
		gc->gc_arena = vmp;
		gc->gc_size = size;
		gc->gc_next = segkmem_gc_list;
		segkmem_gc_list = gc;
		return;
	}

	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);

	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
#if defined(__x86)
		pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
		if (pp == NULL)
			panic("segkmem_free: page not found");
		if (!page_tryupgrade(pp)) {
			/*
			 * Some other thread has a sharelock. Wait for
			 * it to drop the lock so we can free this page.
			 */
			page_unlock(pp);
			pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
			    SE_EXCL);
		}
#else
		pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
#endif
		if (pp == NULL)
			panic("segkmem_free: page not found");
		/* Clear p_lckcnt so page_destroy() doesn't update availrmem */
		pp->p_lckcnt = 0;
		if (func)
			func(pp);
		else
			page_destroy(pp, 0);
	}
	if (func == NULL)
		page_unresv(npages);

	if (vmp != NULL)
		vmem_free(vmp, inaddr, size);

}
Esempio n. 3
0
/*
 * This function is called when we want to decrease the memory reservation
 * of our domain.  Allocate the memory and make a hypervisor call to give
 * it back.
 */
static spgcnt_t
balloon_dec_reservation(ulong_t debit)
{
	int	i, locked;
	long	rv;
	ulong_t	request;
	page_t	*pp;

	bzero(mfn_frames, sizeof (mfn_frames));
	bzero(pfn_frames, sizeof (pfn_frames));

	if (debit > FRAME_ARRAY_SIZE) {
		debit = FRAME_ARRAY_SIZE;
	}
	request = debit;

	/*
	 * Don't bother if there isn't a safe amount of kmem left.
	 */
	if (kmem_avail() < balloon_minkmem) {
		kmem_reap();
		if (kmem_avail() < balloon_minkmem)
			return (0);
	}

	if (page_resv(request, KM_NOSLEEP) == 0) {
		return (0);
	}
	xen_block_migrate();
	for (i = 0; i < debit; i++) {
		pp = page_get_high_mfn(new_high_mfn);
		new_high_mfn = 0;
		if (pp == NULL) {
			/*
			 * Call kmem_reap(), then try once more,
			 * but only if there is a safe amount of
			 * kmem left.
			 */
			kmem_reap();
			if (kmem_avail() < balloon_minkmem ||
			    (pp = page_get_high_mfn(0)) == NULL) {
				debit = i;
				break;
			}
		}
		ASSERT(PAGE_EXCL(pp));
		ASSERT(!hat_page_is_mapped(pp));

		balloon_page_add(pp);
		pfn_frames[i] = pp->p_pagenum;
		mfn_frames[i] = pfn_to_mfn(pp->p_pagenum);
	}
	if (debit == 0) {
		xen_allow_migrate();
		page_unresv(request);
		return (0);
	}

	/*
	 * We zero all the pages before we start reassigning them in order to
	 * minimize the time spent holding the lock on the contig pfn list.
	 */
	if (balloon_zero_memory) {
		for (i = 0; i < debit; i++) {
			pfnzero(pfn_frames[i], 0, PAGESIZE);
		}
	}

	/*
	 * Remove all mappings for the pfns from the system
	 */
	locked = balloon_lock_contig_pfnlist(debit);
	for (i = 0; i < debit; i++) {
		reassign_pfn(pfn_frames[i], MFN_INVALID);
	}
	if (locked)
		unlock_contig_pfnlist();

	rv = balloon_free_pages(debit, mfn_frames, NULL, NULL);

	if (rv < 0) {
		cmn_err(CE_WARN, "Attempt to return pages to the hypervisor "
		    "failed - up to %lu pages lost (error = %ld)", debit, rv);
		rv = 0;
	} else if (rv != debit) {
		panic("Unexpected return value (%ld) from decrease reservation "
		    "hypervisor call", rv);
	}

	xen_allow_migrate();
	if (debit != request)
		page_unresv(request - debit);
	return (rv);
}
Esempio n. 4
0
/*
 * This function is called when our reservation is increasing.  Make a
 * hypervisor call to get our new pages, then integrate them into the system.
 */
static spgcnt_t
balloon_inc_reservation(ulong_t credit)
{
	int	i, cnt, locked;
	int	meta_pg_start, meta_pg_end;
	long	rv;
	page_t	*pp;
	page_t	*new_list_front, *new_list_back;

	/* Make sure we're single-threaded. */
	ASSERT(MUTEX_HELD(&bln_mutex));

	rv = 0;
	new_list_front = new_list_back = NULL;
	meta_pg_start = meta_pg_end = 0;
	bzero(mfn_frames, PAGESIZE);

	if (credit > FRAME_ARRAY_SIZE)
		credit = FRAME_ARRAY_SIZE;

	xen_block_migrate();
	rv = balloon_alloc_pages(credit, mfn_frames);

	if (rv < 0) {
		xen_allow_migrate();
		return (0);
	}
	for (i = 0; i < rv; i++) {
		if (mfn_frames[i] > new_high_mfn)
			new_high_mfn = mfn_frames[i];

		pp = balloon_page_sub();
		if (pp == NULL) {
			/*
			 * We pass the index into the current mfn array,
			 * then move the counter past the mfns we used
			 */
			meta_pg_start = i;
			cnt = balloon_init_new_pages(&mfn_frames[i], rv - i);
			i += cnt;
			meta_pg_end = i;
			if (i < rv) {
				pp = balloon_page_sub();
			} else {
				ASSERT(i == rv);
			}
		}
		if (pp == NULL) {
			break;
		}

		if (new_list_back == NULL) {
			new_list_front = new_list_back = pp;
		} else {
			new_list_back->p_next = pp;
			new_list_back = pp;
		}
		pp->p_next = NULL;
	}
	cnt = i;
	locked = balloon_lock_contig_pfnlist(cnt);
	for (i = 0, pp = new_list_front; i < meta_pg_start;
	    i++, pp = pp->p_next) {
		reassign_pfn(pp->p_pagenum, mfn_frames[i]);
	}
	for (i = meta_pg_end; i < cnt; i++, pp = pp->p_next) {
		reassign_pfn(pp->p_pagenum, mfn_frames[i]);
	}
	if (locked)
		unlock_contig_pfnlist();

	/*
	 * Make sure we don't allow pages without pfn->mfn mappings
	 * into the system.
	 */
	ASSERT(pp == NULL);

	while (new_list_front != NULL) {
		pp = new_list_front;
		new_list_front = pp->p_next;
		page_free(pp, 1);
	}

	/*
	 * Variable review: at this point, rv contains the number of pages
	 * the hypervisor gave us.  cnt contains the number of pages for which
	 * we had page_t structures.  i contains the number of pages
	 * where we set up pfn <-> mfn mappings.  If this ASSERT trips, that
	 * means we somehow lost page_t's from our local list.
	 */
	ASSERT(cnt == i);
	if (cnt < rv) {
		/*
		 * We couldn't get page structures.
		 *
		 * This shouldn't happen, but causes no real harm if it does.
		 * On debug kernels, we'll flag it.  On all kernels, we'll
		 * give back the pages we couldn't assign.
		 *
		 * Since these pages are new to the system and haven't been
		 * used, we don't bother zeroing them.
		 */
#ifdef DEBUG
		cmn_err(CE_WARN, "Could only assign %d of %ld pages", cnt, rv);
#endif	/* DEBUG */

		(void) balloon_free_pages(rv - cnt, &mfn_frames[i], NULL, NULL);

		rv = cnt;
	}

	xen_allow_migrate();
	page_unresv(rv - (meta_pg_end - meta_pg_start));
	return (rv);
}
Esempio n. 5
0
int
vmxnet3s_txcache_init(vmxnet3s_softc_t *dp, vmxnet3s_txq_t *txq)
{
	int		i;
	int		ndescrs;
	int		node;
	page_t		*page;
	struct seg	kseg;
	vmxnet3s_txcache_t *cache = &dp->txcache;
	dev_info_t	*dip = dp->dip;

	cache->num_pages = ((txq->cmdring.size * VMXNET3_HDR_COPY_SIZE) +
	    (PAGESIZE - 1)) / PAGESIZE;

	/* Allocate pages */
	if (!page_resv(cache->num_pages, KM_SLEEP)) {
		dev_err(dip, CE_WARN, "failed to reserve %d pages",
		    cache->num_pages);
		goto out;
	}

	if (!page_create_wait(cache->num_pages, 0)) {
		dev_err(dip, CE_WARN, "failed to create %d pages",
		    cache->num_pages);
		goto unresv_pages;
	}

	cache->pages = kmem_zalloc(cache->num_pages * sizeof (page_t *),
	    KM_SLEEP);

	cache->page_maps = kmem_zalloc(cache->num_pages * sizeof (page_t *),
	    KM_SLEEP);

	kseg.s_as = &kas;
	for (i = 0; i < cache->num_pages; i++) {
		page = page_get_freelist(&kvp, 0, &kseg, (caddr_t)(i*PAGESIZE),
		    PAGESIZE, 0, NULL);
		if (page == NULL) {
			page = page_get_cachelist(&kvp, 0, &kseg,
			    (caddr_t)(i * PAGESIZE), 0, NULL);
			if (page == NULL)
				goto free_pages;
			if (!PP_ISAGED(page))
				page_hashout(page, NULL);
		}
		PP_CLRFREE(page);
		PP_CLRAGED(page);
		cache->pages[i] = page;
	}

	for (i = 0; i < cache->num_pages; i++)
		page_downgrade(cache->pages[i]);

	/* Allocate virtual address range for mapping pages */
	cache->window = vmem_alloc(heap_arena, ptob(cache->num_pages),
	    VM_SLEEP);
	ASSERT(cache->window);

	cache->num_nodes = txq->cmdring.size;

	/* Map pages */
	for (i = 0; i < cache->num_pages; i++) {
		cache->page_maps[i] = cache->window + ptob(i);
		hat_devload(kas.a_hat, cache->page_maps[i], ptob(1),
		    cache->pages[i]->p_pagenum,
		    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
		    HAT_LOAD_LOCK);
	}

	/* Now setup cache items */
	cache->nodes = kmem_zalloc(txq->cmdring.size *
	    sizeof (vmxnet3s_txcache_node_t), KM_SLEEP);

	ndescrs = txq->cmdring.size;
	node = 0;
	for (i = 0; i < cache->num_pages; i++) {
		caddr_t		va;
		int		j;
		int		lim;
		uint64_t	pa;

		lim = (ndescrs <= VMXNET3_TX_CACHE_ITEMS_PER_PAGE) ? ndescrs :
		    VMXNET3_TX_CACHE_ITEMS_PER_PAGE;
		va = cache->page_maps[i];
		pa = cache->pages[i]->p_pagenum << PAGESHIFT;

		for (j = 0; j < lim; j++) {
			cache->nodes[node].pa = pa;
			cache->nodes[node].va = va;

			pa += VMXNET3_HDR_COPY_SIZE;
			va += VMXNET3_HDR_COPY_SIZE;
			node++;
		}
		ndescrs -= lim;
	}
	return (DDI_SUCCESS);

free_pages:
	page_create_putback(cache->num_pages - i);
	while (--i >= 0) {
		if (!page_tryupgrade(cache->pages[i])) {
			page_unlock(cache->pages[i]);
			while (!page_lock(cache->pages[i], SE_EXCL, NULL,
			    P_RECLAIM))
				;
		}
		page_free(cache->pages[i], 0);
	}
	kmem_free(cache->pages, cache->num_pages * PAGESIZE);
unresv_pages:
	page_unresv(cache->num_pages);
out:
	cache->num_pages = cache->num_nodes = 0;

	return (DDI_FAILURE);
}
Esempio n. 6
0
/*
 * Allocate pages to back the virtual address range [addr, addr + size).
 * If addr is NULL, allocate the virtual address space as well.
 */
void *
segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
	page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
{
	page_t *ppl;
	caddr_t addr = inaddr;
	pgcnt_t npages = btopr(size);
	int allocflag;

	if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
		return (NULL);

	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);

	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		return (NULL);
	}

	ppl = page_create_func(addr, size, vmflag, pcarg);
	if (ppl == NULL) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		page_unresv(npages);
		return (NULL);
	}

	/*
	 * Under certain conditions, we need to let the HAT layer know
	 * that it cannot safely allocate memory.  Allocations from
	 * the hat_memload vmem arena always need this, to prevent
	 * infinite recursion.
	 *
	 * In addition, the x86 hat cannot safely do memory
	 * allocations while in vmem_populate(), because there
	 * is no simple bound on its usage.
	 */
	if (vmflag & VM_MEMLOAD)
		allocflag = HAT_NO_KALLOC;
#if defined(__x86)
	else if (vmem_is_populator())
		allocflag = HAT_NO_KALLOC;
#endif
	else
		allocflag = 0;

	while (ppl != NULL) {
		page_t *pp = ppl;
		page_sub(&ppl, pp);
		ASSERT(page_iolock_assert(pp));
		ASSERT(PAGE_EXCL(pp));
		page_io_unlock(pp);
		hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
		    (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
		    HAT_LOAD_LOCK | allocflag);
		pp->p_lckcnt = 1;
#if defined(__x86)
		page_downgrade(pp);
#else
		if (vmflag & SEGKMEM_SHARELOCKED)
			page_downgrade(pp);
		else
			page_unlock(pp);
#endif
	}

	return (addr);
}
Esempio n. 7
0
/*
 * Allocate a large page to back the virtual address range
 * [addr, addr + size).  If addr is NULL, allocate the virtual address
 * space as well.
 */
static void *
segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
    uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
    void *pcarg)
{
	caddr_t addr = inaddr, pa;
	size_t  lpsize = segkmem_lpsize;
	pgcnt_t npages = btopr(size);
	pgcnt_t nbpages = btop(lpsize);
	pgcnt_t nlpages = size >> segkmem_lpshift;
	size_t  ppasize = nbpages * sizeof (page_t *);
	page_t *pp, *rootpp, **ppa, *pplist = NULL;
	int i;

	vmflag |= VM_NOSLEEP;

	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
		return (NULL);
	}

	/*
	 * allocate an array we need for hat_memload_array.
	 * we use a separate arena to avoid recursion.
	 * we will not need this array when hat_memload_array learns pp++
	 */
	if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) {
		goto fail_array_alloc;
	}

	if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
		goto fail_vmem_alloc;

	ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0);

	/* create all the pages */
	for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) {
		if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL)
			goto fail_page_create;
		page_list_concat(&pplist, &pp);
	}

	/* at this point we have all the resource to complete the request */
	while ((rootpp = pplist) != NULL) {
		for (i = 0; i < nbpages; i++) {
			ASSERT(pplist != NULL);
			pp = pplist;
			page_sub(&pplist, pp);
			ASSERT(page_iolock_assert(pp));
			page_io_unlock(pp);
			ppa[i] = pp;
		}
		/*
		 * Load the locked entry. It's OK to preload the entry into the
		 * TSB since we now support large mappings in the kernel TSB.
		 */
		hat_memload_array(kas.a_hat,
		    (caddr_t)(uintptr_t)rootpp->p_offset, lpsize,
		    ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
		    HAT_LOAD_LOCK);

		for (--i; i >= 0; --i) {
			ppa[i]->p_lckcnt = 1;
			page_unlock(ppa[i]);
		}
	}

	vmem_free(segkmem_ppa_arena, ppa, ppasize);
	return (addr);

fail_page_create:
	while ((rootpp = pplist) != NULL) {
		for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) {
			ASSERT(pp != NULL);
			page_sub(&pplist, pp);
			ASSERT(page_iolock_assert(pp));
			page_io_unlock(pp);
		}
		page_destroy_pages(rootpp);
	}

	if (inaddr == NULL)
		vmem_free(vmp, addr, size);

fail_vmem_alloc:
	vmem_free(segkmem_ppa_arena, ppa, ppasize);

fail_array_alloc:
	page_unresv(npages);

	return (NULL);
}
Esempio n. 8
0
/*
 * This does the real work of segkp allocation.
 * Return to client base addr. len must be page-aligned. A null value is
 * returned if there are no more vm resources (e.g. pages, swap). The len
 * and base recorded in the private data structure include the redzone
 * and the redzone length (if applicable). If the user requests a redzone
 * either the first or last page is left unmapped depending whether stacks
 * grow to low or high memory.
 *
 * The client may also specify a no-wait flag. If that is set then the
 * request will choose a non-blocking path when requesting resources.
 * The default is make the client wait.
 */
static caddr_t
segkp_get_internal(
	struct seg *seg,
	size_t len,
	uint_t flags,
	struct segkp_data **tkpd,
	struct anon_map *amp)
{
	struct segkp_segdata	*kpsd = (struct segkp_segdata *)seg->s_data;
	struct segkp_data	*kpd;
	caddr_t vbase = NULL;	/* always first virtual, may not be mapped */
	pgcnt_t np = 0;		/* number of pages in the resource */
	pgcnt_t segkpindex;
	long i;
	caddr_t va;
	pgcnt_t pages = 0;
	ulong_t anon_idx = 0;
	int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
	caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;

	if (len & PAGEOFFSET) {
		panic("segkp_get: len is not page-aligned");
		/*NOTREACHED*/
	}

	ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));

	/* Only allow KPD_NO_ANON if we are going to lock it down */
	if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
		return (NULL);

	if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
		return (NULL);
	/*
	 * Fix up the len to reflect the REDZONE if applicable
	 */
	if (flags & KPD_HASREDZONE)
		len += PAGESIZE;
	np = btop(len);

	vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
	if (vbase == NULL) {
		kmem_free(kpd, sizeof (struct segkp_data));
		return (NULL);
	}

	/* If locking, reserve physical memory */
	if (flags & KPD_LOCKED) {
		pages = btop(SEGKP_MAPLEN(len, flags));
		if (page_resv(pages, kmflag) == 0) {
			vmem_free(SEGKP_VMEM(seg), vbase, len);
			kmem_free(kpd, sizeof (struct segkp_data));
			return (NULL);
		}
		if ((flags & KPD_NO_ANON) == 0)
			atomic_add_long(&anon_segkp_pages_locked, pages);
	}

	/*
	 * Reserve sufficient swap space for this vm resource.  We'll
	 * actually allocate it in the loop below, but reserving it
	 * here allows us to back out more gracefully than if we
	 * had an allocation failure in the body of the loop.
	 *
	 * Note that we don't need swap space for the red zone page.
	 */
	if (amp != NULL) {
		/*
		 * The swap reservation has been done, if required, and the
		 * anon_hdr is separate.
		 */
		anon_idx = 0;
		kpd->kp_anon_idx = anon_idx;
		kpd->kp_anon = amp->ahp;

		TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
		    kpd, vbase, len, flags, 1);

	} else if ((flags & KPD_NO_ANON) == 0) {
		if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
			if (flags & KPD_LOCKED) {
				atomic_add_long(&anon_segkp_pages_locked,
				    -pages);
				page_unresv(pages);
			}
			vmem_free(SEGKP_VMEM(seg), vbase, len);
			kmem_free(kpd, sizeof (struct segkp_data));
			return (NULL);
		}
		atomic_add_long(&anon_segkp_pages_resv,
		    btop(SEGKP_MAPLEN(len, flags)));
		anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
		kpd->kp_anon_idx = anon_idx;
		kpd->kp_anon = kpsd->kpsd_anon;

		TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
		    kpd, vbase, len, flags, 1);
	} else {