Esempio n. 1
0
void
boot_mapin(caddr_t addr, size_t size)
{
	caddr_t	 eaddr;
	page_t	*pp;
	pfn_t	 pfnum;

	if (page_resv(btop(size), KM_NOSLEEP) == 0)
		panic("boot_mapin: page_resv failed");

	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
		pfnum = va_to_pfn(addr);
		if (pfnum == PFN_INVALID)
			continue;
		if ((pp = page_numtopp_nolock(pfnum)) == NULL)
			panic("boot_mapin(): No pp for pfnum = %lx", pfnum);

		/*
		 * must break up any large pages that may have constituent
		 * pages being utilized for BOP_ALLOC()'s before calling
		 * page_numtopp().The locking code (ie. page_reclaim())
		 * can't handle them
		 */
		if (pp->p_szc != 0)
			page_boot_demote(pp);

		pp = page_numtopp(pfnum, SE_EXCL);
		if (pp == NULL || PP_ISFREE(pp))
			panic("boot_alloc: pp is NULL or free");

		/*
		 * If the cage is on but doesn't yet contain this page,
		 * mark it as non-relocatable.
		 */
		if (kcage_on && !PP_ISNORELOC(pp)) {
			PP_SETNORELOC(pp);
			PLCNT_XFER_NORELOC(pp);
		}

		(void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL);
		pp->p_lckcnt = 1;
#if defined(__x86)
		page_downgrade(pp);
#else
		page_unlock(pp);
#endif
	}
}
{
    /*
     * VM1:
     * The page freelist and cachelist both hold pages that are not mapped into any address space.
     * The cachelist is not really free pages but when memory is exhausted they'll be moved to the
     * free lists, it's the total of the free+cache list that we see on the 'free' column in vmstat.
     *
     * VM2:
     * @todo Document what happens behind the scenes in VM2 regarding the free and cachelist.
     */

    /*
     * Non-pageable memory reservation request for _4K pages, don't sleep.
     */
    size_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
    int rc = page_resv(cPages, KM_NOSLEEP);
    if (rc)
    {
        size_t   cbPages = cPages * sizeof(page_t *);
        page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
        if (RT_LIKELY(ppPages))
        {
            /*
             * Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
             * we don't yet have the 'virtAddr' to which this memory may be mapped.
             */
            caddr_t virtAddr = 0;
            for (size_t i = 0; i < cPages; i++, virtAddr += PAGE_SIZE)
            {
                /*
                 * Get a page from the free list locked exclusively. The page will be named (hashed in)
Esempio n. 3
0
/*
 * This function is called when we want to decrease the memory reservation
 * of our domain.  Allocate the memory and make a hypervisor call to give
 * it back.
 */
static spgcnt_t
balloon_dec_reservation(ulong_t debit)
{
	int	i, locked;
	long	rv;
	ulong_t	request;
	page_t	*pp;

	bzero(mfn_frames, sizeof (mfn_frames));
	bzero(pfn_frames, sizeof (pfn_frames));

	if (debit > FRAME_ARRAY_SIZE) {
		debit = FRAME_ARRAY_SIZE;
	}
	request = debit;

	/*
	 * Don't bother if there isn't a safe amount of kmem left.
	 */
	if (kmem_avail() < balloon_minkmem) {
		kmem_reap();
		if (kmem_avail() < balloon_minkmem)
			return (0);
	}

	if (page_resv(request, KM_NOSLEEP) == 0) {
		return (0);
	}
	xen_block_migrate();
	for (i = 0; i < debit; i++) {
		pp = page_get_high_mfn(new_high_mfn);
		new_high_mfn = 0;
		if (pp == NULL) {
			/*
			 * Call kmem_reap(), then try once more,
			 * but only if there is a safe amount of
			 * kmem left.
			 */
			kmem_reap();
			if (kmem_avail() < balloon_minkmem ||
			    (pp = page_get_high_mfn(0)) == NULL) {
				debit = i;
				break;
			}
		}
		ASSERT(PAGE_EXCL(pp));
		ASSERT(!hat_page_is_mapped(pp));

		balloon_page_add(pp);
		pfn_frames[i] = pp->p_pagenum;
		mfn_frames[i] = pfn_to_mfn(pp->p_pagenum);
	}
	if (debit == 0) {
		xen_allow_migrate();
		page_unresv(request);
		return (0);
	}

	/*
	 * We zero all the pages before we start reassigning them in order to
	 * minimize the time spent holding the lock on the contig pfn list.
	 */
	if (balloon_zero_memory) {
		for (i = 0; i < debit; i++) {
			pfnzero(pfn_frames[i], 0, PAGESIZE);
		}
	}

	/*
	 * Remove all mappings for the pfns from the system
	 */
	locked = balloon_lock_contig_pfnlist(debit);
	for (i = 0; i < debit; i++) {
		reassign_pfn(pfn_frames[i], MFN_INVALID);
	}
	if (locked)
		unlock_contig_pfnlist();

	rv = balloon_free_pages(debit, mfn_frames, NULL, NULL);

	if (rv < 0) {
		cmn_err(CE_WARN, "Attempt to return pages to the hypervisor "
		    "failed - up to %lu pages lost (error = %ld)", debit, rv);
		rv = 0;
	} else if (rv != debit) {
		panic("Unexpected return value (%ld) from decrease reservation "
		    "hypervisor call", rv);
	}

	xen_allow_migrate();
	if (debit != request)
		page_unresv(request - debit);
	return (rv);
}
Esempio n. 4
0
int
vmxnet3s_txcache_init(vmxnet3s_softc_t *dp, vmxnet3s_txq_t *txq)
{
	int		i;
	int		ndescrs;
	int		node;
	page_t		*page;
	struct seg	kseg;
	vmxnet3s_txcache_t *cache = &dp->txcache;
	dev_info_t	*dip = dp->dip;

	cache->num_pages = ((txq->cmdring.size * VMXNET3_HDR_COPY_SIZE) +
	    (PAGESIZE - 1)) / PAGESIZE;

	/* Allocate pages */
	if (!page_resv(cache->num_pages, KM_SLEEP)) {
		dev_err(dip, CE_WARN, "failed to reserve %d pages",
		    cache->num_pages);
		goto out;
	}

	if (!page_create_wait(cache->num_pages, 0)) {
		dev_err(dip, CE_WARN, "failed to create %d pages",
		    cache->num_pages);
		goto unresv_pages;
	}

	cache->pages = kmem_zalloc(cache->num_pages * sizeof (page_t *),
	    KM_SLEEP);

	cache->page_maps = kmem_zalloc(cache->num_pages * sizeof (page_t *),
	    KM_SLEEP);

	kseg.s_as = &kas;
	for (i = 0; i < cache->num_pages; i++) {
		page = page_get_freelist(&kvp, 0, &kseg, (caddr_t)(i*PAGESIZE),
		    PAGESIZE, 0, NULL);
		if (page == NULL) {
			page = page_get_cachelist(&kvp, 0, &kseg,
			    (caddr_t)(i * PAGESIZE), 0, NULL);
			if (page == NULL)
				goto free_pages;
			if (!PP_ISAGED(page))
				page_hashout(page, NULL);
		}
		PP_CLRFREE(page);
		PP_CLRAGED(page);
		cache->pages[i] = page;
	}

	for (i = 0; i < cache->num_pages; i++)
		page_downgrade(cache->pages[i]);

	/* Allocate virtual address range for mapping pages */
	cache->window = vmem_alloc(heap_arena, ptob(cache->num_pages),
	    VM_SLEEP);
	ASSERT(cache->window);

	cache->num_nodes = txq->cmdring.size;

	/* Map pages */
	for (i = 0; i < cache->num_pages; i++) {
		cache->page_maps[i] = cache->window + ptob(i);
		hat_devload(kas.a_hat, cache->page_maps[i], ptob(1),
		    cache->pages[i]->p_pagenum,
		    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
		    HAT_LOAD_LOCK);
	}

	/* Now setup cache items */
	cache->nodes = kmem_zalloc(txq->cmdring.size *
	    sizeof (vmxnet3s_txcache_node_t), KM_SLEEP);

	ndescrs = txq->cmdring.size;
	node = 0;
	for (i = 0; i < cache->num_pages; i++) {
		caddr_t		va;
		int		j;
		int		lim;
		uint64_t	pa;

		lim = (ndescrs <= VMXNET3_TX_CACHE_ITEMS_PER_PAGE) ? ndescrs :
		    VMXNET3_TX_CACHE_ITEMS_PER_PAGE;
		va = cache->page_maps[i];
		pa = cache->pages[i]->p_pagenum << PAGESHIFT;

		for (j = 0; j < lim; j++) {
			cache->nodes[node].pa = pa;
			cache->nodes[node].va = va;

			pa += VMXNET3_HDR_COPY_SIZE;
			va += VMXNET3_HDR_COPY_SIZE;
			node++;
		}
		ndescrs -= lim;
	}
	return (DDI_SUCCESS);

free_pages:
	page_create_putback(cache->num_pages - i);
	while (--i >= 0) {
		if (!page_tryupgrade(cache->pages[i])) {
			page_unlock(cache->pages[i]);
			while (!page_lock(cache->pages[i], SE_EXCL, NULL,
			    P_RECLAIM))
				;
		}
		page_free(cache->pages[i], 0);
	}
	kmem_free(cache->pages, cache->num_pages * PAGESIZE);
unresv_pages:
	page_unresv(cache->num_pages);
out:
	cache->num_pages = cache->num_nodes = 0;

	return (DDI_FAILURE);
}
Esempio n. 5
0
/*
 * Allocate pages to back the virtual address range [addr, addr + size).
 * If addr is NULL, allocate the virtual address space as well.
 */
void *
segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
	page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
{
	page_t *ppl;
	caddr_t addr = inaddr;
	pgcnt_t npages = btopr(size);
	int allocflag;

	if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
		return (NULL);

	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);

	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		return (NULL);
	}

	ppl = page_create_func(addr, size, vmflag, pcarg);
	if (ppl == NULL) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		page_unresv(npages);
		return (NULL);
	}

	/*
	 * Under certain conditions, we need to let the HAT layer know
	 * that it cannot safely allocate memory.  Allocations from
	 * the hat_memload vmem arena always need this, to prevent
	 * infinite recursion.
	 *
	 * In addition, the x86 hat cannot safely do memory
	 * allocations while in vmem_populate(), because there
	 * is no simple bound on its usage.
	 */
	if (vmflag & VM_MEMLOAD)
		allocflag = HAT_NO_KALLOC;
#if defined(__x86)
	else if (vmem_is_populator())
		allocflag = HAT_NO_KALLOC;
#endif
	else
		allocflag = 0;

	while (ppl != NULL) {
		page_t *pp = ppl;
		page_sub(&ppl, pp);
		ASSERT(page_iolock_assert(pp));
		ASSERT(PAGE_EXCL(pp));
		page_io_unlock(pp);
		hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
		    (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
		    HAT_LOAD_LOCK | allocflag);
		pp->p_lckcnt = 1;
#if defined(__x86)
		page_downgrade(pp);
#else
		if (vmflag & SEGKMEM_SHARELOCKED)
			page_downgrade(pp);
		else
			page_unlock(pp);
#endif
	}

	return (addr);
}
Esempio n. 6
0
/*
 * Allocate a large page to back the virtual address range
 * [addr, addr + size).  If addr is NULL, allocate the virtual address
 * space as well.
 */
static void *
segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
    uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
    void *pcarg)
{
	caddr_t addr = inaddr, pa;
	size_t  lpsize = segkmem_lpsize;
	pgcnt_t npages = btopr(size);
	pgcnt_t nbpages = btop(lpsize);
	pgcnt_t nlpages = size >> segkmem_lpshift;
	size_t  ppasize = nbpages * sizeof (page_t *);
	page_t *pp, *rootpp, **ppa, *pplist = NULL;
	int i;

	vmflag |= VM_NOSLEEP;

	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
		return (NULL);
	}

	/*
	 * allocate an array we need for hat_memload_array.
	 * we use a separate arena to avoid recursion.
	 * we will not need this array when hat_memload_array learns pp++
	 */
	if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) {
		goto fail_array_alloc;
	}

	if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
		goto fail_vmem_alloc;

	ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0);

	/* create all the pages */
	for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) {
		if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL)
			goto fail_page_create;
		page_list_concat(&pplist, &pp);
	}

	/* at this point we have all the resource to complete the request */
	while ((rootpp = pplist) != NULL) {
		for (i = 0; i < nbpages; i++) {
			ASSERT(pplist != NULL);
			pp = pplist;
			page_sub(&pplist, pp);
			ASSERT(page_iolock_assert(pp));
			page_io_unlock(pp);
			ppa[i] = pp;
		}
		/*
		 * Load the locked entry. It's OK to preload the entry into the
		 * TSB since we now support large mappings in the kernel TSB.
		 */
		hat_memload_array(kas.a_hat,
		    (caddr_t)(uintptr_t)rootpp->p_offset, lpsize,
		    ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
		    HAT_LOAD_LOCK);

		for (--i; i >= 0; --i) {
			ppa[i]->p_lckcnt = 1;
			page_unlock(ppa[i]);
		}
	}

	vmem_free(segkmem_ppa_arena, ppa, ppasize);
	return (addr);

fail_page_create:
	while ((rootpp = pplist) != NULL) {
		for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) {
			ASSERT(pp != NULL);
			page_sub(&pplist, pp);
			ASSERT(page_iolock_assert(pp));
			page_io_unlock(pp);
		}
		page_destroy_pages(rootpp);
	}

	if (inaddr == NULL)
		vmem_free(vmp, addr, size);

fail_vmem_alloc:
	vmem_free(segkmem_ppa_arena, ppa, ppasize);

fail_array_alloc:
	page_unresv(npages);

	return (NULL);
}
Esempio n. 7
0
/*
 * This does the real work of segkp allocation.
 * Return to client base addr. len must be page-aligned. A null value is
 * returned if there are no more vm resources (e.g. pages, swap). The len
 * and base recorded in the private data structure include the redzone
 * and the redzone length (if applicable). If the user requests a redzone
 * either the first or last page is left unmapped depending whether stacks
 * grow to low or high memory.
 *
 * The client may also specify a no-wait flag. If that is set then the
 * request will choose a non-blocking path when requesting resources.
 * The default is make the client wait.
 */
static caddr_t
segkp_get_internal(
	struct seg *seg,
	size_t len,
	uint_t flags,
	struct segkp_data **tkpd,
	struct anon_map *amp)
{
	struct segkp_segdata	*kpsd = (struct segkp_segdata *)seg->s_data;
	struct segkp_data	*kpd;
	caddr_t vbase = NULL;	/* always first virtual, may not be mapped */
	pgcnt_t np = 0;		/* number of pages in the resource */
	pgcnt_t segkpindex;
	long i;
	caddr_t va;
	pgcnt_t pages = 0;
	ulong_t anon_idx = 0;
	int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
	caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;

	if (len & PAGEOFFSET) {
		panic("segkp_get: len is not page-aligned");
		/*NOTREACHED*/
	}

	ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));

	/* Only allow KPD_NO_ANON if we are going to lock it down */
	if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
		return (NULL);

	if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
		return (NULL);
	/*
	 * Fix up the len to reflect the REDZONE if applicable
	 */
	if (flags & KPD_HASREDZONE)
		len += PAGESIZE;
	np = btop(len);

	vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
	if (vbase == NULL) {
		kmem_free(kpd, sizeof (struct segkp_data));
		return (NULL);
	}

	/* If locking, reserve physical memory */
	if (flags & KPD_LOCKED) {
		pages = btop(SEGKP_MAPLEN(len, flags));
		if (page_resv(pages, kmflag) == 0) {
			vmem_free(SEGKP_VMEM(seg), vbase, len);
			kmem_free(kpd, sizeof (struct segkp_data));
			return (NULL);
		}
		if ((flags & KPD_NO_ANON) == 0)
			atomic_add_long(&anon_segkp_pages_locked, pages);
	}

	/*
	 * Reserve sufficient swap space for this vm resource.  We'll
	 * actually allocate it in the loop below, but reserving it
	 * here allows us to back out more gracefully than if we
	 * had an allocation failure in the body of the loop.
	 *
	 * Note that we don't need swap space for the red zone page.
	 */
	if (amp != NULL) {
		/*
		 * The swap reservation has been done, if required, and the
		 * anon_hdr is separate.
		 */
		anon_idx = 0;
		kpd->kp_anon_idx = anon_idx;
		kpd->kp_anon = amp->ahp;

		TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
		    kpd, vbase, len, flags, 1);

	} else if ((flags & KPD_NO_ANON) == 0) {
		if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
			if (flags & KPD_LOCKED) {
				atomic_add_long(&anon_segkp_pages_locked,
				    -pages);
				page_unresv(pages);
			}
			vmem_free(SEGKP_VMEM(seg), vbase, len);
			kmem_free(kpd, sizeof (struct segkp_data));
			return (NULL);
		}
		atomic_add_long(&anon_segkp_pages_resv,
		    btop(SEGKP_MAPLEN(len, flags)));
		anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
		kpd->kp_anon_idx = anon_idx;
		kpd->kp_anon = kpsd->kpsd_anon;

		TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
		    kpd, vbase, len, flags, 1);
	} else {