/**
 * Allocates one page.
 *
 * @param virtAddr       The virtual address to which this page maybe mapped in
 *                       the future.
 *
 * @returns Pointer to the allocated page, NULL on failure.
 */
static page_t *rtR0MemObjSolPageAlloc(caddr_t virtAddr)
{
    u_offset_t      offPage;
    seg_t           KernelSeg;

    /*
     * 16777215 terabytes of total memory for all VMs or
     * restart 8000 1GB VMs 2147483 times until wraparound!
     */
    mutex_enter(&g_OffsetMtx);
    AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
    g_offPage = RT_ALIGN_64(g_offPage, PAGE_SIZE) + PAGE_SIZE;
    offPage   = g_offPage;
    mutex_exit(&g_OffsetMtx);

    KernelSeg.s_as = &kas;
    page_t *pPage = page_create_va(&g_PageVnode, offPage, PAGE_SIZE, PG_WAIT | PG_NORELOC, &KernelSeg, virtAddr);
    if (RT_LIKELY(pPage))
    {
        /*
         * Lock this page into memory "long term" to prevent this page from being paged out
         * when we drop the page lock temporarily (during free). Downgrade to a shared lock
         * to prevent page relocation.
         */
        page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */);
        page_io_unlock(pPage);
        page_downgrade(pPage);
        Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
    }

    return pPage;
}
Exemplo n.º 2
0
/*ARGSUSED*/
void
pvn_plist_init(page_t *pp, page_t *pl[], size_t plsz,
               u_offset_t off, size_t io_len, enum seg_rw rw)
{
    ssize_t sz;
    page_t *ppcur, **ppp;

    /*
     * Set up to load plsz worth
     * starting at the needed page.
     */
    while (pp != NULL && pp->p_offset != off) {
        /*
         * Remove page from the i/o list,
         * release the i/o and the page lock.
         */
        ppcur = pp;
        page_sub(&pp, ppcur);
        page_io_unlock(ppcur);
        (void) page_release(ppcur, 1);
    }

    if (pp == NULL) {
        pl[0] = NULL;
        return;
    }

    sz = plsz;

    /*
     * Initialize the page list array.
     */
    ppp = pl;
    do {
        ppcur = pp;
        *ppp++ = ppcur;
        page_sub(&pp, ppcur);
        page_io_unlock(ppcur);
        if (rw != S_CREATE)
            page_downgrade(ppcur);
        sz -= PAGESIZE;
    } while (sz > 0 && pp != NULL);
    *ppp = NULL;		/* terminate list */

    /*
     * Now free the remaining pages that weren't
     * loaded in the page list.
     */
    while (pp != NULL) {
        ppcur = pp;
        page_sub(&pp, ppcur);
        page_io_unlock(ppcur);
        (void) page_release(ppcur, 1);
    }
}
Exemplo n.º 3
0
void
boot_mapin(caddr_t addr, size_t size)
{
	caddr_t	 eaddr;
	page_t	*pp;
	pfn_t	 pfnum;

	if (page_resv(btop(size), KM_NOSLEEP) == 0)
		panic("boot_mapin: page_resv failed");

	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
		pfnum = va_to_pfn(addr);
		if (pfnum == PFN_INVALID)
			continue;
		if ((pp = page_numtopp_nolock(pfnum)) == NULL)
			panic("boot_mapin(): No pp for pfnum = %lx", pfnum);

		/*
		 * must break up any large pages that may have constituent
		 * pages being utilized for BOP_ALLOC()'s before calling
		 * page_numtopp().The locking code (ie. page_reclaim())
		 * can't handle them
		 */
		if (pp->p_szc != 0)
			page_boot_demote(pp);

		pp = page_numtopp(pfnum, SE_EXCL);
		if (pp == NULL || PP_ISFREE(pp))
			panic("boot_alloc: pp is NULL or free");

		/*
		 * If the cage is on but doesn't yet contain this page,
		 * mark it as non-relocatable.
		 */
		if (kcage_on && !PP_ISNORELOC(pp)) {
			PP_SETNORELOC(pp);
			PLCNT_XFER_NORELOC(pp);
		}

		(void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL);
		pp->p_lckcnt = 1;
#if defined(__x86)
		page_downgrade(pp);
#else
		page_unlock(pp);
#endif
	}
}
Exemplo n.º 4
0
/*
 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
 * B_TRUNC, B_FORCE}.  B_DELWRI indicates that this page is part of a kluster
 * operation and is only to be considered if it doesn't involve any
 * waiting here.  B_TRUNC indicates that the file is being truncated
 * and so no i/o needs to be done. B_FORCE indicates that the page
 * must be destroyed so don't try wrting it out.
 *
 * The caller must ensure that the page is locked.  Returns 1, if
 * the page should be written back (the "iolock" is held in this
 * case), or 0 if the page has been dealt with or has been
 * unlocked.
 */
int
pvn_getdirty(page_t *pp, int flags)
{
    ASSERT((flags & (B_INVAL | B_FREE)) ?
           PAGE_EXCL(pp) : PAGE_SHARED(pp));
    ASSERT(PP_ISFREE(pp) == 0);

    /*
     * If trying to invalidate or free a logically `locked' page,
     * forget it.  Don't need page_struct_lock to check p_lckcnt and
     * p_cowcnt as the page is exclusively locked.
     */
    if ((flags & (B_INVAL | B_FREE)) && !(flags & (B_TRUNC|B_FORCE)) &&
            (pp->p_lckcnt != 0 || pp->p_cowcnt != 0)) {
        page_unlock(pp);
        return (0);
    }

    /*
     * Now acquire the i/o lock so we can add it to the dirty
     * list (if necessary).  We avoid blocking on the i/o lock
     * in the following cases:
     *
     *	If B_DELWRI is set, which implies that this request is
     *	due to a klustering operartion.
     *
     *	If this is an async (B_ASYNC) operation and we are not doing
     *	invalidation (B_INVAL) [The current i/o or fsflush will ensure
     *	that the the page is written out].
     */
    if ((flags & B_DELWRI) || ((flags & (B_INVAL | B_ASYNC)) == B_ASYNC)) {
        if (!page_io_trylock(pp)) {
            page_unlock(pp);
            return (0);
        }
    } else {
        page_io_lock(pp);
    }

    /*
     * If we want to free or invalidate the page then
     * we need to unload it so that anyone who wants
     * it will have to take a minor fault to get it.
     * Otherwise, we're just writing the page back so we
     * need to sync up the hardwre and software mod bit to
     * detect any future modifications.  We clear the
     * software mod bit when we put the page on the dirty
     * list.
     */
    if (flags & (B_INVAL | B_FREE)) {
        (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
    } else {
        (void) hat_pagesync(pp, HAT_SYNC_ZERORM);
    }

    if (!hat_ismod(pp) || (flags & B_TRUNC)) {
        /*
         * Don't need to add it to the
         * list after all.
         */
        page_io_unlock(pp);
        if (flags & B_INVAL) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_INVAL, 0, kcred);
        } else if (flags & B_FREE) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred);
        } else {
            /*
             * This is advisory path for the callers
             * of VOP_PUTPAGE() who prefer freeing the
             * page _only_ if no one else is accessing it.
             * E.g. segmap_release()
             *
             * The above hat_ismod() check is useless because:
             * (1) we may not be holding SE_EXCL lock;
             * (2) we've not unloaded _all_ translations
             *
             * Let page_release() do the heavy-lifting.
             */
            (void) page_release(pp, 1);
        }
        return (0);
    }

    /*
     * Page is dirty, get it ready for the write back
     * and add page to the dirty list.
     */
    hat_clrrefmod(pp);

    /*
     * If we're going to free the page when we're done
     * then we can let others try to use it starting now.
     * We'll detect the fact that they used it when the
     * i/o is done and avoid freeing the page.
     */
    if (flags & B_FREE)
        page_downgrade(pp);


    TRACE_1(TR_FAC_VM, TR_PVN_GETDIRTY, "pvn_getdirty:pp %p", pp);

    return (1);
}
Exemplo n.º 5
0
int
vmxnet3s_txcache_init(vmxnet3s_softc_t *dp, vmxnet3s_txq_t *txq)
{
	int		i;
	int		ndescrs;
	int		node;
	page_t		*page;
	struct seg	kseg;
	vmxnet3s_txcache_t *cache = &dp->txcache;
	dev_info_t	*dip = dp->dip;

	cache->num_pages = ((txq->cmdring.size * VMXNET3_HDR_COPY_SIZE) +
	    (PAGESIZE - 1)) / PAGESIZE;

	/* Allocate pages */
	if (!page_resv(cache->num_pages, KM_SLEEP)) {
		dev_err(dip, CE_WARN, "failed to reserve %d pages",
		    cache->num_pages);
		goto out;
	}

	if (!page_create_wait(cache->num_pages, 0)) {
		dev_err(dip, CE_WARN, "failed to create %d pages",
		    cache->num_pages);
		goto unresv_pages;
	}

	cache->pages = kmem_zalloc(cache->num_pages * sizeof (page_t *),
	    KM_SLEEP);

	cache->page_maps = kmem_zalloc(cache->num_pages * sizeof (page_t *),
	    KM_SLEEP);

	kseg.s_as = &kas;
	for (i = 0; i < cache->num_pages; i++) {
		page = page_get_freelist(&kvp, 0, &kseg, (caddr_t)(i*PAGESIZE),
		    PAGESIZE, 0, NULL);
		if (page == NULL) {
			page = page_get_cachelist(&kvp, 0, &kseg,
			    (caddr_t)(i * PAGESIZE), 0, NULL);
			if (page == NULL)
				goto free_pages;
			if (!PP_ISAGED(page))
				page_hashout(page, NULL);
		}
		PP_CLRFREE(page);
		PP_CLRAGED(page);
		cache->pages[i] = page;
	}

	for (i = 0; i < cache->num_pages; i++)
		page_downgrade(cache->pages[i]);

	/* Allocate virtual address range for mapping pages */
	cache->window = vmem_alloc(heap_arena, ptob(cache->num_pages),
	    VM_SLEEP);
	ASSERT(cache->window);

	cache->num_nodes = txq->cmdring.size;

	/* Map pages */
	for (i = 0; i < cache->num_pages; i++) {
		cache->page_maps[i] = cache->window + ptob(i);
		hat_devload(kas.a_hat, cache->page_maps[i], ptob(1),
		    cache->pages[i]->p_pagenum,
		    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
		    HAT_LOAD_LOCK);
	}

	/* Now setup cache items */
	cache->nodes = kmem_zalloc(txq->cmdring.size *
	    sizeof (vmxnet3s_txcache_node_t), KM_SLEEP);

	ndescrs = txq->cmdring.size;
	node = 0;
	for (i = 0; i < cache->num_pages; i++) {
		caddr_t		va;
		int		j;
		int		lim;
		uint64_t	pa;

		lim = (ndescrs <= VMXNET3_TX_CACHE_ITEMS_PER_PAGE) ? ndescrs :
		    VMXNET3_TX_CACHE_ITEMS_PER_PAGE;
		va = cache->page_maps[i];
		pa = cache->pages[i]->p_pagenum << PAGESHIFT;

		for (j = 0; j < lim; j++) {
			cache->nodes[node].pa = pa;
			cache->nodes[node].va = va;

			pa += VMXNET3_HDR_COPY_SIZE;
			va += VMXNET3_HDR_COPY_SIZE;
			node++;
		}
		ndescrs -= lim;
	}
	return (DDI_SUCCESS);

free_pages:
	page_create_putback(cache->num_pages - i);
	while (--i >= 0) {
		if (!page_tryupgrade(cache->pages[i])) {
			page_unlock(cache->pages[i]);
			while (!page_lock(cache->pages[i], SE_EXCL, NULL,
			    P_RECLAIM))
				;
		}
		page_free(cache->pages[i], 0);
	}
	kmem_free(cache->pages, cache->num_pages * PAGESIZE);
unresv_pages:
	page_unresv(cache->num_pages);
out:
	cache->num_pages = cache->num_nodes = 0;

	return (DDI_FAILURE);
}
Exemplo n.º 6
0
/*
 * Allocate pages to back the virtual address range [addr, addr + size).
 * If addr is NULL, allocate the virtual address space as well.
 */
void *
segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
	page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
{
	page_t *ppl;
	caddr_t addr = inaddr;
	pgcnt_t npages = btopr(size);
	int allocflag;

	if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
		return (NULL);

	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);

	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		return (NULL);
	}

	ppl = page_create_func(addr, size, vmflag, pcarg);
	if (ppl == NULL) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		page_unresv(npages);
		return (NULL);
	}

	/*
	 * Under certain conditions, we need to let the HAT layer know
	 * that it cannot safely allocate memory.  Allocations from
	 * the hat_memload vmem arena always need this, to prevent
	 * infinite recursion.
	 *
	 * In addition, the x86 hat cannot safely do memory
	 * allocations while in vmem_populate(), because there
	 * is no simple bound on its usage.
	 */
	if (vmflag & VM_MEMLOAD)
		allocflag = HAT_NO_KALLOC;
#if defined(__x86)
	else if (vmem_is_populator())
		allocflag = HAT_NO_KALLOC;
#endif
	else
		allocflag = 0;

	while (ppl != NULL) {
		page_t *pp = ppl;
		page_sub(&ppl, pp);
		ASSERT(page_iolock_assert(pp));
		ASSERT(PAGE_EXCL(pp));
		page_io_unlock(pp);
		hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
		    (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
		    HAT_LOAD_LOCK | allocflag);
		pp->p_lckcnt = 1;
#if defined(__x86)
		page_downgrade(pp);
#else
		if (vmflag & SEGKMEM_SHARELOCKED)
			page_downgrade(pp);
		else
			page_unlock(pp);
#endif
	}

	return (addr);
}
Exemplo n.º 7
0
/*
 * With the addition of reader-writer lock semantics to page_lock_es,
 * callers wanting an exclusive (writer) lock may prevent shared-lock
 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
 * In this case, when an exclusive lock cannot be acquired, p_selock's
 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
 * if the page is slated for retirement.
 *
 * The se and es parameters determine if the lock should be granted
 * based on the following decision table:
 *
 * Lock wanted   es flags     p_selock/SE_EWANTED  Action
 * ----------- -------------- -------------------  ---------
 * SE_EXCL        any [1][2]   unlocked/any        grant lock, clear SE_EWANTED
 * SE_EXCL        SE_EWANTED   any lock/any        deny, set SE_EWANTED
 * SE_EXCL        none         any lock/any        deny
 * SE_SHARED      n/a [2]        shared/0          grant
 * SE_SHARED      n/a [2]      unlocked/0          grant
 * SE_SHARED      n/a            shared/1          deny
 * SE_SHARED      n/a          unlocked/1          deny
 * SE_SHARED      n/a              excl/any        deny
 *
 * Notes:
 * [1] The code grants an exclusive lock to the caller and clears the bit
 *   SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
 *   bit's value.  This was deemed acceptable as we are not concerned about
 *   exclusive-lock starvation. If this ever becomes an issue, a priority or
 *   fifo mechanism should also be implemented. Meantime, the thread that
 *   set SE_EWANTED should be prepared to catch this condition and reset it
 *
 * [2] Retired pages may not be locked at any time, regardless of the
 *   dispostion of se, unless the es parameter has SE_RETIRED flag set.
 *
 * Notes on values of "es":
 *
 *   es & 1: page_lookup_create will attempt page relocation
 *   es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
 *       memory thread); this prevents reader-starvation of waiting
 *       writer thread(s) by giving priority to writers over readers.
 *   es & SE_RETIRED: caller wants to lock pages even if they are
 *       retired.  Default is to deny the lock if the page is retired.
 *
 * And yes, we know, the semantics of this function are too complicated.
 * It's on the list to be cleaned up.
 */
int
page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
{
	int		retval;
	kmutex_t	*pse = PAGE_SE_MUTEX(pp);
	int		upgraded;
	int		reclaim_it;

	ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);

	VM_STAT_ADD(page_lock_count);

	upgraded = 0;
	reclaim_it = 0;

	mutex_enter(pse);

	ASSERT(((es & SE_EXCL_WANTED) == 0) ||
	    ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));

	if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
		mutex_exit(pse);
		VM_STAT_ADD(page_lock_retired);
		return (0);
	}

	if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
		se = SE_EXCL;
	}

	if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {

		reclaim_it = 1;
		if (se == SE_SHARED) {
			/*
			 * This is an interesting situation.
			 *
			 * Remember that p_free can only change if
			 * p_selock < 0.
			 * p_free does not depend on our holding `pse'.
			 * And, since we hold `pse', p_selock can not change.
			 * So, if p_free changes on us, the page is already
			 * exclusively held, and we would fail to get p_selock
			 * regardless.
			 *
			 * We want to avoid getting the share
			 * lock on a free page that needs to be reclaimed.
			 * It is possible that some other thread has the share
			 * lock and has left the free page on the cache list.
			 * pvn_vplist_dirty() does this for brief periods.
			 * If the se_share is currently SE_EXCL, we will fail
			 * to acquire p_selock anyway.  Blocking is the
			 * right thing to do.
			 * If we need to reclaim this page, we must get
			 * exclusive access to it, force the upgrade now.
			 * Again, we will fail to acquire p_selock if the
			 * page is not free and block.
			 */
			upgraded = 1;
			se = SE_EXCL;
			VM_STAT_ADD(page_lock_upgrade);
		}
	}

	if (se == SE_EXCL) {
		if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
			/*
			 * if the caller wants a writer lock (but did not
			 * specify exclusive access), and there is a pending
			 * writer that wants exclusive access, return failure
			 */
			retval = 0;
		} else if ((pp->p_selock & ~SE_EWANTED) == 0) {
			/* no reader/writer lock held */
			THREAD_KPRI_REQUEST();
			/* this clears our setting of the SE_EWANTED bit */
			pp->p_selock = SE_WRITER;
			retval = 1;
		} else {
			/* page is locked */
			if (es & SE_EXCL_WANTED) {
				/* set the SE_EWANTED bit */
				pp->p_selock |= SE_EWANTED;
			}
			retval = 0;
		}
	} else {
		retval = 0;
		if (pp->p_selock >= 0) {
			if ((pp->p_selock & SE_EWANTED) == 0) {
				pp->p_selock += SE_READER;
				retval = 1;
			}
		}
	}

	if (retval == 0) {
		if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
			VM_STAT_ADD(page_lock_deleted);
			mutex_exit(pse);
			return (retval);
		}

#ifdef VM_STATS
		VM_STAT_ADD(page_lock_miss);
		if (upgraded) {
			VM_STAT_ADD(page_lock_upgrade_failed);
		}
#endif
		if (lock) {
			VM_STAT_ADD(page_lock_miss_lock);
			mutex_exit(lock);
		}

		/*
		 * Now, wait for the page to be unlocked and
		 * release the lock protecting p_cv and p_selock.
		 */
		cv_wait(&pp->p_cv, pse);
		mutex_exit(pse);

		/*
		 * The page identity may have changed while we were
		 * blocked.  If we are willing to depend on "pp"
		 * still pointing to a valid page structure (i.e.,
		 * assuming page structures are not dynamically allocated
		 * or freed), we could try to lock the page if its
		 * identity hasn't changed.
		 *
		 * This needs to be measured, since we come back from
		 * cv_wait holding pse (the expensive part of this
		 * operation) we might as well try the cheap part.
		 * Though we would also have to confirm that dropping
		 * `lock' did not cause any grief to the callers.
		 */
		if (lock) {
			mutex_enter(lock);
		}
	} else {
		/*
		 * We have the page lock.
		 * If we needed to reclaim the page, and the page
		 * needed reclaiming (ie, it was free), then we
		 * have the page exclusively locked.  We may need
		 * to downgrade the page.
		 */
		ASSERT((upgraded) ?
		    ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
		mutex_exit(pse);

		/*
		 * We now hold this page's lock, either shared or
		 * exclusive.  This will prevent its identity from changing.
		 * The page, however, may or may not be free.  If the caller
		 * requested, and it is free, go reclaim it from the
		 * free list.  If the page can't be reclaimed, return failure
		 * so that the caller can start all over again.
		 *
		 * NOTE:page_reclaim() releases the page lock (p_selock)
		 *	if it can't be reclaimed.
		 */
		if (reclaim_it) {
			if (!page_reclaim(pp, lock)) {
				VM_STAT_ADD(page_lock_bad_reclaim);
				retval = 0;
			} else {
				VM_STAT_ADD(page_lock_reclaim);
				if (upgraded) {
					page_downgrade(pp);
				}
			}
		}
	}
	return (retval);
}