예제 #1
0
/*
 * vm_contig_pg_alloc:
 *
 * Allocate contiguous pages from the VM.  This function does not
 * map the allocated pages into the kernel map, otherwise it is
 * impossible to make large allocations (i.e. >2G).
 *
 * Malloc()'s data structures have been used for collection of
 * statistics and for allocations of less than a page.
 */
static int
vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high,
                   unsigned long alignment, unsigned long boundary, int mflags)
{
    int i, q, start, pass;
    vm_offset_t phys;
    vm_page_t pga = vm_page_array;
    vm_page_t m;
    int pqtype;

    size = round_page(size);
    if (size == 0)
        panic("vm_contig_pg_alloc: size must not be 0");
    if ((alignment & (alignment - 1)) != 0)
        panic("vm_contig_pg_alloc: alignment must be a power of 2");
    if ((boundary & (boundary - 1)) != 0)
        panic("vm_contig_pg_alloc: boundary must be a power of 2");

    /*
     * See if we can get the pages from the contiguous page reserve
     * alist.  The returned pages will be allocated and wired but not
     * busied.
     */
    m = vm_page_alloc_contig(low, high, alignment, boundary, size);
    if (m)
        return (m - &pga[0]);

    /*
     * Three passes (0, 1, 2).  Each pass scans the VM page list for
     * free or cached pages.  After each pass if the entire scan failed
     * we attempt to flush inactive pages and reset the start index back
     * to 0.  For passes 1 and 2 we also attempt to flush active pages.
     */
    start = 0;
    for (pass = 0; pass < 3; pass++) {
        /*
         * Find first page in array that is free, within range,
         * aligned, and such that the boundary won't be crossed.
         */
again:
        for (i = start; i < vmstats.v_page_count; i++) {
            m = &pga[i];
            phys = VM_PAGE_TO_PHYS(m);
            pqtype = m->queue - m->pc;
            if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
                    (phys >= low) && (phys < high) &&
                    ((phys & (alignment - 1)) == 0) &&
                    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) &&
                    m->busy == 0 && m->wire_count == 0 &&
                    m->hold_count == 0 &&
                    (m->flags & (PG_BUSY | PG_NEED_COMMIT)) == 0)
            {
                break;
            }
        }

        /*
         * If we cannot find the page in the given range, or we have
         * crossed the boundary, call the vm_contig_pg_clean() function
         * for flushing out the queues, and returning it back to
         * normal state.
         */
        if ((i == vmstats.v_page_count) ||
                ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {

            /*
             * Best effort flush of all inactive pages.
             * This is quite quick, for now stall all
             * callers, even if they've specified M_NOWAIT.
             */
            for (q = 0; q < PQ_L2_SIZE; ++q) {
                vm_contig_pg_clean(PQ_INACTIVE + q,
                                   vmstats.v_inactive_count);
                lwkt_yield();
            }

            /*
             * Best effort flush of active pages.
             *
             * This is very, very slow.
             * Only do this if the caller has agreed to M_WAITOK.
             *
             * If enough pages are flushed, we may succeed on
             * next (final) pass, if not the caller, contigmalloc(),
             * will fail in the index < 0 case.
             */
            if (pass > 0 && (mflags & M_WAITOK)) {
                for (q = 0; q < PQ_L2_SIZE; ++q) {
                    vm_contig_pg_clean(PQ_ACTIVE + q,
                                       vmstats.v_active_count);
                }
                lwkt_yield();
            }

            /*
             * We're already too high in the address space
             * to succeed, reset to 0 for the next iteration.
             */
            start = 0;
            continue;	/* next pass */
        }
        start = i;

        /*
         * Check successive pages for contiguous and free.
         *
         * (still in critical section)
         */
        for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
            m = &pga[i];
            pqtype = m->queue - m->pc;
            if ((VM_PAGE_TO_PHYS(&m[0]) !=
                    (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) ||
                    ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) ||
                    m->busy || m->wire_count ||
                    m->hold_count ||
                    (m->flags & (PG_BUSY | PG_NEED_COMMIT)))
            {
                start++;
                goto again;
            }
        }

        /*
         * Try to allocate the pages, wiring them as we go.
         *
         * (still in critical section)
         */
        for (i = start; i < (start + size / PAGE_SIZE); i++) {
            m = &pga[i];

            if (vm_page_busy_try(m, TRUE)) {
                vm_contig_pg_free(start,
                                  (i - start) * PAGE_SIZE);
                start++;
                goto again;
            }
            pqtype = m->queue - m->pc;
            if (pqtype == PQ_CACHE &&
                    m->hold_count == 0 &&
                    m->wire_count == 0 &&
                    (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) {
                vm_page_protect(m, VM_PROT_NONE);
                KKASSERT((m->flags & PG_MAPPED) == 0);
                KKASSERT(m->dirty == 0);
                vm_page_free(m);
                --i;
                continue;	/* retry the page */
            }
            if (pqtype != PQ_FREE || m->hold_count) {
                vm_page_wakeup(m);
                vm_contig_pg_free(start,
                                  (i - start) * PAGE_SIZE);
                start++;
                goto again;
            }
            KKASSERT((m->valid & m->dirty) == 0);
            KKASSERT(m->wire_count == 0);
            KKASSERT(m->object == NULL);
            vm_page_unqueue_nowakeup(m);
            m->valid = VM_PAGE_BITS_ALL;
            if (m->flags & PG_ZERO)
                vm_page_zero_count--;
            KASSERT(m->dirty == 0,
                    ("vm_contig_pg_alloc: page %p was dirty", m));
            KKASSERT(m->wire_count == 0);
            KKASSERT(m->busy == 0);

            /*
             * Clear all flags except PG_BUSY, PG_ZERO, and
             * PG_WANTED, then unbusy the now allocated page.
             */
            vm_page_flag_clear(m, ~(PG_BUSY | PG_SBUSY |
                                    PG_ZERO | PG_WANTED));
            vm_page_wire(m);
            vm_page_wakeup(m);
        }

        /*
         * Our job is done, return the index page of vm_page_array.
         */
        return (start); /* aka &pga[start] */
    }

    /*
     * Failed.
     */
    return (-1);
}
예제 #2
0
static int
ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset,
    int prot, vm_page_t *mres)
{
	struct ttm_buffer_object *bo = vm_obj->handle;
	struct ttm_bo_device *bdev = bo->bdev;
	struct ttm_tt *ttm = NULL;
	vm_page_t m, m1, oldm;
	int ret;
	int retval = VM_PAGER_OK;
	struct ttm_mem_type_manager *man =
		&bdev->man[bo->mem.mem_type];

	vm_object_pip_add(vm_obj, 1);
	oldm = *mres;
	if (oldm != NULL) {
		vm_page_remove(oldm);
		*mres = NULL;
	} else
		oldm = NULL;
retry:
	VM_OBJECT_WUNLOCK(vm_obj);
	m = NULL;

reserve:
	ret = ttm_bo_reserve(bo, false, false, false, 0);
	if (unlikely(ret != 0)) {
		if (ret == -EBUSY) {
			lwkt_yield();
			goto reserve;
		}
	}

	if (bdev->driver->fault_reserve_notify) {
		ret = bdev->driver->fault_reserve_notify(bo);
		switch (ret) {
		case 0:
			break;
		case -EBUSY:
		case -ERESTARTSYS:
		case -EINTR:
			lwkt_yield();
			goto reserve;
		default:
			retval = VM_PAGER_ERROR;
			goto out_unlock;
		}
	}

	/*
	 * Wait for buffer data in transit, due to a pipelined
	 * move.
	 */

	lockmgr(&bdev->fence_lock, LK_EXCLUSIVE);
	if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
		/*
		 * Here, the behavior differs between Linux and FreeBSD.
		 *
		 * On Linux, the wait is interruptible (3rd argument to
		 * ttm_bo_wait). There must be some mechanism to resume
		 * page fault handling, once the signal is processed.
		 *
		 * On FreeBSD, the wait is uninteruptible. This is not a
		 * problem as we can't end up with an unkillable process
		 * here, because the wait will eventually time out.
		 *
		 * An example of this situation is the Xorg process
		 * which uses SIGALRM internally. The signal could
		 * interrupt the wait, causing the page fault to fail
		 * and the process to receive SIGSEGV.
		 */
		ret = ttm_bo_wait(bo, false, false, false);
		lockmgr(&bdev->fence_lock, LK_RELEASE);
		if (unlikely(ret != 0)) {
			retval = VM_PAGER_ERROR;
			goto out_unlock;
		}
	} else
		lockmgr(&bdev->fence_lock, LK_RELEASE);

	ret = ttm_mem_io_lock(man, true);
	if (unlikely(ret != 0)) {
		retval = VM_PAGER_ERROR;
		goto out_unlock;
	}
	ret = ttm_mem_io_reserve_vm(bo);
	if (unlikely(ret != 0)) {
		retval = VM_PAGER_ERROR;
		goto out_io_unlock;
	}

	/*
	 * Strictly, we're not allowed to modify vma->vm_page_prot here,
	 * since the mmap_sem is only held in read mode. However, we
	 * modify only the caching bits of vma->vm_page_prot and
	 * consider those bits protected by
	 * the bo->mutex, as we should be the only writers.
	 * There shouldn't really be any readers of these bits except
	 * within vm_insert_mixed()? fork?
	 *
	 * TODO: Add a list of vmas to the bo, and change the
	 * vma->vm_page_prot when the object changes caching policy, with
	 * the correct locks held.
	 */
	if (!bo->mem.bus.is_iomem) {
		/* Allocate all page at once, most common usage */
		ttm = bo->ttm;
		if (ttm->bdev->driver->ttm_tt_populate(ttm)) {
			retval = VM_PAGER_ERROR;
			goto out_io_unlock;
		}
	}

	if (bo->mem.bus.is_iomem) {
		m = vm_phys_fictitious_to_vm_page(bo->mem.bus.base +
		    bo->mem.bus.offset + offset);
		pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement));
	} else {
		ttm = bo->ttm;
		m = ttm->pages[OFF_TO_IDX(offset)];
		if (unlikely(!m)) {
			retval = VM_PAGER_ERROR;
			goto out_io_unlock;
		}
		pmap_page_set_memattr(m,
		    (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
		    VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement));
	}

	VM_OBJECT_WLOCK(vm_obj);
	if ((m->flags & PG_BUSY) != 0) {
#if 0
		vm_page_sleep(m, "ttmpbs");
#endif
		ttm_mem_io_unlock(man);
		ttm_bo_unreserve(bo);
		goto retry;
	}
	m->valid = VM_PAGE_BITS_ALL;
	*mres = m;
	m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
	if (m1 == NULL) {
		vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
	} else {
		KASSERT(m == m1,
		    ("inconsistent insert bo %p m %p m1 %p offset %jx",
		    bo, m, m1, (uintmax_t)offset));
	}
	vm_page_busy_try(m, FALSE);

	if (oldm != NULL) {
		vm_page_free(oldm);
	}

out_io_unlock1:
	ttm_mem_io_unlock(man);
out_unlock1:
	ttm_bo_unreserve(bo);
	vm_object_pip_wakeup(vm_obj);
	return (retval);

out_io_unlock:
	VM_OBJECT_WLOCK(vm_obj);
	goto out_io_unlock1;

out_unlock:
	VM_OBJECT_WLOCK(vm_obj);
	goto out_unlock1;
}
예제 #3
0
/*
 * vm_contig_pg_clean:
 *
 * Do a thorough cleanup of the specified 'queue', which can be either
 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough.  If the page is not
 * marked dirty, it is shoved into the page cache, provided no one has
 * currently aqcuired it, otherwise localized action per object type
 * is taken for cleanup:
 *
 * 	In the OBJT_VNODE case, the whole page range is cleaned up
 * 	using the vm_object_page_clean() routine, by specyfing a
 * 	start and end of '0'.
 *
 * 	Otherwise if the object is of any other type, the generic
 * 	pageout (daemon) flush routine is invoked.
 */
static void
vm_contig_pg_clean(int queue, int count)
{
    vm_object_t object;
    vm_page_t m, m_tmp;
    struct vm_page marker;
    struct vpgqueues *pq = &vm_page_queues[queue];

    /*
     * Setup a local marker
     */
    bzero(&marker, sizeof(marker));
    marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
    marker.queue = queue;
    marker.wire_count = 1;

    vm_page_queues_spin_lock(queue);
    TAILQ_INSERT_HEAD(&pq->pl, &marker, pageq);
    vm_page_queues_spin_unlock(queue);

    /*
     * Iterate the queue.  Note that the vm_page spinlock must be
     * acquired before the pageq spinlock so it's easiest to simply
     * not hold it in the loop iteration.
     */
    while (count-- > 0 && (m = TAILQ_NEXT(&marker, pageq)) != NULL) {
        vm_page_and_queue_spin_lock(m);
        if (m != TAILQ_NEXT(&marker, pageq)) {
            vm_page_and_queue_spin_unlock(m);
            ++count;
            continue;
        }
        KKASSERT(m->queue == queue);

        TAILQ_REMOVE(&pq->pl, &marker, pageq);
        TAILQ_INSERT_AFTER(&pq->pl, m, &marker, pageq);

        if (m->flags & PG_MARKER) {
            vm_page_and_queue_spin_unlock(m);
            continue;
        }
        if (vm_page_busy_try(m, TRUE)) {
            vm_page_and_queue_spin_unlock(m);
            continue;
        }
        vm_page_and_queue_spin_unlock(m);

        /*
         * We've successfully busied the page
         */
        if (m->queue - m->pc != queue) {
            vm_page_wakeup(m);
            continue;
        }
        if (m->wire_count || m->hold_count) {
            vm_page_wakeup(m);
            continue;
        }
        if ((object = m->object) == NULL) {
            vm_page_wakeup(m);
            continue;
        }
        vm_page_test_dirty(m);
        if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
            vm_object_hold(object);
            KKASSERT(m->object == object);

            if (object->type == OBJT_VNODE) {
                vm_page_wakeup(m);
                vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY);
                vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
                vn_unlock(((struct vnode *)object->handle));
            } else if (object->type == OBJT_SWAP ||
                       object->type == OBJT_DEFAULT) {
                m_tmp = m;
                vm_pageout_flush(&m_tmp, 1, 0);
            } else {
                vm_page_wakeup(m);
            }
            vm_object_drop(object);
        } else if (m->hold_count == 0) {
            vm_page_cache(m);
        } else {
            vm_page_wakeup(m);
        }
    }

    /*
     * Scrap our local marker
     */
    vm_page_queues_spin_lock(queue);
    TAILQ_REMOVE(&pq->pl, &marker, pageq);
    vm_page_queues_spin_unlock(queue);
}