예제 #1
0
static
int
nvtruncbuf_bp_trunc(struct buf *bp, void *data)
{
	struct truncbuf_info *info = data;

	/*
	 * Do not try to use a buffer we cannot immediately lock,
	 * but sleep anyway to prevent a livelock.  The code will
	 * loop until all buffers can be acted upon.
	 */
	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
		atomic_add_int(&bp->b_refs, 1);
		if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0)
			BUF_UNLOCK(bp);
		atomic_subtract_int(&bp->b_refs, 1);
	} else if ((info->clean && (bp->b_flags & B_DELWRI)) ||
		   (info->clean == 0 && (bp->b_flags & B_DELWRI) == 0) ||
		   bp->b_vp != info->vp ||
		   nvtruncbuf_bp_trunc_cmp(bp, data)) {
		BUF_UNLOCK(bp);
	} else {
		bremfree(bp);
		bp->b_flags |= (B_INVAL | B_RELBUF | B_NOCACHE);
		brelse(bp);
	}
	lwkt_yield();
	return(1);
}
예제 #2
0
static int
ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset,
    int prot, vm_page_t *mres)
{
	struct ttm_buffer_object *bo = vm_obj->handle;
	struct ttm_bo_device *bdev = bo->bdev;
	struct ttm_tt *ttm = NULL;
	vm_page_t m, m1, oldm;
	int ret;
	int retval = VM_PAGER_OK;
	struct ttm_mem_type_manager *man =
		&bdev->man[bo->mem.mem_type];

	vm_object_pip_add(vm_obj, 1);
	oldm = *mres;
	if (oldm != NULL) {
		vm_page_remove(oldm);
		*mres = NULL;
	} else
		oldm = NULL;
retry:
	VM_OBJECT_WUNLOCK(vm_obj);
	m = NULL;

reserve:
	ret = ttm_bo_reserve(bo, false, false, false, 0);
	if (unlikely(ret != 0)) {
		if (ret == -EBUSY) {
			lwkt_yield();
			goto reserve;
		}
	}

	if (bdev->driver->fault_reserve_notify) {
		ret = bdev->driver->fault_reserve_notify(bo);
		switch (ret) {
		case 0:
			break;
		case -EBUSY:
		case -ERESTARTSYS:
		case -EINTR:
			lwkt_yield();
			goto reserve;
		default:
			retval = VM_PAGER_ERROR;
			goto out_unlock;
		}
	}

	/*
	 * Wait for buffer data in transit, due to a pipelined
	 * move.
	 */

	lockmgr(&bdev->fence_lock, LK_EXCLUSIVE);
	if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
		/*
		 * Here, the behavior differs between Linux and FreeBSD.
		 *
		 * On Linux, the wait is interruptible (3rd argument to
		 * ttm_bo_wait). There must be some mechanism to resume
		 * page fault handling, once the signal is processed.
		 *
		 * On FreeBSD, the wait is uninteruptible. This is not a
		 * problem as we can't end up with an unkillable process
		 * here, because the wait will eventually time out.
		 *
		 * An example of this situation is the Xorg process
		 * which uses SIGALRM internally. The signal could
		 * interrupt the wait, causing the page fault to fail
		 * and the process to receive SIGSEGV.
		 */
		ret = ttm_bo_wait(bo, false, false, false);
		lockmgr(&bdev->fence_lock, LK_RELEASE);
		if (unlikely(ret != 0)) {
			retval = VM_PAGER_ERROR;
			goto out_unlock;
		}
	} else
		lockmgr(&bdev->fence_lock, LK_RELEASE);

	ret = ttm_mem_io_lock(man, true);
	if (unlikely(ret != 0)) {
		retval = VM_PAGER_ERROR;
		goto out_unlock;
	}
	ret = ttm_mem_io_reserve_vm(bo);
	if (unlikely(ret != 0)) {
		retval = VM_PAGER_ERROR;
		goto out_io_unlock;
	}

	/*
	 * Strictly, we're not allowed to modify vma->vm_page_prot here,
	 * since the mmap_sem is only held in read mode. However, we
	 * modify only the caching bits of vma->vm_page_prot and
	 * consider those bits protected by
	 * the bo->mutex, as we should be the only writers.
	 * There shouldn't really be any readers of these bits except
	 * within vm_insert_mixed()? fork?
	 *
	 * TODO: Add a list of vmas to the bo, and change the
	 * vma->vm_page_prot when the object changes caching policy, with
	 * the correct locks held.
	 */
	if (!bo->mem.bus.is_iomem) {
		/* Allocate all page at once, most common usage */
		ttm = bo->ttm;
		if (ttm->bdev->driver->ttm_tt_populate(ttm)) {
			retval = VM_PAGER_ERROR;
			goto out_io_unlock;
		}
	}

	if (bo->mem.bus.is_iomem) {
		m = vm_phys_fictitious_to_vm_page(bo->mem.bus.base +
		    bo->mem.bus.offset + offset);
		pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement));
	} else {
		ttm = bo->ttm;
		m = ttm->pages[OFF_TO_IDX(offset)];
		if (unlikely(!m)) {
			retval = VM_PAGER_ERROR;
			goto out_io_unlock;
		}
		pmap_page_set_memattr(m,
		    (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
		    VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement));
	}

	VM_OBJECT_WLOCK(vm_obj);
	if ((m->flags & PG_BUSY) != 0) {
#if 0
		vm_page_sleep(m, "ttmpbs");
#endif
		ttm_mem_io_unlock(man);
		ttm_bo_unreserve(bo);
		goto retry;
	}
	m->valid = VM_PAGE_BITS_ALL;
	*mres = m;
	m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
	if (m1 == NULL) {
		vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
	} else {
		KASSERT(m == m1,
		    ("inconsistent insert bo %p m %p m1 %p offset %jx",
		    bo, m, m1, (uintmax_t)offset));
	}
	vm_page_busy_try(m, FALSE);

	if (oldm != NULL) {
		vm_page_free(oldm);
	}

out_io_unlock1:
	ttm_mem_io_unlock(man);
out_unlock1:
	ttm_bo_unreserve(bo);
	vm_object_pip_wakeup(vm_obj);
	return (retval);

out_io_unlock:
	VM_OBJECT_WLOCK(vm_obj);
	goto out_io_unlock1;

out_unlock:
	VM_OBJECT_WLOCK(vm_obj);
	goto out_unlock1;
}
예제 #3
0
/*
 * vm_contig_pg_alloc:
 *
 * Allocate contiguous pages from the VM.  This function does not
 * map the allocated pages into the kernel map, otherwise it is
 * impossible to make large allocations (i.e. >2G).
 *
 * Malloc()'s data structures have been used for collection of
 * statistics and for allocations of less than a page.
 */
static int
vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high,
                   unsigned long alignment, unsigned long boundary, int mflags)
{
    int i, q, start, pass;
    vm_offset_t phys;
    vm_page_t pga = vm_page_array;
    vm_page_t m;
    int pqtype;

    size = round_page(size);
    if (size == 0)
        panic("vm_contig_pg_alloc: size must not be 0");
    if ((alignment & (alignment - 1)) != 0)
        panic("vm_contig_pg_alloc: alignment must be a power of 2");
    if ((boundary & (boundary - 1)) != 0)
        panic("vm_contig_pg_alloc: boundary must be a power of 2");

    /*
     * See if we can get the pages from the contiguous page reserve
     * alist.  The returned pages will be allocated and wired but not
     * busied.
     */
    m = vm_page_alloc_contig(low, high, alignment, boundary, size);
    if (m)
        return (m - &pga[0]);

    /*
     * Three passes (0, 1, 2).  Each pass scans the VM page list for
     * free or cached pages.  After each pass if the entire scan failed
     * we attempt to flush inactive pages and reset the start index back
     * to 0.  For passes 1 and 2 we also attempt to flush active pages.
     */
    start = 0;
    for (pass = 0; pass < 3; pass++) {
        /*
         * Find first page in array that is free, within range,
         * aligned, and such that the boundary won't be crossed.
         */
again:
        for (i = start; i < vmstats.v_page_count; i++) {
            m = &pga[i];
            phys = VM_PAGE_TO_PHYS(m);
            pqtype = m->queue - m->pc;
            if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
                    (phys >= low) && (phys < high) &&
                    ((phys & (alignment - 1)) == 0) &&
                    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) &&
                    m->busy == 0 && m->wire_count == 0 &&
                    m->hold_count == 0 &&
                    (m->flags & (PG_BUSY | PG_NEED_COMMIT)) == 0)
            {
                break;
            }
        }

        /*
         * If we cannot find the page in the given range, or we have
         * crossed the boundary, call the vm_contig_pg_clean() function
         * for flushing out the queues, and returning it back to
         * normal state.
         */
        if ((i == vmstats.v_page_count) ||
                ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {

            /*
             * Best effort flush of all inactive pages.
             * This is quite quick, for now stall all
             * callers, even if they've specified M_NOWAIT.
             */
            for (q = 0; q < PQ_L2_SIZE; ++q) {
                vm_contig_pg_clean(PQ_INACTIVE + q,
                                   vmstats.v_inactive_count);
                lwkt_yield();
            }

            /*
             * Best effort flush of active pages.
             *
             * This is very, very slow.
             * Only do this if the caller has agreed to M_WAITOK.
             *
             * If enough pages are flushed, we may succeed on
             * next (final) pass, if not the caller, contigmalloc(),
             * will fail in the index < 0 case.
             */
            if (pass > 0 && (mflags & M_WAITOK)) {
                for (q = 0; q < PQ_L2_SIZE; ++q) {
                    vm_contig_pg_clean(PQ_ACTIVE + q,
                                       vmstats.v_active_count);
                }
                lwkt_yield();
            }

            /*
             * We're already too high in the address space
             * to succeed, reset to 0 for the next iteration.
             */
            start = 0;
            continue;	/* next pass */
        }
        start = i;

        /*
         * Check successive pages for contiguous and free.
         *
         * (still in critical section)
         */
        for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
            m = &pga[i];
            pqtype = m->queue - m->pc;
            if ((VM_PAGE_TO_PHYS(&m[0]) !=
                    (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) ||
                    ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) ||
                    m->busy || m->wire_count ||
                    m->hold_count ||
                    (m->flags & (PG_BUSY | PG_NEED_COMMIT)))
            {
                start++;
                goto again;
            }
        }

        /*
         * Try to allocate the pages, wiring them as we go.
         *
         * (still in critical section)
         */
        for (i = start; i < (start + size / PAGE_SIZE); i++) {
            m = &pga[i];

            if (vm_page_busy_try(m, TRUE)) {
                vm_contig_pg_free(start,
                                  (i - start) * PAGE_SIZE);
                start++;
                goto again;
            }
            pqtype = m->queue - m->pc;
            if (pqtype == PQ_CACHE &&
                    m->hold_count == 0 &&
                    m->wire_count == 0 &&
                    (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) {
                vm_page_protect(m, VM_PROT_NONE);
                KKASSERT((m->flags & PG_MAPPED) == 0);
                KKASSERT(m->dirty == 0);
                vm_page_free(m);
                --i;
                continue;	/* retry the page */
            }
            if (pqtype != PQ_FREE || m->hold_count) {
                vm_page_wakeup(m);
                vm_contig_pg_free(start,
                                  (i - start) * PAGE_SIZE);
                start++;
                goto again;
            }
            KKASSERT((m->valid & m->dirty) == 0);
            KKASSERT(m->wire_count == 0);
            KKASSERT(m->object == NULL);
            vm_page_unqueue_nowakeup(m);
            m->valid = VM_PAGE_BITS_ALL;
            if (m->flags & PG_ZERO)
                vm_page_zero_count--;
            KASSERT(m->dirty == 0,
                    ("vm_contig_pg_alloc: page %p was dirty", m));
            KKASSERT(m->wire_count == 0);
            KKASSERT(m->busy == 0);

            /*
             * Clear all flags except PG_BUSY, PG_ZERO, and
             * PG_WANTED, then unbusy the now allocated page.
             */
            vm_page_flag_clear(m, ~(PG_BUSY | PG_SBUSY |
                                    PG_ZERO | PG_WANTED));
            vm_page_wire(m);
            vm_page_wakeup(m);
        }

        /*
         * Our job is done, return the index page of vm_page_array.
         */
        return (start); /* aka &pga[start] */
    }

    /*
     * Failed.
     */
    return (-1);
}
예제 #4
0
/*
 * No requirements.
 */
static int
do_vmtotal(SYSCTL_HANDLER_ARGS)
{
	struct vmtotal total;
	struct vmtotal *totalp;
	struct vm_object marker;
	vm_object_t object;
	long collisions;
	int burst;

	bzero(&total, sizeof(total));
	totalp = &total;
	bzero(&marker, sizeof(marker));
	marker.type = OBJT_MARKER;
	collisions = vmobj_token.t_collisions;

#if 0
	/*
	 * Mark all objects as inactive.
	 */
	lwkt_gettoken(&vmobj_token);
	for (object = TAILQ_FIRST(&vm_object_list);
	    object != NULL;
	    object = TAILQ_NEXT(object,object_list)) {
		if (object->type == OBJT_MARKER)
			continue;
		vm_object_clear_flag(object, OBJ_ACTIVE);
	}
	lwkt_reltoken(&vmobj_token);
#endif

	/*
	 * Calculate process statistics.
	 */
	allproc_scan(do_vmtotal_callback, totalp);

	/*
	 * Calculate object memory usage statistics.
	 */
	lwkt_gettoken(&vmobj_token);
	TAILQ_INSERT_HEAD(&vm_object_list, &marker, object_list);
	burst = 0;

	for (object = TAILQ_FIRST(&vm_object_list);
	    object != NULL;
	    object = TAILQ_NEXT(object, object_list)) {
		/*
		 * devices, like /dev/mem, will badly skew our totals.
		 * markers aren't real objects.
		 */
		if (object->type == OBJT_MARKER)
			continue;
		if (object->type == OBJT_DEVICE)
			continue;
		if (object->size >= 0x7FFFFFFF) {
			/*
			 * Probably unbounded anonymous memory (really
			 * bounded by related vm_map_entry structures which
			 * we do not have access to in this loop).
			 */
			totalp->t_vm += object->resident_page_count;
		} else {
			/*
			 * It's questionable how useful this is but...
			 */
			totalp->t_vm += object->size;
		}
		totalp->t_rm += object->resident_page_count;
		if (object->flags & OBJ_ACTIVE) {
			totalp->t_avm += object->size;
			totalp->t_arm += object->resident_page_count;
		}
		if (object->shadow_count > 1) {
			/* shared object */
			totalp->t_vmshr += object->size;
			totalp->t_rmshr += object->resident_page_count;
			if (object->flags & OBJ_ACTIVE) {
				totalp->t_avmshr += object->size;
				totalp->t_armshr += object->resident_page_count;
			}
		}

		/*
		 * Don't waste time unnecessarily
		 */
		if (++burst < 25)
			continue;
		burst = 0;

		/*
		 * Don't hog the vmobj_token if someone else wants it.
		 */
		TAILQ_REMOVE(&vm_object_list, &marker, object_list);
		TAILQ_INSERT_AFTER(&vm_object_list, object,
				   &marker, object_list);
		object = &marker;
		if (collisions != vmobj_token.t_collisions) {
			tsleep(&vm_object_list, 0, "breath", 1);
			collisions = vmobj_token.t_collisions;
		} else {
			lwkt_yield();
		}
	}

	TAILQ_REMOVE(&vm_object_list, &marker, object_list);
	lwkt_reltoken(&vmobj_token);

	totalp->t_free = vmstats.v_free_count + vmstats.v_cache_count;

	return (sysctl_handle_opaque(oidp, totalp, sizeof total, req));
}
예제 #5
0
/*
 * MPSAFE thread
 */
static void
vm_pagezero(void *arg)
{
	vm_page_t m = NULL;
	struct lwbuf *lwb = NULL;
	struct lwbuf lwb_cache;
	enum zeroidle_state state = STATE_IDLE;
	char *pg = NULL;
	int npages = 0;
	int sleep_time;	
	int i = 0;
	int cpu = (int)(intptr_t)arg;
	int zero_state = 0;

	/*
	 * Adjust thread parameters before entering our loop.  The thread
	 * is started with the MP lock held and with normal kernel thread
	 * priority.
	 *
	 * Also put us on the last cpu for now.
	 *
	 * For now leave the MP lock held, the VM routines cannot be called
	 * with it released until tokenization is finished.
	 */
	lwkt_setpri_self(TDPRI_IDLE_WORK);
	lwkt_setcpu_self(globaldata_find(cpu));
	sleep_time = DEFAULT_SLEEP_TIME;

	/*
	 * Loop forever
	 */
	for (;;) {
		int zero_count;

		switch(state) {
		case STATE_IDLE:
			/*
			 * Wait for work.
			 */
			tsleep(&zero_state, 0, "pgzero", sleep_time);
			if (vm_page_zero_check(&zero_count, &zero_state))
				npages = idlezero_rate / 10;
			sleep_time = vm_page_zero_time(zero_count);
			if (npages)
				state = STATE_GET_PAGE;	/* Fallthrough */
			break;
		case STATE_GET_PAGE:
			/*
			 * Acquire page to zero
			 */
			if (--npages == 0) {
				state = STATE_IDLE;
			} else {
				m = vm_page_free_fromq_fast();
				if (m == NULL) {
					state = STATE_IDLE;
				} else {
					state = STATE_ZERO_PAGE;
					lwb = lwbuf_alloc(m, &lwb_cache);
					pg = (char *)lwbuf_kva(lwb);
					i = 0;
				}
			}
			break;
		case STATE_ZERO_PAGE:
			/*
			 * Zero-out the page
			 */
			while (i < PAGE_SIZE) {
				if (idlezero_nocache == 1)
					bzeront(&pg[i], IDLEZERO_RUN);
				else
					bzero(&pg[i], IDLEZERO_RUN);
				i += IDLEZERO_RUN;
				lwkt_yield();
			}
			state = STATE_RELEASE_PAGE;
			break;
		case STATE_RELEASE_PAGE:
			lwbuf_free(lwb);
			vm_page_flag_set(m, PG_ZERO);
			vm_page_free_toq(m);
			state = STATE_GET_PAGE;
			++idlezero_count;	/* non-locked, SMP race ok */
			break;
		}
		lwkt_yield();
	}
}