/* * vm_contig_pg_alloc: * * Allocate contiguous pages from the VM. This function does not * map the allocated pages into the kernel map, otherwise it is * impossible to make large allocations (i.e. >2G). * * Malloc()'s data structures have been used for collection of * statistics and for allocations of less than a page. */ static int vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, unsigned long boundary, int mflags) { int i, q, start, pass; vm_offset_t phys; vm_page_t pga = vm_page_array; vm_page_t m; int pqtype; size = round_page(size); if (size == 0) panic("vm_contig_pg_alloc: size must not be 0"); if ((alignment & (alignment - 1)) != 0) panic("vm_contig_pg_alloc: alignment must be a power of 2"); if ((boundary & (boundary - 1)) != 0) panic("vm_contig_pg_alloc: boundary must be a power of 2"); /* * See if we can get the pages from the contiguous page reserve * alist. The returned pages will be allocated and wired but not * busied. */ m = vm_page_alloc_contig(low, high, alignment, boundary, size); if (m) return (m - &pga[0]); /* * Three passes (0, 1, 2). Each pass scans the VM page list for * free or cached pages. After each pass if the entire scan failed * we attempt to flush inactive pages and reset the start index back * to 0. For passes 1 and 2 we also attempt to flush active pages. */ start = 0; for (pass = 0; pass < 3; pass++) { /* * Find first page in array that is free, within range, * aligned, and such that the boundary won't be crossed. */ again: for (i = start; i < vmstats.v_page_count; i++) { m = &pga[i]; phys = VM_PAGE_TO_PHYS(m); pqtype = m->queue - m->pc; if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && (phys >= low) && (phys < high) && ((phys & (alignment - 1)) == 0) && (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) && m->busy == 0 && m->wire_count == 0 && m->hold_count == 0 && (m->flags & (PG_BUSY | PG_NEED_COMMIT)) == 0) { break; } } /* * If we cannot find the page in the given range, or we have * crossed the boundary, call the vm_contig_pg_clean() function * for flushing out the queues, and returning it back to * normal state. */ if ((i == vmstats.v_page_count) || ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { /* * Best effort flush of all inactive pages. * This is quite quick, for now stall all * callers, even if they've specified M_NOWAIT. */ for (q = 0; q < PQ_L2_SIZE; ++q) { vm_contig_pg_clean(PQ_INACTIVE + q, vmstats.v_inactive_count); lwkt_yield(); } /* * Best effort flush of active pages. * * This is very, very slow. * Only do this if the caller has agreed to M_WAITOK. * * If enough pages are flushed, we may succeed on * next (final) pass, if not the caller, contigmalloc(), * will fail in the index < 0 case. */ if (pass > 0 && (mflags & M_WAITOK)) { for (q = 0; q < PQ_L2_SIZE; ++q) { vm_contig_pg_clean(PQ_ACTIVE + q, vmstats.v_active_count); } lwkt_yield(); } /* * We're already too high in the address space * to succeed, reset to 0 for the next iteration. */ start = 0; continue; /* next pass */ } start = i; /* * Check successive pages for contiguous and free. * * (still in critical section) */ for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { m = &pga[i]; pqtype = m->queue - m->pc; if ((VM_PAGE_TO_PHYS(&m[0]) != (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || m->busy || m->wire_count || m->hold_count || (m->flags & (PG_BUSY | PG_NEED_COMMIT))) { start++; goto again; } } /* * Try to allocate the pages, wiring them as we go. * * (still in critical section) */ for (i = start; i < (start + size / PAGE_SIZE); i++) { m = &pga[i]; if (vm_page_busy_try(m, TRUE)) { vm_contig_pg_free(start, (i - start) * PAGE_SIZE); start++; goto again; } pqtype = m->queue - m->pc; if (pqtype == PQ_CACHE && m->hold_count == 0 && m->wire_count == 0 && (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) { vm_page_protect(m, VM_PROT_NONE); KKASSERT((m->flags & PG_MAPPED) == 0); KKASSERT(m->dirty == 0); vm_page_free(m); --i; continue; /* retry the page */ } if (pqtype != PQ_FREE || m->hold_count) { vm_page_wakeup(m); vm_contig_pg_free(start, (i - start) * PAGE_SIZE); start++; goto again; } KKASSERT((m->valid & m->dirty) == 0); KKASSERT(m->wire_count == 0); KKASSERT(m->object == NULL); vm_page_unqueue_nowakeup(m); m->valid = VM_PAGE_BITS_ALL; if (m->flags & PG_ZERO) vm_page_zero_count--; KASSERT(m->dirty == 0, ("vm_contig_pg_alloc: page %p was dirty", m)); KKASSERT(m->wire_count == 0); KKASSERT(m->busy == 0); /* * Clear all flags except PG_BUSY, PG_ZERO, and * PG_WANTED, then unbusy the now allocated page. */ vm_page_flag_clear(m, ~(PG_BUSY | PG_SBUSY | PG_ZERO | PG_WANTED)); vm_page_wire(m); vm_page_wakeup(m); } /* * Our job is done, return the index page of vm_page_array. */ return (start); /* aka &pga[start] */ } /* * Failed. */ return (-1); }
static int ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct ttm_buffer_object *bo = vm_obj->handle; struct ttm_bo_device *bdev = bo->bdev; struct ttm_tt *ttm = NULL; vm_page_t m, m1, oldm; int ret; int retval = VM_PAGER_OK; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; vm_object_pip_add(vm_obj, 1); oldm = *mres; if (oldm != NULL) { vm_page_remove(oldm); *mres = NULL; } else oldm = NULL; retry: VM_OBJECT_WUNLOCK(vm_obj); m = NULL; reserve: ret = ttm_bo_reserve(bo, false, false, false, 0); if (unlikely(ret != 0)) { if (ret == -EBUSY) { lwkt_yield(); goto reserve; } } if (bdev->driver->fault_reserve_notify) { ret = bdev->driver->fault_reserve_notify(bo); switch (ret) { case 0: break; case -EBUSY: case -ERESTARTSYS: case -EINTR: lwkt_yield(); goto reserve; default: retval = VM_PAGER_ERROR; goto out_unlock; } } /* * Wait for buffer data in transit, due to a pipelined * move. */ lockmgr(&bdev->fence_lock, LK_EXCLUSIVE); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { /* * Here, the behavior differs between Linux and FreeBSD. * * On Linux, the wait is interruptible (3rd argument to * ttm_bo_wait). There must be some mechanism to resume * page fault handling, once the signal is processed. * * On FreeBSD, the wait is uninteruptible. This is not a * problem as we can't end up with an unkillable process * here, because the wait will eventually time out. * * An example of this situation is the Xorg process * which uses SIGALRM internally. The signal could * interrupt the wait, causing the page fault to fail * and the process to receive SIGSEGV. */ ret = ttm_bo_wait(bo, false, false, false); lockmgr(&bdev->fence_lock, LK_RELEASE); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } } else lockmgr(&bdev->fence_lock, LK_RELEASE); ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } ret = ttm_mem_io_reserve_vm(bo); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } /* * Strictly, we're not allowed to modify vma->vm_page_prot here, * since the mmap_sem is only held in read mode. However, we * modify only the caching bits of vma->vm_page_prot and * consider those bits protected by * the bo->mutex, as we should be the only writers. * There shouldn't really be any readers of these bits except * within vm_insert_mixed()? fork? * * TODO: Add a list of vmas to the bo, and change the * vma->vm_page_prot when the object changes caching policy, with * the correct locks held. */ if (!bo->mem.bus.is_iomem) { /* Allocate all page at once, most common usage */ ttm = bo->ttm; if (ttm->bdev->driver->ttm_tt_populate(ttm)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } } if (bo->mem.bus.is_iomem) { m = vm_phys_fictitious_to_vm_page(bo->mem.bus.base + bo->mem.bus.offset + offset); pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement)); } else { ttm = bo->ttm; m = ttm->pages[OFF_TO_IDX(offset)]; if (unlikely(!m)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } pmap_page_set_memattr(m, (bo->mem.placement & TTM_PL_FLAG_CACHED) ? VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement)); } VM_OBJECT_WLOCK(vm_obj); if ((m->flags & PG_BUSY) != 0) { #if 0 vm_page_sleep(m, "ttmpbs"); #endif ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } m->valid = VM_PAGE_BITS_ALL; *mres = m; m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (m1 == NULL) { vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); } else { KASSERT(m == m1, ("inconsistent insert bo %p m %p m1 %p offset %jx", bo, m, m1, (uintmax_t)offset)); } vm_page_busy_try(m, FALSE); if (oldm != NULL) { vm_page_free(oldm); } out_io_unlock1: ttm_mem_io_unlock(man); out_unlock1: ttm_bo_unreserve(bo); vm_object_pip_wakeup(vm_obj); return (retval); out_io_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_io_unlock1; out_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_unlock1; }
/* * vm_contig_pg_clean: * * Do a thorough cleanup of the specified 'queue', which can be either * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not * marked dirty, it is shoved into the page cache, provided no one has * currently aqcuired it, otherwise localized action per object type * is taken for cleanup: * * In the OBJT_VNODE case, the whole page range is cleaned up * using the vm_object_page_clean() routine, by specyfing a * start and end of '0'. * * Otherwise if the object is of any other type, the generic * pageout (daemon) flush routine is invoked. */ static void vm_contig_pg_clean(int queue, int count) { vm_object_t object; vm_page_t m, m_tmp; struct vm_page marker; struct vpgqueues *pq = &vm_page_queues[queue]; /* * Setup a local marker */ bzero(&marker, sizeof(marker)); marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; marker.queue = queue; marker.wire_count = 1; vm_page_queues_spin_lock(queue); TAILQ_INSERT_HEAD(&pq->pl, &marker, pageq); vm_page_queues_spin_unlock(queue); /* * Iterate the queue. Note that the vm_page spinlock must be * acquired before the pageq spinlock so it's easiest to simply * not hold it in the loop iteration. */ while (count-- > 0 && (m = TAILQ_NEXT(&marker, pageq)) != NULL) { vm_page_and_queue_spin_lock(m); if (m != TAILQ_NEXT(&marker, pageq)) { vm_page_and_queue_spin_unlock(m); ++count; continue; } KKASSERT(m->queue == queue); TAILQ_REMOVE(&pq->pl, &marker, pageq); TAILQ_INSERT_AFTER(&pq->pl, m, &marker, pageq); if (m->flags & PG_MARKER) { vm_page_and_queue_spin_unlock(m); continue; } if (vm_page_busy_try(m, TRUE)) { vm_page_and_queue_spin_unlock(m); continue; } vm_page_and_queue_spin_unlock(m); /* * We've successfully busied the page */ if (m->queue - m->pc != queue) { vm_page_wakeup(m); continue; } if (m->wire_count || m->hold_count) { vm_page_wakeup(m); continue; } if ((object = m->object) == NULL) { vm_page_wakeup(m); continue; } vm_page_test_dirty(m); if (m->dirty || (m->flags & PG_NEED_COMMIT)) { vm_object_hold(object); KKASSERT(m->object == object); if (object->type == OBJT_VNODE) { vm_page_wakeup(m); vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); vm_object_page_clean(object, 0, 0, OBJPC_SYNC); vn_unlock(((struct vnode *)object->handle)); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { m_tmp = m; vm_pageout_flush(&m_tmp, 1, 0); } else { vm_page_wakeup(m); } vm_object_drop(object); } else if (m->hold_count == 0) { vm_page_cache(m); } else { vm_page_wakeup(m); } } /* * Scrap our local marker */ vm_page_queues_spin_lock(queue); TAILQ_REMOVE(&pq->pl, &marker, pageq); vm_page_queues_spin_unlock(queue); }