Exemple #1
0
static vm_page_t
os_kmem_alloc(int alloc_normal_failed) // IN
{
   vm_page_t page;
   vm_pindex_t pindex;
   os_state *state = &global_state;
   os_pmap *pmap = &state->pmap;

   pindex = os_pmap_getindex(pmap);
   if (pindex == (vm_pindex_t)-1) {
      return NULL;
   }

   /*
    * BSD's page allocator does not support flags that are similar to
    * KM_NOSLEEP and KM_SLEEP in Solaris. The main page allocation function
    * vm_page_alloc() does not sleep ever. It just returns NULL when it
    * cannot find a free (or cached-and-clean) page. Therefore, we use
    * VM_ALLOC_NORMAL and VM_ALLOC_SYSTEM loosely to mean KM_NOSLEEP
    * and KM_SLEEP respectively.
    */
   if (alloc_normal_failed) {
      page = vm_page_alloc(state->vmobject, pindex, VM_ALLOC_SYSTEM);
   } else {
      page = vm_page_alloc(state->vmobject, pindex, VM_ALLOC_NORMAL);
   }

   if (!page) {
      os_pmap_putindex(pmap, pindex);
   }

   return page;
}
Exemple #2
0
void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
	static vm_pindex_t colour;
	vm_page_t m;
	vm_paddr_t pa;
	void *va;
	int pflags;

	*flags = UMA_SLAB_PRIV;
	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
	else
		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED;
	if (wait & M_ZERO)
		pflags |= VM_ALLOC_ZERO;
	for (;;) {
		m = vm_page_alloc(NULL, colour++, pflags | VM_ALLOC_NOOBJ);
		if (m == NULL) {
			if (wait & M_NOWAIT)
				return (NULL);
			else
				VM_WAIT;
		} else
			break;
	}
	pa = m->phys_addr;
	dump_add_page(pa);
	va = (void *)PHYS_TO_DMAP(pa);
	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
		pagezero(va);
	return (va);
}
Exemple #3
0
void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
	void *va;
	vm_page_t m;
	int pflags;

	*flags = UMA_SLAB_PRIV;
	pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;

	for (;;) {
		m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
		if (m == NULL) {
			if (wait & M_NOWAIT)
				return (NULL);
			VM_WAIT;
		} else
			break;
	}

	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
		bzero(va, PAGE_SIZE);
	return (va);
}
Exemple #4
0
void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
	static vm_pindex_t color;
	void *va;
	vm_page_t m;
	int pflags;

	*flags = UMA_SLAB_PRIV;
	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
	else
		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED;
	if (wait & M_ZERO)
		pflags |= VM_ALLOC_ZERO;

	for (;;) {
		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
		if (m == NULL) {
			if (wait & M_NOWAIT)
				return (NULL);
			VM_WAIT;
		} else
			break;
	}

	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
		bzero(va, PAGE_SIZE);
	return (va);
}
Exemple #5
0
void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
    void *va;
    vm_page_t m;
    int pflags;

    *flags = UMA_SLAB_PRIV;
    pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;

    for (;;) {
        m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
        if (m == NULL) {
            if (wait & M_NOWAIT)
                return (NULL);
            VM_WAIT;
        } else
            break;
    }

    va = (void *) VM_PAGE_TO_PHYS(m);

    if (!hw_direct_map)
        pmap_kenter((vm_offset_t)va, VM_PAGE_TO_PHYS(m));

    if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
        bzero(va, PAGE_SIZE);
    atomic_add_int(&hw_uma_mdpages, 1);

    return (va);
}
Exemple #6
0
void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
	vm_page_t m;
	vm_paddr_t pa;
	void *va;
	int pflags;

	*flags = UMA_SLAB_PRIV;
	pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
	for (;;) {
		m = vm_page_alloc(NULL, 0, pflags);
		if (m == NULL) {
			if (wait & M_NOWAIT)
				return (NULL);
			else
				VM_WAIT;
		} else
			break;
	}
	pa = m->phys_addr;
	if ((wait & M_NODUMP) == 0)
		dump_add_page(pa);
	va = (void *)PHYS_TO_DMAP(pa);
	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
		pagezero(va);
	return (va);
}
Exemple #7
0
void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
	void *ret;
	struct arm_small_page *sp;
	TAILQ_HEAD(,arm_small_page) *head;
	vm_page_t m;

	*flags = UMA_SLAB_PRIV;
	/*
	 * For CPUs where we setup page tables as write back, there's no
	 * need to maintain two separate pools.
	 */
	if (zone == l2zone && pte_l1_s_cache_mode != pte_l1_s_cache_mode_pt)
		head = (void *)&pages_wt;
	else
		head = (void *)&pages_normal;

	mtx_lock(&smallalloc_mtx);
	sp = TAILQ_FIRST(head);

	if (!sp) {
		int pflags;

		mtx_unlock(&smallalloc_mtx);
		if (zone == l2zone &&
		    pte_l1_s_cache_mode != pte_l1_s_cache_mode_pt) {
			*flags = UMA_SLAB_KMEM;
			ret = ((void *)kmem_malloc(kmem_arena, bytes,
			    M_NOWAIT));
			return (ret);
		}
		pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;
		for (;;) {
			m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
			if (m == NULL) {
				if (wait & M_NOWAIT)
					return (NULL);
				VM_WAIT;
			} else
				break;
		}
		ret = (void *)arm_ptovirt(VM_PAGE_TO_PHYS(m));
		if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
			bzero(ret, PAGE_SIZE);
		return (ret);
	}
	TAILQ_REMOVE(head, sp, pg_list);
	TAILQ_INSERT_HEAD(&free_pgdesc, sp, pg_list);
	ret = sp->addr;
	mtx_unlock(&smallalloc_mtx);
	if ((wait & M_ZERO))
		bzero(ret, bytes);
	return (ret);
}
Exemple #8
0
/*
 *	Allocate new wired pages in an object.
 *	The object is assumed to be mapped into the kernel map or
 *	a submap.
 */
void
kmem_alloc_pages(
	vm_object_t	object,
	vm_offset_t	offset,
	vm_offset_t	start, 
	vm_offset_t	end,
	vm_prot_t	protection)
{
	/*
	 *	Mark the pmap region as not pageable.
	 */
	pmap_pageable(kernel_pmap, start, end, FALSE);

	while (start < end) {
	    vm_page_t	mem;

	    vm_object_lock(object);

	    /*
	     *	Allocate a page
	     */
	    while ((mem = vm_page_alloc(object, offset))
			 == VM_PAGE_NULL) {
		vm_object_unlock(object);
		VM_PAGE_WAIT((void (*)()) 0);
		vm_object_lock(object);
	    }

	    /*
	     *	Wire it down
	     */
	    vm_page_lock_queues();
	    vm_page_wire(mem);
	    vm_page_unlock_queues();
	    vm_object_unlock(object);

	    /*
	     *	Enter it in the kernel pmap
	     */
	    PMAP_ENTER(kernel_pmap, start, mem,
		       protection, TRUE);

	    vm_object_lock(object);
	    PAGE_WAKEUP_DONE(mem);
	    vm_object_unlock(object);

	    start += PAGE_SIZE;
	    offset += PAGE_SIZE;
	}
}
Exemple #9
0
int
i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv;
	struct i915_hw_ppgtt *ppgtt;
	u_int first_pd_entry_in_global_pt, i;

	dev_priv = dev->dev_private;

	/*
	 * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
	 * entries. For aliasing ppgtt support we just steal them at the end for
	 * now.
	 */
	first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;

	ppgtt = malloc(sizeof(*ppgtt), DRM_I915_GEM, M_WAITOK | M_ZERO);

	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
	ppgtt->pt_pages = malloc(sizeof(vm_page_t) * ppgtt->num_pd_entries,
	    DRM_I915_GEM, M_WAITOK | M_ZERO);

	for (i = 0; i < ppgtt->num_pd_entries; i++) {
		ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
		    VM_ALLOC_ZERO);
		if (ppgtt->pt_pages[i] == NULL) {
			dev_priv->mm.aliasing_ppgtt = ppgtt;
			i915_gem_cleanup_aliasing_ppgtt(dev);
			return (-ENOMEM);
		}
	}

	ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt.scratch_page_dma;

	i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries *
	    I915_PPGTT_PT_ENTRIES);
	ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t);
	dev_priv->mm.aliasing_ppgtt = ppgtt;
	return (0);
}
Exemple #10
0
void
kmem_init_zero_region(void)
{
	vm_offset_t addr, i;
	vm_page_t m;

	/*
	 * Map a single physical page of zeros to a larger virtual range.
	 * This requires less looping in places that want large amounts of
	 * zeros, while not using much more physical resources.
	 */
	addr = kva_alloc(ZERO_REGION_SIZE);
	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
	if ((m->flags & PG_ZERO) == 0)
		pmap_zero_page(m);
	for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE)
		pmap_qenter(addr + i, &m, 1);
	pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ);

	zero_region = (const void *)addr;
}
Exemple #11
0
void *
uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
{
	vm_paddr_t pa;
	vm_page_t m;
	int pflags;
	void *va;

	*flags = UMA_SLAB_PRIV;
	pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;

	for (;;) {
#ifdef MIPS64_NEW_PMAP
		m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
#else /* ! MIPS64_NEW_PMAP */
		m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags);
#endif /* ! MIPS64_NEW_PMAP */
#ifndef __mips_n64
		if (m == NULL && vm_page_reclaim_contig(pflags, 1,
		    0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0))
			continue;
#endif
		if (m == NULL) {
			if (wait & M_NOWAIT)
				return (NULL);
			else
				VM_WAIT;
		} else
			break;
	}

	pa = VM_PAGE_TO_PHYS(m);
	if ((wait & M_NODUMP) == 0)
		dump_add_page(pa);
	va = (void *)MIPS_PHYS_TO_DIRECT(pa);
	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
		bzero(va, PAGE_SIZE);
	return (va);
}
Exemple #12
0
void *
uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
{
	vm_paddr_t pa;
	vm_page_t m;
	int pflags;
	void *va;

	PMAP_STATS_INC(uma_nsmall_alloc);

	*flags = UMA_SLAB_PRIV;
	pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;

	for (;;) {
		m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
		if (m == NULL) {
			if (wait & M_NOWAIT)
				return (NULL);
			else
				VM_WAIT;
		} else
			break;
	}

	pa = VM_PAGE_TO_PHYS(m);
	if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) {
		KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0,
		    ("uma_small_alloc: free page %p still has mappings!", m));
		PMAP_STATS_INC(uma_nsmall_alloc_oc);
		m->md.color = DCACHE_COLOR(pa);
		dcache_page_inval(pa);
	}
	va = (void *)TLB_PHYS_TO_DIRECT(pa);
	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
		cpu_block_zero(va, PAGE_SIZE);
	return (va);
}
Exemple #13
0
static int
shm_dotruncate(struct shmfd *shmfd, off_t length)
{
	vm_object_t object;
	vm_page_t m, ma[1];
	vm_pindex_t idx, nobjsize;
	vm_ooffset_t delta;
	int base, rv;

	object = shmfd->shm_object;
	VM_OBJECT_LOCK(object);
	if (length == shmfd->shm_size) {
		VM_OBJECT_UNLOCK(object);
		return (0);
	}
	nobjsize = OFF_TO_IDX(length + PAGE_MASK);

	/* Are we shrinking?  If so, trim the end. */
	if (length < shmfd->shm_size) {
		/*
		 * Disallow any requests to shrink the size if this
		 * object is mapped into the kernel.
		 */
		if (shmfd->shm_kmappings > 0) {
			VM_OBJECT_UNLOCK(object);
			return (EBUSY);
		}

		/*
		 * Zero the truncated part of the last page.
		 */
		base = length & PAGE_MASK;
		if (base != 0) {
			idx = OFF_TO_IDX(length);
retry:
			m = vm_page_lookup(object, idx);
			if (m != NULL) {
				if ((m->oflags & VPO_BUSY) != 0 ||
				    m->busy != 0) {
					vm_page_sleep(m, "shmtrc");
					goto retry;
				}
			} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
				m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
				if (m == NULL) {
					VM_OBJECT_UNLOCK(object);
					VM_WAIT;
					VM_OBJECT_LOCK(object);
					goto retry;
				} else if (m->valid != VM_PAGE_BITS_ALL) {
					ma[0] = m;
					rv = vm_pager_get_pages(object, ma, 1,
					    0);
					m = vm_page_lookup(object, idx);
				} else
					/* A cached page was reactivated. */
					rv = VM_PAGER_OK;
				vm_page_lock(m);
				if (rv == VM_PAGER_OK) {
					vm_page_deactivate(m);
					vm_page_unlock(m);
					vm_page_wakeup(m);
				} else {
					vm_page_free(m);
					vm_page_unlock(m);
					VM_OBJECT_UNLOCK(object);
					return (EIO);
				}
			}
			if (m != NULL) {
				pmap_zero_page_area(m, base, PAGE_SIZE - base);
				KASSERT(m->valid == VM_PAGE_BITS_ALL,
				    ("shm_dotruncate: page %p is invalid", m));
				vm_page_dirty(m);
				vm_pager_page_unswapped(m);
			}
		}
		delta = ptoa(object->size - nobjsize);

		/* Toss in memory pages. */
		if (nobjsize < object->size)
			vm_object_page_remove(object, nobjsize, object->size,
			    0);

		/* Toss pages from swap. */
		if (object->type == OBJT_SWAP)
			swap_pager_freespace(object, nobjsize, delta);

		/* Free the swap accounted for shm */
		swap_release_by_cred(delta, object->cred);
		object->charge -= delta;
	} else {
		/* Attempt to reserve the swap */
		delta = ptoa(nobjsize - object->size);
		if (!swap_reserve_by_cred(delta, object->cred)) {
			VM_OBJECT_UNLOCK(object);
			return (ENOMEM);
		}
		object->charge += delta;
	}
	shmfd->shm_size = length;
	mtx_lock(&shm_timestamp_lock);
	vfs_timestamp(&shmfd->shm_ctime);
	shmfd->shm_mtime = shmfd->shm_ctime;
	mtx_unlock(&shm_timestamp_lock);
	object->size = nobjsize;
	VM_OBJECT_UNLOCK(object);
	return (0);
}
Exemple #14
0
int
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
    int fault_flags, vm_page_t *m_hold)
{
	vm_prot_t prot;
	long ahead, behind;
	int alloc_req, era, faultcount, nera, reqpage, result;
	boolean_t growstack, is_first_object_locked, wired;
	int map_generation;
	vm_object_t next_object;
	vm_page_t marray[VM_FAULT_READ_MAX];
	int hardfault;
	struct faultstate fs;
	struct vnode *vp;
	int locked, error;

	hardfault = 0;
	growstack = TRUE;
	PCPU_INC(cnt.v_vm_faults);
	fs.vp = NULL;
	faultcount = reqpage = 0;

RetryFault:;

	/*
	 * Find the backing store object and offset into it to begin the
	 * search.
	 */
	fs.map = map;
	result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
	    &fs.first_object, &fs.first_pindex, &prot, &wired);
	if (result != KERN_SUCCESS) {
		if (growstack && result == KERN_INVALID_ADDRESS &&
		    map != kernel_map) {
			result = vm_map_growstack(curproc, vaddr);
			if (result != KERN_SUCCESS)
				return (KERN_FAILURE);
			growstack = FALSE;
			goto RetryFault;
		}
		return (result);
	}

	map_generation = fs.map->timestamp;

	if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
		panic("vm_fault: fault on nofault entry, addr: %lx",
		    (u_long)vaddr);
	}

	/*
	 * Make a reference to this object to prevent its disposal while we
	 * are messing with it.  Once we have the reference, the map is free
	 * to be diddled.  Since objects reference their shadows (and copies),
	 * they will stay around as well.
	 *
	 * Bump the paging-in-progress count to prevent size changes (e.g. 
	 * truncation operations) during I/O.  This must be done after
	 * obtaining the vnode lock in order to avoid possible deadlocks.
	 */
	VM_OBJECT_WLOCK(fs.first_object);
	vm_object_reference_locked(fs.first_object);
	vm_object_pip_add(fs.first_object, 1);

	fs.lookup_still_valid = TRUE;

	if (wired)
		fault_type = prot | (fault_type & VM_PROT_COPY);

	fs.first_m = NULL;

	/*
	 * Search for the page at object/offset.
	 */
	fs.object = fs.first_object;
	fs.pindex = fs.first_pindex;
	while (TRUE) {
		/*
		 * If the object is dead, we stop here
		 */
		if (fs.object->flags & OBJ_DEAD) {
			unlock_and_deallocate(&fs);
			return (KERN_PROTECTION_FAILURE);
		}

		/*
		 * See if page is resident
		 */
		fs.m = vm_page_lookup(fs.object, fs.pindex);
		if (fs.m != NULL) {
			/* 
			 * check for page-based copy on write.
			 * We check fs.object == fs.first_object so
			 * as to ensure the legacy COW mechanism is
			 * used when the page in question is part of
			 * a shadow object.  Otherwise, vm_page_cowfault()
			 * removes the page from the backing object, 
			 * which is not what we want.
			 */
			vm_page_lock(fs.m);
			if ((fs.m->cow) && 
			    (fault_type & VM_PROT_WRITE) &&
			    (fs.object == fs.first_object)) {
				vm_page_cowfault(fs.m);
				unlock_and_deallocate(&fs);
				goto RetryFault;
			}

			/*
			 * Wait/Retry if the page is busy.  We have to do this
			 * if the page is busy via either VPO_BUSY or 
			 * vm_page_t->busy because the vm_pager may be using
			 * vm_page_t->busy for pageouts ( and even pageins if
			 * it is the vnode pager ), and we could end up trying
			 * to pagein and pageout the same page simultaneously.
			 *
			 * We can theoretically allow the busy case on a read
			 * fault if the page is marked valid, but since such
			 * pages are typically already pmap'd, putting that
			 * special case in might be more effort then it is 
			 * worth.  We cannot under any circumstances mess
			 * around with a vm_page_t->busy page except, perhaps,
			 * to pmap it.
			 */
			if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) {
				/*
				 * Reference the page before unlocking and
				 * sleeping so that the page daemon is less
				 * likely to reclaim it. 
				 */
				vm_page_aflag_set(fs.m, PGA_REFERENCED);
				vm_page_unlock(fs.m);
				if (fs.object != fs.first_object) {
					if (!VM_OBJECT_TRYWLOCK(
					    fs.first_object)) {
						VM_OBJECT_WUNLOCK(fs.object);
						VM_OBJECT_WLOCK(fs.first_object);
						VM_OBJECT_WLOCK(fs.object);
					}
					vm_page_lock(fs.first_m);
					vm_page_free(fs.first_m);
					vm_page_unlock(fs.first_m);
					vm_object_pip_wakeup(fs.first_object);
					VM_OBJECT_WUNLOCK(fs.first_object);
					fs.first_m = NULL;
				}
				unlock_map(&fs);
				if (fs.m == vm_page_lookup(fs.object,
				    fs.pindex)) {
					vm_page_sleep_if_busy(fs.m, TRUE,
					    "vmpfw");
				}
				vm_object_pip_wakeup(fs.object);
				VM_OBJECT_WUNLOCK(fs.object);
				PCPU_INC(cnt.v_intrans);
				vm_object_deallocate(fs.first_object);
				goto RetryFault;
			}
			vm_page_remque(fs.m);
			vm_page_unlock(fs.m);

			/*
			 * Mark page busy for other processes, and the 
			 * pagedaemon.  If it still isn't completely valid
			 * (readable), jump to readrest, else break-out ( we
			 * found the page ).
			 */
			vm_page_busy(fs.m);
			if (fs.m->valid != VM_PAGE_BITS_ALL)
				goto readrest;
			break;
		}

		/*
		 * Page is not resident, If this is the search termination
		 * or the pager might contain the page, allocate a new page.
		 */
		if (TRYPAGER || fs.object == fs.first_object) {
			if (fs.pindex >= fs.object->size) {
				unlock_and_deallocate(&fs);
				return (KERN_PROTECTION_FAILURE);
			}

			/*
			 * Allocate a new page for this object/offset pair.
			 *
			 * Unlocked read of the p_flag is harmless. At
			 * worst, the P_KILLED might be not observed
			 * there, and allocation can fail, causing
			 * restart and new reading of the p_flag.
			 */
			fs.m = NULL;
			if (!vm_page_count_severe() || P_KILLED(curproc)) {
#if VM_NRESERVLEVEL > 0
				if ((fs.object->flags & OBJ_COLORED) == 0) {
					fs.object->flags |= OBJ_COLORED;
					fs.object->pg_color = atop(vaddr) -
					    fs.pindex;
				}
#endif
				alloc_req = P_KILLED(curproc) ?
				    VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL;
				if (fs.object->type != OBJT_VNODE &&
				    fs.object->backing_object == NULL)
					alloc_req |= VM_ALLOC_ZERO;
				fs.m = vm_page_alloc(fs.object, fs.pindex,
				    alloc_req);
			}
			if (fs.m == NULL) {
				unlock_and_deallocate(&fs);
				VM_WAITPFAULT;
				goto RetryFault;
			} else if (fs.m->valid == VM_PAGE_BITS_ALL)
				break;
		}

readrest:
		/*
		 * We have found a valid page or we have allocated a new page.
		 * The page thus may not be valid or may not be entirely 
		 * valid.
		 *
		 * Attempt to fault-in the page if there is a chance that the
		 * pager has it, and potentially fault in additional pages
		 * at the same time.
		 */
		if (TRYPAGER) {
			int rv;
			u_char behavior = vm_map_entry_behavior(fs.entry);

			if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
			    P_KILLED(curproc)) {
				behind = 0;
				ahead = 0;
			} else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
				behind = 0;
				ahead = atop(fs.entry->end - vaddr) - 1;
				if (ahead > VM_FAULT_READ_AHEAD_MAX)
					ahead = VM_FAULT_READ_AHEAD_MAX;
				if (fs.pindex == fs.entry->next_read)
					vm_fault_cache_behind(&fs,
					    VM_FAULT_READ_MAX);
			} else {
				/*
				 * If this is a sequential page fault, then
				 * arithmetically increase the number of pages
				 * in the read-ahead window.  Otherwise, reset
				 * the read-ahead window to its smallest size.
				 */
				behind = atop(vaddr - fs.entry->start);
				if (behind > VM_FAULT_READ_BEHIND)
					behind = VM_FAULT_READ_BEHIND;
				ahead = atop(fs.entry->end - vaddr) - 1;
				era = fs.entry->read_ahead;
				if (fs.pindex == fs.entry->next_read) {
					nera = era + behind;
					if (nera > VM_FAULT_READ_AHEAD_MAX)
						nera = VM_FAULT_READ_AHEAD_MAX;
					behind = 0;
					if (ahead > nera)
						ahead = nera;
					if (era == VM_FAULT_READ_AHEAD_MAX)
						vm_fault_cache_behind(&fs,
						    VM_FAULT_CACHE_BEHIND);
				} else if (ahead > VM_FAULT_READ_AHEAD_MIN)
					ahead = VM_FAULT_READ_AHEAD_MIN;
				if (era != ahead)
					fs.entry->read_ahead = ahead;
			}

			/*
			 * Call the pager to retrieve the data, if any, after
			 * releasing the lock on the map.  We hold a ref on
			 * fs.object and the pages are VPO_BUSY'd.
			 */
			unlock_map(&fs);

			if (fs.object->type == OBJT_VNODE) {
				vp = fs.object->handle;
				if (vp == fs.vp)
					goto vnode_locked;
				else if (fs.vp != NULL) {
					vput(fs.vp);
					fs.vp = NULL;
				}
				locked = VOP_ISLOCKED(vp);

				if (locked != LK_EXCLUSIVE)
					locked = LK_SHARED;
				/* Do not sleep for vnode lock while fs.m is busy */
				error = vget(vp, locked | LK_CANRECURSE |
				    LK_NOWAIT, curthread);
				if (error != 0) {
					vhold(vp);
					release_page(&fs);
					unlock_and_deallocate(&fs);
					error = vget(vp, locked | LK_RETRY |
					    LK_CANRECURSE, curthread);
					vdrop(vp);
					fs.vp = vp;
					KASSERT(error == 0,
					    ("vm_fault: vget failed"));
					goto RetryFault;
				}
				fs.vp = vp;
			}
vnode_locked:
			KASSERT(fs.vp == NULL || !fs.map->system_map,
			    ("vm_fault: vnode-backed object mapped by system map"));

			/*
			 * now we find out if any other pages should be paged
			 * in at this time this routine checks to see if the
			 * pages surrounding this fault reside in the same
			 * object as the page for this fault.  If they do,
			 * then they are faulted in also into the object.  The
			 * array "marray" returned contains an array of
			 * vm_page_t structs where one of them is the
			 * vm_page_t passed to the routine.  The reqpage
			 * return value is the index into the marray for the
			 * vm_page_t passed to the routine.
			 *
			 * fs.m plus the additional pages are VPO_BUSY'd.
			 */
			faultcount = vm_fault_additional_pages(
			    fs.m, behind, ahead, marray, &reqpage);

			rv = faultcount ?
			    vm_pager_get_pages(fs.object, marray, faultcount,
				reqpage) : VM_PAGER_FAIL;

			if (rv == VM_PAGER_OK) {
				/*
				 * Found the page. Leave it busy while we play
				 * with it.
				 */

				/*
				 * Relookup in case pager changed page. Pager
				 * is responsible for disposition of old page
				 * if moved.
				 */
				fs.m = vm_page_lookup(fs.object, fs.pindex);
				if (!fs.m) {
					unlock_and_deallocate(&fs);
					goto RetryFault;
				}

				hardfault++;
				break; /* break to PAGE HAS BEEN FOUND */
			}
			/*
			 * Remove the bogus page (which does not exist at this
			 * object/offset); before doing so, we must get back
			 * our object lock to preserve our invariant.
			 *
			 * Also wake up any other process that may want to bring
			 * in this page.
			 *
			 * If this is the top-level object, we must leave the
			 * busy page to prevent another process from rushing
			 * past us, and inserting the page in that object at
			 * the same time that we are.
			 */
			if (rv == VM_PAGER_ERROR)
				printf("vm_fault: pager read error, pid %d (%s)\n",
				    curproc->p_pid, curproc->p_comm);
			/*
			 * Data outside the range of the pager or an I/O error
			 */
			/*
			 * XXX - the check for kernel_map is a kludge to work
			 * around having the machine panic on a kernel space
			 * fault w/ I/O error.
			 */
			if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) ||
				(rv == VM_PAGER_BAD)) {
				vm_page_lock(fs.m);
				vm_page_free(fs.m);
				vm_page_unlock(fs.m);
				fs.m = NULL;
				unlock_and_deallocate(&fs);
				return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE);
			}
			if (fs.object != fs.first_object) {
				vm_page_lock(fs.m);
				vm_page_free(fs.m);
				vm_page_unlock(fs.m);
				fs.m = NULL;
				/*
				 * XXX - we cannot just fall out at this
				 * point, m has been freed and is invalid!
				 */
			}
		}

		/*
		 * We get here if the object has default pager (or unwiring) 
		 * or the pager doesn't have the page.
		 */
		if (fs.object == fs.first_object)
			fs.first_m = fs.m;

		/*
		 * Move on to the next object.  Lock the next object before
		 * unlocking the current one.
		 */
		fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset);
		next_object = fs.object->backing_object;
		if (next_object == NULL) {
			/*
			 * If there's no object left, fill the page in the top
			 * object with zeros.
			 */
			if (fs.object != fs.first_object) {
				vm_object_pip_wakeup(fs.object);
				VM_OBJECT_WUNLOCK(fs.object);

				fs.object = fs.first_object;
				fs.pindex = fs.first_pindex;
				fs.m = fs.first_m;
				VM_OBJECT_WLOCK(fs.object);
			}
			fs.first_m = NULL;

			/*
			 * Zero the page if necessary and mark it valid.
			 */
			if ((fs.m->flags & PG_ZERO) == 0) {
				pmap_zero_page(fs.m);
			} else {
				PCPU_INC(cnt.v_ozfod);
			}
			PCPU_INC(cnt.v_zfod);
			fs.m->valid = VM_PAGE_BITS_ALL;
			break;	/* break to PAGE HAS BEEN FOUND */
		} else {
			KASSERT(fs.object != next_object,
			    ("object loop %p", next_object));
			VM_OBJECT_WLOCK(next_object);
			vm_object_pip_add(next_object, 1);
			if (fs.object != fs.first_object)
				vm_object_pip_wakeup(fs.object);
			VM_OBJECT_WUNLOCK(fs.object);
			fs.object = next_object;
		}
	}

	KASSERT((fs.m->oflags & VPO_BUSY) != 0,
	    ("vm_fault: not busy after main loop"));

	/*
	 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
	 * is held.]
	 */

	/*
	 * If the page is being written, but isn't already owned by the
	 * top-level object, we have to copy it into a new page owned by the
	 * top-level object.
	 */
	if (fs.object != fs.first_object) {
		/*
		 * We only really need to copy if we want to write it.
		 */
		if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
			/*
			 * This allows pages to be virtually copied from a 
			 * backing_object into the first_object, where the 
			 * backing object has no other refs to it, and cannot
			 * gain any more refs.  Instead of a bcopy, we just 
			 * move the page from the backing object to the 
			 * first object.  Note that we must mark the page 
			 * dirty in the first object so that it will go out 
			 * to swap when needed.
			 */
			is_first_object_locked = FALSE;
			if (
				/*
				 * Only one shadow object
				 */
				(fs.object->shadow_count == 1) &&
				/*
				 * No COW refs, except us
				 */
				(fs.object->ref_count == 1) &&
				/*
				 * No one else can look this object up
				 */
				(fs.object->handle == NULL) &&
				/*
				 * No other ways to look the object up
				 */
				((fs.object->type == OBJT_DEFAULT) ||
				 (fs.object->type == OBJT_SWAP)) &&
			    (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) &&
				/*
				 * We don't chase down the shadow chain
				 */
			    fs.object == fs.first_object->backing_object) {
				/*
				 * get rid of the unnecessary page
				 */
				vm_page_lock(fs.first_m);
				vm_page_free(fs.first_m);
				vm_page_unlock(fs.first_m);
				/*
				 * grab the page and put it into the 
				 * process'es object.  The page is 
				 * automatically made dirty.
				 */
				vm_page_lock(fs.m);
				vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
				vm_page_unlock(fs.m);
				vm_page_busy(fs.m);
				fs.first_m = fs.m;
				fs.m = NULL;
				PCPU_INC(cnt.v_cow_optim);
			} else {
				/*
				 * Oh, well, lets copy it.
				 */
				pmap_copy_page(fs.m, fs.first_m);
				fs.first_m->valid = VM_PAGE_BITS_ALL;
				if (wired && (fault_flags &
				    VM_FAULT_CHANGE_WIRING) == 0) {
					vm_page_lock(fs.first_m);
					vm_page_wire(fs.first_m);
					vm_page_unlock(fs.first_m);
					
					vm_page_lock(fs.m);
					vm_page_unwire(fs.m, FALSE);
					vm_page_unlock(fs.m);
				}
				/*
				 * We no longer need the old page or object.
				 */
				release_page(&fs);
			}
			/*
			 * fs.object != fs.first_object due to above 
			 * conditional
			 */
			vm_object_pip_wakeup(fs.object);
			VM_OBJECT_WUNLOCK(fs.object);
			/*
			 * Only use the new page below...
			 */
			fs.object = fs.first_object;
			fs.pindex = fs.first_pindex;
			fs.m = fs.first_m;
			if (!is_first_object_locked)
				VM_OBJECT_WLOCK(fs.object);
			PCPU_INC(cnt.v_cow_faults);
			curthread->td_cow++;
		} else {
			prot &= ~VM_PROT_WRITE;
		}
	}

	/*
	 * We must verify that the maps have not changed since our last
	 * lookup.
	 */
	if (!fs.lookup_still_valid) {
		vm_object_t retry_object;
		vm_pindex_t retry_pindex;
		vm_prot_t retry_prot;

		if (!vm_map_trylock_read(fs.map)) {
			release_page(&fs);
			unlock_and_deallocate(&fs);
			goto RetryFault;
		}
		fs.lookup_still_valid = TRUE;
		if (fs.map->timestamp != map_generation) {
			result = vm_map_lookup_locked(&fs.map, vaddr, fault_type,
			    &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);

			/*
			 * If we don't need the page any longer, put it on the inactive
			 * list (the easiest thing to do here).  If no one needs it,
			 * pageout will grab it eventually.
			 */
			if (result != KERN_SUCCESS) {
				release_page(&fs);
				unlock_and_deallocate(&fs);

				/*
				 * If retry of map lookup would have blocked then
				 * retry fault from start.
				 */
				if (result == KERN_FAILURE)
					goto RetryFault;
				return (result);
			}
			if ((retry_object != fs.first_object) ||
			    (retry_pindex != fs.first_pindex)) {
				release_page(&fs);
				unlock_and_deallocate(&fs);
				goto RetryFault;
			}

			/*
			 * Check whether the protection has changed or the object has
			 * been copied while we left the map unlocked. Changing from
			 * read to write permission is OK - we leave the page
			 * write-protected, and catch the write fault. Changing from
			 * write to read permission means that we can't mark the page
			 * write-enabled after all.
			 */
			prot &= retry_prot;
		}
	}
	/*
	 * If the page was filled by a pager, update the map entry's
	 * last read offset.  Since the pager does not return the
	 * actual set of pages that it read, this update is based on
	 * the requested set.  Typically, the requested and actual
	 * sets are the same.
	 *
	 * XXX The following assignment modifies the map
	 * without holding a write lock on it.
	 */
	if (hardfault)
		fs.entry->next_read = fs.pindex + faultcount - reqpage;

	if ((prot & VM_PROT_WRITE) != 0 ||
	    (fault_flags & VM_FAULT_DIRTY) != 0) {
		vm_object_set_writeable_dirty(fs.object);

		/*
		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
		 * if the page is already dirty to prevent data written with
		 * the expectation of being synced from not being synced.
		 * Likewise if this entry does not request NOSYNC then make
		 * sure the page isn't marked NOSYNC.  Applications sharing
		 * data should use the same flags to avoid ping ponging.
		 */
		if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
			if (fs.m->dirty == 0)
				fs.m->oflags |= VPO_NOSYNC;
		} else {
			fs.m->oflags &= ~VPO_NOSYNC;
		}

		/*
		 * If the fault is a write, we know that this page is being
		 * written NOW so dirty it explicitly to save on 
		 * pmap_is_modified() calls later.
		 *
		 * Also tell the backing pager, if any, that it should remove
		 * any swap backing since the page is now dirty.
		 */
		if (((fault_type & VM_PROT_WRITE) != 0 &&
		    (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
		    (fault_flags & VM_FAULT_DIRTY) != 0) {
			vm_page_dirty(fs.m);
			vm_pager_page_unswapped(fs.m);
		}
	}

	/*
	 * Page had better still be busy
	 */
	KASSERT(fs.m->oflags & VPO_BUSY,
		("vm_fault: page %p not busy!", fs.m));
	/*
	 * Page must be completely valid or it is not fit to
	 * map into user space.  vm_pager_get_pages() ensures this.
	 */
	KASSERT(fs.m->valid == VM_PAGE_BITS_ALL,
	    ("vm_fault: page %p partially invalid", fs.m));
	VM_OBJECT_WUNLOCK(fs.object);

	/*
	 * Put this page into the physical map.  We had to do the unlock above
	 * because pmap_enter() may sleep.  We don't put the page
	 * back on the active queue until later so that the pageout daemon
	 * won't find it (yet).
	 */
	pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired);
	if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0)
		vm_fault_prefault(fs.map->pmap, vaddr, fs.entry);
	VM_OBJECT_WLOCK(fs.object);
	vm_page_lock(fs.m);

	/*
	 * If the page is not wired down, then put it where the pageout daemon
	 * can find it.
	 */
	if (fault_flags & VM_FAULT_CHANGE_WIRING) {
		if (wired)
			vm_page_wire(fs.m);
		else
			vm_page_unwire(fs.m, 1);
	} else
		vm_page_activate(fs.m);
	if (m_hold != NULL) {
		*m_hold = fs.m;
		vm_page_hold(fs.m);
	}
	vm_page_unlock(fs.m);
	vm_page_wakeup(fs.m);

	/*
	 * Unlock everything, and return
	 */
	unlock_and_deallocate(&fs);
	if (hardfault) {
		PCPU_INC(cnt.v_io_faults);
		curthread->td_ru.ru_majflt++;
	} else 
		curthread->td_ru.ru_minflt++;

	return (KERN_SUCCESS);
}
Exemple #15
0
static void
cpu_initialize_context(unsigned int cpu)
{
	/* vcpu_guest_context_t is too large to allocate on the stack.
	 * Hence we allocate statically and protect it with a lock */
	vm_page_t m[NPGPTD + 2];
	static vcpu_guest_context_t ctxt;
	vm_offset_t boot_stack;
	vm_offset_t newPTD;
	vm_paddr_t ma[NPGPTD];
	int i;

	/*
	 * Page 0,[0-3]	PTD
	 * Page 1, [4]	boot stack
	 * Page [5]	PDPT
	 *
	 */
	for (i = 0; i < NPGPTD + 2; i++) {
		m[i] = vm_page_alloc(NULL, 0,
		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
		    VM_ALLOC_ZERO);

		pmap_zero_page(m[i]);

	}
	boot_stack = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
	newPTD = kmem_alloc_nofault(kernel_map, NPGPTD * PAGE_SIZE);
	ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;

#ifdef PAE	
	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
	for (i = 0; i < NPGPTD; i++) {
		((vm_paddr_t *)boot_stack)[i] =
		ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
	}
#endif	

	/*
	 * Copy cpu0 IdlePTD to new IdlePTD - copying only
	 * kernel mappings
	 */
	pmap_qenter(newPTD, m, 4);
	
	memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
	    (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
	    nkpt*sizeof(vm_paddr_t));

	pmap_qremove(newPTD, 4);
	kmem_free(kernel_map, newPTD, 4 * PAGE_SIZE);
	/*
	 * map actual idle stack to boot_stack
	 */
	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));


	xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
	rw_wlock(&pvh_global_lock);
	for (i = 0; i < 4; i++) {
		int pdir = (PTDPTDI + i) / NPDEPG;
		int curoffset = (PTDPTDI + i) % NPDEPG;
		
		xen_queue_pt_update((vm_paddr_t)
		    ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 
		    ma[i]);
	}
	PT_UPDATES_FLUSH();
	rw_wunlock(&pvh_global_lock);
	
	memset(&ctxt, 0, sizeof(ctxt));
	ctxt.flags = VGCF_IN_KERNEL;
	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
	ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
	ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
	ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
	ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
	ctxt.user_regs.eip = (unsigned long)init_secondary;
	ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */

	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));

	smp_trap_init(ctxt.trap_ctxt);

	ctxt.ldt_ents = 0;
	ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
	ctxt.gdt_ents      = 512;

#ifdef __i386__
	ctxt.user_regs.esp = boot_stack + PAGE_SIZE;

	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
	ctxt.kernel_sp = boot_stack + PAGE_SIZE;

	ctxt.event_callback_cs     = GSEL(GCODE_SEL, SEL_KPL);
	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
	ctxt.failsafe_callback_cs  = GSEL(GCODE_SEL, SEL_KPL);
	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;

	ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
#else /* __x86_64__ */
	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
	ctxt.kernel_sp = idle->thread.rsp0;

	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
	ctxt.syscall_callback_eip  = (unsigned long)system_call;

	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));

	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
#endif

	printf("gdtpfn=%lx pdptpfn=%lx\n",
	    ctxt.gdt_frames[0],
	    ctxt.ctrlreg[3] >> PAGE_SHIFT);

	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
	DELAY(3000);
	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
}
Exemple #16
0
/*
 *	Routine:
 *		vm_fault_copy_entry
 *	Function:
 *		Create new shadow object backing dst_entry with private copy of
 *		all underlying pages. When src_entry is equal to dst_entry,
 *		function implements COW for wired-down map entry. Otherwise,
 *		it forks wired entry into dst_map.
 *
 *	In/out conditions:
 *		The source and destination maps must be locked for write.
 *		The source map entry must be wired down (or be a sharing map
 *		entry corresponding to a main map entry that is wired down).
 */
void
vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
    vm_map_entry_t dst_entry, vm_map_entry_t src_entry,
    vm_ooffset_t *fork_charge)
{
	vm_object_t backing_object, dst_object, object, src_object;
	vm_pindex_t dst_pindex, pindex, src_pindex;
	vm_prot_t access, prot;
	vm_offset_t vaddr;
	vm_page_t dst_m;
	vm_page_t src_m;
	boolean_t src_readonly, upgrade;

#ifdef	lint
	src_map++;
#endif	/* lint */

	upgrade = src_entry == dst_entry;

	src_object = src_entry->object.vm_object;
	src_pindex = OFF_TO_IDX(src_entry->offset);
	src_readonly = (src_entry->protection & VM_PROT_WRITE) == 0;

	/*
	 * Create the top-level object for the destination entry. (Doesn't
	 * actually shadow anything - we copy the pages directly.)
	 */
	dst_object = vm_object_allocate(OBJT_DEFAULT,
	    OFF_TO_IDX(dst_entry->end - dst_entry->start));
#if VM_NRESERVLEVEL > 0
	dst_object->flags |= OBJ_COLORED;
	dst_object->pg_color = atop(dst_entry->start);
#endif

	VM_OBJECT_WLOCK(dst_object);
	KASSERT(upgrade || dst_entry->object.vm_object == NULL,
	    ("vm_fault_copy_entry: vm_object not NULL"));
	dst_entry->object.vm_object = dst_object;
	dst_entry->offset = 0;
	dst_object->charge = dst_entry->end - dst_entry->start;
	if (fork_charge != NULL) {
		KASSERT(dst_entry->cred == NULL,
		    ("vm_fault_copy_entry: leaked swp charge"));
		dst_object->cred = curthread->td_ucred;
		crhold(dst_object->cred);
		*fork_charge += dst_object->charge;
	} else {
		dst_object->cred = dst_entry->cred;
		dst_entry->cred = NULL;
	}
	access = prot = dst_entry->protection;
	/*
	 * If not an upgrade, then enter the mappings in the pmap as
	 * read and/or execute accesses.  Otherwise, enter them as
	 * write accesses.
	 *
	 * A writeable large page mapping is only created if all of
	 * the constituent small page mappings are modified. Marking
	 * PTEs as modified on inception allows promotion to happen
	 * without taking potentially large number of soft faults.
	 */
	if (!upgrade)
		access &= ~VM_PROT_WRITE;

	/*
	 * Loop through all of the virtual pages within the entry's
	 * range, copying each page from the source object to the
	 * destination object.  Since the source is wired, those pages
	 * must exist.  In contrast, the destination is pageable.
	 * Since the destination object does share any backing storage
	 * with the source object, all of its pages must be dirtied,
	 * regardless of whether they can be written.
	 */
	for (vaddr = dst_entry->start, dst_pindex = 0;
	    vaddr < dst_entry->end;
	    vaddr += PAGE_SIZE, dst_pindex++) {

		/*
		 * Allocate a page in the destination object.
		 */
		do {
			dst_m = vm_page_alloc(dst_object, dst_pindex,
			    VM_ALLOC_NORMAL);
			if (dst_m == NULL) {
				VM_OBJECT_WUNLOCK(dst_object);
				VM_WAIT;
				VM_OBJECT_WLOCK(dst_object);
			}
		} while (dst_m == NULL);

		/*
		 * Find the page in the source object, and copy it in.
		 * (Because the source is wired down, the page will be in
		 * memory.)
		 */
		VM_OBJECT_WLOCK(src_object);
		object = src_object;
		pindex = src_pindex + dst_pindex;
		while ((src_m = vm_page_lookup(object, pindex)) == NULL &&
		    src_readonly &&
		    (backing_object = object->backing_object) != NULL) {
			/*
			 * Allow fallback to backing objects if we are reading.
			 */
			VM_OBJECT_WLOCK(backing_object);
			pindex += OFF_TO_IDX(object->backing_object_offset);
			VM_OBJECT_WUNLOCK(object);
			object = backing_object;
		}
		if (src_m == NULL)
			panic("vm_fault_copy_wired: page missing");
		pmap_copy_page(src_m, dst_m);
		VM_OBJECT_WUNLOCK(object);
		dst_m->valid = VM_PAGE_BITS_ALL;
		dst_m->dirty = VM_PAGE_BITS_ALL;
		VM_OBJECT_WUNLOCK(dst_object);

		/*
		 * Enter it in the pmap. If a wired, copy-on-write
		 * mapping is being replaced by a write-enabled
		 * mapping, then wire that new mapping.
		 */
		pmap_enter(dst_map->pmap, vaddr, access, dst_m, prot, upgrade);

		/*
		 * Mark it no longer busy, and put it on the active list.
		 */
		VM_OBJECT_WLOCK(dst_object);
		
		if (upgrade) {
			vm_page_lock(src_m);
			vm_page_unwire(src_m, 0);
			vm_page_unlock(src_m);

			vm_page_lock(dst_m);
			vm_page_wire(dst_m);
			vm_page_unlock(dst_m);
		} else {
			vm_page_lock(dst_m);
			vm_page_activate(dst_m);
			vm_page_unlock(dst_m);
		}
		vm_page_wakeup(dst_m);
	}
	VM_OBJECT_WUNLOCK(dst_object);
	if (upgrade) {
		dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY);
		vm_object_deallocate(src_object);
	}
}
Exemple #17
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
    int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	off_t foff;
#ifdef INVARIANTS
	off_t blkno0;
#endif
	int bsize, pagesperblock, *freecnt;
	int error, before, after, rbehind, rahead, poff, i;
	int bytecount, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("%s does not support devices", __func__));

	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	foff = IDX_TO_OFF(m[0]->pindex);
	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;

	KASSERT(foff < object->un_pager.vnp.vnp_size,
	    ("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
	KASSERT(count <= sizeof(bp->b_pages),
	    ("%s: requested %d pages", __func__, count));

	/*
	 * The last page has valid blocks.  Invalid part can only
	 * exist at the end of file, and the page is made fully valid
	 * by zeroing in vm_pager_get_pages().
	 */
	if (m[count - 1]->valid != 0 && --count == 0) {
		if (iodone != NULL)
			iodone(arg, m, 1, 0);
		return (VM_PAGER_OK);
	}

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_old(object, m[i]);
			if (error)
				break;
		}
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		return (VM_PAGER_ERROR);
	}

	/*
	 * If the file system supports BMAP, but blocksize is smaller
	 * than a page size, then use special small filesystem code.
	 */
	if (pagesperblock == 0) {
		relpbuf(bp, freecnt);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_smlfs(object, m[i]);
			if (error)
				break;
		}
		return (error);
	}

	/*
	 * A sparse file can be encountered only for a single page request,
	 * which may not be preceded by call to vm_pager_haspage().
	 */
	if (bp->b_blkno == -1) {
		KASSERT(count == 1,
		    ("%s: array[%d] request to a sparse file %p", __func__,
		    count, vp));
		relpbuf(bp, freecnt);
		pmap_zero_page(m[0]);
		KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
		    __func__, m[0]));
		VM_OBJECT_WLOCK(object);
		m[0]->valid = VM_PAGE_BITS_ALL;
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	}

#ifdef INVARIANTS
	blkno0 = bp->b_blkno;
#endif
	bp->b_blkno += (foff % bsize) / DEV_BSIZE;

	/* Recalculate blocks available after/before to pages. */
	poff = (foff % bsize) / PAGE_SIZE;
	before *= pagesperblock;
	before += poff;
	after *= pagesperblock;
	after += pagesperblock - (poff + 1);
	if (m[0]->pindex + after >= object->size)
		after = object->size - 1 - m[0]->pindex;
	KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d",
	    __func__, count, after + 1));
	after -= count - 1;

	/* Trim requested rbehind/rahead to possible values. */   
	rbehind = a_rbehind ? *a_rbehind : 0;
	rahead = a_rahead ? *a_rahead : 0;
	rbehind = min(rbehind, before);
	rbehind = min(rbehind, m[0]->pindex);
	rahead = min(rahead, after);
	rahead = min(rahead, object->size - m[count - 1]->pindex);
	/*
	 * Check that total amount of pages fit into buf.  Trim rbehind and
	 * rahead evenly if not.
	 */
	if (rbehind + rahead + count > nitems(bp->b_pages)) {
		int trim, sum;

		trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
		sum = rbehind + rahead;
		if (rbehind == before) {
			/* Roundup rbehind trim to block size. */
			rbehind -= roundup(trim * rbehind / sum, pagesperblock);
			if (rbehind < 0)
				rbehind = 0;
		} else
			rbehind -= trim * rbehind / sum;
		rahead -= trim * rahead / sum;
	}
	KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
	    ("%s: behind %d ahead %d count %d", __func__,
	    rbehind, rahead, count));

	/*
	 * Fill in the bp->b_pages[] array with requested and optional   
	 * read behind or read ahead pages.  Read behind pages are looked
	 * up in a backward direction, down to a first cached page.  Same
	 * for read ahead pages, but there is no need to shift the array
	 * in case of encountering a cached page.
	 */
	i = bp->b_npages = 0;
	if (rbehind) {
		vm_pindex_t startpindex, tpindex;
		vm_page_t p;

		VM_OBJECT_WLOCK(object);
		startpindex = m[0]->pindex - rbehind;
		if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL &&
		    p->pindex >= startpindex)
			startpindex = p->pindex + 1;

		/* tpindex is unsigned; beware of numeric underflow. */
		for (tpindex = m[0]->pindex - 1;
		    tpindex >= startpindex && tpindex < m[0]->pindex;
		    tpindex--, i++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL) {
				/* Shift the array. */
				for (int j = 0; j < i; j++)
					bp->b_pages[j] = bp->b_pages[j + 
					    tpindex + 1 - startpindex]; 
				break;
			}
			bp->b_pages[tpindex - startpindex] = p;
		}

		bp->b_pgbefore = i;
		bp->b_npages += i;
		bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE;
	} else
		bp->b_pgbefore = 0;

	/* Requested pages. */
	for (int j = 0; j < count; j++, i++)
		bp->b_pages[i] = m[j];
	bp->b_npages += count;

	if (rahead) {
		vm_pindex_t endpindex, tpindex;
		vm_page_t p;

		if (!VM_OBJECT_WOWNED(object))
			VM_OBJECT_WLOCK(object);
		endpindex = m[count - 1]->pindex + rahead + 1;
		if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL &&
		    p->pindex < endpindex)
			endpindex = p->pindex;
		if (endpindex > object->size)
			endpindex = object->size;

		for (tpindex = m[count - 1]->pindex + 1;
		    tpindex < endpindex; i++, tpindex++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL)
				break;
			bp->b_pages[i] = p;
		}

		bp->b_pgafter = i - bp->b_npages;
		bp->b_npages = i;
	} else
		bp->b_pgafter = 0;

	if (VM_OBJECT_WOWNED(object))
		VM_OBJECT_WUNLOCK(object);

	/* Report back actual behind/ahead read. */
	if (a_rbehind)
		*a_rbehind = bp->b_pgbefore;
	if (a_rahead)
		*a_rahead = bp->b_pgafter;

#ifdef INVARIANTS
	KASSERT(bp->b_npages <= nitems(bp->b_pages),
	    ("%s: buf %p overflowed", __func__, bp));
	for (int j = 1; j < bp->b_npages; j++)
		KASSERT(bp->b_pages[j]->pindex - 1 ==
		    bp->b_pages[j - 1]->pindex,
		    ("%s: pages array not consecutive, bp %p", __func__, bp));
#endif

	/*
	 * Recalculate first offset and bytecount with regards to read behind.
	 * Truncate bytecount to vnode real size and round up physical size
	 * for real devices.
	 */
	foff = IDX_TO_OFF(bp->b_pages[0]->pindex);
	bytecount = bp->b_npages << PAGE_SHIFT;
	if ((foff + bytecount) > object->un_pager.vnp.vnp_size)
		bytecount = object->un_pager.vnp.vnp_size - foff;
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("%s: sector size %d too large", __func__, secmask + 1));
	bytecount = (bytecount + secmask) & ~secmask;

	/*
	 * And map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
	}

	/* Build a minimal buffer header. */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount;
	bp->b_iooffset = dbtob(bp->b_blkno);
	KASSERT(IDX_TO_OFF(m[0]->pindex - bp->b_pages[0]->pindex) ==
	    (blkno0 - bp->b_blkno) * DEV_BSIZE +
	    IDX_TO_OFF(m[0]->pindex) % bsize,
	    ("wrong offsets bsize %d m[0] %ju b_pages[0] %ju "
	    "blkno0 %ju b_blkno %ju", bsize,
	    (uintmax_t)m[0]->pindex, (uintmax_t)bp->b_pages[0]->pindex,
	    (uintmax_t)blkno0, (uintmax_t)bp->b_blkno));

	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, bp->b_npages);

	if (iodone != NULL) { /* async */
		bp->b_pgiodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		return (VM_PAGER_OK);
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
		return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
	}
}
/*
 * Prepare a socket for DDP.  Must be called when the socket is known to be
 * open.
 */
int
t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock)
{
	int i, err = ENOMEM;
	static vm_pindex_t color;
	unsigned int nppods, kbuf_pages, idx = 0;
	struct ddp_state *p = &toep->tp_ddp_state;
	struct tom_data *d = TOM_DATA(toep->tp_toedev);

	
	if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
		return (EINVAL);

#ifdef notyet	
	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif	
	kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
	nppods = pages2ppods(kbuf_pages);

	p->kbuf_noinval = !!waitall;
	p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
	for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
		p->kbuf[idx] = 
		    malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
			sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
		if (p->kbuf[idx] == NULL)
			goto err;
		err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]);
		if (err) {
			printf("t3_alloc_ppods failed err=%d\n", err);
			goto err;
		}
		
		p->kbuf_nppods[idx] = nppods;
		p->kbuf[idx]->dgl_length = kbuf_size;
		p->kbuf[idx]->dgl_offset = 0;
		p->kbuf[idx]->dgl_nelem = kbuf_pages;

		for (i = 0; i < kbuf_pages; ++i) {
			p->kbuf[idx]->dgl_pages[i] = vm_page_alloc(NULL, color,
			    VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
			    VM_ALLOC_ZERO);
			if (p->kbuf[idx]->dgl_pages[i] == NULL) {
				p->kbuf[idx]->dgl_nelem = i;
				printf("failed to allocate kbuf pages\n");
				goto err;
			}
		}
#ifdef NEED_BUSDMA
		/*
		 * XXX we'll need this for VT-d or any platform with an iommu :-/
		 *
		 */
		for (i = 0; i < kbuf_pages; ++i)
			p->kbuf[idx]->phys_addr[i] = 
			    pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
					 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
#endif
		t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx], 
			       p->kbuf[idx]->dgl_length, 0, 0);
	}
	cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);

	t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6);
	t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length);
	t3_repost_kbuf(toep, 0, 0, 1, nonblock);

	t3_set_rcv_coalesce_enable(toep, 
	    TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce));
	t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1);
	
#ifdef T3_TRACE
	T3_TRACE4(TIDTB(so),
		  "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
		   kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
#endif
	CTR4(KTR_TOM,
		  "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
		   kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
	cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
	return (0);

err:
	t3_release_ddp_resources(toep);
	t3_cleanup_ddp(toep);
	return (err);
}
Exemple #19
0
int
exec_map_first_page(struct image_params *imgp)
{
	int rv, i, after, initial_pagein;
	vm_page_t ma[VM_INITIAL_PAGEIN];
	vm_object_t object;

	if (imgp->firstpage != NULL)
		exec_unmap_first_page(imgp);

	object = imgp->vp->v_object;
	if (object == NULL)
		return (EACCES);
	VM_OBJECT_WLOCK(object);
#if VM_NRESERVLEVEL > 0
	vm_object_color(object, 0);
#endif
	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
	if (ma[0]->valid != VM_PAGE_BITS_ALL) {
		vm_page_xbusy(ma[0]);
		if (!vm_pager_has_page(object, 0, NULL, &after)) {
			vm_page_lock(ma[0]);
			vm_page_free(ma[0]);
			vm_page_unlock(ma[0]);
			VM_OBJECT_WUNLOCK(object);
			return (EIO);
		}
		initial_pagein = min(after, VM_INITIAL_PAGEIN);
		KASSERT(initial_pagein <= object->size,
		    ("%s: initial_pagein %d object->size %ju",
		    __func__, initial_pagein, (uintmax_t )object->size));
		for (i = 1; i < initial_pagein; i++) {
			if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) {
				if (ma[i]->valid)
					break;
				if (!vm_page_tryxbusy(ma[i]))
					break;
			} else {
				ma[i] = vm_page_alloc(object, i,
				    VM_ALLOC_NORMAL);
				if (ma[i] == NULL)
					break;
			}
		}
		initial_pagein = i;
		rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL);
		if (rv != VM_PAGER_OK) {
			for (i = 0; i < initial_pagein; i++) {
				vm_page_lock(ma[i]);
				vm_page_free(ma[i]);
				vm_page_unlock(ma[i]);
			}
			VM_OBJECT_WUNLOCK(object);
			return (EIO);
		}
		vm_page_xunbusy(ma[0]);
		for (i = 1; i < initial_pagein; i++)
			vm_page_readahead_finish(ma[i]);
	}
	vm_page_lock(ma[0]);
	vm_page_hold(ma[0]);
	vm_page_activate(ma[0]);
	vm_page_unlock(ma[0]);
	VM_OBJECT_WUNLOCK(object);

	imgp->firstpage = sf_buf_alloc(ma[0], 0);
	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);

	return (0);
}
Exemple #20
0
/* software page fault handler */
static status_t vm_soft_page_fault(addr_t addr, bool is_write, bool is_exec, bool is_user)
{
    vm_address_space_t *aspace;
    vm_mapping_t *mapping;
    vm_upage_t *upage;
    vm_page_t *page;
    unsigned long irqstate;
    status_t err;

    /* get faulted address space */
    if(is_kernel_address(addr)) {
        aspace = vm_get_kernel_aspace();
    } else {
        aspace = vm_get_current_user_aspace();
        if(!aspace)
            panic("vm_soft_page_fault: no user address space!\n");
    }

    /* increment address space faults counter */
    atomic_inc((atomic_t*)&aspace->faults_count);

    /** TODO: spinlocks are temporary solution here. **/

    /* acquire lock before touching address space */
    irqstate = spin_lock_irqsave(&aspace->lock);

    /* get faulted mapping */
    err = vm_aspace_get_mapping(aspace, addr, &mapping);
    if(err != NO_ERROR)
        panic("vm_soft_page_fault: can't get mapping at address %x, err = %x!\n", addr, err);

    /* this page fault handler deals only with mapped objects */
    if(mapping->type != VM_MAPPING_TYPE_OBJECT)
        panic("vm_soft_page_fault: wrong mapping type!\n");

    /* lock mapped object */
    spin_lock(&mapping->object->lock);

    /* get universal page for mapping its data */
    err = vm_object_get_or_add_upage(mapping->object,
                                     addr - mapping->start + mapping->offset,
                                     &upage);
    if(err != NO_ERROR)
        panic("vm_soft_page_fault: can't get upage, err = %x!\n", err);

    /* allocate new physical page or just map existing page */
    if(upage->state == VM_UPAGE_STATE_UNWIRED) {
        /* upage is not wired with physical page.
         * so... allocate new one.
         */
        page = vm_page_alloc(VM_PAGE_STATE_CLEAR);
        if(page == NULL)
           panic("vm_soft_page_fault: out of physical memory!\n");
        /* stick physical page into upage */
        upage->state = VM_UPAGE_STATE_RESIDENT;
        upage->ppn = page->ppn;
    } else if(upage->state == VM_UPAGE_STATE_RESIDENT) {
        /* upage has resident physical page.
         * just get it for further mapping.
         */
        page = vm_page_lookup(upage->ppn);
        if(page == NULL)
            panic("vm_soft_page_fault: wrong physical page number!\n");
    } else {
        /* all other upage states is not supported for now */
        panic("vm_soft_page_fault: invalid universal page state!\n");
        page = NULL; /* keep compiler happy */
    }

    /* now unlock object */
    spin_unlock(&mapping->object->lock);

    /* ... and lock translation map */
    aspace->tmap.ops->lock(&aspace->tmap);

    /* map page into address space */
    aspace->tmap.ops->map(&aspace->tmap, addr, PAGE_ADDRESS(page->ppn), mapping->protect);

    /* unlock translation map */
    aspace->tmap.ops->unlock(&aspace->tmap);

    /* .. and finally unlock address space */
    spin_unlock_irqrstor(&aspace->lock, irqstate);

    /* return address space to the kernel */
    vm_put_aspace(aspace);

    /* page fault handled successfully */
    return NO_ERROR;
}