Ejemplo n.º 1
0
/*
 * Given an address range, finds the first watched area which overlaps some or
 * all of the range.
 */
watched_area_t *
pr_find_watched_area(proc_t *p, watched_area_t *pwa, avl_index_t *where)
{
	caddr_t vaddr = pwa->wa_vaddr;
	caddr_t eaddr = pwa->wa_eaddr;
	watched_area_t *wap;
	avl_index_t real_where;

	/* First, check if there is an exact match.  */
	wap = avl_find(&p->p_warea, pwa, &real_where);


	/* Check to see if we overlap with the previous area.  */
	if (wap == NULL) {
		wap = avl_nearest(&p->p_warea, real_where, AVL_BEFORE);
		if (wap != NULL &&
		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
			wap = NULL;
	}

	/* Try the next area.  */
	if (wap == NULL) {
		wap = avl_nearest(&p->p_warea, real_where, AVL_AFTER);
		if (wap != NULL &&
		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
			wap = NULL;
	}

	if (where)
		*where = real_where;

	return (wap);
}
Ejemplo n.º 2
0
static void
trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
{
	avl_index_t where;
	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
	boolean_t merge_before, merge_after;

	ASSERT(MUTEX_HELD(&tm->tm_lock));
	VERIFY(start < end);

	tsearch.ts_start = start;
	tsearch.ts_end = end;

	ts = avl_find(&tm->tm_queued_frees, &tsearch, &where);
	if (ts != NULL) {
		if (start < ts->ts_start)
			trim_map_segment_add(tm, start, ts->ts_start, txg);
		if (end > ts->ts_end)
			trim_map_segment_add(tm, ts->ts_end, end, txg);
		return;
	}

	ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
	ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);

	merge_before = (ts_before != NULL && ts_before->ts_end == start &&
	    ts_before->ts_txg == txg);
	merge_after = (ts_after != NULL && ts_after->ts_start == end &&
	    ts_after->ts_txg == txg);

	if (merge_before && merge_after) {
		avl_remove(&tm->tm_queued_frees, ts_before);
		list_remove(&tm->tm_head, ts_before);
		ts_after->ts_start = ts_before->ts_start;
		kmem_free(ts_before, sizeof (*ts_before));
	} else if (merge_before) {
		ts_before->ts_end = end;
	} else if (merge_after) {
		ts_after->ts_start = start;
	} else {
		ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
		ts->ts_start = start;
		ts->ts_end = end;
		ts->ts_txg = txg;
		avl_insert(&tm->tm_queued_frees, ts, where);
		list_insert_tail(&tm->tm_head, ts);
	}
}
Ejemplo n.º 3
0
/*
 * Update cache contents upon write completion.
 */
void
vdev_cache_write(zio_t *zio)
{
	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
	vdev_cache_entry_t *ve, ve_search;
	uint64_t io_start = zio->io_offset;
	uint64_t io_end = io_start + zio->io_size;
	uint64_t min_offset = P2ALIGN(io_start, VCBS);
	uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
	avl_index_t where;

	ASSERT(zio->io_type == ZIO_TYPE_WRITE);

	mutex_enter(&vc->vc_lock);

	ve_search.ve_offset = min_offset;
	ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);

	if (ve == NULL)
		ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);

	while (ve != NULL && ve->ve_offset < max_offset) {
		uint64_t start = MAX(ve->ve_offset, io_start);
		uint64_t end = MIN(ve->ve_offset + VCBS, io_end);

		if (ve->ve_fill_io != NULL) {
			ve->ve_missed_update = 1;
		} else {
			bcopy((char *)zio->io_data + start - io_start,
			    ve->ve_data + start - ve->ve_offset, end - start);
		}
		ve = AVL_NEXT(&vc->vc_offset_tree, ve);
	}
	mutex_exit(&vc->vc_lock);
}
Ejemplo n.º 4
0
static mzap_ent_t *
mze_find(zap_t *zap, const char *name, uint64_t hash)
{
	mzap_ent_t mze_tofind;
	mzap_ent_t *mze;
	avl_index_t idx;
	avl_tree_t *avl = &zap->zap_m.zap_avl;

	ASSERT(zap->zap_ismicro);
	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
	ASSERT3U(zap_hash(zap, name), ==, hash);

	if (strlen(name) >= sizeof (mze_tofind.mze_phys.mze_name))
		return (NULL);

	mze_tofind.mze_hash = hash;
	mze_tofind.mze_phys.mze_cd = 0;

	mze = avl_find(avl, &mze_tofind, &idx);
	if (mze == NULL)
		mze = avl_nearest(avl, idx, AVL_AFTER);
	for (; mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
		if (strcmp(name, mze->mze_phys.mze_name) == 0)
			return (mze);
	}
	return (NULL);
}
Ejemplo n.º 5
0
static zcrypt_keychain_node_t *
zcrypt_keychain_find(avl_tree_t keychain, uint64_t txg)
{
	zcrypt_keychain_node_t search_dkn;
	zcrypt_keychain_node_t *found_dkn;
	avl_index_t where;

	search_dkn.dkn_txg = txg;
	found_dkn = avl_find(&keychain, &search_dkn, &where);
	if (found_dkn == NULL) {
		found_dkn = avl_nearest(&keychain, where, AVL_BEFORE);
	}

	return (found_dkn);
}
Ejemplo n.º 6
0
void
range_tree_add(void *arg, uint64_t start, uint64_t size)
{
	range_tree_t *rt = arg;
	avl_index_t where;
	range_seg_t rsearch, *rs_before, *rs_after, *rs;
	uint64_t end = start + size;
	boolean_t merge_before, merge_after;

	ASSERT(MUTEX_HELD(rt->rt_lock));
	VERIFY(size != 0);

	rsearch.rs_start = start;
	rsearch.rs_end = end;
	rs = avl_find(&rt->rt_root, &rsearch, &where);

	if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
		zfs_panic_recover("zfs: allocating allocated segment"
		    "(offset=%llu size=%llu)\n",
		    (longlong_t)start, (longlong_t)size);
		return;
	}

	/* Make sure we don't overlap with either of our neighbors */
	VERIFY(rs == NULL);

	rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
	rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);

	merge_before = (rs_before != NULL && rs_before->rs_end == start);
	merge_after = (rs_after != NULL && rs_after->rs_start == end);

	if (merge_before && merge_after) {
		avl_remove(&rt->rt_root, rs_before);
		if (rt->rt_ops != NULL) {
			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
		}

		range_tree_stat_decr(rt, rs_before);
		range_tree_stat_decr(rt, rs_after);

		rs_after->rs_start = rs_before->rs_start;
		kmem_cache_free(range_seg_cache, rs_before);
		rs = rs_after;
	} else if (merge_before) {
		if (rt->rt_ops != NULL)
			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);

		range_tree_stat_decr(rt, rs_before);

		rs_before->rs_end = end;
		rs = rs_before;
	} else if (merge_after) {
		if (rt->rt_ops != NULL)
			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);

		range_tree_stat_decr(rt, rs_after);

		rs_after->rs_start = start;
		rs = rs_after;
	} else {
		rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
		rs->rs_start = start;
		rs->rs_end = end;
		avl_insert(&rt->rt_root, rs, where);
	}

	if (rt->rt_ops != NULL)
		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);

	range_tree_stat_incr(rt, rs);
	rt->rt_space += size;
}
Ejemplo n.º 7
0
/*
 * Common code for pr_mappage() and pr_unmappage().
 */
static int
pr_do_mappage(caddr_t addr, size_t size, int mapin, enum seg_rw rw, int kernel)
{
	proc_t *p = curproc;
	struct as *as = p->p_as;
	char *eaddr = addr + size;
	int prot_rw = rw_to_prot(rw);
	int xrw = rw_to_index(rw);
	int rv = 0;
	struct watched_page *pwp;
	struct watched_page tpw;
	avl_index_t where;
	uint_t prot;

	ASSERT(as != &kas);

startover:
	ASSERT(rv == 0);
	if (avl_numnodes(&as->a_wpage) == 0)
		return (0);

	/*
	 * as->a_wpage can only be changed while the process is totally stopped.
	 * Don't grab p_lock here.  Holding p_lock while grabbing the address
	 * space lock leads to deadlocks with the clock thread.  Note that if an
	 * as_fault() is servicing a fault to a watched page on behalf of an
	 * XHAT provider, watchpoint will be temporarily cleared (and wp_prot
	 * will be set to wp_oprot).  Since this is done while holding as writer
	 * lock, we need to grab as lock (reader lock is good enough).
	 *
	 * p_maplock prevents simultaneous execution of this function.  Under
	 * normal circumstances, holdwatch() will stop all other threads, so the
	 * lock isn't really needed.  But there may be multiple threads within
	 * stop() when SWATCHOK is set, so we need to handle multiple threads
	 * at once.  See holdwatch() for the details of this dance.
	 */

	mutex_enter(&p->p_maplock);
	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);

	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
	if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
		pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);

	for (; pwp != NULL && pwp->wp_vaddr < eaddr;
		pwp = AVL_NEXT(&as->a_wpage, pwp)) {

		/*
		 * If the requested protection has not been
		 * removed, we need not remap this page.
		 */
		prot = pwp->wp_prot;
		if (kernel || (prot & PROT_USER))
			if (prot & prot_rw)
				continue;
		/*
		 * If the requested access does not exist in the page's
		 * original protections, we need not remap this page.
		 * If the page does not exist yet, we can't test it.
		 */
		if ((prot = pwp->wp_oprot) != 0) {
			if (!(kernel || (prot & PROT_USER)))
				continue;
			if (!(prot & prot_rw))
				continue;
		}

		if (mapin) {
			/*
			 * Before mapping the page in, ensure that
			 * all other lwps are held in the kernel.
			 */
			if (p->p_mapcnt == 0) {
				/*
				 * Release as lock while in holdwatch()
				 * in case other threads need to grab it.
				 */
				AS_LOCK_EXIT(as, &as->a_lock);
				mutex_exit(&p->p_maplock);
				if (holdwatch() != 0) {
					/*
					 * We stopped in holdwatch().
					 * Start all over again because the
					 * watched page list may have changed.
					 */
					goto startover;
				}
				mutex_enter(&p->p_maplock);
				AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
			}
			p->p_mapcnt++;
		}

		addr = pwp->wp_vaddr;
		rv++;

		prot = pwp->wp_prot;
		if (mapin) {
			if (kernel)
				pwp->wp_kmap[xrw]++;
			else
				pwp->wp_umap[xrw]++;
			pwp->wp_flags |= WP_NOWATCH;
			if (pwp->wp_kmap[X] + pwp->wp_umap[X])
				/* cannot have exec-only protection */
				prot |= PROT_READ|PROT_EXEC;
			if (pwp->wp_kmap[R] + pwp->wp_umap[R])
				prot |= PROT_READ;
			if (pwp->wp_kmap[W] + pwp->wp_umap[W])
				/* cannot have write-only protection */
				prot |= PROT_READ|PROT_WRITE;
#if 0	/* damned broken mmu feature! */
			if (sum(pwp->wp_umap) == 0)
				prot &= ~PROT_USER;
#endif
		} else {
			ASSERT(pwp->wp_flags & WP_NOWATCH);
			if (kernel) {
				ASSERT(pwp->wp_kmap[xrw] != 0);
				--pwp->wp_kmap[xrw];
			} else {
				ASSERT(pwp->wp_umap[xrw] != 0);
				--pwp->wp_umap[xrw];
			}
			if (sum(pwp->wp_kmap) + sum(pwp->wp_umap) == 0)
				pwp->wp_flags &= ~WP_NOWATCH;
			else {
				if (pwp->wp_kmap[X] + pwp->wp_umap[X])
					/* cannot have exec-only protection */
					prot |= PROT_READ|PROT_EXEC;
				if (pwp->wp_kmap[R] + pwp->wp_umap[R])
					prot |= PROT_READ;
				if (pwp->wp_kmap[W] + pwp->wp_umap[W])
					/* cannot have write-only protection */
					prot |= PROT_READ|PROT_WRITE;
#if 0	/* damned broken mmu feature! */
				if (sum(pwp->wp_umap) == 0)
					prot &= ~PROT_USER;
#endif
			}
		}


		if (pwp->wp_oprot != 0) {	/* if page exists */
			struct seg *seg;
			uint_t oprot;
			int err, retrycnt = 0;

			AS_LOCK_EXIT(as, &as->a_lock);
			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
		retry:
			seg = as_segat(as, addr);
			ASSERT(seg != NULL);
			SEGOP_GETPROT(seg, addr, 0, &oprot);
			if (prot != oprot) {
				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
				if (err == IE_RETRY) {
					ASSERT(retrycnt == 0);
					retrycnt++;
					goto retry;
				}
			}
			AS_LOCK_EXIT(as, &as->a_lock);
		} else
			AS_LOCK_EXIT(as, &as->a_lock);

		/*
		 * When all pages are mapped back to their normal state,
		 * continue the other lwps.
		 */
		if (!mapin) {
			ASSERT(p->p_mapcnt > 0);
			p->p_mapcnt--;
			if (p->p_mapcnt == 0) {
				mutex_exit(&p->p_maplock);
				mutex_enter(&p->p_lock);
				continuelwps(p);
				mutex_exit(&p->p_lock);
				mutex_enter(&p->p_maplock);
			}
		}

		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
	}

	AS_LOCK_EXIT(as, &as->a_lock);
	mutex_exit(&p->p_maplock);

	return (rv);
}