/* * Given an address range, finds the first watched area which overlaps some or * all of the range. */ watched_area_t * pr_find_watched_area(proc_t *p, watched_area_t *pwa, avl_index_t *where) { caddr_t vaddr = pwa->wa_vaddr; caddr_t eaddr = pwa->wa_eaddr; watched_area_t *wap; avl_index_t real_where; /* First, check if there is an exact match. */ wap = avl_find(&p->p_warea, pwa, &real_where); /* Check to see if we overlap with the previous area. */ if (wap == NULL) { wap = avl_nearest(&p->p_warea, real_where, AVL_BEFORE); if (wap != NULL && (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr)) wap = NULL; } /* Try the next area. */ if (wap == NULL) { wap = avl_nearest(&p->p_warea, real_where, AVL_AFTER); if (wap != NULL && (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr)) wap = NULL; } if (where) *where = real_where; return (wap); }
static void trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg) { avl_index_t where; trim_seg_t tsearch, *ts_before, *ts_after, *ts; boolean_t merge_before, merge_after; ASSERT(MUTEX_HELD(&tm->tm_lock)); VERIFY(start < end); tsearch.ts_start = start; tsearch.ts_end = end; ts = avl_find(&tm->tm_queued_frees, &tsearch, &where); if (ts != NULL) { if (start < ts->ts_start) trim_map_segment_add(tm, start, ts->ts_start, txg); if (end > ts->ts_end) trim_map_segment_add(tm, ts->ts_end, end, txg); return; } ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE); ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER); merge_before = (ts_before != NULL && ts_before->ts_end == start && ts_before->ts_txg == txg); merge_after = (ts_after != NULL && ts_after->ts_start == end && ts_after->ts_txg == txg); if (merge_before && merge_after) { avl_remove(&tm->tm_queued_frees, ts_before); list_remove(&tm->tm_head, ts_before); ts_after->ts_start = ts_before->ts_start; kmem_free(ts_before, sizeof (*ts_before)); } else if (merge_before) { ts_before->ts_end = end; } else if (merge_after) { ts_after->ts_start = start; } else { ts = kmem_alloc(sizeof (*ts), KM_SLEEP); ts->ts_start = start; ts->ts_end = end; ts->ts_txg = txg; avl_insert(&tm->tm_queued_frees, ts, where); list_insert_tail(&tm->tm_head, ts); } }
/* * Update cache contents upon write completion. */ void vdev_cache_write(zio_t *zio) { vdev_cache_t *vc = &zio->io_vd->vdev_cache; vdev_cache_entry_t *ve, ve_search; uint64_t io_start = zio->io_offset; uint64_t io_end = io_start + zio->io_size; uint64_t min_offset = P2ALIGN(io_start, VCBS); uint64_t max_offset = P2ROUNDUP(io_end, VCBS); avl_index_t where; ASSERT(zio->io_type == ZIO_TYPE_WRITE); mutex_enter(&vc->vc_lock); ve_search.ve_offset = min_offset; ve = avl_find(&vc->vc_offset_tree, &ve_search, &where); if (ve == NULL) ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER); while (ve != NULL && ve->ve_offset < max_offset) { uint64_t start = MAX(ve->ve_offset, io_start); uint64_t end = MIN(ve->ve_offset + VCBS, io_end); if (ve->ve_fill_io != NULL) { ve->ve_missed_update = 1; } else { bcopy((char *)zio->io_data + start - io_start, ve->ve_data + start - ve->ve_offset, end - start); } ve = AVL_NEXT(&vc->vc_offset_tree, ve); } mutex_exit(&vc->vc_lock); }
static mzap_ent_t * mze_find(zap_t *zap, const char *name, uint64_t hash) { mzap_ent_t mze_tofind; mzap_ent_t *mze; avl_index_t idx; avl_tree_t *avl = &zap->zap_m.zap_avl; ASSERT(zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); ASSERT3U(zap_hash(zap, name), ==, hash); if (strlen(name) >= sizeof (mze_tofind.mze_phys.mze_name)) return (NULL); mze_tofind.mze_hash = hash; mze_tofind.mze_phys.mze_cd = 0; mze = avl_find(avl, &mze_tofind, &idx); if (mze == NULL) mze = avl_nearest(avl, idx, AVL_AFTER); for (; mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { if (strcmp(name, mze->mze_phys.mze_name) == 0) return (mze); } return (NULL); }
static zcrypt_keychain_node_t * zcrypt_keychain_find(avl_tree_t keychain, uint64_t txg) { zcrypt_keychain_node_t search_dkn; zcrypt_keychain_node_t *found_dkn; avl_index_t where; search_dkn.dkn_txg = txg; found_dkn = avl_find(&keychain, &search_dkn, &where); if (found_dkn == NULL) { found_dkn = avl_nearest(&keychain, where, AVL_BEFORE); } return (found_dkn); }
void range_tree_add(void *arg, uint64_t start, uint64_t size) { range_tree_t *rt = arg; avl_index_t where; range_seg_t rsearch, *rs_before, *rs_after, *rs; uint64_t end = start + size; boolean_t merge_before, merge_after; ASSERT(MUTEX_HELD(rt->rt_lock)); VERIFY(size != 0); rsearch.rs_start = start; rsearch.rs_end = end; rs = avl_find(&rt->rt_root, &rsearch, &where); if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) { zfs_panic_recover("zfs: allocating allocated segment" "(offset=%llu size=%llu)\n", (longlong_t)start, (longlong_t)size); return; } /* Make sure we don't overlap with either of our neighbors */ VERIFY(rs == NULL); rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE); rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER); merge_before = (rs_before != NULL && rs_before->rs_end == start); merge_after = (rs_after != NULL && rs_after->rs_start == end); if (merge_before && merge_after) { avl_remove(&rt->rt_root, rs_before); if (rt->rt_ops != NULL) { rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); } range_tree_stat_decr(rt, rs_before); range_tree_stat_decr(rt, rs_after); rs_after->rs_start = rs_before->rs_start; kmem_cache_free(range_seg_cache, rs_before); rs = rs_after; } else if (merge_before) { if (rt->rt_ops != NULL) rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); range_tree_stat_decr(rt, rs_before); rs_before->rs_end = end; rs = rs_before; } else if (merge_after) { if (rt->rt_ops != NULL) rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); range_tree_stat_decr(rt, rs_after); rs_after->rs_start = start; rs = rs_after; } else { rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP); rs->rs_start = start; rs->rs_end = end; avl_insert(&rt->rt_root, rs, where); } if (rt->rt_ops != NULL) rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); range_tree_stat_incr(rt, rs); rt->rt_space += size; }
/* * Common code for pr_mappage() and pr_unmappage(). */ static int pr_do_mappage(caddr_t addr, size_t size, int mapin, enum seg_rw rw, int kernel) { proc_t *p = curproc; struct as *as = p->p_as; char *eaddr = addr + size; int prot_rw = rw_to_prot(rw); int xrw = rw_to_index(rw); int rv = 0; struct watched_page *pwp; struct watched_page tpw; avl_index_t where; uint_t prot; ASSERT(as != &kas); startover: ASSERT(rv == 0); if (avl_numnodes(&as->a_wpage) == 0) return (0); /* * as->a_wpage can only be changed while the process is totally stopped. * Don't grab p_lock here. Holding p_lock while grabbing the address * space lock leads to deadlocks with the clock thread. Note that if an * as_fault() is servicing a fault to a watched page on behalf of an * XHAT provider, watchpoint will be temporarily cleared (and wp_prot * will be set to wp_oprot). Since this is done while holding as writer * lock, we need to grab as lock (reader lock is good enough). * * p_maplock prevents simultaneous execution of this function. Under * normal circumstances, holdwatch() will stop all other threads, so the * lock isn't really needed. But there may be multiple threads within * stop() when SWATCHOK is set, so we need to handle multiple threads * at once. See holdwatch() for the details of this dance. */ mutex_enter(&p->p_maplock); AS_LOCK_ENTER(as, &as->a_lock, RW_READER); tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); for (; pwp != NULL && pwp->wp_vaddr < eaddr; pwp = AVL_NEXT(&as->a_wpage, pwp)) { /* * If the requested protection has not been * removed, we need not remap this page. */ prot = pwp->wp_prot; if (kernel || (prot & PROT_USER)) if (prot & prot_rw) continue; /* * If the requested access does not exist in the page's * original protections, we need not remap this page. * If the page does not exist yet, we can't test it. */ if ((prot = pwp->wp_oprot) != 0) { if (!(kernel || (prot & PROT_USER))) continue; if (!(prot & prot_rw)) continue; } if (mapin) { /* * Before mapping the page in, ensure that * all other lwps are held in the kernel. */ if (p->p_mapcnt == 0) { /* * Release as lock while in holdwatch() * in case other threads need to grab it. */ AS_LOCK_EXIT(as, &as->a_lock); mutex_exit(&p->p_maplock); if (holdwatch() != 0) { /* * We stopped in holdwatch(). * Start all over again because the * watched page list may have changed. */ goto startover; } mutex_enter(&p->p_maplock); AS_LOCK_ENTER(as, &as->a_lock, RW_READER); } p->p_mapcnt++; } addr = pwp->wp_vaddr; rv++; prot = pwp->wp_prot; if (mapin) { if (kernel) pwp->wp_kmap[xrw]++; else pwp->wp_umap[xrw]++; pwp->wp_flags |= WP_NOWATCH; if (pwp->wp_kmap[X] + pwp->wp_umap[X]) /* cannot have exec-only protection */ prot |= PROT_READ|PROT_EXEC; if (pwp->wp_kmap[R] + pwp->wp_umap[R]) prot |= PROT_READ; if (pwp->wp_kmap[W] + pwp->wp_umap[W]) /* cannot have write-only protection */ prot |= PROT_READ|PROT_WRITE; #if 0 /* damned broken mmu feature! */ if (sum(pwp->wp_umap) == 0) prot &= ~PROT_USER; #endif } else { ASSERT(pwp->wp_flags & WP_NOWATCH); if (kernel) { ASSERT(pwp->wp_kmap[xrw] != 0); --pwp->wp_kmap[xrw]; } else { ASSERT(pwp->wp_umap[xrw] != 0); --pwp->wp_umap[xrw]; } if (sum(pwp->wp_kmap) + sum(pwp->wp_umap) == 0) pwp->wp_flags &= ~WP_NOWATCH; else { if (pwp->wp_kmap[X] + pwp->wp_umap[X]) /* cannot have exec-only protection */ prot |= PROT_READ|PROT_EXEC; if (pwp->wp_kmap[R] + pwp->wp_umap[R]) prot |= PROT_READ; if (pwp->wp_kmap[W] + pwp->wp_umap[W]) /* cannot have write-only protection */ prot |= PROT_READ|PROT_WRITE; #if 0 /* damned broken mmu feature! */ if (sum(pwp->wp_umap) == 0) prot &= ~PROT_USER; #endif } } if (pwp->wp_oprot != 0) { /* if page exists */ struct seg *seg; uint_t oprot; int err, retrycnt = 0; AS_LOCK_EXIT(as, &as->a_lock); AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); retry: seg = as_segat(as, addr); ASSERT(seg != NULL); SEGOP_GETPROT(seg, addr, 0, &oprot); if (prot != oprot) { err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); if (err == IE_RETRY) { ASSERT(retrycnt == 0); retrycnt++; goto retry; } } AS_LOCK_EXIT(as, &as->a_lock); } else AS_LOCK_EXIT(as, &as->a_lock); /* * When all pages are mapped back to their normal state, * continue the other lwps. */ if (!mapin) { ASSERT(p->p_mapcnt > 0); p->p_mapcnt--; if (p->p_mapcnt == 0) { mutex_exit(&p->p_maplock); mutex_enter(&p->p_lock); continuelwps(p); mutex_exit(&p->p_lock); mutex_enter(&p->p_maplock); } } AS_LOCK_ENTER(as, &as->a_lock, RW_READER); } AS_LOCK_EXIT(as, &as->a_lock); mutex_exit(&p->p_maplock); return (rv); }