static int splat_mutex_test4(struct file *file, void *arg) { kmutex_t mtx; kthread_t *owner; int rc = 0; mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); /* * Verify mutex owner is cleared after being dropped. Depending * on how you build your kernel this behavior changes, ensure the * SPL mutex implementation is properly detecting this. */ mutex_enter(&mtx); msleep(100); mutex_exit(&mtx); if (MUTEX_HELD(&mtx)) { splat_vprint(file, SPLAT_MUTEX_TEST4_NAME, "Mutex should " "not be held, bit is by %p\n", mutex_owner(&mtx)); rc = -EINVAL; goto out; } mutex_enter(&mtx); /* Mutex should be owned by current */ owner = mutex_owner(&mtx); if (current != owner) { splat_vprint(file, SPLAT_MUTEX_TEST4_NAME, "Mutex should " "be owned by pid %d but is owned by pid %d\n", current->pid, owner ? owner->pid : -1); rc = -EINVAL; goto out; } mutex_exit(&mtx); /* Mutex should not be owned by any task */ owner = mutex_owner(&mtx); if (owner) { splat_vprint(file, SPLAT_MUTEX_TEST4_NAME, "Mutex should not " "be owned but is owned by pid %d\n", owner->pid); rc = -EINVAL; goto out; } splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s", "Correct mutex_owner() behavior\n"); out: mutex_destroy(&mtx); return rc; }
void cv_wait(kcondvar_t *cv, kmutex_t *mp) { ASSERT(mutex_owner(mp) == curthread); mp->m_owner = NULL; VERIFY(pthread_cond_wait(cv, &mp->m_lock) == 0); mp->m_owner = curthread; }
void mutex_exit(kmutex_t *mp) { ASSERT(mutex_owner(mp) == curthread); mp->m_owner = NULL; #ifndef __native_client__ VERIFY(pthread_mutex_unlock(&mp->m_lock) == 0); #endif }
static int splat_condvar_test2(struct file *file, void *arg) { int i, count = 0, rc = 0; condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT]; condvar_priv_t cv; cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC; cv.cv_file = file; mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); cv_init(&cv.cv_condvar, NULL, CV_DEFAULT, NULL); /* Create some threads, the exact number isn't important just as * long as we know how many we managed to create and should expect. */ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { ct[i].ct_cvp = &cv; ct[i].ct_name = SPLAT_CONDVAR_TEST2_NAME; ct[i].ct_rc = 0; ct[i].ct_thread = spl_kthread_create(splat_condvar_test12_thread, &ct[i], "%s/%d", SPLAT_CONDVAR_TEST_NAME, i); if (!IS_ERR(ct[i].ct_thread)) { wake_up_process(ct[i].ct_thread); count++; } } /* Wait until all threads are waiting on the condition variable */ while (atomic_read(&cv.cv_condvar.cv_waiters) != count) schedule(); /* Wake all threads waiting on the condition variable */ cv_broadcast(&cv.cv_condvar); /* Wait until all threads have exited */ while ((atomic_read(&cv.cv_condvar.cv_waiters) > 0) || mutex_owner(&cv.cv_mtx)) schedule(); splat_vprint(file, SPLAT_CONDVAR_TEST2_NAME, "Correctly woke all " "%d sleeping threads at once\n", count); /* Wake everything for the failure case */ cv_destroy(&cv.cv_condvar); /* wait for threads to exit */ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { if (!IS_ERR(ct[i].ct_thread)) kthread_stop(ct[i].ct_thread); } mutex_destroy(&cv.cv_mtx); return rc; }
/* * This routine is a special form of xc_attention(). It ensures that * prom functions are callable while the cpus are at attention. */ void promsafe_xc_attention(cpuset_t cpuset) { xc_attention(cpuset); /* If some other cpu is entering or is in the prom, spin */ while (prom_cpu || mutex_owner(&prom_mutex)) { xc_dismissed(cpuset); mutex_enter(&prom_mutex); /* Wait for other cpu to exit prom */ while (prom_cpu) cv_wait(&prom_cv, &prom_mutex); mutex_exit(&prom_mutex); xc_attention(cpuset); } /* At this point all cpus are paused and none are in the prom */ }
/* * This routine is a special form of pause_cpus(). It ensures that * prom functions are callable while the cpus are paused. */ void promsafe_pause_cpus(void) { pause_cpus(NULL); /* If some other cpu is entering or is in the prom, spin */ while (prom_cpu || mutex_owner(&prom_mutex)) { start_cpus(); mutex_enter(&prom_mutex); /* Wait for other cpu to exit prom */ while (prom_cpu) cv_wait(&prom_cv, &prom_mutex); mutex_exit(&prom_mutex); pause_cpus(NULL); } /* At this point all cpus are paused and none are in the prom */ }
clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { int error; struct timespec ts,tv; clock_t delta; top: delta = abstime - lbolt; if (delta <= 0) return (-1); VERIFY(clock_gettime(CLOCK_MONOTONIC, &tv) == 0); ts.tv_sec = tv.tv_sec + delta / hz; ts.tv_nsec = tv.tv_nsec + (delta % hz) * (NANOSEC / hz); ASSERT(ts.tv_nsec >= 0); if(ts.tv_nsec >= NANOSEC) { ts.tv_sec++; ts.tv_nsec -= NANOSEC; } ASSERT(mutex_owner(mp) == curthread); mp->m_owner = NULL; error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); mp->m_owner = curthread; if (error == EINTR) goto top; if (error == ETIMEDOUT) return (-1); ASSERT(error == 0); return (1); }
void zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) { dmu_buf_t *db = sa_get_db(hdl); znode_t *zp = sa_get_userdata(hdl); zfsvfs_t *zfsvfs = zp->z_zfsvfs; sa_bulk_attr_t bulk[20]; int count = 0; sa_bulk_attr_t sa_attrs[20] = { { 0 } }; zfs_acl_locator_cb_t locate = { 0 }; uint64_t uid, gid, mode, rdev, xattr, parent; uint64_t crtime[2], mtime[2], ctime[2]; zfs_acl_phys_t znode_acl; char scanstamp[AV_SCANSTAMP_SZ]; boolean_t drop_lock = B_FALSE; /* * No upgrade if ACL isn't cached * since we won't know which locks are held * and ready the ACL would require special "locked" * interfaces that would be messy */ if (zp->z_acl_cached == NULL || vnode_islnk(ZTOV(zp))) return; /* * If the z_lock is held and we aren't the owner * the just return since we don't want to deadlock * trying to update the status of z_is_sa. This * file can then be upgraded at a later time. * * Otherwise, we know we are doing the * sa_update() that caused us to enter this function. */ if (mutex_owner(&zp->z_lock) != curthread) { if (mutex_tryenter(&zp->z_lock) == 0) return; else drop_lock = B_TRUE; } /* First do a bulk query of the attributes that aren't cached */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, &znode_acl, 88); if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) goto done; /* * While the order here doesn't matter its best to try and organize * it is such a way to pick up an already existing layout number */ count = 0; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL, zp->z_atime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, 8); if (vnode_isblk(zp->z_vnode) || vnode_islnk(zp->z_vnode)) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL, &zp->z_acl_cached->z_acl_count, 8); if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID) zfs_acl_xform(zp, zp->z_acl_cached, CRED()); locate.cb_aclp = zp->z_acl_cached; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs), zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes); if (xattr) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); /* if scanstamp then add scanstamp */ if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) { bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, scanstamp, AV_SCANSTAMP_SZ); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs), NULL, scanstamp, AV_SCANSTAMP_SZ); zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; } VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx) == 0); if (znode_acl.z_acl_extern_obj) VERIFY(0 == dmu_object_free(zfsvfs->z_os, znode_acl.z_acl_extern_obj, tx)); zp->z_is_sa = B_TRUE; done: if (drop_lock) mutex_exit(&zp->z_lock); }
void rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp) { mutex_owner(mtx, (void **)lp); }
int mutex_owned(kmutex_t *mtx) { return mutex_owner(mtx) == curlwp; }
int turnstile_block(turnstile_t *ts, int qnum, void *sobj, sobj_ops_t *sobj_ops, kmutex_t *mp, lwp_timer_t *lwptp) { kthread_t *owner; kthread_t *t = curthread; proc_t *p = ttoproc(t); klwp_t *lwp = ttolwp(t); turnstile_chain_t *tc = &TURNSTILE_CHAIN(sobj); int error = 0; int loser = 0; ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); ASSERT(mp == NULL || IS_UPI(mp)); ASSERT((SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) ^ (mp == NULL)); thread_lock_high(t); if (ts == NULL) { /* * This is the first thread to block on this sobj. * Take its attached turnstile and add it to the hash chain. */ ts = t->t_ts; ts->ts_sobj = sobj; ts->ts_next = tc->tc_first; tc->tc_first = ts; ASSERT(ts->ts_waiters == 0); } else { /* * Another thread has already donated its turnstile * to block on this sobj, so ours isn't needed. * Stash it on the active turnstile's freelist. */ turnstile_t *myts = t->t_ts; myts->ts_free = ts->ts_free; ts->ts_free = myts; t->t_ts = ts; ASSERT(ts->ts_sobj == sobj); ASSERT(ts->ts_waiters > 0); } /* * Put the thread to sleep. */ ASSERT(t != CPU->cpu_idle_thread); ASSERT(CPU_ON_INTR(CPU) == 0); ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL); ASSERT(t->t_state == TS_ONPROC); if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { curthread->t_flag |= T_WAKEABLE; } CL_SLEEP(t); /* assign kernel priority */ THREAD_SLEEP(t, &tc->tc_lock); t->t_wchan = sobj; t->t_sobj_ops = sobj_ops; DTRACE_SCHED(sleep); if (lwp != NULL) { lwp->lwp_ru.nvcsw++; (void) new_mstate(t, LMS_SLEEP); if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { lwp->lwp_asleep = 1; lwp->lwp_sysabort = 0; /* * make wchan0 non-zero to conform to the rule that * threads blocking for user-level objects have a * non-zero wchan0: this prevents spurious wake-ups * by, for example, /proc. */ t->t_wchan0 = (caddr_t)1; } } ts->ts_waiters++; sleepq_insert(&ts->ts_sleepq[qnum], t); if (SOBJ_TYPE(sobj_ops) == SOBJ_MUTEX && SOBJ_OWNER(sobj_ops, sobj) == NULL) panic("turnstile_block(%p): unowned mutex", (void *)ts); /* * Follow the blocking chain to its end, willing our priority to * everyone who's in our way. */ while (t->t_sobj_ops != NULL && (owner = SOBJ_OWNER(t->t_sobj_ops, t->t_wchan)) != NULL) { if (owner == curthread) { if (SOBJ_TYPE(sobj_ops) != SOBJ_USER_PI) { panic("Deadlock: cycle in blocking chain"); } /* * If the cycle we've encountered ends in mp, * then we know it isn't a 'real' cycle because * we're going to drop mp before we go to sleep. * Moreover, since we've come full circle we know * that we must have willed priority to everyone * in our way. Therefore, we can break out now. */ if (t->t_wchan == (void *)mp) break; if (loser) lock_clear(&turnstile_loser_lock); /* * For SOBJ_USER_PI, a cycle is an application * deadlock which needs to be communicated * back to the application. */ thread_unlock_nopreempt(t); mutex_exit(mp); setrun(curthread); swtch(); /* necessary to transition state */ curthread->t_flag &= ~T_WAKEABLE; if (lwptp->lwpt_id != 0) (void) lwp_timer_dequeue(lwptp); setallwatch(); lwp->lwp_asleep = 0; lwp->lwp_sysabort = 0; return (EDEADLK); } if (!turnstile_interlock(t->t_lockp, &owner->t_lockp)) { /* * If we failed to grab the owner's thread lock, * turnstile_interlock() will have dropped t's * thread lock, so at this point we don't even know * that 't' exists anymore. The simplest solution * is to restart the entire priority inheritance dance * from the beginning of the blocking chain, since * we *do* know that 'curthread' still exists. * Application of priority inheritance is idempotent, * so it's OK that we're doing it more than once. * Note also that since we've dropped our thread lock, * we may already have been woken up; if so, our * t_sobj_ops will be NULL, the loop will terminate, * and the call to swtch() will be a no-op. Phew. * * There is one further complication: if two (or more) * threads keep trying to grab the turnstile locks out * of order and keep losing the race to another thread, * these "dueling losers" can livelock the system. * Therefore, once we get into this rare situation, * we serialize all the losers. */ if (loser == 0) { loser = 1; lock_set(&turnstile_loser_lock); } t = curthread; thread_lock_high(t); continue; } /* * We now have the owner's thread lock. If we are traversing * from non-SOBJ_USER_PI ops to SOBJ_USER_PI ops, then we know * that we have caught the thread while in the TS_SLEEP state, * but holding mp. We know that this situation is transient * (mp will be dropped before the holder actually sleeps on * the SOBJ_USER_PI sobj), so we will spin waiting for mp to * be dropped. Then, as in the turnstile_interlock() failure * case, we will restart the priority inheritance dance. */ if (SOBJ_TYPE(t->t_sobj_ops) != SOBJ_USER_PI && owner->t_sobj_ops != NULL && SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_USER_PI) { kmutex_t *upi_lock = (kmutex_t *)t->t_wchan; ASSERT(IS_UPI(upi_lock)); ASSERT(SOBJ_TYPE(t->t_sobj_ops) == SOBJ_MUTEX); if (t->t_lockp != owner->t_lockp) thread_unlock_high(owner); thread_unlock_high(t); if (loser) lock_clear(&turnstile_loser_lock); while (mutex_owner(upi_lock) == owner) { SMT_PAUSE(); continue; } if (loser) lock_set(&turnstile_loser_lock); t = curthread; thread_lock_high(t); continue; } turnstile_pi_inherit(t->t_ts, owner, DISP_PRIO(t)); if (t->t_lockp != owner->t_lockp) thread_unlock_high(t); t = owner; } if (loser) lock_clear(&turnstile_loser_lock); /* * Note: 't' and 'curthread' were synonymous before the loop above, * but now they may be different. ('t' is now the last thread in * the blocking chain.) */ if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { ushort_t s = curthread->t_oldspl; int timedwait = 0; uint_t imm_timeout = 0; clock_t tim = -1; thread_unlock_high(t); if (lwptp->lwpt_id != 0) { /* * We enqueued a timeout. If it has already fired, * lwptp->lwpt_imm_timeout has been set with cas, * so fetch it with cas. */ timedwait = 1; imm_timeout = atomic_cas_uint(&lwptp->lwpt_imm_timeout, 0, 0); } mutex_exit(mp); splx(s); if (ISSIG(curthread, JUSTLOOKING) || MUSTRETURN(p, curthread) || imm_timeout) setrun(curthread); swtch(); curthread->t_flag &= ~T_WAKEABLE; if (timedwait) tim = lwp_timer_dequeue(lwptp); setallwatch(); if (ISSIG(curthread, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, curthread)) error = EINTR; else if (imm_timeout || (timedwait && tim == -1)) error = ETIME; lwp->lwp_sysabort = 0; lwp->lwp_asleep = 0; } else { thread_unlock_nopreempt(t); swtch(); } return (error); }
static int splat_condvar_test1(struct file *file, void *arg) { int i, count = 0, rc = 0; condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT]; condvar_priv_t cv; cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC; cv.cv_file = file; mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); cv_init(&cv.cv_condvar, NULL, CV_DEFAULT, NULL); /* Create some threads, the exact number isn't important just as * long as we know how many we managed to create and should expect. */ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { ct[i].ct_cvp = &cv; ct[i].ct_name = SPLAT_CONDVAR_TEST1_NAME; ct[i].ct_rc = 0; ct[i].ct_thread = spl_kthread_create(splat_condvar_test12_thread, &ct[i], "%s/%d", SPLAT_CONDVAR_TEST_NAME, i); if (!IS_ERR(ct[i].ct_thread)) { wake_up_process(ct[i].ct_thread); count++; } } /* Wait until all threads are waiting on the condition variable */ while (atomic_read(&cv.cv_condvar.cv_waiters) != count) schedule(); /* Wake a single thread at a time, wait until it exits */ for (i = 1; i <= count; i++) { cv_signal(&cv.cv_condvar); while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i)) schedule(); /* Correct behavior 1 thread woken */ if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i)) continue; splat_vprint(file, SPLAT_CONDVAR_TEST1_NAME, "Attempted to " "wake %d thread but work %d threads woke\n", 1, count - atomic_read(&cv.cv_condvar.cv_waiters)); rc = -EINVAL; break; } if (!rc) splat_vprint(file, SPLAT_CONDVAR_TEST1_NAME, "Correctly woke " "%d sleeping threads %d at a time\n", count, 1); /* Wait until that last nutex is dropped */ while (mutex_owner(&cv.cv_mtx)) schedule(); /* Wake everything for the failure case */ cv_broadcast(&cv.cv_condvar); cv_destroy(&cv.cv_condvar); /* wait for threads to exit */ for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { if (!IS_ERR(ct[i].ct_thread)) kthread_stop(ct[i].ct_thread); } mutex_destroy(&cv.cv_mtx); return rc; }