コード例 #1
0
ファイル: condvar.c プロジェクト: BjoKaSH/mac-zfs
void
cv_signal(kcondvar_t *cvp)
{
	condvar_impl_t *cp = (condvar_impl_t *)cvp;

	/* make sure the cv_waiters field looks sane */
	ASSERT(cp->cv_waiters <= CV_MAX_WAITERS);
	if (cp->cv_waiters > 0) {
		sleepq_head_t *sqh = SQHASH(cp);
		disp_lock_enter(&sqh->sq_lock);
		ASSERT(CPU_ON_INTR(CPU) == 0);
		if (cp->cv_waiters & CV_WAITERS_MASK) {
			kthread_t *t;
			cp->cv_waiters--;
			t = sleepq_wakeone_chan(&sqh->sq_queue, cp);
			/*
			 * If cv_waiters is non-zero (and less than
			 * CV_MAX_WAITERS) there should be a thread
			 * in the queue.
			 */
			ASSERT(t != NULL);
		} else if (sleepq_wakeone_chan(&sqh->sq_queue, cp) == NULL) {
			cp->cv_waiters = 0;
		}
		disp_lock_exit(&sqh->sq_lock);
	}
}
コード例 #2
0
ファイル: condvar.c プロジェクト: BjoKaSH/mac-zfs
void
cv_broadcast(kcondvar_t *cvp)
{
	condvar_impl_t *cp = (condvar_impl_t *)cvp;

	/* make sure the cv_waiters field looks sane */
	ASSERT(cp->cv_waiters <= CV_MAX_WAITERS);
	if (cp->cv_waiters > 0) {
		sleepq_head_t *sqh = SQHASH(cp);
		disp_lock_enter(&sqh->sq_lock);
		ASSERT(CPU_ON_INTR(CPU) == 0);
		sleepq_wakeall_chan(&sqh->sq_queue, cp);
		cp->cv_waiters = 0;
		disp_lock_exit(&sqh->sq_lock);
	}
}
コード例 #3
0
ファイル: condvar.c プロジェクト: BjoKaSH/mac-zfs
/*
 * The cv_block() function blocks a thread on a condition variable
 * by putting it in a hashed sleep queue associated with the
 * synchronization object.
 *
 * Threads are taken off the hashed sleep queues via calls to
 * cv_signal(), cv_broadcast(), or cv_unsleep().
 */
static void
cv_block(condvar_impl_t *cvp)
{
	kthread_t *t = curthread;
	klwp_t *lwp = ttolwp(t);
	sleepq_head_t *sqh;

	ASSERT(THREAD_LOCK_HELD(t));
	ASSERT(t != CPU->cpu_idle_thread);
	ASSERT(CPU_ON_INTR(CPU) == 0);
	ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
	ASSERT(t->t_state == TS_ONPROC);

	t->t_schedflag &= ~TS_SIGNALLED;
	CL_SLEEP(t);			/* assign kernel priority */
	t->t_wchan = (caddr_t)cvp;
	t->t_sobj_ops = &cv_sobj_ops;
	DTRACE_SCHED(sleep);

	/*
	 * The check for t_intr is to avoid doing the
	 * account for an interrupt thread on the still-pinned
	 * lwp's statistics.
	 */
	if (lwp != NULL && t->t_intr == NULL) {
		lwp->lwp_ru.nvcsw++;
		(void) new_mstate(t, LMS_SLEEP);
	}

	sqh = SQHASH(cvp);
	disp_lock_enter_high(&sqh->sq_lock);
	if (cvp->cv_waiters < CV_MAX_WAITERS)
		cvp->cv_waiters++;
	ASSERT(cvp->cv_waiters <= CV_MAX_WAITERS);
	THREAD_SLEEP(t, &sqh->sq_lock);
	sleepq_insert(&sqh->sq_queue, t);
	/*
	 * THREAD_SLEEP() moves curthread->t_lockp to point to the
	 * lock sqh->sq_lock. This lock is later released by the caller
	 * when it calls thread_unlock() on curthread.
	 */
}
コード例 #4
0
ファイル: dtrace_isa.c プロジェクト: JianchengZh/linux
/*ARGSUSED*/
int
dtrace_getstackdepth(int aframes)
{
# if 1
    TODO();
    return 0;
# else
    struct frame *fp = (struct frame *)dtrace_getfp();
    struct frame *nextfp, *minfp, *stacktop;
    int depth = 0;
    int is_intr = 0;
    int on_intr;
    uintptr_t pc;

    if ((on_intr = CPU_ON_INTR(CPU)) != 0)
        stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME));
    else
        stacktop = (struct frame *)curthread->t_stk;
    minfp = fp;

    aframes++;

    for (;;) {
        depth++;

        if (is_intr) {
            struct regs *rp = (struct regs *)fp;
            nextfp = (struct frame *)rp->r_fp;
            pc = rp->r_pc;
        } else {
            nextfp = (struct frame *)fp->fr_savfp;
            pc = fp->fr_savpc;
        }

        if (nextfp <= minfp || nextfp >= stacktop) {
            if (on_intr) {
                /*
                 * Hop from interrupt stack to thread stack.
                 */
                stacktop = (struct frame *)curthread->t_stk;
                minfp = (struct frame *)curthread->t_stkbase;
                on_intr = 0;
                continue;
            }
            break;
        }

        is_intr = pc - (uintptr_t)_interrupt < _interrupt_size ||
                  pc - (uintptr_t)_allsyscalls < _allsyscalls_size ||
                  pc - (uintptr_t)_cmntrap < _cmntrap_size;

        fp = nextfp;
        minfp = fp;
    }

    if (depth <= aframes)
        return (0);

    return (depth - aframes);
# endif
}
コード例 #5
0
ファイル: msacct.c プロジェクト: bahamas10/openzfs
void
new_cpu_mstate(int cmstate, hrtime_t curtime)
{
	cpu_t *cpu = CPU;
	uint16_t gen;

	ASSERT(cpu->cpu_mstate != CMS_DISABLED);
	ASSERT(cmstate < NCMSTATES);
	ASSERT(cmstate != CMS_DISABLED);

	/*
	 * This function cannot be re-entrant on a given CPU. As such,
	 * we ASSERT and panic if we are called on behalf of an interrupt.
	 * The one exception is for an interrupt which has previously
	 * blocked. Such an interrupt is being scheduled by the dispatcher
	 * just like a normal thread, and as such cannot arrive here
	 * in a re-entrant manner.
	 */

	ASSERT(!CPU_ON_INTR(cpu) && curthread->t_intr == NULL);
	ASSERT(curthread->t_preempt > 0 || curthread == cpu->cpu_idle_thread);

	/*
	 * LOCKING, or lack thereof:
	 *
	 * Updates to CPU mstate can only be made by the CPU
	 * itself, and the above check to ignore interrupts
	 * should prevent recursion into this function on a given
	 * processor. i.e. no possible write contention.
	 *
	 * However, reads of CPU mstate can occur at any time
	 * from any CPU. Any locking added to this code path
	 * would seriously impact syscall performance. So,
	 * instead we have a best-effort protection for readers.
	 * The reader will want to account for any time between
	 * cpu_mstate_start and the present time. This requires
	 * some guarantees that the reader is getting coherent
	 * information.
	 *
	 * We use a generation counter, which is set to 0 before
	 * we start making changes, and is set to a new value
	 * after we're done. Someone reading the CPU mstate
	 * should check for the same non-zero value of this
	 * counter both before and after reading all state. The
	 * important point is that the reader is not a
	 * performance-critical path, but this function is.
	 *
	 * The ordering of writes is critical. cpu_mstate_gen must
	 * be visibly zero on all CPUs before we change cpu_mstate
	 * and cpu_mstate_start. Additionally, cpu_mstate_gen must
	 * not be restored to oldgen+1 until after all of the other
	 * writes have become visible.
	 *
	 * Normally one puts membar_producer() calls to accomplish
	 * this. Unfortunately this routine is extremely performance
	 * critical (esp. in syscall_mstate below) and we cannot
	 * afford the additional time, particularly on some x86
	 * architectures with extremely slow sfence calls. On a
	 * CPU which guarantees write ordering (including sparc, x86,
	 * and amd64) this is not a problem. The compiler could still
	 * reorder the writes, so we make the four cpu fields
	 * volatile to prevent this.
	 *
	 * TSO warning: should we port to a non-TSO (or equivalent)
	 * CPU, this will break.
	 *
	 * The reader stills needs the membar_consumer() calls because,
	 * although the volatiles prevent the compiler from reordering
	 * loads, the CPU can still do so.
	 */

	NEW_CPU_MSTATE(cmstate);
}
コード例 #6
0
ファイル: dtrace_isa.c プロジェクト: 0xffea/xnu
void
dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
		  uint32_t *intrpc)
{
	struct frame *fp = (struct frame *)__builtin_frame_address(0);
	struct frame *nextfp, *minfp, *stacktop;
	int depth = 0;
	int last = 0;
	uintptr_t pc;
	uintptr_t caller = CPU->cpu_dtrace_caller;
	int on_intr;

	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
		stacktop = (struct frame *)dtrace_get_cpu_int_stack_top();
	else
		stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);

	minfp = fp;

	aframes++;

	if (intrpc != NULL && depth < pcstack_limit)
		pcstack[depth++] = (pc_t)intrpc;

	while (depth < pcstack_limit) {
		nextfp = *(struct frame **)fp;
#if defined(__x86_64__)
		pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64);
#else
		pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET);
#endif

		if (nextfp <= minfp || nextfp >= stacktop) {
			if (on_intr) {
				/*
				 * Hop from interrupt stack to thread stack.
				 */
				vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());

				minfp = (struct frame *)kstack_base;
				stacktop = (struct frame *)(kstack_base + kernel_stack_size);

				on_intr = 0;
				continue;
			}
			/*
			 * This is the last frame we can process; indicate
			 * that we should return after processing this frame.
			 */
			last = 1;
		}

		if (aframes > 0) {
			if (--aframes == 0 && caller != 0) {
				/*
				 * We've just run out of artificial frames,
				 * and we have a valid caller -- fill it in
				 * now.
				 */
				ASSERT(depth < pcstack_limit);
				pcstack[depth++] = (pc_t)caller;
				caller = 0;
			}
		} else {
			if (depth < pcstack_limit)
				pcstack[depth++] = (pc_t)pc;
		}

		if (last) {
			while (depth < pcstack_limit)
				pcstack[depth++] = 0;
			return;
		}

		fp = nextfp;
		minfp = fp;
	}
}
コード例 #7
0
void
panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
{
	int s = spl8();
	kthread_t *t = curthread;
	cpu_t *cp = CPU;

	caddr_t intr_stack = NULL;
	uint_t intr_actv;

	ushort_t schedflag = t->t_schedflag;
	cpu_t *bound_cpu = t->t_bound_cpu;
	char preempt = t->t_preempt;

	(void) setjmp(&t->t_pcb);
	t->t_flag |= T_PANIC;

	t->t_schedflag |= TS_DONT_SWAP;
	t->t_bound_cpu = cp;
	t->t_preempt++;

	/*
	 * Switch lbolt to event driven mode.
	 */
	lbolt_hybrid = lbolt_event_driven;

	panic_enter_hw(s);

	/*
	 * If we're on the interrupt stack and an interrupt thread is available
	 * in this CPU's pool, preserve the interrupt stack by detaching an
	 * interrupt thread and making its stack the intr_stack.
	 */
	if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
		kthread_t *it = cp->cpu_intr_thread;

		intr_stack = cp->cpu_intr_stack;
		intr_actv = cp->cpu_intr_actv;

		cp->cpu_intr_stack = thread_stk_init(it->t_stk);
		cp->cpu_intr_thread = it->t_link;

		/*
		 * Clear only the high level bits of cpu_intr_actv.
		 * We want to indicate that high-level interrupts are
		 * not active without destroying the low-level interrupt
		 * information stored there.
		 */
		cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
	}

	/*
	 * Record one-time panic information and quiesce the other CPUs.
	 * Then print out the panic message and stack trace.
	 */
	if (on_panic_stack) {
		panic_data_t *pdp = (panic_data_t *)panicbuf;

		pdp->pd_version = PANICBUFVERS;
		pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);

		if (t->t_panic_trap != NULL)
			panic_savetrap(pdp, t->t_panic_trap);
		else
			panic_saveregs(pdp, rp);

		(void) vsnprintf(&panicbuf[pdp->pd_msgoff],
		    PANICBUFSIZE - pdp->pd_msgoff, format, alist);

		/*
		 * Call into the platform code to stop the other CPUs.
		 * We currently have all interrupts blocked, and expect that
		 * the platform code will lower ipl only as far as needed to
		 * perform cross-calls, and will acquire as *few* locks as is
		 * possible -- panicstr is not set so we can still deadlock.
		 */
		panic_stopcpus(cp, t, s);

		panicstr = (char *)format;
		va_copy(panicargs, alist);
		panic_lbolt = LBOLT_NO_ACCOUNT;
		panic_lbolt64 = LBOLT_NO_ACCOUNT64;
		panic_hrestime = hrestime;
		panic_hrtime = gethrtime_waitfree();
		panic_thread = t;
		panic_regs = t->t_pcb;
		panic_reg = rp;
		panic_cpu = *cp;
		panic_ipl = spltoipl(s);
		panic_schedflag = schedflag;
		panic_bound_cpu = bound_cpu;
		panic_preempt = preempt;

		if (intr_stack != NULL) {
			panic_cpu.cpu_intr_stack = intr_stack;
			panic_cpu.cpu_intr_actv = intr_actv;
		}

		/*
		 * Lower ipl to 10 to keep clock() from running, but allow
		 * keyboard interrupts to enter the debugger.  These callbacks
		 * are executed with panicstr set so they can bypass locks.
		 */
		splx(ipltospl(CLOCK_LEVEL));
		panic_quiesce_hw(pdp);
		(void) FTRACE_STOP();
		(void) callb_execute_class(CB_CL_PANIC, NULL);

		if (log_intrq != NULL)
			log_flushq(log_intrq);

		/*
		 * If log_consq has been initialized and syslogd has started,
		 * print any messages in log_consq that haven't been consumed.
		 */
		if (log_consq != NULL && log_consq != log_backlogq)
			log_printq(log_consq);

		fm_banner();

#if defined(__x86)
		/*
		 * A hypervisor panic originates outside of Solaris, so we
		 * don't want to prepend the panic message with misleading
		 * pointers from within Solaris.
		 */
		if (!IN_XPV_PANIC())
#endif
			printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id,
			    (void *)t);
		vprintf(format, alist);
		printf("\n\n");

		if (t->t_panic_trap != NULL) {
			panic_showtrap(t->t_panic_trap);
			printf("\n");
		}

		traceregs(rp);
		printf("\n");

		if (((boothowto & RB_DEBUG) || obpdebug) &&
		    !nopanicdebug && !panic_forced) {
			if (dumpvp != NULL) {
				debug_enter("panic: entering debugger "
				    "(continue to save dump)");
			} else {
				debug_enter("panic: entering debugger "
				    "(no dump device, continue to reboot)");
			}
		}

	} else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
		vprintf(format, alist);
		printf("\n");
	} else
		goto spin;

	/*
	 * Prior to performing sync or dump, we make sure that do_polled_io is
	 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
	 * will re-enter panic if we are not making progress with sync or dump.
	 */

	/*
	 * Sync the filesystems.  Reset t_cred if not set because much of
	 * the filesystem code depends on CRED() being valid.
	 */
	if (!in_sync && panic_trigger(&panic_sync)) {
		if (t->t_cred == NULL)
			t->t_cred = kcred;
		splx(ipltospl(CLOCK_LEVEL));
		do_polled_io = 1;
		vfs_syncall();
	}

	/*
	 * Take the crash dump.  If the dump trigger is already set, try to
	 * enter the debugger again before rebooting the system.
	 */
	if (panic_trigger(&panic_dump)) {
		panic_dump_hw(s);
		splx(ipltospl(CLOCK_LEVEL));
		errorq_panic();
		do_polled_io = 1;
		dumpsys();
	} else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
		debug_enter("panic: entering debugger (continue to reboot)");
	} else
		printf("dump aborted: please record the above information!\n");

	if (halt_on_panic)
		mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
	else
		mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
spin:
	/*
	 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
	 * and unable to jump into the debugger.
	 */
	splx(MIN(s, ipltospl(CLOCK_LEVEL)));
	for (;;)
		;
}
コード例 #8
0
static void
clock_tick_process(cpu_t *cp, clock_t mylbolt, int pending)
{
	kthread_t	*t;
	kmutex_t	*plockp;
	int		notick, intr;
	klwp_id_t	lwp;

	/*
	 * The locking here is rather tricky. thread_free_prevent()
	 * prevents the thread returned from being freed while we
	 * are looking at it. We can then check if the thread
	 * is exiting and get the appropriate p_lock if it
	 * is not.  We have to be careful, though, because
	 * the _process_ can still be freed while we've
	 * prevented thread free.  To avoid touching the
	 * proc structure we put a pointer to the p_lock in the
	 * thread structure.  The p_lock is persistent so we
	 * can acquire it even if the process is gone.  At that
	 * point we can check (again) if the thread is exiting
	 * and either drop the lock or do the tick processing.
	 */
	t = cp->cpu_thread;	/* Current running thread */
	if (CPU == cp) {
		/*
		 * 't' will be the tick processing thread on this
		 * CPU.  Use the pinned thread (if any) on this CPU
		 * as the target of the clock tick.
		 */
		if (t->t_intr != NULL)
			t = t->t_intr;
	}

	/*
	 * We use thread_free_prevent to keep the currently running
	 * thread from being freed or recycled while we're
	 * looking at it.
	 */
	thread_free_prevent(t);
	/*
	 * We cannot hold the cpu_lock to prevent the
	 * cpu_active from changing in the clock interrupt.
	 * As long as we don't block (or don't get pre-empted)
	 * the cpu_list will not change (all threads are paused
	 * before list modification).
	 */
	if (CLOCK_TICK_CPU_OFFLINE(cp)) {
		thread_free_allow(t);
		return;
	}

	/*
	 * Make sure the thread is still on the CPU.
	 */
	if ((t != cp->cpu_thread) &&
	    ((cp != CPU) || (t != cp->cpu_thread->t_intr))) {
		/*
		 * We could not locate the thread. Skip this CPU. Race
		 * conditions while performing these checks are benign.
		 * These checks are not perfect and they don't need
		 * to be.
		 */
		thread_free_allow(t);
		return;
	}

	intr = t->t_flag & T_INTR_THREAD;
	lwp = ttolwp(t);
	if (lwp == NULL || (t->t_proc_flag & TP_LWPEXIT) || intr) {
		/*
		 * Thread is exiting (or uninteresting) so don't
		 * do tick processing.
		 */
		thread_free_allow(t);
		return;
	}

	/*
	 * OK, try to grab the process lock.  See
	 * comments above for why we're not using
	 * ttoproc(t)->p_lockp here.
	 */
	plockp = t->t_plockp;
	mutex_enter(plockp);
	/* See above comment. */
	if (CLOCK_TICK_CPU_OFFLINE(cp)) {
		mutex_exit(plockp);
		thread_free_allow(t);
		return;
	}

	/*
	 * The thread may have exited between when we
	 * checked above, and when we got the p_lock.
	 */
	if (t->t_proc_flag & TP_LWPEXIT) {
		mutex_exit(plockp);
		thread_free_allow(t);
		return;
	}

	/*
	 * Either we have the p_lock for the thread's process,
	 * or we don't care about the thread structure any more.
	 * Either way we can allow thread free.
	 */
	thread_free_allow(t);

	/*
	 * If we haven't done tick processing for this
	 * lwp, then do it now. Since we don't hold the
	 * lwp down on a CPU it can migrate and show up
	 * more than once, hence the lbolt check. mylbolt
	 * is copied at the time of tick scheduling to prevent
	 * lbolt mismatches.
	 *
	 * Also, make sure that it's okay to perform the
	 * tick processing before calling clock_tick.
	 * Setting notick to a TRUE value (ie. not 0)
	 * results in tick processing not being performed for
	 * that thread.
	 */
	notick = ((cp->cpu_flags & CPU_QUIESCED) || CPU_ON_INTR(cp) ||
	    (cp->cpu_dispthread == cp->cpu_idle_thread));

	if ((!notick) && (t->t_lbolt < mylbolt)) {
		t->t_lbolt = mylbolt;
		clock_tick(t, pending);
	}

	mutex_exit(plockp);
}
コード例 #9
0
/*
 * mutex_vector_enter() is called from the assembly mutex_enter() routine
 * if the lock is held or is not of type MUTEX_ADAPTIVE.
 */
void
mutex_vector_enter(mutex_impl_t *lp)
{
	kthread_id_t	owner;
	hrtime_t	sleep_time = 0;	/* how long we slept */
	uint_t		spin_count = 0;	/* how many times we spun */
	cpu_t 		*cpup, *last_cpu;
	extern cpu_t	*cpu_list;
	turnstile_t	*ts;
	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
	int		backoff;	/* current backoff */
	int		backctr;	/* ctr for backoff */
	int		sleep_count = 0;

	ASSERT_STACK_ALIGNED();

	if (MUTEX_TYPE_SPIN(lp)) {
		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
		    &lp->m_spin.m_oldspl);
		return;
	}

	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
		mutex_panic("mutex_enter: bad mutex", lp);
		return;
	}

	/*
	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
	 * so we must verify by disabling preemption and loading CPU again.
	 */
	cpup = CPU;
	if (CPU_ON_INTR(cpup) && !panicstr) {
		kpreempt_disable();
		if (CPU_ON_INTR(CPU))
			mutex_panic("mutex_enter: adaptive at high PIL", lp);
		kpreempt_enable();
	}

	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);

	if (&plat_lock_delay) {
		backoff = 0;
	} else {
		backoff = BACKOFF_BASE;
	}

	for (;;) {
spin:
		spin_count++;
		/*
		 * Add an exponential backoff delay before trying again
		 * to touch the mutex data structure.
		 * the spin_count test and call to nulldev are to prevent
		 * the compiler optimizer from eliminating the delay loop.
		 */
		if (&plat_lock_delay) {
			plat_lock_delay(&backoff);
		} else {
			for (backctr = backoff; backctr; backctr--) {
				if (!spin_count) (void) nulldev();
			};    /* delay */
			backoff = backoff << 1;			/* double it */
			if (backoff > BACKOFF_CAP) {
				backoff = BACKOFF_CAP;
			}

			SMT_PAUSE();
		}

		if (panicstr)
			return;

		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
			if (mutex_adaptive_tryenter(lp))
				break;
			continue;
		}

		if (owner == curthread)
			mutex_panic("recursive mutex_enter", lp);

		/*
		 * If lock is held but owner is not yet set, spin.
		 * (Only relevant for platforms that don't have cas.)
		 */
		if (owner == MUTEX_NO_OWNER)
			continue;

		/*
		 * When searching the other CPUs, start with the one where
		 * we last saw the owner thread.  If owner is running, spin.
		 *
		 * We must disable preemption at this point to guarantee
		 * that the list doesn't change while we traverse it
		 * without the cpu_lock mutex.  While preemption is
		 * disabled, we must revalidate our cached cpu pointer.
		 */
		kpreempt_disable();
		if (cpup->cpu_next == NULL)
			cpup = cpu_list;
		last_cpu = cpup;	/* mark end of search */
		do {
			if (cpup->cpu_thread == owner) {
				kpreempt_enable();
				goto spin;
			}
		} while ((cpup = cpup->cpu_next) != last_cpu);
		kpreempt_enable();

		/*
		 * The owner appears not to be running, so block.
		 * See the Big Theory Statement for memory ordering issues.
		 */
		ts = turnstile_lookup(lp);
		MUTEX_SET_WAITERS(lp);
		membar_enter();

		/*
		 * Recheck whether owner is running after waiters bit hits
		 * global visibility (above).  If owner is running, spin.
		 *
		 * Since we are at ipl DISP_LEVEL, kernel preemption is
		 * disabled, however we still need to revalidate our cached
		 * cpu pointer to make sure the cpu hasn't been deleted.
		 */
		if (cpup->cpu_next == NULL)
			last_cpu = cpup = cpu_list;
		do {
			if (cpup->cpu_thread == owner) {
				turnstile_exit(lp);
				goto spin;
			}
		} while ((cpup = cpup->cpu_next) != last_cpu);
		membar_consumer();

		/*
		 * If owner and waiters bit are unchanged, block.
		 */
		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
			sleep_time -= gethrtime();
			(void) turnstile_block(ts, TS_WRITER_Q, lp,
			    &mutex_sobj_ops, NULL, NULL);
			sleep_time += gethrtime();
			sleep_count++;
		} else {
			turnstile_exit(lp);
		}
	}

	ASSERT(MUTEX_OWNER(lp) == curthread);

	if (sleep_time != 0) {
		/*
		 * Note, sleep time is the sum of all the sleeping we
		 * did.
		 */
		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
	}

	/*
	 * We do not count a sleep as a spin.
	 */
	if (spin_count > sleep_count)
		LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp,
		    spin_count - sleep_count);

	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
}
コード例 #10
0
int
turnstile_block(turnstile_t *ts, int qnum, void *sobj, sobj_ops_t *sobj_ops,
    kmutex_t *mp, lwp_timer_t *lwptp)
{
	kthread_t *owner;
	kthread_t *t = curthread;
	proc_t *p = ttoproc(t);
	klwp_t *lwp = ttolwp(t);
	turnstile_chain_t *tc = &TURNSTILE_CHAIN(sobj);
	int error = 0;
	int loser = 0;

	ASSERT(DISP_LOCK_HELD(&tc->tc_lock));
	ASSERT(mp == NULL || IS_UPI(mp));
	ASSERT((SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) ^ (mp == NULL));

	thread_lock_high(t);

	if (ts == NULL) {
		/*
		 * This is the first thread to block on this sobj.
		 * Take its attached turnstile and add it to the hash chain.
		 */
		ts = t->t_ts;
		ts->ts_sobj = sobj;
		ts->ts_next = tc->tc_first;
		tc->tc_first = ts;
		ASSERT(ts->ts_waiters == 0);
	} else {
		/*
		 * Another thread has already donated its turnstile
		 * to block on this sobj, so ours isn't needed.
		 * Stash it on the active turnstile's freelist.
		 */
		turnstile_t *myts = t->t_ts;
		myts->ts_free = ts->ts_free;
		ts->ts_free = myts;
		t->t_ts = ts;
		ASSERT(ts->ts_sobj == sobj);
		ASSERT(ts->ts_waiters > 0);
	}

	/*
	 * Put the thread to sleep.
	 */
	ASSERT(t != CPU->cpu_idle_thread);
	ASSERT(CPU_ON_INTR(CPU) == 0);
	ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
	ASSERT(t->t_state == TS_ONPROC);

	if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) {
		curthread->t_flag |= T_WAKEABLE;
	}
	CL_SLEEP(t);		/* assign kernel priority */
	THREAD_SLEEP(t, &tc->tc_lock);
	t->t_wchan = sobj;
	t->t_sobj_ops = sobj_ops;
	DTRACE_SCHED(sleep);

	if (lwp != NULL) {
		lwp->lwp_ru.nvcsw++;
		(void) new_mstate(t, LMS_SLEEP);
		if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) {
			lwp->lwp_asleep = 1;
			lwp->lwp_sysabort = 0;
			/*
			 * make wchan0 non-zero to conform to the rule that
			 * threads blocking for user-level objects have a
			 * non-zero wchan0: this prevents spurious wake-ups
			 * by, for example, /proc.
			 */
			t->t_wchan0 = (caddr_t)1;
		}
	}
	ts->ts_waiters++;
	sleepq_insert(&ts->ts_sleepq[qnum], t);

	if (SOBJ_TYPE(sobj_ops) == SOBJ_MUTEX &&
	    SOBJ_OWNER(sobj_ops, sobj) == NULL)
		panic("turnstile_block(%p): unowned mutex", (void *)ts);

	/*
	 * Follow the blocking chain to its end, willing our priority to
	 * everyone who's in our way.
	 */
	while (t->t_sobj_ops != NULL &&
	    (owner = SOBJ_OWNER(t->t_sobj_ops, t->t_wchan)) != NULL) {
		if (owner == curthread) {
			if (SOBJ_TYPE(sobj_ops) != SOBJ_USER_PI) {
				panic("Deadlock: cycle in blocking chain");
			}
			/*
			 * If the cycle we've encountered ends in mp,
			 * then we know it isn't a 'real' cycle because
			 * we're going to drop mp before we go to sleep.
			 * Moreover, since we've come full circle we know
			 * that we must have willed priority to everyone
			 * in our way.  Therefore, we can break out now.
			 */
			if (t->t_wchan == (void *)mp)
				break;

			if (loser)
				lock_clear(&turnstile_loser_lock);
			/*
			 * For SOBJ_USER_PI, a cycle is an application
			 * deadlock which needs to be communicated
			 * back to the application.
			 */
			thread_unlock_nopreempt(t);
			mutex_exit(mp);
			setrun(curthread);
			swtch(); /* necessary to transition state */
			curthread->t_flag &= ~T_WAKEABLE;
			if (lwptp->lwpt_id != 0)
				(void) lwp_timer_dequeue(lwptp);
			setallwatch();
			lwp->lwp_asleep = 0;
			lwp->lwp_sysabort = 0;
			return (EDEADLK);
		}
		if (!turnstile_interlock(t->t_lockp, &owner->t_lockp)) {
			/*
			 * If we failed to grab the owner's thread lock,
			 * turnstile_interlock() will have dropped t's
			 * thread lock, so at this point we don't even know
			 * that 't' exists anymore.  The simplest solution
			 * is to restart the entire priority inheritance dance
			 * from the beginning of the blocking chain, since
			 * we *do* know that 'curthread' still exists.
			 * Application of priority inheritance is idempotent,
			 * so it's OK that we're doing it more than once.
			 * Note also that since we've dropped our thread lock,
			 * we may already have been woken up; if so, our
			 * t_sobj_ops will be NULL, the loop will terminate,
			 * and the call to swtch() will be a no-op.  Phew.
			 *
			 * There is one further complication: if two (or more)
			 * threads keep trying to grab the turnstile locks out
			 * of order and keep losing the race to another thread,
			 * these "dueling losers" can livelock the system.
			 * Therefore, once we get into this rare situation,
			 * we serialize all the losers.
			 */
			if (loser == 0) {
				loser = 1;
				lock_set(&turnstile_loser_lock);
			}
			t = curthread;
			thread_lock_high(t);
			continue;
		}

		/*
		 * We now have the owner's thread lock.  If we are traversing
		 * from non-SOBJ_USER_PI ops to SOBJ_USER_PI ops, then we know
		 * that we have caught the thread while in the TS_SLEEP state,
		 * but holding mp.  We know that this situation is transient
		 * (mp will be dropped before the holder actually sleeps on
		 * the SOBJ_USER_PI sobj), so we will spin waiting for mp to
		 * be dropped.  Then, as in the turnstile_interlock() failure
		 * case, we will restart the priority inheritance dance.
		 */
		if (SOBJ_TYPE(t->t_sobj_ops) != SOBJ_USER_PI &&
		    owner->t_sobj_ops != NULL &&
		    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_USER_PI) {
			kmutex_t *upi_lock = (kmutex_t *)t->t_wchan;

			ASSERT(IS_UPI(upi_lock));
			ASSERT(SOBJ_TYPE(t->t_sobj_ops) == SOBJ_MUTEX);

			if (t->t_lockp != owner->t_lockp)
				thread_unlock_high(owner);
			thread_unlock_high(t);
			if (loser)
				lock_clear(&turnstile_loser_lock);

			while (mutex_owner(upi_lock) == owner) {
				SMT_PAUSE();
				continue;
			}

			if (loser)
				lock_set(&turnstile_loser_lock);
			t = curthread;
			thread_lock_high(t);
			continue;
		}

		turnstile_pi_inherit(t->t_ts, owner, DISP_PRIO(t));
		if (t->t_lockp != owner->t_lockp)
			thread_unlock_high(t);
		t = owner;
	}

	if (loser)
		lock_clear(&turnstile_loser_lock);

	/*
	 * Note: 't' and 'curthread' were synonymous before the loop above,
	 * but now they may be different.  ('t' is now the last thread in
	 * the blocking chain.)
	 */
	if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) {
		ushort_t s = curthread->t_oldspl;
		int timedwait = 0;
		uint_t imm_timeout = 0;
		clock_t tim = -1;

		thread_unlock_high(t);
		if (lwptp->lwpt_id != 0) {
			/*
			 * We enqueued a timeout.  If it has already fired,
			 * lwptp->lwpt_imm_timeout has been set with cas,
			 * so fetch it with cas.
			 */
			timedwait = 1;
			imm_timeout =
			    atomic_cas_uint(&lwptp->lwpt_imm_timeout, 0, 0);
		}
		mutex_exit(mp);
		splx(s);

		if (ISSIG(curthread, JUSTLOOKING) ||
		    MUSTRETURN(p, curthread) || imm_timeout)
			setrun(curthread);
		swtch();
		curthread->t_flag &= ~T_WAKEABLE;
		if (timedwait)
			tim = lwp_timer_dequeue(lwptp);
		setallwatch();
		if (ISSIG(curthread, FORREAL) || lwp->lwp_sysabort ||
		    MUSTRETURN(p, curthread))
			error = EINTR;
		else if (imm_timeout || (timedwait && tim == -1))
			error = ETIME;
		lwp->lwp_sysabort = 0;
		lwp->lwp_asleep = 0;
	} else {
		thread_unlock_nopreempt(t);
		swtch();
	}

	return (error);
}