Exemplo n.º 1
0
int
xb_read(void *data, unsigned len)
{
	volatile struct xenstore_domain_interface *intf =
	    xs_domain_interface(xb_addr);
	XENSTORE_RING_IDX cons, prod;
	extern int do_polled_io;

	while (len != 0) {
		unsigned int avail;
		const char *src;

		mutex_enter(&xb_wait_lock);
		while (intf->rsp_cons == intf->rsp_prod) {
			if (interrupts_unleashed && !do_polled_io) {
				if (cv_wait_sig(&xb_wait_cv,
				    &xb_wait_lock) == 0) {
					mutex_exit(&xb_wait_lock);
					return (EINTR);
				}
			} else { /* polled mode needed for early probes */
				(void) HYPERVISOR_yield();
			}
		}
		mutex_exit(&xb_wait_lock);
		/* Read indexes, then verify. */
		cons = intf->rsp_cons;
		prod = intf->rsp_prod;
		membar_enter();
		if (!check_indexes(cons, prod))
			return (EIO);

		src = get_input_chunk(cons, prod, (char *)intf->rsp, &avail);
		if (avail == 0)
			continue;
		if (avail > len)
			avail = len;

		/* We must read header before we read data. */
		membar_consumer();

		(void) memcpy(data, src, avail);
		data = (void *)((uintptr_t)data + avail);
		len -= avail;

		/* Other side must not see free space until we've copied out */
		membar_enter();
		intf->rsp_cons += avail;

		/* Implies mb(): they will see new header. */
		ec_notify_via_evtchn(xen_info->store_evtchn);
	}

	return (0);
}
Exemplo n.º 2
0
int
pthread_rwlock_tryrdlock(pthread_rwlock_t *ptr)
{
	uintptr_t owner, next;

	if (__predict_false(__uselibcstub))
		return __libc_rwlock_tryrdlock_stub(ptr);

#ifdef ERRORCHECK
	if (ptr->ptr_magic != _PT_RWLOCK_MAGIC)
		return EINVAL;
#endif

	/*
	 * Don't get a readlock if there is a writer or if there are waiting
	 * writers; i.e. prefer writers to readers. This strategy is dictated
	 * by SUSv3.
	 */
	for (owner = (uintptr_t)ptr->ptr_owner;; owner = next) {
		if ((owner & (RW_WRITE_LOCKED | RW_WRITE_WANTED)) != 0)
			return EBUSY;
		next = rw_cas(ptr, owner, owner + RW_READ_INCR);
		if (owner == next) {
			/* Got it! */
#ifndef PTHREAD__ATOMIC_IS_MEMBAR
			membar_enter();
#endif
			return 0;
		}
	}
}
Exemplo n.º 3
0
static int
systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
	switch (cmd) {
	case DDI_ATTACH:
		break;
	case DDI_RESUME:
		return (DDI_SUCCESS);
	default:
		return (DDI_FAILURE);
	}

	systrace_probe = (void (*)())dtrace_probe;
	membar_enter();

	if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
	    DDI_PSEUDO, NULL) == DDI_FAILURE ||
	    dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
	    &systrace_pops, NULL, &systrace_id) != 0) {
		systrace_probe = systrace_stub;
		ddi_remove_minor_node(devi, NULL);
		return (DDI_FAILURE);
	}

	ddi_report_dev(devi);
	systrace_devi = devi;

	return (DDI_SUCCESS);
}
Exemplo n.º 4
0
int
mtx_enter_try(struct mutex *mtx)
{
	struct cpu_info *owner, *ci = curcpu();
	int s;
	
 	if (mtx->mtx_wantipl != IPL_NONE)
		s = splraise(mtx->mtx_wantipl);

	owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci);
#ifdef DIAGNOSTIC
	if (__predict_false(owner == ci))
		panic("mtx %p: locking against myself", mtx);
#endif
	if (owner == NULL) {
		if (mtx->mtx_wantipl != IPL_NONE)
			mtx->mtx_oldipl = s;
#ifdef DIAGNOSTIC
		ci->ci_mutex_level++;
#endif
		membar_enter();
		return (1);
	}

	if (mtx->mtx_wantipl != IPL_NONE)
		splx(s);

	return (0);
}
Exemplo n.º 5
0
void mcs_rwlock::downgrade() 
{
    membar_exit();  // this is for all intents and purposes, a release
    w_assert1(*&_holders == WRITER);
    *&_holders = READER;
    membar_enter(); // but it's also an acquire
}
Exemplo n.º 6
0
int
pthread_rwlock_trywrlock(pthread_rwlock_t *ptr)
{
	uintptr_t owner, next;
	pthread_t self;

	if (__predict_false(__uselibcstub))
		return __libc_rwlock_trywrlock_stub(ptr);

#ifdef ERRORCHECK
	if (ptr->ptr_magic != _PT_RWLOCK_MAGIC)
		return EINVAL;
#endif

	self = pthread__self();

	for (owner = (uintptr_t)ptr->ptr_owner;; owner = next) {
		if (owner != 0)
			return EBUSY;
		next = rw_cas(ptr, owner, (uintptr_t)self | RW_WRITE_LOCKED);
		if (owner == next) {
			/* Got it! */
#ifndef PTHREAD__ATOMIC_IS_MEMBAR
			membar_enter();
#endif
			return 0;
		}
	}
}
Exemplo n.º 7
0
bool mcs_rwlock::attempt_read() 
{
    unsigned int old_value = *&_holders;
    if(old_value & WRITER || 
        old_value != atomic_cas_32(&_holders, old_value, old_value+READER))
        return false;

    membar_enter();
    return true;
}
Exemplo n.º 8
0
/*
 * Called by a CPU which has just been onlined.  It is expected that the CPU
 * performing the online operation will call tsc_sync_master().
 *
 * TSC sync is disabled in the context of virtualization. See comments
 * above tsc_sync_master.
 */
void
tsc_sync_slave(void)
{
	ulong_t flags;
	hrtime_t s1;
	tsc_sync_t *tsc = tscp;
	int cnt;
	int hwtype;

	hwtype = get_hwenv();
	if (!tsc_master_slave_sync_needed || hwtype == HW_XEN_HVM ||
	    hwtype == HW_VMWARE)
		return;

	flags = clear_int_flag();

	for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) {
		/* Re-fill the cache line */
		s1 = tsc->master_tsc;
		membar_enter();
		tsc_sync_go = TSC_SYNC_GO;
		do {
			/*
			 * Do not put an SMT_PAUSE here. For instance,
			 * if the master and slave are really the same
			 * hyper-threaded CPU, then you want the master
			 * to yield to the slave as quickly as possible here,
			 * but not the other way.
			 */
			s1 = tsc_read();
		} while (tsc->master_tsc == 0);
		tsc->slave_tsc = s1;
		membar_enter();
		tsc_sync_go = TSC_SYNC_DONE;

		while (tsc_sync_go != TSC_SYNC_STOP)
			SMT_PAUSE();
	}

	restore_int_flag(flags);
}
Exemplo n.º 9
0
/*
 * Spin until either start_cpus() wakes us up, or we get a request to
 * enter the safe phase (followed by a later start_cpus()).
 */
void
mach_cpu_pause(volatile char *safe)
{
	*safe = PAUSE_WAIT;
	membar_enter();

	while (*safe != PAUSE_IDLE) {
		if (cpu_phase[CPU->cpu_id] == CPU_PHASE_WAIT_SAFE)
			enter_safe_phase();
		SMT_PAUSE();
	}
}
Exemplo n.º 10
0
int
xb_write(const void *data, unsigned len)
{
	volatile struct xenstore_domain_interface *intf =
	    xs_domain_interface(xb_addr);
	XENSTORE_RING_IDX cons, prod;
	extern int do_polled_io;

	while (len != 0) {
		void *dst;
		unsigned int avail;

		mutex_enter(&xb_wait_lock);
		while ((intf->req_prod - intf->req_cons) ==
		    XENSTORE_RING_SIZE) {
			if (interrupts_unleashed && !do_polled_io) {
				if (cv_wait_sig(&xb_wait_cv,
				    &xb_wait_lock) == 0) {
					mutex_exit(&xb_wait_lock);
					return (EINTR);
				}
			} else { /* polled mode needed for early probes */
				(void) HYPERVISOR_yield();
			}
		}
		mutex_exit(&xb_wait_lock);
		/* Read indexes, then verify. */
		cons = intf->req_cons;
		prod = intf->req_prod;
		membar_enter();
		if (!check_indexes(cons, prod))
			return (EIO);

		dst = get_output_chunk(cons, prod, (char *)intf->req, &avail);
		if (avail == 0)
			continue;
		if (avail > len)
			avail = len;

		(void) memcpy(dst, data, avail);
		data = (void *)((uintptr_t)data + avail);
		len -= avail;

		/* Other side must not see new header until data is there. */
		membar_producer();
		intf->req_prod += avail;

		/* This implies mb() before other side sees interrupt. */
		ec_notify_via_evtchn(xen_info->store_evtchn);
	}

	return (0);
}
Exemplo n.º 11
0
void
mach_cpu_pause(volatile char *safe)
{
	/*
	 * This cpu is now safe.
	 */
	*safe = PAUSE_WAIT;
	membar_enter(); /* make sure stores are flushed */

	/*
	 * Now we wait.  When we are allowed to continue, safe
	 * will be set to PAUSE_IDLE.
	 */
	while (*safe != PAUSE_IDLE)
		SMT_PAUSE();
}
Exemplo n.º 12
0
/*
 * This locking needs work and will misbehave severely if:
 * 1) the backing memory has to be paged in
 * 2) some lockholder exits while holding the lock
 */
static void
shmif_lockbus(struct shmif_mem *busmem)
{
	int i = 0;

	while (__predict_false(atomic_cas_32(&busmem->shm_lock,
	    LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) {
		if (__predict_false(++i > LOCK_COOLDOWN)) {
			/* wait 1ms */
			rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL,
			    0, 1000*1000);
			i = 0;
		}
		continue;
	}
	membar_enter();
}
Exemplo n.º 13
0
void mcs_rwlock::acquire_read() 
{
    /* attempt to CAS first. If no writers around, or no intervening
     * add'l readers, we're done
     */
    if(!attempt_read()) {
        /* There seem to be writers around, or other readers intervened in our
         * attempt_read() above.
         * Join the queue and wait for them to leave 
         */
        {
            CRITICAL_SECTION(cs, (parent_lock*) this);
            _add_when_writer_leaves(READER);
        }
        membar_enter();
    }
}
Exemplo n.º 14
0
void mcs_rwlock::acquire_write() 
{
    /* always join the queue first.
     *
     * 1. We don't want to race with other writers
     *
     * 2. We don't want to make readers deal with the gap between
     * us updating _holders and actually acquiring the MCS lock.
     */
    CRITICAL_SECTION(cs, (parent_lock*) this);
    _add_when_writer_leaves(WRITER);
    w_assert1(has_writer()); // me!

    // now wait for existing readers to clear out
    if(has_reader()) _spin_on_readers();

    // done!
    membar_enter();
}
Exemplo n.º 15
0
void occ_rwlock::acquire_read()
{
    int count = atomic_add_32_nv(&_active_count, READER);
    while(count & WRITER) {
        // block
        count = atomic_add_32_nv(&_active_count, -READER);
        {
            CRITICAL_SECTION(cs, _read_write_mutex);
            
            // nasty race: we could have fooled a writer into sleeping...
            if(count == WRITER)
                DO_PTHREAD(pthread_cond_signal(&_write_cond));
            
            while(*&_active_count & WRITER) {
                DO_PTHREAD(pthread_cond_wait(&_read_cond, &_read_write_mutex));
            }
        }
        count = atomic_add_32_nv(&_active_count, READER);
    }
    membar_enter();
}
Exemplo n.º 16
0
bool mcs_rwlock::_attempt_write(unsigned int expected) 
{
    /* succeeds iff we are the only reader (if expected==READER)
     * or if there are no readers or writers (if expected==0)
     *
     * How do we know there's the only reader is us?
     * A:  we rely on these facts: this is called with expected==READER only
     * from attempt_upgrade(), which is called from latch only in the case
     * in which we hold the latch in LATCH_SH mode and are requesting it in LATCH_EX mode.

       If there is a writer waiting we have to get in line like everyone else.
       No need for a membar because we already hold the latch
    */
    ext_qnode me = QUEUE_EXT_QNODE_INITIALIZER;
    if(*&_holders != expected || !attempt(&me))
        return false;
    // at this point, we've called mcs_lock::attempt(&me), and
    // have acquired the parent/mcs lock
    // The following line replaces our reader bit with a writer bit.
    bool result = (expected == atomic_cas_32(&_holders, expected, WRITER));
    release(me); // parent/mcs lock
    membar_enter();
    return result;
}
Exemplo n.º 17
0
static int
cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
{
	cpupart_t *oldpp;
	cpu_t	*ncp, *newlist;
	kthread_t *t;
	int	move_threads = 1;
	lgrp_id_t lgrpid;
	proc_t 	*p;
	int lgrp_diff_lpl;
	lpl_t	*cpu_lpl;
	int	ret;
	boolean_t unbind_all_threads = (forced != 0);

	ASSERT(MUTEX_HELD(&cpu_lock));
	ASSERT(newpp != NULL);

	oldpp = cp->cpu_part;
	ASSERT(oldpp != NULL);
	ASSERT(oldpp->cp_ncpus > 0);

	if (newpp == oldpp) {
		/*
		 * Don't need to do anything.
		 */
		return (0);
	}

	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);

	if (!disp_bound_partition(cp, 0)) {
		/*
		 * Don't need to move threads if there are no threads in
		 * the partition.  Note that threads can't enter the
		 * partition while we're holding cpu_lock.
		 */
		move_threads = 0;
	} else if (oldpp->cp_ncpus == 1) {
		/*
		 * The last CPU is removed from a partition which has threads
		 * running in it. Some of these threads may be bound to this
		 * CPU.
		 *
		 * Attempt to unbind threads from the CPU and from the processor
		 * set. Note that no threads should be bound to this CPU since
		 * cpupart_move_threads will refuse to move bound threads to
		 * other CPUs.
		 */
		(void) cpu_unbind(oldpp->cp_cpulist->cpu_id, B_FALSE);
		(void) cpupart_unbind_threads(oldpp, B_FALSE);

		if (!disp_bound_partition(cp, 0)) {
			/*
			 * No bound threads in this partition any more
			 */
			move_threads = 0;
		} else {
			/*
			 * There are still threads bound to the partition
			 */
			cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
			return (EBUSY);
		}
	}

	/*
	 * If forced flag is set unbind any threads from this CPU.
	 * Otherwise unbind soft-bound threads only.
	 */
	if ((ret = cpu_unbind(cp->cpu_id, unbind_all_threads)) != 0) {
		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
		return (ret);
	}

	/*
	 * Stop further threads weak binding to this cpu.
	 */
	cpu_inmotion = cp;
	membar_enter();

	/*
	 * Notify the Processor Groups subsystem that the CPU
	 * will be moving cpu partitions. This is done before
	 * CPUs are paused to provide an opportunity for any
	 * needed memory allocations.
	 */
	pg_cpupart_out(cp, oldpp);
	pg_cpupart_in(cp, newpp);

again:
	if (move_threads) {
		int loop_count;
		/*
		 * Check for threads strong or weak bound to this CPU.
		 */
		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
			if (loop_count >= 5) {
				cpu_state_change_notify(cp->cpu_id,
				    CPU_CPUPART_IN);
				pg_cpupart_out(cp, newpp);
				pg_cpupart_in(cp, oldpp);
				cpu_inmotion = NULL;
				return (EBUSY);	/* some threads still bound */
			}
			delay(1);
		}
	}

	/*
	 * Before we actually start changing data structures, notify
	 * the cyclic subsystem that we want to move this CPU out of its
	 * partition.
	 */
	if (!cyclic_move_out(cp)) {
		/*
		 * This CPU must be the last CPU in a processor set with
		 * a bound cyclic.
		 */
		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
		pg_cpupart_out(cp, newpp);
		pg_cpupart_in(cp, oldpp);
		cpu_inmotion = NULL;
		return (EBUSY);
	}

	pause_cpus(cp);

	if (move_threads) {
		/*
		 * The thread on cpu before the pause thread may have read
		 * cpu_inmotion before we raised the barrier above.  Check
		 * again.
		 */
		if (disp_bound_threads(cp, 1)) {
			start_cpus();
			goto again;
		}

	}

	/*
	 * Now that CPUs are paused, let the PG subsystem perform
	 * any necessary data structure updates.
	 */
	pg_cpupart_move(cp, oldpp, newpp);

	/* save this cpu's lgroup -- it'll be the same in the new partition */
	lgrpid = cp->cpu_lpl->lpl_lgrpid;

	cpu_lpl = cp->cpu_lpl;
	/*
	 * let the lgroup framework know cp has left the partition
	 */
	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);

	/* move out of old partition */
	oldpp->cp_ncpus--;
	if (oldpp->cp_ncpus > 0) {

		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
		if (oldpp->cp_cpulist == cp) {
			oldpp->cp_cpulist = ncp;
		}
	} else {
		ncp = oldpp->cp_cpulist = NULL;
		cp_numparts_nonempty--;
		ASSERT(cp_numparts_nonempty != 0);
	}
	oldpp->cp_gen++;

	/* move into new partition */
	newlist = newpp->cp_cpulist;
	if (newlist == NULL) {
		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
		cp_numparts_nonempty++;
		ASSERT(cp_numparts_nonempty != 0);
	} else {
		cp->cpu_next_part = newlist;
		cp->cpu_prev_part = newlist->cpu_prev_part;
		newlist->cpu_prev_part->cpu_next_part = cp;
		newlist->cpu_prev_part = cp;
	}
	cp->cpu_part = newpp;
	newpp->cp_ncpus++;
	newpp->cp_gen++;

	ASSERT(bitset_is_null(&newpp->cp_haltset));
	ASSERT(bitset_is_null(&oldpp->cp_haltset));

	/*
	 * let the lgroup framework know cp has entered the partition
	 */
	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);

	/*
	 * If necessary, move threads off processor.
	 */
	if (move_threads) {
		ASSERT(ncp != NULL);

		/*
		 * Walk thru the active process list to look for
		 * threads that need to have a new home lgroup,
		 * or the last CPU they run on is the same CPU
		 * being moved out of the partition.
		 */

		for (p = practive; p != NULL; p = p->p_next) {

			t = p->p_tlist;

			if (t == NULL)
				continue;

			lgrp_diff_lpl = 0;

			do {

				ASSERT(t->t_lpl != NULL);

				/*
				 * Update the count of how many threads are
				 * in this CPU's lgroup but have a different lpl
				 */

				if (t->t_lpl != cpu_lpl &&
				    t->t_lpl->lpl_lgrpid == lgrpid)
					lgrp_diff_lpl++;
				/*
				 * If the lgroup that t is assigned to no
				 * longer has any CPUs in t's partition,
				 * we'll have to choose a new lgroup for t.
				 */

				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
				    t->t_cpupart)) {
					lgrp_move_thread(t,
					    lgrp_choose(t, t->t_cpupart), 0);
				}

				/*
				 * make sure lpl points to our own partition
				 */
				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
				    t->t_cpupart->cp_nlgrploads));

				ASSERT(t->t_lpl->lpl_ncpu > 0);

				/* Update CPU last ran on if it was this CPU */
				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
				    t->t_bound_cpu != cp) {
					t->t_cpu = disp_lowpri_cpu(ncp,
					    t->t_lpl, t->t_pri, NULL);
				}
				t = t->t_forw;
			} while (t != p->p_tlist);

			/*
			 * Didn't find any threads in the same lgroup as this
			 * CPU with a different lpl, so remove the lgroup from
			 * the process lgroup bitmask.
			 */

			if (lgrp_diff_lpl)
				klgrpset_del(p->p_lgrpset, lgrpid);
		}

		/*
		 * Walk thread list looking for threads that need to be
		 * rehomed, since there are some threads that are not in
		 * their process's p_tlist.
		 */

		t = curthread;

		do {
			ASSERT(t != NULL && t->t_lpl != NULL);

			/*
			 * If the lgroup that t is assigned to no
			 * longer has any CPUs in t's partition,
			 * we'll have to choose a new lgroup for t.
			 * Also, choose best lgroup for home when
			 * thread has specified lgroup affinities,
			 * since there may be an lgroup with more
			 * affinity available after moving CPUs
			 * around.
			 */
			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
			    t->t_cpupart) || t->t_lgrp_affinity) {
				lgrp_move_thread(t,
				    lgrp_choose(t, t->t_cpupart), 1);
			}

			/* make sure lpl points to our own partition */
			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
			    t->t_cpupart->cp_nlgrploads));

			ASSERT(t->t_lpl->lpl_ncpu > 0);

			/* Update CPU last ran on if it was this CPU */
			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
			    t->t_bound_cpu != cp) {
				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
				    t->t_pri, NULL);
			}

			t = t->t_next;
		} while (t != curthread);

		/*
		 * Clear off the CPU's run queue, and the kp queue if the
		 * partition is now empty.
		 */
		disp_cpu_inactive(cp);

		/*
		 * Make cp switch to a thread from the new partition.
		 */
		cp->cpu_runrun = 1;
		cp->cpu_kprunrun = 1;
	}

	cpu_inmotion = NULL;
	start_cpus();

	/*
	 * Let anyone interested know that cpu has been added to the set.
	 */
	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);

	/*
	 * Now let the cyclic subsystem know that it can reshuffle cyclics
	 * bound to the new processor set.
	 */
	cyclic_move_in(cp);

	return (0);
}
/*
 * rw_vector_enter:
 *
 *	Acquire a rwlock.
 */
void
rw_vector_enter(krwlock_t *rw, const krw_t op)
{
	uintptr_t owner, incr, need_wait, set_wait, curthread, next;
	turnstile_t *ts;
	int queue;
	lwp_t *l;
	LOCKSTAT_TIMER(slptime);
	LOCKSTAT_TIMER(slpcnt);
	LOCKSTAT_TIMER(spintime);
	LOCKSTAT_COUNTER(spincnt);
	LOCKSTAT_FLAG(lsflag);

	l = curlwp;
	curthread = (uintptr_t)l;

	RW_ASSERT(rw, !cpu_intr_p());
	RW_ASSERT(rw, curthread != 0);
	RW_WANTLOCK(rw, op);

	if (panicstr == NULL) {
		LOCKDEBUG_BARRIER(&kernel_lock, 1);
	}

	/*
	 * We play a slight trick here.  If we're a reader, we want
	 * increment the read count.  If we're a writer, we want to
	 * set the owner field and whe WRITE_LOCKED bit.
	 *
	 * In the latter case, we expect those bits to be zero,
	 * therefore we can use an add operation to set them, which
	 * means an add operation for both cases.
	 */
	if (__predict_true(op == RW_READER)) {
		incr = RW_READ_INCR;
		set_wait = RW_HAS_WAITERS;
		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
		queue = TS_READER_Q;
	} else {
		RW_DASSERT(rw, op == RW_WRITER);
		incr = curthread | RW_WRITE_LOCKED;
		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
		need_wait = RW_WRITE_LOCKED | RW_THREAD;
		queue = TS_WRITER_Q;
	}

	LOCKSTAT_ENTER(lsflag);

	KPREEMPT_DISABLE(curlwp);
	for (owner = rw->rw_owner; ;) {
		/*
		 * Read the lock owner field.  If the need-to-wait
		 * indicator is clear, then try to acquire the lock.
		 */
		if ((owner & need_wait) == 0) {
			next = rw_cas(rw, owner, (owner + incr) &
			    ~RW_WRITE_WANTED);
			if (__predict_true(next == owner)) {
				/* Got it! */
				membar_enter();
				break;
			}

			/*
			 * Didn't get it -- spin around again (we'll
			 * probably sleep on the next iteration).
			 */
			owner = next;
			continue;
		}
		if (__predict_false(panicstr != NULL)) {
			kpreempt_enable();
			return;
		}
		if (__predict_false(RW_OWNER(rw) == curthread)) {
			rw_abort(rw, __func__, "locking against myself");
		}
		/*
		 * If the lock owner is running on another CPU, and
		 * there are no existing waiters, then spin.
		 */
		if (rw_oncpu(owner)) {
			LOCKSTAT_START_TIMER(lsflag, spintime);
			u_int count = SPINLOCK_BACKOFF_MIN;
			do {
				KPREEMPT_ENABLE(curlwp);
				SPINLOCK_BACKOFF(count);
				KPREEMPT_DISABLE(curlwp);
				owner = rw->rw_owner;
			} while (rw_oncpu(owner));
			LOCKSTAT_STOP_TIMER(lsflag, spintime);
			LOCKSTAT_COUNT(spincnt, 1);
			if ((owner & need_wait) == 0)
				continue;
		}

		/*
		 * Grab the turnstile chain lock.  Once we have that, we
		 * can adjust the waiter bits and sleep queue.
		 */
		ts = turnstile_lookup(rw);

		/*
		 * Mark the rwlock as having waiters.  If the set fails,
		 * then we may not need to sleep and should spin again.
		 * Reload rw_owner because turnstile_lookup() may have
		 * spun on the turnstile chain lock.
		 */
		owner = rw->rw_owner;
		if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
			turnstile_exit(rw);
			continue;
		}
		next = rw_cas(rw, owner, owner | set_wait);
		if (__predict_false(next != owner)) {
			turnstile_exit(rw);
			owner = next;
			continue;
		}

		LOCKSTAT_START_TIMER(lsflag, slptime);
		turnstile_block(ts, queue, rw, &rw_syncobj);
		LOCKSTAT_STOP_TIMER(lsflag, slptime);
		LOCKSTAT_COUNT(slpcnt, 1);

		/*
		 * No need for a memory barrier because of context switch.
		 * If not handed the lock, then spin again.
		 */
		if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
			break;

		owner = rw->rw_owner;
	}
	KPREEMPT_ENABLE(curlwp);

	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK |
	    (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);
	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime);
	LOCKSTAT_EXIT(lsflag);

	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
	    (op == RW_READER && RW_COUNT(rw) != 0));
	RW_LOCKED(rw, op);
}
Exemplo n.º 19
0
/*
 * mutex_vector_enter() is called from the assembly mutex_enter() routine
 * if the lock is held or is not of type MUTEX_ADAPTIVE.
 */
void
mutex_vector_enter(mutex_impl_t *lp)
{
	kthread_id_t	owner;
	hrtime_t	sleep_time = 0;	/* how long we slept */
	uint_t		spin_count = 0;	/* how many times we spun */
	cpu_t 		*cpup, *last_cpu;
	extern cpu_t	*cpu_list;
	turnstile_t	*ts;
	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
	int		backoff;	/* current backoff */
	int		backctr;	/* ctr for backoff */
	int		sleep_count = 0;

	ASSERT_STACK_ALIGNED();

	if (MUTEX_TYPE_SPIN(lp)) {
		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
		    &lp->m_spin.m_oldspl);
		return;
	}

	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
		mutex_panic("mutex_enter: bad mutex", lp);
		return;
	}

	/*
	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
	 * so we must verify by disabling preemption and loading CPU again.
	 */
	cpup = CPU;
	if (CPU_ON_INTR(cpup) && !panicstr) {
		kpreempt_disable();
		if (CPU_ON_INTR(CPU))
			mutex_panic("mutex_enter: adaptive at high PIL", lp);
		kpreempt_enable();
	}

	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);

	if (&plat_lock_delay) {
		backoff = 0;
	} else {
		backoff = BACKOFF_BASE;
	}

	for (;;) {
spin:
		spin_count++;
		/*
		 * Add an exponential backoff delay before trying again
		 * to touch the mutex data structure.
		 * the spin_count test and call to nulldev are to prevent
		 * the compiler optimizer from eliminating the delay loop.
		 */
		if (&plat_lock_delay) {
			plat_lock_delay(&backoff);
		} else {
			for (backctr = backoff; backctr; backctr--) {
				if (!spin_count) (void) nulldev();
			};    /* delay */
			backoff = backoff << 1;			/* double it */
			if (backoff > BACKOFF_CAP) {
				backoff = BACKOFF_CAP;
			}

			SMT_PAUSE();
		}

		if (panicstr)
			return;

		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
			if (mutex_adaptive_tryenter(lp))
				break;
			continue;
		}

		if (owner == curthread)
			mutex_panic("recursive mutex_enter", lp);

		/*
		 * If lock is held but owner is not yet set, spin.
		 * (Only relevant for platforms that don't have cas.)
		 */
		if (owner == MUTEX_NO_OWNER)
			continue;

		/*
		 * When searching the other CPUs, start with the one where
		 * we last saw the owner thread.  If owner is running, spin.
		 *
		 * We must disable preemption at this point to guarantee
		 * that the list doesn't change while we traverse it
		 * without the cpu_lock mutex.  While preemption is
		 * disabled, we must revalidate our cached cpu pointer.
		 */
		kpreempt_disable();
		if (cpup->cpu_next == NULL)
			cpup = cpu_list;
		last_cpu = cpup;	/* mark end of search */
		do {
			if (cpup->cpu_thread == owner) {
				kpreempt_enable();
				goto spin;
			}
		} while ((cpup = cpup->cpu_next) != last_cpu);
		kpreempt_enable();

		/*
		 * The owner appears not to be running, so block.
		 * See the Big Theory Statement for memory ordering issues.
		 */
		ts = turnstile_lookup(lp);
		MUTEX_SET_WAITERS(lp);
		membar_enter();

		/*
		 * Recheck whether owner is running after waiters bit hits
		 * global visibility (above).  If owner is running, spin.
		 *
		 * Since we are at ipl DISP_LEVEL, kernel preemption is
		 * disabled, however we still need to revalidate our cached
		 * cpu pointer to make sure the cpu hasn't been deleted.
		 */
		if (cpup->cpu_next == NULL)
			last_cpu = cpup = cpu_list;
		do {
			if (cpup->cpu_thread == owner) {
				turnstile_exit(lp);
				goto spin;
			}
		} while ((cpup = cpup->cpu_next) != last_cpu);
		membar_consumer();

		/*
		 * If owner and waiters bit are unchanged, block.
		 */
		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
			sleep_time -= gethrtime();
			(void) turnstile_block(ts, TS_WRITER_Q, lp,
			    &mutex_sobj_ops, NULL, NULL);
			sleep_time += gethrtime();
			sleep_count++;
		} else {
			turnstile_exit(lp);
		}
	}

	ASSERT(MUTEX_OWNER(lp) == curthread);

	if (sleep_time != 0) {
		/*
		 * Note, sleep time is the sum of all the sleeping we
		 * did.
		 */
		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
	}

	/*
	 * We do not count a sleep as a spin.
	 */
	if (spin_count > sleep_count)
		LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp,
		    spin_count - sleep_count);

	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
}
Exemplo n.º 20
0
int atomicGet(volatile atomic_t& val)
{
    long temp = val.l;
    membar_enter();
    return temp;
}
Exemplo n.º 21
0
/*
 * Called by the master in the TSC sync operation (usually the boot CPU).
 * If the slave is discovered to have a skew, gethrtimef will be changed to
 * point to tsc_gethrtime_delta(). Calculating skews is precise only when
 * the master and slave TSCs are read simultaneously; however, there is no
 * algorithm that can read both CPUs in perfect simultaneity. The proposed
 * algorithm is an approximate method based on the behaviour of cache
 * management. The slave CPU continuously reads TSC and then reads a global
 * variable which the master CPU updates. The moment the master's update reaches
 * the slave's visibility (being forced by an mfence operation) we use the TSC
 * reading taken on the slave. A corresponding TSC read will be taken on the
 * master as soon as possible after finishing the mfence operation. But the
 * delay between causing the slave to notice the invalid cache line and the
 * competion of mfence is not repeatable. This error is heuristically assumed
 * to be 1/4th of the total write time as being measured by the two TSC reads
 * on the master sandwiching the mfence. Furthermore, due to the nature of
 * bus arbitration, contention on memory bus, etc., the time taken for the write
 * to reflect globally can vary a lot. So instead of taking a single reading,
 * a set of readings are taken and the one with least write time is chosen
 * to calculate the final skew.
 *
 * TSC sync is disabled in the context of virtualization because the CPUs
 * assigned to the guest are virtual CPUs which means the real CPUs on which
 * guest runs keep changing during life time of guest OS. So we would end up
 * calculating TSC skews for a set of CPUs during boot whereas the guest
 * might migrate to a different set of physical CPUs at a later point of
 * time.
 */
void
tsc_sync_master(processorid_t slave)
{
	ulong_t flags, source, min_write_time = ~0UL;
	hrtime_t write_time, x, mtsc_after, tdelta;
	tsc_sync_t *tsc = tscp;
	int cnt;
	int hwtype;

	hwtype = get_hwenv();
	if (!tsc_master_slave_sync_needed || hwtype == HW_XEN_HVM ||
	    hwtype == HW_VMWARE)
		return;

	flags = clear_int_flag();
	source = CPU->cpu_id;

	for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) {
		while (tsc_sync_go != TSC_SYNC_GO)
			SMT_PAUSE();

		tsc->master_tsc = tsc_read();
		membar_enter();
		mtsc_after = tsc_read();
		while (tsc_sync_go != TSC_SYNC_DONE)
			SMT_PAUSE();
		write_time =  mtsc_after - tsc->master_tsc;
		if (write_time <= min_write_time) {
			min_write_time = write_time;
			/*
			 * Apply heuristic adjustment only if the calculated
			 * delta is > 1/4th of the write time.
			 */
			x = tsc->slave_tsc - mtsc_after;
			if (x < 0)
				x = -x;
			if (x > (min_write_time/4))
				/*
				 * Subtract 1/4th of the measured write time
				 * from the master's TSC value, as an estimate
				 * of how late the mfence completion came
				 * after the slave noticed the cache line
				 * change.
				 */
				tdelta = tsc->slave_tsc -
				    (mtsc_after - (min_write_time/4));
			else
				tdelta = tsc->slave_tsc - mtsc_after;
			tsc_sync_tick_delta[slave] =
			    tsc_sync_tick_delta[source] - tdelta;
		}

		tsc->master_tsc = tsc->slave_tsc = write_time = 0;
		membar_enter();
		tsc_sync_go = TSC_SYNC_STOP;
	}
	if (tdelta < 0)
		tdelta = -tdelta;
	if (tdelta > largest_tsc_delta)
		largest_tsc_delta = tdelta;
	if (min_write_time < shortest_write_time)
		shortest_write_time = min_write_time;
	/*
	 * Enable delta variants of tsc functions if the largest of all chosen
	 * deltas is > smallest of the write time.
	 */
	if (largest_tsc_delta > shortest_write_time) {
		gethrtimef = tsc_gethrtime_delta;
		gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
	}
	restore_int_flag(flags);
}
Exemplo n.º 22
0
static int
machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
	switch (cmd) {
		case DDI_ATTACH:
			break;
		case DDI_RESUME:
			return (DDI_SUCCESS);
		default:
			return (DDI_FAILURE);
	}

#if !defined(__APPLE__)
	machtrace_probe = (void (*)())dtrace_probe;
	membar_enter();

	if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
				DDI_PSEUDO, NULL) == DDI_FAILURE ||
			dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
				&machtrace_pops, NULL, &machtrace_id) != 0) {
		machtrace_probe = systrace_stub;
#else
	machtrace_probe = dtrace_probe;
	membar_enter();
	
	if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
				DDI_PSEUDO, 0) == DDI_FAILURE ||
			dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
				&machtrace_pops, NULL, &machtrace_id) != 0) {
                machtrace_probe = (void (*))&systrace_stub;
#endif /* __APPLE__ */		
		ddi_remove_minor_node(devi, NULL);
		return (DDI_FAILURE);
	}

	ddi_report_dev(devi);
	machtrace_devi = devi;

	return (DDI_SUCCESS);
}

d_open_t _systrace_open;

int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
{
#pragma unused(dev,flags,devtype,p)
	return 0;
}

#define SYSTRACE_MAJOR  -24 /* let the kernel pick the device number */

/*
 * A struct describing which functions will get invoked for certain
 * actions.
 */
static struct cdevsw systrace_cdevsw =
{
	_systrace_open,		/* open */
	eno_opcl,		/* close */
	eno_rdwrt,			/* read */
	eno_rdwrt,			/* write */
	eno_ioctl,		/* ioctl */
	(stop_fcn_t *)nulldev, /* stop */
	(reset_fcn_t *)nulldev, /* reset */
	NULL,				/* tty's */
	eno_select,			/* select */
	eno_mmap,			/* mmap */
	eno_strat,			/* strategy */
	eno_getc,			/* getc */
	eno_putc,			/* putc */
	0					/* type */
};

static int gSysTraceInited = 0;

void systrace_init( void );

void systrace_init( void )
{
	if (0 == gSysTraceInited) {
		int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);

		if (majdevno < 0) {
			printf("systrace_init: failed to allocate a major number!\n");
			gSysTraceInited = 0;
			return;
		}

		systrace_attach( (dev_info_t	*)(uintptr_t)majdevno, DDI_ATTACH );
		machtrace_attach( (dev_info_t	*)(uintptr_t)majdevno, DDI_ATTACH );

		gSysTraceInited = 1;
	} else
		panic("systrace_init: called twice!\n");
}
#undef SYSTRACE_MAJOR
#endif /* __APPLE__ */

static uint64_t
systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
{
#pragma unused(arg,id,parg,aframes)     /* __APPLE__ */
	uint64_t val = 0;
	syscall_arg_t *stack = (syscall_arg_t *)NULL;

	uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());	

	if (uthread)
		stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;

	if (!stack)
		return(0);

	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
	/* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
	val = (uint64_t)*(stack+argno);
	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
	return (val);
}
Exemplo n.º 23
0
static int
pthread__rwlock_wrlock(pthread_rwlock_t *ptr, const struct timespec *ts)
{
	uintptr_t owner, next;
	pthread_mutex_t *interlock;
	pthread_t self;
	int error;

	self = pthread__self();

#ifdef ERRORCHECK
	if (ptr->ptr_magic != _PT_RWLOCK_MAGIC)
		return EINVAL;
#endif

	for (owner = (uintptr_t)ptr->ptr_owner;; owner = next) {
		/*
		 * Read the lock owner field.  If the need-to-wait
		 * indicator is clear, then try to acquire the lock.
		 */
		if ((owner & RW_THREAD) == 0) {
			next = rw_cas(ptr, owner,
			    (uintptr_t)self | RW_WRITE_LOCKED);
			if (owner == next) {
				/* Got it! */
#ifndef PTHREAD__ATOMIC_IS_MEMBAR
				membar_enter();
#endif
				return 0;
			}

			/*
			 * Didn't get it -- spin around again (we'll
			 * probably sleep on the next iteration).
			 */
			continue;
		}

		if ((owner & RW_THREAD) == (uintptr_t)self)
			return EDEADLK;

		/* If held write locked and no waiters, spin. */
		if (pthread__rwlock_spin(owner)) {
			while (pthread__rwlock_spin(owner)) {
				owner = (uintptr_t)ptr->ptr_owner;
			}
			next = owner;
			continue;
		}

		/*
		 * Grab the interlock.  Once we have that, we
		 * can adjust the waiter bits and sleep queue.
		 */
		interlock = pthread__hashlock(ptr);
		pthread_mutex_lock(interlock);

		/*
		 * Mark the rwlock as having waiters.  If the set fails,
		 * then we may not need to sleep and should spin again.
		 */
		next = rw_cas(ptr, owner,
		    owner | RW_HAS_WAITERS | RW_WRITE_WANTED);
		if (owner != next) {
			pthread_mutex_unlock(interlock);
			continue;
		}

		/* The waiters bit is set - it's safe to sleep. */
	    	PTQ_INSERT_TAIL(&ptr->ptr_wblocked, self, pt_sleep);
		self->pt_rwlocked = _RW_WANT_WRITE;
		self->pt_sleepobj = &ptr->ptr_wblocked;
		self->pt_early = pthread__rwlock_early;
		error = pthread__park(self, interlock, &ptr->ptr_wblocked,
		    ts, 0, &ptr->ptr_wblocked);

		/* Did we get the lock? */
		if (self->pt_rwlocked == _RW_LOCKED) {
#ifndef PTHREAD__ATOMIC_IS_MEMBAR
			membar_enter();
#endif
			return 0;
		}
		if (error != 0)
			return error;

		pthread__errorfunc(__FILE__, __LINE__, __func__,
		    "direct handoff failure");
	}
}