Exemple #1
0
/*
 * Release a token that we hold.
 */
static __inline
void
_lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
{
	lwkt_token_t tok;
	long count;

	tok = ref->tr_tok;
	for (;;) {
		count = tok->t_count;
		cpu_ccfence();
		if (tok->t_ref == ref) {
			/*
			 * We are an exclusive holder.  We must clear tr_ref
			 * before we clear the TOK_EXCLUSIVE bit.  If we are
			 * unable to clear the bit we must restore
			 * tok->t_ref.
			 */
			KKASSERT(count & TOK_EXCLUSIVE);
			tok->t_ref = NULL;
			if (atomic_cmpset_long(&tok->t_count, count,
					       count & ~TOK_EXCLUSIVE)) {
				return;
			}
			tok->t_ref = ref;
			/* retry */
		} else {
			/*
			 * We are a shared holder
			 */
			KKASSERT(count & TOK_COUNTMASK);
			if (atomic_cmpset_long(&tok->t_count, count,
					       count - TOK_INCR)) {
				return;
			}
			/* retry */
		}
		/* retry */
	}
}
Exemple #2
0
/*
 * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
 * FALSE on failure.
 *
 * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
 * token, otherwise are attempting to get a shared token.
 *
 * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
 * it is a non-blocking operation (for both exclusive or shared acquisions).
 */
static __inline
int
_lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
{
	lwkt_token_t tok;
	lwkt_tokref_t oref;
	long count;

	tok = ref->tr_tok;
	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
		td->td_gd->gd_intr_nesting_level == 0 ||
		panic_cpu_gd == mycpu),
		("Attempt to acquire token %p not already "
		"held in hard code section", tok));

	if (mode & TOK_EXCLUSIVE) {
		/*
		 * Attempt to get an exclusive token
		 */
		for (;;) {
			count = tok->t_count;
			oref = tok->t_ref;	/* can be NULL */
			cpu_ccfence();
			if ((count & ~TOK_EXCLREQ) == 0) {
				/*
				 * It is possible to get the exclusive bit.
				 * We must clear TOK_EXCLREQ on successful
				 * acquisition.
				 */
				if (atomic_cmpset_long(&tok->t_count, count,
						       (count & ~TOK_EXCLREQ) |
						       TOK_EXCLUSIVE)) {
					KKASSERT(tok->t_ref == NULL);
					tok->t_ref = ref;
					return TRUE;
				}
				/* retry */
			} else if ((count & TOK_EXCLUSIVE) &&
				   oref >= &td->td_toks_base &&
				   oref < td->td_toks_stop) {
				/*
				 * Our thread already holds the exclusive
				 * bit, we treat this tokref as a shared
				 * token (sorta) to make the token release
				 * code easier.
				 *
				 * NOTE: oref cannot race above if it
				 *	 happens to be ours, so we're good.
				 *	 But we must still have a stable
				 *	 variable for both parts of the
				 *	 comparison.
				 *
				 * NOTE: Since we already have an exclusive
				 *	 lock and don't need to check EXCLREQ
				 *	 we can just use an atomic_add here
				 */
				atomic_add_long(&tok->t_count, TOK_INCR);
				ref->tr_count &= ~TOK_EXCLUSIVE;
				return TRUE;
			} else if ((mode & TOK_EXCLREQ) &&
				   (count & TOK_EXCLREQ) == 0) {
				/*
				 * Unable to get the exclusive bit but being
				 * asked to set the exclusive-request bit.
				 * Since we are going to retry anyway just
				 * set the bit unconditionally.
				 */
				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
				return FALSE;
			} else {
				/*
				 * Unable to get the exclusive bit and not
				 * being asked to set the exclusive-request
				 * (aka lwkt_trytoken()), or EXCLREQ was
				 * already set.
				 */
				cpu_pause();
				return FALSE;
			}
			/* retry */
		}
	} else {
		/*
		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
		 * for shared tokens simply means the caller intends to
		 * block.  We never actually set the bit in tok->t_count.
		 */
		for (;;) {
			count = tok->t_count;
			oref = tok->t_ref;	/* can be NULL */
			cpu_ccfence();
			if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) {
				/*
				 * It may be possible to get the token shared.
				 */
				if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
					return TRUE;
				}
				atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
				/* retry */
			} else if ((count & TOK_EXCLUSIVE) &&
				   oref >= &td->td_toks_base &&
				   oref < td->td_toks_stop) {
				/*
				 * We own the exclusive bit on the token so
				 * we can in fact also get it shared.
				 */
				atomic_add_long(&tok->t_count, TOK_INCR);
				return TRUE;
			} else {
				/*
				 * We failed to get the token shared
				 */
				return FALSE;
			}
			/* retry */
		}
	}
}
Exemple #3
0
/*
 * Called with a critical section held and interrupts enabled.
 */
int
pmap_inval_intr(cpumask_t *cpumaskp, int toolong)
{
    globaldata_t gd = mycpu;
    pmap_inval_info_t *info;
    int loopme = 0;
    int cpu;
    cpumask_t cpumask;

    /*
     * Check all cpus for invalidations we may need to service.
     */
    cpu_ccfence();
    cpu = gd->gd_cpuid;
    cpumask = *cpumaskp;

    while (CPUMASK_TESTNZERO(cpumask)) {
        int n = BSFCPUMASK(cpumask);

#ifdef LOOPRECOVER
        KKASSERT(n >= 0 && n < MAXCPU);
#endif

        CPUMASK_NANDBIT(cpumask, n);
        info = &invinfo[n];

        /*
         * Due to interrupts/races we can catch a new operation
         * in an older interrupt.  A fence is needed once we detect
         * the (not) done bit.
         */
        if (!CPUMASK_TESTBIT(info->done, cpu))
            continue;
        cpu_lfence();
#ifdef LOOPRECOVER
        if (toolong) {
            kprintf("pminvl %d->%d %08jx %08jx mode=%d\n",
                    cpu, n, info->done.ary[0], info->mask.ary[0],
                    info->mode);
        }
#endif

        /*
         * info->mask and info->done always contain the originating
         * cpu until the originator is done.  Targets may still be
         * present in info->done after the originator is done (they
         * will be finishing up their loops).
         *
         * Clear info->mask bits on other cpus to indicate that they
         * have quiesced (entered the loop).  Once the other mask bits
         * are clear we can execute the operation on the original,
         * then clear the mask and done bits on the originator.  The
         * targets will then finish up their side and clear their
         * done bits.
         *
         * The command is considered 100% done when all done bits have
         * been cleared.
         */
        if (n != cpu) {
            /*
             * Command state machine for 'other' cpus.
             */
            if (CPUMASK_TESTBIT(info->mask, cpu)) {
                /*
                 * Other cpu indicate to originator that they
                 * are quiesced.
                 */
                ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                loopme = 1;
            } else if (info->ptep &&
                       CPUMASK_TESTBIT(info->mask, n)) {
                /*
                 * Other cpu must wait for the originator (n)
                 * to complete its command if ptep is not NULL.
                 */
                loopme = 1;
            } else {
                /*
                 * Other cpu detects that the originator has
                 * completed its command, or there was no
                 * command.
                 *
                 * Now that the page table entry has changed,
                 * we can follow up with our own invalidation.
                 */
                vm_offset_t va = info->va;
                int npgs;

                if (va == (vm_offset_t)-1 ||
                        info->npgs > MAX_INVAL_PAGES) {
                    cpu_invltlb();
                } else {
                    for (npgs = info->npgs; npgs; --npgs) {
                        cpu_invlpg((void *)va);
                        va += PAGE_SIZE;
                    }
                }
                ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
                /* info invalid now */
                /* loopme left alone */
            }
        } else if (CPUMASK_TESTBIT(info->mask, cpu)) {
            /*
             * Originator is waiting for other cpus
             */
            if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) {
                /*
                 * Originator waits for other cpus to enter
                 * their loop (aka quiesce).
                 *
                 * If this bugs out the IPI may have been lost,
                 * try to reissue by resetting our own
                 * reentrancy bit and clearing the smurf mask
                 * for the cpus that did not respond, then
                 * reissuing the IPI.
                 */
                loopme = 1;
#ifdef LOOPRECOVER
                if (loopwdog(info)) {
                    info->failed = 1;
                    loopdebug("C", info);
                    /* XXX recover from possible bug */
                    mdcpu->gd_xinvaltlb = 0;
                    ATOMIC_CPUMASK_NANDMASK(smp_smurf_mask,
                                            info->mask);
                    cpu_disable_intr();
                    smp_invlpg(&smp_active_mask);

                    /*
                     * Force outer-loop retest of Xinvltlb
                     * requests (see mp_machdep.c).
                     */
                    mdcpu->gd_xinvaltlb = 2;
                    cpu_enable_intr();
                }
#endif
            } else {
                /*
                 * Originator executes operation and clears
                 * mask to allow other cpus to finish.
                 */
                KKASSERT(info->mode != INVDONE);
                if (info->mode == INVSTORE) {
                    if (info->ptep)
                        info->opte = atomic_swap_long(info->ptep, info->npte);
                    CHECKSIGMASK(info);
                    ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                    CHECKSIGMASK(info);
                } else {
                    if (atomic_cmpset_long(info->ptep,
                                           info->opte, info->npte)) {
                        info->success = 1;
                    } else {
                        info->success = 0;
                    }
                    CHECKSIGMASK(info);
                    ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                    CHECKSIGMASK(info);
                }
                loopme = 1;
            }
        } else {
            /*
             * Originator does not have to wait for the other
             * cpus to finish.  It clears its done bit.  A new
             * command will not be initiated by the originator
             * until the other cpus have cleared their done bits
             * (asynchronously).
             */
            vm_offset_t va = info->va;
            int npgs;

            if (va == (vm_offset_t)-1 ||
                    info->npgs > MAX_INVAL_PAGES) {
                cpu_invltlb();
            } else {
                for (npgs = info->npgs; npgs; --npgs) {
                    cpu_invlpg((void *)va);
                    va += PAGE_SIZE;
                }
            }

            /* leave loopme alone */
            /* other cpus may still be finishing up */
            /* can't race originator since that's us */
            info->mode = INVDONE;
            ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
        }
    }
    return loopme;
}
Exemple #4
0
/*
 * Stop a running timer and ensure that any running callout completes before
 * returning.  If the timer is running on another cpu this function may block
 * to interlock against the callout.  If the callout is currently executing
 * or blocked in another thread this function may also block to interlock
 * against the callout.
 *
 * The caller must be careful to avoid deadlocks, either by using
 * callout_init_lk() (which uses the lockmgr lock cancelation feature),
 * by using tokens and dealing with breaks in the serialization, or using
 * the lockmgr lock cancelation feature yourself in the callout callback
 * function.
 *
 * callout_stop() returns non-zero if the callout was pending.
 */
static int
_callout_stop(struct callout *c, int issync)
{
	globaldata_t gd = mycpu;
	globaldata_t tgd;
	softclock_pcpu_t sc;
	int flags;
	int nflags;
	int rc;
	int cpuid;

#ifdef INVARIANTS
        if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
		callout_init(c);
		kprintf(
		    "callout_stop(%p) from %p: callout was not initialized\n",
		    c, ((int **)&c)[-1]);
		print_backtrace(-1);
	}
#endif
	crit_enter_gd(gd);

	/*
	 * Fast path operations:
	 *
	 * If ARMED and owned by our cpu, or not ARMED, and other simple
	 * conditions are met, we can just clear ACTIVE and EXECUTED
	 * and we are done.
	 */
	for (;;) {
		flags = c->c_flags;
		cpu_ccfence();

		cpuid = CALLOUT_FLAGS_TO_CPU(flags);

		/*
		 * Can't handle an armed callout in the fast path if it is
		 * not on the current cpu.  We must atomically increment the
		 * IPI count for the IPI we intend to send and break out of
		 * the fast path to enter the slow path.
		 */
		if (flags & CALLOUT_ARMED) {
			if (gd->gd_cpuid != cpuid) {
				nflags = flags + 1;
				if (atomic_cmpset_int(&c->c_flags,
						      flags, nflags)) {
					/* break to slow path */
					break;
				}
				continue;	/* retry */
			}
		} else {
			cpuid = gd->gd_cpuid;
			KKASSERT((flags & CALLOUT_IPI_MASK) == 0);
			KKASSERT((flags & CALLOUT_PENDING) == 0);
		}

		/*
		 * Process pending IPIs and retry (only if not called from
		 * an IPI).
		 */
		if (flags & CALLOUT_IPI_MASK) {
			lwkt_process_ipiq();
			continue;	/* retry */
		}

		/*
		 * Transition to the stopped state, recover the EXECUTED
		 * status.  If pending we cannot clear ARMED until after
		 * we have removed (c) from the callwheel.
		 *
		 * NOTE: The callout might already not be armed but in this
		 *	 case it should also not be pending.
		 */
		nflags = flags & ~(CALLOUT_ACTIVE |
				   CALLOUT_EXECUTED |
				   CALLOUT_WAITING |
				   CALLOUT_PENDING);

		/* NOTE: IPI_MASK already tested */
		if ((flags & CALLOUT_PENDING) == 0)
			nflags &= ~CALLOUT_ARMED;
		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			/*
			 * Can only remove from callwheel if currently
			 * pending.
			 */
			if (flags & CALLOUT_PENDING) {
				sc = &softclock_pcpu_ary[gd->gd_cpuid];
				if (sc->next == c)
					sc->next = TAILQ_NEXT(c, c_links.tqe);
				TAILQ_REMOVE(
					&sc->callwheel[c->c_time & cwheelmask],
					c,
					c_links.tqe);
				c->c_func = NULL;

				/*
				 * NOTE: Can't clear ARMED until we have
				 *	 physically removed (c) from the
				 *	 callwheel.
				 *
				 * NOTE: WAITING bit race exists when doing
				 *	 unconditional bit clears.
				 */
				callout_maybe_clear_armed(c);
				if (c->c_flags & CALLOUT_WAITING)
					flags |= CALLOUT_WAITING;
			}

			/*
			 * ARMED has been cleared at this point and (c)
			 * might now be stale.  Only good for wakeup()s.
			 */
			if (flags & CALLOUT_WAITING)
				wakeup(c);

			goto skip_slow;
		}
		/* retry */
	}

	/*
	 * Slow path (and not called via an IPI).
	 *
	 * When ARMED to a different cpu the stop must be processed on that
	 * cpu.  Issue the IPI and wait for completion.  We have already
	 * incremented the IPI count.
	 */
	tgd = globaldata_find(cpuid);
	lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync);

	for (;;) {
		int flags;
		int nflags;

		flags = c->c_flags;
		cpu_ccfence();
		if ((flags & CALLOUT_IPI_MASK) == 0)	/* fast path */
			break;
		nflags = flags | CALLOUT_WAITING;
		tsleep_interlock(c, 0);
		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			tsleep(c, PINTERLOCKED, "cstp1", 0);
		}
	}

skip_slow:

	/*
	 * If (issync) we must also wait for any in-progress callbacks to
	 * complete, unless the stop is being executed from the callback
	 * itself.  The EXECUTED flag is set prior to the callback
	 * being made so our existing flags status already has it.
	 *
	 * If auto-lock mode is being used, this is where we cancel any
	 * blocked lock that is potentially preventing the target cpu
	 * from completing the callback.
	 */
	while (issync) {
		intptr_t *runp;
		intptr_t runco;

		sc = &softclock_pcpu_ary[cpuid];
		if (gd->gd_curthread == &sc->thread)	/* stop from cb */
			break;
		runp = &sc->running;
		runco = *runp;
		cpu_ccfence();
		if ((runco & ~(intptr_t)1) != (intptr_t)c)
			break;
		if (c->c_flags & CALLOUT_AUTOLOCK)
			lockmgr(c->c_lk, LK_CANCEL_BEG);
		tsleep_interlock(c, 0);
		if (atomic_cmpset_long(runp, runco, runco | 1))
			tsleep(c, PINTERLOCKED, "cstp3", 0);
		if (c->c_flags & CALLOUT_AUTOLOCK)
			lockmgr(c->c_lk, LK_CANCEL_END);
	}

	crit_exit_gd(gd);
	rc = (flags & CALLOUT_EXECUTED) != 0;

	return rc;
}
Exemple #5
0
/*
 * API function - invalidate the pte at (va) and replace *ptep with npte
 * atomically only if *ptep equals opte, across the pmap's active cpus.
 *
 * Returns 1 on success, 0 on failure (caller typically retries).
 */
int
pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep,
                      pt_entry_t opte, pt_entry_t npte)
{
    globaldata_t gd = mycpu;
    pmap_inval_info_t *info;
    int success;
    int cpu = gd->gd_cpuid;
    cpumask_t tmpmask;
    unsigned long rflags;

    /*
     * Initialize invalidation for pmap and enter critical section.
     */
    if (pmap == NULL)
        pmap = &kernel_pmap;
    pmap_inval_init(pmap);

    /*
     * Shortcut single-cpu case if possible.
     */
    if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) {
        if (atomic_cmpset_long(ptep, opte, npte)) {
            if (va == (vm_offset_t)-1)
                cpu_invltlb();
            else
                cpu_invlpg((void *)va);
            pmap_inval_done(pmap);
            return 1;
        } else {
            pmap_inval_done(pmap);
            return 0;
        }
    }

    /*
     * We need a critical section to prevent getting preempted while
     * we setup our command.  A preemption might execute its own
     * pmap_inval*() command and create confusion below.
     */
    info = &invinfo[cpu];
    info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1);

    /*
     * We must wait for other cpus which may still be finishing
     * up a prior operation.
     */
    while (CPUMASK_TESTNZERO(info->done)) {
#ifdef LOOPRECOVER
        if (loopwdog(info)) {
            info->failed = 1;
            loopdebug("B", info);
            /* XXX recover from possible bug */
            CPUMASK_ASSZERO(info->done);
        }
#endif
        cpu_pause();
    }
    KKASSERT(info->mode == INVDONE);

    /*
     * Must set our cpu in the invalidation scan mask before
     * any possibility of [partial] execution (remember, XINVLTLB
     * can interrupt a critical section).
     */
    ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);

    info->va = va;
    info->npgs = 1;			/* unused */
    info->ptep = ptep;
    info->npte = npte;
    info->opte = opte;
#ifdef LOOPRECOVER
    info->failed = 0;
#endif
    info->mode = INVCMPSET;
    info->success = 0;

    tmpmask = pmap->pm_active;	/* volatile */
    cpu_ccfence();
    CPUMASK_ANDMASK(tmpmask, smp_active_mask);
    CPUMASK_ORBIT(tmpmask, cpu);
    info->mask = tmpmask;

    /*
     * Command may start executing the moment 'done' is initialized,
     * disable current cpu interrupt to prevent 'done' field from
     * changing (other cpus can't clear done bits until the originating
     * cpu clears its mask bit).
     */
#ifdef LOOPRECOVER
    info->sigmask = tmpmask;
    CHECKSIGMASK(info);
#endif
    cpu_sfence();
    rflags = read_rflags();
    cpu_disable_intr();

    ATOMIC_CPUMASK_COPY(info->done, tmpmask);

    /*
     * Pass our copy of the done bits (so they don't change out from
     * under us) to generate the Xinvltlb interrupt on the targets.
     */
    smp_invlpg(&tmpmask);
    success = info->success;
    KKASSERT(info->mode == INVDONE);

    ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
    write_rflags(rflags);
    pmap_inval_done(pmap);

    return success;
}