Пример #1
1
int ti_threadgroup_fork(ti_threadgroup_t *tg, int16_t ext_tid,
                        void **bcast_val)
{
    if (tg->tid_map[ext_tid] == 0) {
        tg->envelope = bcast_val ? *bcast_val : NULL;
        cpu_sfence();
        tg->forked = 1;
        tg->group_sense = tg->thread_sense[0]->sense;

        // if it's possible that threads are sleeping, signal them
        if (tg->sleep_threshold) {
            uv_mutex_lock(&tg->alarm_lock);
            uv_cond_broadcast(&tg->alarm);
            uv_mutex_unlock(&tg->alarm_lock);
        }
    }
    else {
        // spin up to threshold cycles (count sheep), then sleep
        uint64_t spin_cycles, spin_start = rdtsc();
        while (tg->group_sense !=
               tg->thread_sense[tg->tid_map[ext_tid]]->sense) {
            if (tg->sleep_threshold) {
                spin_cycles = rdtsc() - spin_start;
                if (spin_cycles >= tg->sleep_threshold) {
                    uv_mutex_lock(&tg->alarm_lock);
                    if (tg->group_sense !=
                        tg->thread_sense[tg->tid_map[ext_tid]]->sense) {
                        uv_cond_wait(&tg->alarm, &tg->alarm_lock);
                    }
                    uv_mutex_unlock(&tg->alarm_lock);
                    spin_start = rdtsc();
                    continue;
                }
            }
            cpu_pause();
        }
        cpu_lfence();
        if (bcast_val)
            *bcast_val = tg->envelope;
    }

    return 0;
}
Пример #2
0
int
__gettimeofday(struct timeval *tv, struct timezone *tz)
{
	struct timespec ts;
	int res;
	int w;

	if (fast_clock == 0 && fast_count++ >= 10) {
                __kpmap_map(&upticksp, &fast_clock, KPTYPE_UPTICKS);
		__kpmap_map(&ts_realtime, &fast_clock, KPTYPE_TS_REALTIME);
		__kpmap_map(&fast_gtod, &fast_clock, KPTYPE_FAST_GTOD);
		__kpmap_map(NULL, &fast_clock, 0);
	}
	if (fast_clock > 0 && *fast_gtod && tz == NULL) {
		do {
			w = *upticksp;
			cpu_lfence();
			ts = ts_realtime[w & 1];
			cpu_lfence();
			w = *upticksp - w;
		} while (w > 1);
		res = 0;
		if (tv) {
			tv->tv_sec = ts.tv_sec;
			tv->tv_usec = ts.tv_nsec / 1000;
		}
	} else {
		res = __sys_gettimeofday(tv, tz);
	}
	return res;
}
Пример #3
0
/*
 * Get SMP fully working before we start initializing devices.
 */
static
void
ap_finish(void)
{
	int i;
	cpumask_t ncpus_mask = 0;

	for (i = 1; i <= ncpus; i++)
		ncpus_mask |= CPUMASK(i);

        mp_finish = 1;
        if (bootverbose)
                kprintf("Finish MP startup\n");

	/* build our map of 'other' CPUs */
	mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid);

	/*
	 * Let the other cpu's finish initializing and build their map
	 * of 'other' CPUs.
	 */
        rel_mplock();
        while (smp_active_mask != smp_startup_mask) {
		DELAY(100000);
                cpu_lfence();
	}

        while (try_mplock() == 0)
		DELAY(100000);
        if (bootverbose)
                kprintf("Active CPU Mask: %08x\n", smp_active_mask);
}
Пример #4
0
/*
 * Get SMP fully working before we start initializing devices.
 */
static
void
ap_finish(void)
{
        mp_finish = 1;
        if (bootverbose)
                kprintf("Finish MP startup\n");

	/* build our map of 'other' CPUs */
	mycpu->gd_other_cpus = smp_startup_mask;
	CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid);

	/*
	 * Let the other cpu's finish initializing and build their map
	 * of 'other' CPUs.
	 */
        rel_mplock();
        while (CPUMASK_CMPMASKNEQ(smp_active_mask,smp_startup_mask)) {
		DELAY(100000);
                cpu_lfence();
	}

        while (try_mplock() == 0)
		DELAY(100000);
        if (bootverbose)
                kprintf("Active CPU Mask: %08lx\n",
			(long)CPUMASK_LOWMASK(smp_active_mask));
}
Пример #5
0
int ti_threadgroup_fork(ti_threadgroup_t *tg, int16_t ext_tid,
                        void **bcast_val)
{
    if (tg->tid_map[ext_tid] == 0) {
        tg->envelope = bcast_val ? *bcast_val : NULL;
        cpu_sfence();
        tg->forked = 1;
        tg->group_sense = tg->thread_sense[0]->sense;

        // if it's possible that threads are sleeping, signal them
        if (tg->sleep_threshold) {
            uv_mutex_lock(&tg->alarm_lock);
            uv_cond_broadcast(&tg->alarm);
            uv_mutex_unlock(&tg->alarm_lock);
        }
    }
    else {
        // spin up to threshold ns (count sheep), then sleep
        uint64_t spin_ns;
        uint64_t spin_start = 0;
        while (tg->group_sense !=
                tg->thread_sense[tg->tid_map[ext_tid]]->sense) {
            if (tg->sleep_threshold) {
                if (!spin_start) {
                    // Lazily initialize spin_start since uv_hrtime is expensive
                    spin_start = uv_hrtime();
                    continue;
                }
                spin_ns = uv_hrtime() - spin_start;
                // In case uv_hrtime is not monotonic, we'll sleep earlier
                if (spin_ns >= tg->sleep_threshold) {
                    uv_mutex_lock(&tg->alarm_lock);
                    if (tg->group_sense !=
                            tg->thread_sense[tg->tid_map[ext_tid]]->sense) {
                        uv_cond_wait(&tg->alarm, &tg->alarm_lock);
                    }
                    uv_mutex_unlock(&tg->alarm_lock);
                    spin_start = 0;
                    continue;
                }
            }
            cpu_pause();
        }
        cpu_lfence();
        if (bcast_val)
            *bcast_val = tg->envelope;
    }

    return 0;
}
Пример #6
0
static
void
loopdebug(const char *msg, pmap_inval_info_t *info)
{
    int p;
    int cpu = mycpu->gd_cpuid;

    /*
     * Don't kprintf() anything if the pmap inval watchdog gets hit.
     * DRM can cause an occassional watchdog hit (at least with a 1/16
     * second watchdog), and attempting to kprintf to the KVM frame buffer
     * from Xinvltlb, which ignores critical sections, can implode the
     * system.
     */
    if (pmap_inval_watchdog_print == 0)
        return;

    cpu_lfence();
#ifdef LOOPRECOVER
    atomic_add_long(&smp_smurf_mask.ary[0], 0);
#endif
    kprintf("ipilost-%s! %d mode=%d m=%08jx d=%08jx "
#ifdef LOOPRECOVER
            "s=%08jx "
#endif
#ifdef LOOPMASK_IN
            "in=%08jx "
#endif
#ifdef LOOPRECOVER
            "smurf=%08jx\n"
#endif
            , msg, cpu, info->mode,
            info->mask.ary[0],
            info->done.ary[0]
#ifdef LOOPRECOVER
            , info->sigmask.ary[0]
#endif
#ifdef LOOPMASK_IN
            , smp_in_mask.ary[0]
#endif
#ifdef LOOPRECOVER
            , smp_smurf_mask.ary[0]
#endif
           );
    kprintf("mdglob ");
    for (p = 0; p < ncpus; ++p)
        kprintf(" %d", CPU_prvspace[p]->mdglobaldata.gd_xinvaltlb);
    kprintf("\n");
}
Пример #7
0
static int
sysctl_get_basetime(SYSCTL_HANDLER_ARGS)
{
	struct timespec *bt;
	int error;
	int index;

	/*
	 * Because basetime data and index may be updated by another cpu,
	 * a load fence is required to ensure that the data we read has
	 * not been speculatively read relative to a possibly updated index.
	 */
	index = basetime_index;
	cpu_lfence();
	bt = &basetime[index];
	error = SYSCTL_OUT(req, bt, sizeof(*bt));
	return (error);
}
Пример #8
0
/*
 * (Frontend) collect a response from a running cluster op.
 *
 * Responses are fed from all appropriate nodes concurrently
 * and collected into a cohesive response >= collect_key.
 *
 * The collector will return the instant quorum or other requirements
 * are met, even if some nodes get behind or become non-responsive.
 *
 * HAMMER2_XOP_COLLECT_NOWAIT	- Used to 'poll' a completed collection,
 *				  usually called synchronously from the
 *				  node XOPs for the strategy code to
 *				  fake the frontend collection and complete
 *				  the BIO as soon as possible.
 *
 * HAMMER2_XOP_SYNCHRONIZER	- Reqeuest synchronization with a particular
 *				  cluster index, prevents looping when that
 *				  index is out of sync so caller can act on
 *				  the out of sync element.  ESRCH and EDEADLK
 *				  can be returned if this flag is specified.
 *
 * Returns 0 on success plus a filled out xop->cluster structure.
 * Return ENOENT on normal termination.
 * Otherwise return an error.
 */
int
hammer2_xop_collect(hammer2_xop_head_t *xop, int flags)
{
	hammer2_xop_fifo_t *fifo;
	hammer2_chain_t *chain;
	hammer2_key_t lokey;
	int error;
	int keynull;
	int adv;		/* advance the element */
	int i;
	uint32_t check_counter;

loop:
	/*
	 * First loop tries to advance pieces of the cluster which
	 * are out of sync.
	 */
	lokey = HAMMER2_KEY_MAX;
	keynull = HAMMER2_CHECK_NULL;
	check_counter = xop->check_counter;
	cpu_lfence();

	for (i = 0; i < xop->cluster.nchains; ++i) {
		chain = xop->cluster.array[i].chain;
		if (chain == NULL) {
			adv = 1;
		} else if (chain->bref.key < xop->collect_key) {
			adv = 1;
		} else {
			keynull &= ~HAMMER2_CHECK_NULL;
			if (lokey > chain->bref.key)
				lokey = chain->bref.key;
			adv = 0;
		}
		if (adv == 0)
			continue;

		/*
		 * Advance element if possible, advanced element may be NULL.
		 */
		if (chain) {
			hammer2_chain_unlock(chain);
			hammer2_chain_drop(chain);
		}
		fifo = &xop->collect[i];
		if (fifo->ri != fifo->wi) {
			cpu_lfence();
			chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK];
			++fifo->ri;
			xop->cluster.array[i].chain = chain;
			if (chain == NULL) {
				/* XXX */
				xop->cluster.array[i].flags |=
							HAMMER2_CITEM_NULL;
			}
			if (fifo->wi - fifo->ri < HAMMER2_XOPFIFO / 2)
				wakeup(xop);	/* XXX optimize */
			--i;		/* loop on same index */
		} else {
			/*
			 * Retain CITEM_NULL flag.  If set just repeat EOF.
			 * If not, the NULL,0 combination indicates an
			 * operation in-progress.
			 */
			xop->cluster.array[i].chain = NULL;
			/* retain any CITEM_NULL setting */
		}
	}

	/*
	 * Determine whether the lowest collected key meets clustering
	 * requirements.  Returns:
	 *
	 * 0	 	 - key valid, cluster can be returned.
	 *
	 * ENOENT	 - normal end of scan, return ENOENT.
	 *
	 * ESRCH	 - sufficient elements collected, quorum agreement
	 *		   that lokey is not a valid element and should be
	 *		   skipped.
	 *
	 * EDEADLK	 - sufficient elements collected, no quorum agreement
	 *		   (and no agreement possible).  In this situation a
	 *		   repair is needed, for now we loop.
	 *
	 * EINPROGRESS	 - insufficient elements collected to resolve, wait
	 *		   for event and loop.
	 */
	if ((flags & HAMMER2_XOP_COLLECT_WAITALL) &&
	    xop->run_mask != HAMMER2_XOPMASK_VOP) {
		error = EINPROGRESS;
	} else {
		error = hammer2_cluster_check(&xop->cluster, lokey, keynull);
	}
	if (error == EINPROGRESS) {
		if (xop->check_counter == check_counter) {
			if (flags & HAMMER2_XOP_COLLECT_NOWAIT)
				goto done;
			tsleep_interlock(&xop->check_counter, 0);
			cpu_lfence();
			if (xop->check_counter == check_counter) {
				tsleep(&xop->check_counter, PINTERLOCKED,
					"h2coll", hz*60);
			}
		}
		goto loop;
	}
	if (error == ESRCH) {
		if (lokey != HAMMER2_KEY_MAX) {
			xop->collect_key = lokey + 1;
			goto loop;
		}
		error = ENOENT;
	}
	if (error == EDEADLK) {
		kprintf("hammer2: no quorum possible lokey %016jx\n",
			lokey);
		if (lokey != HAMMER2_KEY_MAX) {
			xop->collect_key = lokey + 1;
			goto loop;
		}
		error = ENOENT;
	}
	if (lokey == HAMMER2_KEY_MAX)
		xop->collect_key = lokey;
	else
		xop->collect_key = lokey + 1;
done:
	return error;
}
Пример #9
0
/*
 * Wait for async lock completion or abort.  Returns ENOLCK if an abort
 * occurred.
 */
int
mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
{
	indefinite_info_t info;
	int error;

	indefinite_init(&info, mtx->mtx_ident, 1,
			((link->state & MTX_LINK_LINKED_SH) ? 'm' : 'M'));

	/*
	 * Sleep.  Handle false wakeups, interruptions, etc.
	 * The link may also have been aborted.  The LINKED
	 * bit was set by this cpu so we can test it without
	 * fences.
	 */
	error = 0;
	while (link->state & MTX_LINK_LINKED) {
		tsleep_interlock(link, 0);
		cpu_lfence();
		if (link->state & MTX_LINK_LINKED) {
			error = tsleep(link, flags | PINTERLOCKED,
				       mtx->mtx_ident, to);
			if (error)
				break;
		}
		if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
			indefinite_check(&info);
	}

	/*
	 * We need at least a lfence (load fence) to ensure our cpu does not
	 * reorder loads (of data outside the lock structure) prior to the
	 * remote cpu's release, since the above test may have run without
	 * any atomic interactions.
	 *
	 * If we do not do this then state updated by the other cpu before
	 * releasing its lock may not be read cleanly by our cpu when this
	 * function returns.  Even though the other cpu ordered its stores,
	 * our loads can still be out of order.
	 */
	cpu_mfence();

	/*
	 * We are done, make sure the link structure is unlinked.
	 * It may still be on the list due to e.g. EINTR or
	 * EWOULDBLOCK.
	 *
	 * It is possible for the tsleep to race an ABORT and cause
	 * error to be 0.
	 *
	 * The tsleep() can be woken up for numerous reasons and error
	 * might be zero in situations where we intend to return an error.
	 *
	 * (This is the synchronous case so state cannot be CALLEDBACK)
	 */
	switch(link->state) {
	case MTX_LINK_ACQUIRED:
	case MTX_LINK_CALLEDBACK:
		error = 0;
		break;
	case MTX_LINK_ABORTED:
		error = ENOLCK;
		break;
	case MTX_LINK_LINKED_EX:
	case MTX_LINK_LINKED_SH:
		mtx_delete_link(mtx, link);
		/* fall through */
	default:
		if (error == 0)
			error = EWOULDBLOCK;
		break;
	}

	/*
	 * Clear state on status returned.
	 */
	link->state = MTX_LINK_IDLE;

	if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
		indefinite_done(&info);

	return error;
}
Пример #10
0
/*
 * Called with a critical section held and interrupts enabled.
 */
int
pmap_inval_intr(cpumask_t *cpumaskp, int toolong)
{
    globaldata_t gd = mycpu;
    pmap_inval_info_t *info;
    int loopme = 0;
    int cpu;
    cpumask_t cpumask;

    /*
     * Check all cpus for invalidations we may need to service.
     */
    cpu_ccfence();
    cpu = gd->gd_cpuid;
    cpumask = *cpumaskp;

    while (CPUMASK_TESTNZERO(cpumask)) {
        int n = BSFCPUMASK(cpumask);

#ifdef LOOPRECOVER
        KKASSERT(n >= 0 && n < MAXCPU);
#endif

        CPUMASK_NANDBIT(cpumask, n);
        info = &invinfo[n];

        /*
         * Due to interrupts/races we can catch a new operation
         * in an older interrupt.  A fence is needed once we detect
         * the (not) done bit.
         */
        if (!CPUMASK_TESTBIT(info->done, cpu))
            continue;
        cpu_lfence();
#ifdef LOOPRECOVER
        if (toolong) {
            kprintf("pminvl %d->%d %08jx %08jx mode=%d\n",
                    cpu, n, info->done.ary[0], info->mask.ary[0],
                    info->mode);
        }
#endif

        /*
         * info->mask and info->done always contain the originating
         * cpu until the originator is done.  Targets may still be
         * present in info->done after the originator is done (they
         * will be finishing up their loops).
         *
         * Clear info->mask bits on other cpus to indicate that they
         * have quiesced (entered the loop).  Once the other mask bits
         * are clear we can execute the operation on the original,
         * then clear the mask and done bits on the originator.  The
         * targets will then finish up their side and clear their
         * done bits.
         *
         * The command is considered 100% done when all done bits have
         * been cleared.
         */
        if (n != cpu) {
            /*
             * Command state machine for 'other' cpus.
             */
            if (CPUMASK_TESTBIT(info->mask, cpu)) {
                /*
                 * Other cpu indicate to originator that they
                 * are quiesced.
                 */
                ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                loopme = 1;
            } else if (info->ptep &&
                       CPUMASK_TESTBIT(info->mask, n)) {
                /*
                 * Other cpu must wait for the originator (n)
                 * to complete its command if ptep is not NULL.
                 */
                loopme = 1;
            } else {
                /*
                 * Other cpu detects that the originator has
                 * completed its command, or there was no
                 * command.
                 *
                 * Now that the page table entry has changed,
                 * we can follow up with our own invalidation.
                 */
                vm_offset_t va = info->va;
                int npgs;

                if (va == (vm_offset_t)-1 ||
                        info->npgs > MAX_INVAL_PAGES) {
                    cpu_invltlb();
                } else {
                    for (npgs = info->npgs; npgs; --npgs) {
                        cpu_invlpg((void *)va);
                        va += PAGE_SIZE;
                    }
                }
                ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
                /* info invalid now */
                /* loopme left alone */
            }
        } else if (CPUMASK_TESTBIT(info->mask, cpu)) {
            /*
             * Originator is waiting for other cpus
             */
            if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) {
                /*
                 * Originator waits for other cpus to enter
                 * their loop (aka quiesce).
                 *
                 * If this bugs out the IPI may have been lost,
                 * try to reissue by resetting our own
                 * reentrancy bit and clearing the smurf mask
                 * for the cpus that did not respond, then
                 * reissuing the IPI.
                 */
                loopme = 1;
#ifdef LOOPRECOVER
                if (loopwdog(info)) {
                    info->failed = 1;
                    loopdebug("C", info);
                    /* XXX recover from possible bug */
                    mdcpu->gd_xinvaltlb = 0;
                    ATOMIC_CPUMASK_NANDMASK(smp_smurf_mask,
                                            info->mask);
                    cpu_disable_intr();
                    smp_invlpg(&smp_active_mask);

                    /*
                     * Force outer-loop retest of Xinvltlb
                     * requests (see mp_machdep.c).
                     */
                    mdcpu->gd_xinvaltlb = 2;
                    cpu_enable_intr();
                }
#endif
            } else {
                /*
                 * Originator executes operation and clears
                 * mask to allow other cpus to finish.
                 */
                KKASSERT(info->mode != INVDONE);
                if (info->mode == INVSTORE) {
                    if (info->ptep)
                        info->opte = atomic_swap_long(info->ptep, info->npte);
                    CHECKSIGMASK(info);
                    ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                    CHECKSIGMASK(info);
                } else {
                    if (atomic_cmpset_long(info->ptep,
                                           info->opte, info->npte)) {
                        info->success = 1;
                    } else {
                        info->success = 0;
                    }
                    CHECKSIGMASK(info);
                    ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                    CHECKSIGMASK(info);
                }
                loopme = 1;
            }
        } else {
            /*
             * Originator does not have to wait for the other
             * cpus to finish.  It clears its done bit.  A new
             * command will not be initiated by the originator
             * until the other cpus have cleared their done bits
             * (asynchronously).
             */
            vm_offset_t va = info->va;
            int npgs;

            if (va == (vm_offset_t)-1 ||
                    info->npgs > MAX_INVAL_PAGES) {
                cpu_invltlb();
            } else {
                for (npgs = info->npgs; npgs; --npgs) {
                    cpu_invlpg((void *)va);
                    va += PAGE_SIZE;
                }
            }

            /* leave loopme alone */
            /* other cpus may still be finishing up */
            /* can't race originator since that's us */
            info->mode = INVDONE;
            ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
        }
    }
    return loopme;
}