Esempio n. 1
0
File: vcore.c Progetto: 7perl/akaros
/* Returns -1 with errno set on error, or 0 on success.  This does not return
 * the number of cores actually granted (though some parts of the kernel do
 * internally).
 *
 * This tries to get "more vcores", based on the number we currently have.
 * We'll probably need smarter 2LSs in the future that just directly set
 * amt_wanted.  What happens is we can have a bunch of 2LS vcore contexts
 * trying to get "another vcore", which currently means more than num_vcores().
 * If you have someone ask for two more, and then someone else ask for one more,
 * how many you ultimately ask for depends on if the kernel heard you and
 * adjusted num_vcores in between the two calls.  Or maybe your amt_wanted
 * already was num_vcores + 5, so neither call is telling the kernel anything
 * new.  It comes down to "one more than I have" vs "one more than I've already
 * asked for".
 *
 * So for now, this will keep the older behavior (one more than I have).  It
 * will try to accumulate any concurrent requests, and adjust amt_wanted up.
 * Interleaving, repetitive calls (everyone asking for one more) may get
 * ignored.
 *
 * Note the doesn't block or anything (despite the min number requested is
 * 1), since the kernel won't block the call.
 *
 * There are a few concurrency concerns.  We have _max_vcores_ever_wanted,
 * initialization of new vcore stacks/TLSs, making sure we don't ask for too
 * many (minor point), and most importantly not asking the kernel for too much
 * or otherwise miscommunicating our desires to the kernel.  Remember, the
 * kernel wants just one answer from the process about what it wants, and it is
 * up to the process to figure that out.
 *
 * So we basically have one thread do the submitting/prepping/bookkeeping, and
 * other threads come in just update the number wanted and make sure someone
 * is sorting things out.  This will perform a bit better too, since only one
 * vcore makes syscalls (which hammer the proc_lock).  This essentially has
 * cores submit work, and one core does the work (like Eric's old delta
 * functions).
 *
 * There's a slight semantic change: this will return 0 (success) for the
 * non-submitters, and 0 if we submitted.  -1 only if the submitter had some
 * non-kernel failure.
 *
 * Also, beware that this (like the old version) doesn't protect with races on
 * num_vcores().  num_vcores() is how many you have now or very soon (accounting
 * for messages in flight that will take your cores), not how many you told the
 * kernel you want. */
int vcore_request(long nr_new_vcores)
{
	long nr_to_prep_now, nr_vcores_wanted;

	assert(vc_initialized);
	/* Early sanity checks */
	if ((nr_new_vcores < 0) || (nr_new_vcores + num_vcores() > max_vcores()))
		return -1;	/* consider ERRNO */
	/* Post our desires (ROS atomic_add() conflicts with glibc) */
	atomic_fetch_and_add(&nr_new_vcores_wanted, nr_new_vcores);
try_handle_it:
	cmb();	/* inc before swap.  the atomic is a CPU mb() */
	if (atomic_swap(&vc_req_being_handled, 1)) {
		/* We got a 1 back, so someone else is already working on it */
		return 0;
	}
	/* So now we're the ones supposed to handle things.  This does things in the
	 * "increment based on the number we have", vs "increment on the number we
	 * said we want".
	 *
	 * Figure out how many we have, though this is racy.  Yields/preempts/grants
	 * will change this over time, and we may end up asking for less than we
	 * had. */
	nr_vcores_wanted = num_vcores();
	/* Pull all of the vcores wanted into our local variable, where we'll deal
	 * with prepping/requesting that many vcores.  Keep doing this til we think
	 * no more are wanted. */
	while ((nr_to_prep_now = atomic_swap(&nr_new_vcores_wanted, 0))) {
		nr_vcores_wanted += nr_to_prep_now;
		/* Don't bother prepping or asking for more than we can ever get */
		nr_vcores_wanted = MIN(nr_vcores_wanted, max_vcores());
		/* Make sure all we might ask for are prepped */
		for (long i = _max_vcores_ever_wanted; i < nr_vcores_wanted; i++) {
			if (allocate_transition_stack(i) || allocate_transition_tls(i)) {
				atomic_set(&vc_req_being_handled, 0);	/* unlock and bail out*/
				return -1;
			}
			_max_vcores_ever_wanted++;	/* done in the loop to handle failures*/
		}
	}
	cmb();	/* force a reread of num_vcores() */
	/* Update amt_wanted if we now want *more* than what the kernel already
	 * knows.  See notes in the func doc. */
	if (nr_vcores_wanted > __procdata.res_req[RES_CORES].amt_wanted)
		__procdata.res_req[RES_CORES].amt_wanted = nr_vcores_wanted;
	/* If num_vcores isn't what we want, we can poke the ksched.  Due to some
	 * races with yield, our desires may be old.  Not a big deal; any vcores
	 * that pop up will just end up yielding (or get preempt messages.)  */
	if (nr_vcores_wanted > num_vcores())
		sys_poke_ksched(0, RES_CORES);	/* 0 -> poke for ourselves */
	/* Unlock, (which lets someone else work), and check to see if more work
	 * needs to be done.  If so, we'll make sure it gets handled. */
	atomic_set(&vc_req_being_handled, 0);	/* unlock, to allow others to try */
	wrmb();
	/* check for any that might have come in while we were out */
	if (atomic_read(&nr_new_vcores_wanted))
		goto try_handle_it;
	return 0;
}
Esempio n. 2
0
/* Enables notifs, and deals with missed notifs by self notifying.  This should
 * be rare, so the syscall overhead isn't a big deal.  The other alternative
 * would be to uthread_yield(), which would require us to revert some uthread
 * interface changes. */
void enable_notifs(uint32_t vcoreid)
{
    __enable_notifs(vcoreid);
    wrmb();	/* need to read after the write that enabled notifs */
    /* Note we could get migrated before executing this.  If that happens, our
     * vcore had gone into vcore context (which is what we wanted), and this
     * self_notify to our old vcore is spurious and harmless. */
    if (vcpd_of(vcoreid)->notif_pending)
        sys_self_notify(vcoreid, EV_NONE, 0, TRUE);
}
Esempio n. 3
0
File: ceq.c Progetto: anandab/akaros
/* Consumer side, returns TRUE on success and fills *msg with the ev_msg.  If
 * the ceq appears empty, it will return FALSE.  Messages may have arrived after
 * we started getting that we do not receive. */
bool get_ceq_msg(struct ceq *ceq, struct event_msg *msg)
{
	int32_t idx = get_ring_idx(ceq);
	if (idx == -1) {
		if (!ceq->ring_overflowed)
			return FALSE;
		/* We didn't get anything via the ring, but if we're overflowed, then we
		 * need to look in the array directly.  Note that we only handle
		 * overflow when we failed to get something.  Eventually, we'll deal
		 * with overflow (which should be very rare).  Also note that while we
		 * are dealing with overflow, the kernel could be producing and using
		 * the ring, and we could have consumers consuming from the ring.
		 *
		 * Overall, we need to clear the overflow flag, make sure the list is
		 * empty, and turn the flag back on if it isn't.  That'll make sure
		 * overflow is set if there's a chance there is a message in the array
		 * that doesn't have an idx in the ring.
		 *
		 * However, if we do that, there's a time when overflow isn't set and
		 * the ring is empty.  A concurrent consumer could think that the ring
		 * is empty, when in fact it isn't.  That's bad, since we could miss a
		 * message (i.e. sleep when we have a message we needed).  So we'll need
		 * to deal with concurrent consumers, and whatever we do will also need
		 * to deal with concurrent conusmers who handle overflow too.  Easiest
		 * thing is to just lock.  If the lock is set, then that also means the
		 * mailbox isn't empty. */
		spin_pdr_lock((struct spin_pdr_lock*)&ceq->u_lock);
		/* Check again - someone may have handled it while we were waiting on
		 * the lock */
		if (!ceq->ring_overflowed) {
			spin_pdr_unlock((struct spin_pdr_lock*)&ceq->u_lock);
			return FALSE;
		}
		ceq->ring_overflowed = FALSE;
		wrmb(); /* clear overflowed before reading event entries */
		for (int i = 0; i < ceq->nr_events; i++) {
			if (extract_ceq_msg(ceq, i, msg)) {
				/* We found something.  There might be more, but a future
				 * consumer will have to deal with it, or verify there isn't. */
				ceq->ring_overflowed = TRUE;
				spin_pdr_unlock((struct spin_pdr_lock*)&ceq->u_lock);
				return TRUE;
			}
		}
		/* made it to the end, looks like there was no overflow left.  there
		 * could be new ones added behind us (they'd be in the ring or overflow
		 * would be turned on again), but those message were added after we
		 * started consuming, and therefore not our obligation to extract. */
		spin_pdr_unlock((struct spin_pdr_lock*)&ceq->u_lock);
		return FALSE;
	}
	if (!extract_ceq_msg(ceq, idx, msg))
		return FALSE;
	return TRUE;
}
Esempio n. 4
0
/* Attempts to message a vcore that may or may not have VC_CAN_RCV_MSG set.  If
 * so, we'll post the message and the message will eventually get dealt with
 * (when the vcore runs or when it is preempte-recovered). */
static bool try_spam_vcore(struct proc *p, uint32_t vcoreid,
                           struct event_msg *ev_msg, int ev_flags)
{
	/* Not sure if we can or not, so check before spamming.  Technically, the
	 * only critical part is that we __alert, then check can_alert. */
	if (can_msg_vcore(vcoreid)) {
		spam_vcore(p, vcoreid, ev_msg, ev_flags);
		wrmb();	/* prev write (notif_pending) must come before following reads*/
		if (can_msg_vcore(vcoreid))
			return TRUE;
	}
	return FALSE;
}
Esempio n. 5
0
/* Helper: will try to message (INDIR/IPI) a list member (lists of vcores).  We
 * use this on the online and bulk_preempted vcore lists.  If this succeeds in
 * alerting a vcore on the list, it'll return TRUE.  We need to be careful here,
 * since we're reading a list that could be concurrently modified.  The
 * important thing is that we can always fail if we're unsure (such as with
 * lists being temporarily empty).  The caller will be able to deal with it via
 * the ultimate fallback. */
static bool spam_list_member(struct vcore_tailq *list, struct proc *p,
                             struct event_msg *ev_msg, int ev_flags)
{
	struct vcore *vc, *vc_first;
	uint32_t vcoreid;
	int loops = 0;
	vc = TAILQ_FIRST(list);
	/* If the list appears empty, we'll bail out (failing) after the loop. */
	while (vc) {
		vcoreid = vcore2vcoreid(p, vc);
		/* post the alert.  Not using the try_spam_vcore() helper since I want
		 * something more customized for the lists. */
		spam_vcore(p, vcoreid, ev_msg, ev_flags);
		wrmb();	/* prev write (notif_pending) must come before following reads*/
		/* if they are still alertable after we sent the msg, then they'll get
		 * it before yielding (racing with userspace yield here).  This check is
		 * not as critical as the next one, but will allow us to alert vcores
		 * that happen to concurrently be moved from the active to the
		 * bulk_preempt list. */
		if (can_msg_vcore(vcoreid))
			return TRUE;
		/* As a backup, if they are still the first on the list, then they are
		 * still going to get the message.  For the online list, proc_yield()
		 * will return them to userspace (where they will get the message)
		 * because __alert_vcore() set notif_pending.  For the BP list, they
		 * will either be turned on later, or have a preempt message sent about
		 * their demise.
		 *
		 * We race on list membership (and not exclusively VC_CAN_RCV_MSG, so
		 * that when it fails we can get a new vcore to try (or know WHP there
		 * are none). */
		vc_first = TAILQ_FIRST(list);
		if (vc == vc_first)
			return TRUE;
		/* At this point, the list has changed and the vcore we tried yielded,
		 * so we try the *new* list head.  Track loops for sanity reasons. */
		if (loops++ > 10) {
			warn("Too many (%d) attempts to find a vcore, failing!", loops);
			return FALSE;	/* always safe to fail! */
		}
		/* Get set up for your attack run! */
		vc = vc_first;
	}
	return FALSE;
}
Esempio n. 6
0
/* Glibc initial blockon, usable before parlib code can init things (or if it
 * never can, like for RTLD).  MCPs will need the 'uthread-aware' blockon. */
void __ros_scp_syscall_blockon(struct syscall *sysc)
{
	/* Need to disable notifs before registering, so we don't take an __notify
	 * that drops us into VC ctx and forces us to eat the notif_pending that was
	 * meant to prevent us from yielding if the syscall completed early. */
	__procdata.vcore_preempt_data[0].notif_disabled = TRUE;
	/* Ask for a SYSCALL event when the sysc is done.  We don't need a handler,
	 * we just need the kernel to restart us from proc_yield.  If register
	 * fails, we're already done. */
	if (register_evq(sysc, &__ros_scp_simple_evq)) {
		/* Sending false for now - we want to signal proc code that we want to
		 * wait (piggybacking on the MCP meaning of this variable) */
		__ros_syscall_noerrno(SYS_yield, FALSE, 0, 0, 0, 0, 0);
	}
	/* Manually doing an enable_notifs for VC 0 */
	__procdata.vcore_preempt_data[0].notif_disabled = FALSE;
	wrmb();	/* need to read after the write that enabled notifs */
	if (__procdata.vcore_preempt_data[0].notif_pending)
		__ros_syscall_noerrno(SYS_self_notify, 0, EV_NONE, 0, TRUE, 0, 0);
}
Esempio n. 7
0
/* Helper, from u/p/uthread.c.  Keep it in sync.  (don't want to move this into
 * glibc yet). */
static bool register_evq(struct syscall *sysc, struct event_queue *ev_q)
{
	int old_flags;
	sysc->ev_q = ev_q;
	wrmb();	/* don't let that write pass any future reads (flags) */
	/* Try and set the SC_UEVENT flag (so the kernel knows to look at ev_q) */
	do {
		/* no cmb() needed, the atomic_read will reread flags */
		old_flags = atomic_read(&sysc->flags);
		/* Spin if the kernel is mucking with syscall flags */
		while (old_flags & SC_K_LOCK)
			old_flags = atomic_read(&sysc->flags);
		/* If the kernel finishes while we are trying to sign up for an event,
		 * we need to bail out */
		if (old_flags & (SC_DONE | SC_PROGRESS)) {
			sysc->ev_q = 0;		/* not necessary, but might help with bugs */
			return FALSE;
		}
	} while (!atomic_cas(&sysc->flags, old_flags, old_flags | SC_UEVENT));
	return TRUE;
}
Esempio n. 8
0
File: poke.c Progetto: brho/akaros
/* This is the 'post (work) and poke' style of sync.  We make sure the poke
 * tracker's function runs.  Once this returns, the func either has run or is
 * currently running (in case someone else is running now).  We won't wait or
 * spin or anything, and it is safe to call this recursively (deeper in the
 * call-graph).
 *
 * It's up to the caller to somehow post its work.  We'll also pass arg to the
 * func, ONLY IF the caller is the one to execute it - so there's no guarantee
 * the func(specific_arg) combo will actually run.  It's more for info
 * purposes/optimizations/etc.  If no one uses it, I'll get rid of it. */
void poke(struct poke_tracker *tracker, void *arg)
{
	atomic_set(&tracker->need_to_run, TRUE);
	/* will need to repeatedly do it if someone keeps posting work */
	do {
		/* want an wrmb() btw posting work/need_to_run and in_progress.
		 * the swap provides the HW mb. just need a cmb, which we do in
		 * the loop to cover the iterations (even though i can't imagine
		 * the compiler reordering the check it needed to do for the
		 * branch).. */
		cmb();
		/* poke / make sure someone does it.  if we get a TRUE (1) back,
		 * someone is already running and will deal with the posted
		 * work.  (probably on their next loop).  if we got a 0 back, we
		 * won the race and have the 'lock'. */
		if (atomic_swap(&tracker->run_in_progress, TRUE))
			return;
		/* if we're here, then we're the one who needs to run the func.
		 * */
		/* clear the 'need to run', since we're running it now.  new
		 * users will set it again.  this write needs to be wmb()'d
		 * after in_progress.  the swap provided the HW mb(). */
		cmb();
		/* no internal HW mb */
		atomic_set(&tracker->need_to_run, FALSE);
		/* run the actual function.  the poke sync makes sure only one
		 * caller is in that func at a time. */
		assert(tracker->func);
		tracker->func(arg);
		/* ensure the in_prog write comes after the run_again. */
		wmb();
		/* no internal HW mb */
		atomic_set(&tracker->run_in_progress, FALSE);
		/* in_prog write must come before run_again read */
		wrmb();
	} while (atomic_read(&tracker->need_to_run));
}
Esempio n. 9
0
/* Send an event to ev_q, based on the parameters in ev_q's flag.  We don't
 * accept null ev_qs, since the caller ought to be checking before bothering to
 * make a msg and send it to the event_q.  Vcoreid is who the kernel thinks the
 * message ought to go to (for IPIs).  Appropriate for things like
 * EV_PREEMPT_PENDING, where we tell the affected vcore.  To have the message go
 * where the kernel suggests, set EVENT_VCORE_APPRO(priate). */
void send_event(struct proc *p, struct event_queue *ev_q, struct event_msg *msg,
                uint32_t vcoreid)
{
	struct proc *old_proc;
	struct event_mbox *ev_mbox = 0;
	assert(p);
	printd("[kernel] sending msg to proc %p, ev_q %p\n", p, ev_q);
	if (!ev_q) {
		warn("[kernel] Null ev_q - kernel code should check before sending!");
		return;
	}
	if (!is_user_rwaddr(ev_q, sizeof(struct event_queue))) {
		/* Ought to kill them, just warn for now */
		printk("[kernel] Illegal addr for ev_q\n");
		return;
	}
	/* This should be caught by "future technology" that can tell when the
	 * kernel PFs on the user's behalf.  For now, we catch common userspace bugs
	 * (had this happen a few times). */
	if (!PTE_ADDR(ev_q)) {
		printk("[kernel] Bad addr %p for ev_q\n", ev_q);
		return;
	}
	/* ev_q is a user pointer, so we need to make sure we're in the right
	 * address space */
	old_proc = switch_to(p);
	/* If we're an _S, just spam vcore0, and wake up if necessary. */
	if (!__proc_is_mcp(p)) {
		spam_vcore(p, 0, msg, ev_q->ev_flags);
		wrmb();	/* don't let the notif_pending write pass the state read */
		/* using the same pattern as in spam_public (which can have multiple
		 * unblock callbacks */
		if (p->state == PROC_WAITING)
			proc_wakeup(p);
		goto out;
	}
	/* Get the vcoreid that we'll message (if appropriate).  For INDIR and
	 * SPAMMING, this is the first choice of a vcore, but other vcores might get
	 * it.  Common case is !APPRO and !ROUNDROBIN.  Note we are clobbering the
	 * vcoreid parameter. */
	if (!(ev_q->ev_flags & EVENT_VCORE_APPRO))
		vcoreid = ev_q->ev_vcore;	/* use the ev_q's vcoreid */
	/* Note that RR overwrites APPRO */
	if (ev_q->ev_flags & EVENT_ROUNDROBIN) {
		/* Pick a vcore, round-robin style.  Assuming ev_vcore was the previous
		 * one used.  Note that round-robin overrides the passed-in vcoreid.
		 * Also note this may be 'wrong' if num_vcores changes. */
		vcoreid = (ev_q->ev_vcore + 1) % p->procinfo->num_vcores;
		ev_q->ev_vcore = vcoreid;
	}
	if (!vcoreid_is_safe(vcoreid)) {
		/* Ought to kill them, just warn for now */
		printk("[kernel] Vcoreid %d unsafe! (too big?)\n", vcoreid);
		goto out;
	}
	/* If we're a SPAM_PUBLIC, they just want us to spam the message.  Note we
	 * don't care about the mbox, since it'll go to VCPD public mboxes, and
	 * we'll prefer to send it to whatever vcoreid we determined at this point
	 * (via APPRO or whatever). */
	if (ev_q->ev_flags & EVENT_SPAM_PUBLIC) {
		spam_public_msg(p, msg, vcoreid, ev_q->ev_flags & EVENT_SPAM_FLAGS);
		goto out;
	}
	/* We aren't spamming and we know the default vcore, and now we need to
	 * figure out which mbox to use.  If they provided an mbox, we'll use it.
	 * If not, we'll use a VCPD mbox (public or private, depending on the
	 * flags). */
	ev_mbox = ev_q->ev_mbox;
	if (!ev_mbox)
		ev_mbox = get_vcpd_mbox(vcoreid, ev_q->ev_flags);
	/* At this point, we ought to have the right mbox to send the msg to, and
	 * which vcore to alert (IPI/INDIR) (if applicable).  The mbox could be the
	 * vcore's vcpd ev_mbox. */
	if (!ev_mbox) {
		/* This shouldn't happen any more, this is more for sanity's sake */
		warn("[kernel] ought to have an mbox by now!");
		goto out;
	}
	/* Even if we're using an mbox in procdata (VCPD), we want a user pointer */
	if (!is_user_rwaddr(ev_mbox, sizeof(struct event_mbox))) {
		/* Ought to kill them, just warn for now */
		printk("[kernel] Illegal addr for ev_mbox\n");
		goto out;
	}
	/* We used to support no msgs, but quit being lazy and send a 'msg'.  If the
	 * ev_q is a NOMSG, we won't actually memcpy or anything, it'll just be a
	 * vehicle for sending the ev_type. */
	assert(msg);
	post_ev_msg(p, ev_mbox, msg, ev_q->ev_flags);
	wmb();	/* ensure ev_msg write is before alerting the vcore */
	/* Prod/alert a vcore with an IPI or INDIR, if desired.  INDIR will also
	 * call try_notify (IPI) later */
	if (ev_q->ev_flags & EVENT_INDIR) {
		send_indir(p, ev_q, vcoreid);
	} else {
		/* they may want an IPI despite not wanting an INDIR */
		try_notify(p, vcoreid, ev_q->ev_flags);
	}
	/* Fall through */
out:
	/* Return to the old address space. */
	switch_back(p, old_proc);
}