Exemple #1
0
/* Returns -1 with errno set on error, or 0 on success.  This does not return
 * the number of cores actually granted (though some parts of the kernel do
 * internally).
 *
 * This tries to get "more vcores", based on the number we currently have.
 * We'll probably need smarter 2LSs in the future that just directly set
 * amt_wanted.  What happens is we can have a bunch of 2LS vcore contexts
 * trying to get "another vcore", which currently means more than num_vcores().
 * If you have someone ask for two more, and then someone else ask for one more,
 * how many you ultimately ask for depends on if the kernel heard you and
 * adjusted num_vcores in between the two calls.  Or maybe your amt_wanted
 * already was num_vcores + 5, so neither call is telling the kernel anything
 * new.  It comes down to "one more than I have" vs "one more than I've already
 * asked for".
 *
 * So for now, this will keep the older behavior (one more than I have).  It
 * will try to accumulate any concurrent requests, and adjust amt_wanted up.
 * Interleaving, repetitive calls (everyone asking for one more) may get
 * ignored.
 *
 * Note the doesn't block or anything (despite the min number requested is
 * 1), since the kernel won't block the call.
 *
 * There are a few concurrency concerns.  We have _max_vcores_ever_wanted,
 * initialization of new vcore stacks/TLSs, making sure we don't ask for too
 * many (minor point), and most importantly not asking the kernel for too much
 * or otherwise miscommunicating our desires to the kernel.  Remember, the
 * kernel wants just one answer from the process about what it wants, and it is
 * up to the process to figure that out.
 *
 * So we basically have one thread do the submitting/prepping/bookkeeping, and
 * other threads come in just update the number wanted and make sure someone
 * is sorting things out.  This will perform a bit better too, since only one
 * vcore makes syscalls (which hammer the proc_lock).  This essentially has
 * cores submit work, and one core does the work (like Eric's old delta
 * functions).
 *
 * There's a slight semantic change: this will return 0 (success) for the
 * non-submitters, and 0 if we submitted.  -1 only if the submitter had some
 * non-kernel failure.
 *
 * Also, beware that this (like the old version) doesn't protect with races on
 * num_vcores().  num_vcores() is how many you have now or very soon (accounting
 * for messages in flight that will take your cores), not how many you told the
 * kernel you want. */
int vcore_request(long nr_new_vcores)
{
	long nr_to_prep_now, nr_vcores_wanted;

	assert(vc_initialized);
	/* Early sanity checks */
	if ((nr_new_vcores < 0) || (nr_new_vcores + num_vcores() > max_vcores()))
		return -1;	/* consider ERRNO */
	/* Post our desires (ROS atomic_add() conflicts with glibc) */
	atomic_fetch_and_add(&nr_new_vcores_wanted, nr_new_vcores);
try_handle_it:
	cmb();	/* inc before swap.  the atomic is a CPU mb() */
	if (atomic_swap(&vc_req_being_handled, 1)) {
		/* We got a 1 back, so someone else is already working on it */
		return 0;
	}
	/* So now we're the ones supposed to handle things.  This does things in the
	 * "increment based on the number we have", vs "increment on the number we
	 * said we want".
	 *
	 * Figure out how many we have, though this is racy.  Yields/preempts/grants
	 * will change this over time, and we may end up asking for less than we
	 * had. */
	nr_vcores_wanted = num_vcores();
	/* Pull all of the vcores wanted into our local variable, where we'll deal
	 * with prepping/requesting that many vcores.  Keep doing this til we think
	 * no more are wanted. */
	while ((nr_to_prep_now = atomic_swap(&nr_new_vcores_wanted, 0))) {
		nr_vcores_wanted += nr_to_prep_now;
		/* Don't bother prepping or asking for more than we can ever get */
		nr_vcores_wanted = MIN(nr_vcores_wanted, max_vcores());
		/* Make sure all we might ask for are prepped */
		for (long i = _max_vcores_ever_wanted; i < nr_vcores_wanted; i++) {
			if (allocate_transition_stack(i) || allocate_transition_tls(i)) {
				atomic_set(&vc_req_being_handled, 0);	/* unlock and bail out*/
				return -1;
			}
			_max_vcores_ever_wanted++;	/* done in the loop to handle failures*/
		}
	}
	cmb();	/* force a reread of num_vcores() */
	/* Update amt_wanted if we now want *more* than what the kernel already
	 * knows.  See notes in the func doc. */
	if (nr_vcores_wanted > __procdata.res_req[RES_CORES].amt_wanted)
		__procdata.res_req[RES_CORES].amt_wanted = nr_vcores_wanted;
	/* If num_vcores isn't what we want, we can poke the ksched.  Due to some
	 * races with yield, our desires may be old.  Not a big deal; any vcores
	 * that pop up will just end up yielding (or get preempt messages.)  */
	if (nr_vcores_wanted > num_vcores())
		sys_poke_ksched(0, RES_CORES);	/* 0 -> poke for ourselves */
	/* Unlock, (which lets someone else work), and check to see if more work
	 * needs to be done.  If so, we'll make sure it gets handled. */
	atomic_set(&vc_req_being_handled, 0);	/* unlock, to allow others to try */
	wrmb();
	/* check for any that might have come in while we were out */
	if (atomic_read(&nr_new_vcores_wanted))
		goto try_handle_it;
	return 0;
}
Exemple #2
0
int main(int argc, char** argv)
{
	pthread_t *my_threads = malloc(sizeof(pthread_t) * max_vcores());

	/* set up to receive the PREEMPT_PENDING event.  EVENT_VCORE_APPRO tells the
	 * kernel to send the msg to whichever vcore is appropriate.  Pthread code
	 * will see the preemption and yield. */
	struct event_queue *ev_q = get_event_q();
	ev_q->ev_flags = EVENT_IPI | EVENT_NOMSG | EVENT_VCORE_APPRO;
	register_kevent_q(ev_q, EV_PREEMPT_PENDING);

	/* actually only need one less, since the _S will be pthread 0 */
	for (int i = 0; i < max_vcores() - 1; i++)
		pthread_create(&my_threads[i], NULL, &while_thread, NULL);

	assert(num_vcores() == max_vcores());
	while (1);

	/* should never make it here */
	return -1;
}
Exemple #3
0
int bthread_create(bthread_t* thread, const bthread_attr_t* attr,
                   void *(*start_routine)(void *), void* arg)
{
  struct mcs_lock_qnode local_qn = {0};
  bthread_once(&init_once,&_bthread_init);

  *thread = (bthread_t)malloc(sizeof(work_queue_t));
  (*thread)->start_routine = start_routine;
  (*thread)->arg = arg;
  (*thread)->next = 0;
  (*thread)->finished = 0;
  (*thread)->detached = 0;
  mcs_lock_lock(&work_queue_lock, &local_qn);
  {
    threads_active++;
    queue_insert(&work_queue_head,&work_queue_tail,*thread);
    // don't return until we get a vcore
    while(threads_active > num_vcores() && vcore_request(1));
  }
  mcs_lock_unlock(&work_queue_lock, &local_qn);

  return 0;
}