/* Returns -1 with errno set on error, or 0 on success. This does not return * the number of cores actually granted (though some parts of the kernel do * internally). * * This tries to get "more vcores", based on the number we currently have. * We'll probably need smarter 2LSs in the future that just directly set * amt_wanted. What happens is we can have a bunch of 2LS vcore contexts * trying to get "another vcore", which currently means more than num_vcores(). * If you have someone ask for two more, and then someone else ask for one more, * how many you ultimately ask for depends on if the kernel heard you and * adjusted num_vcores in between the two calls. Or maybe your amt_wanted * already was num_vcores + 5, so neither call is telling the kernel anything * new. It comes down to "one more than I have" vs "one more than I've already * asked for". * * So for now, this will keep the older behavior (one more than I have). It * will try to accumulate any concurrent requests, and adjust amt_wanted up. * Interleaving, repetitive calls (everyone asking for one more) may get * ignored. * * Note the doesn't block or anything (despite the min number requested is * 1), since the kernel won't block the call. * * There are a few concurrency concerns. We have _max_vcores_ever_wanted, * initialization of new vcore stacks/TLSs, making sure we don't ask for too * many (minor point), and most importantly not asking the kernel for too much * or otherwise miscommunicating our desires to the kernel. Remember, the * kernel wants just one answer from the process about what it wants, and it is * up to the process to figure that out. * * So we basically have one thread do the submitting/prepping/bookkeeping, and * other threads come in just update the number wanted and make sure someone * is sorting things out. This will perform a bit better too, since only one * vcore makes syscalls (which hammer the proc_lock). This essentially has * cores submit work, and one core does the work (like Eric's old delta * functions). * * There's a slight semantic change: this will return 0 (success) for the * non-submitters, and 0 if we submitted. -1 only if the submitter had some * non-kernel failure. * * Also, beware that this (like the old version) doesn't protect with races on * num_vcores(). num_vcores() is how many you have now or very soon (accounting * for messages in flight that will take your cores), not how many you told the * kernel you want. */ int vcore_request(long nr_new_vcores) { long nr_to_prep_now, nr_vcores_wanted; assert(vc_initialized); /* Early sanity checks */ if ((nr_new_vcores < 0) || (nr_new_vcores + num_vcores() > max_vcores())) return -1; /* consider ERRNO */ /* Post our desires (ROS atomic_add() conflicts with glibc) */ atomic_fetch_and_add(&nr_new_vcores_wanted, nr_new_vcores); try_handle_it: cmb(); /* inc before swap. the atomic is a CPU mb() */ if (atomic_swap(&vc_req_being_handled, 1)) { /* We got a 1 back, so someone else is already working on it */ return 0; } /* So now we're the ones supposed to handle things. This does things in the * "increment based on the number we have", vs "increment on the number we * said we want". * * Figure out how many we have, though this is racy. Yields/preempts/grants * will change this over time, and we may end up asking for less than we * had. */ nr_vcores_wanted = num_vcores(); /* Pull all of the vcores wanted into our local variable, where we'll deal * with prepping/requesting that many vcores. Keep doing this til we think * no more are wanted. */ while ((nr_to_prep_now = atomic_swap(&nr_new_vcores_wanted, 0))) { nr_vcores_wanted += nr_to_prep_now; /* Don't bother prepping or asking for more than we can ever get */ nr_vcores_wanted = MIN(nr_vcores_wanted, max_vcores()); /* Make sure all we might ask for are prepped */ for (long i = _max_vcores_ever_wanted; i < nr_vcores_wanted; i++) { if (allocate_transition_stack(i) || allocate_transition_tls(i)) { atomic_set(&vc_req_being_handled, 0); /* unlock and bail out*/ return -1; } _max_vcores_ever_wanted++; /* done in the loop to handle failures*/ } } cmb(); /* force a reread of num_vcores() */ /* Update amt_wanted if we now want *more* than what the kernel already * knows. See notes in the func doc. */ if (nr_vcores_wanted > __procdata.res_req[RES_CORES].amt_wanted) __procdata.res_req[RES_CORES].amt_wanted = nr_vcores_wanted; /* If num_vcores isn't what we want, we can poke the ksched. Due to some * races with yield, our desires may be old. Not a big deal; any vcores * that pop up will just end up yielding (or get preempt messages.) */ if (nr_vcores_wanted > num_vcores()) sys_poke_ksched(0, RES_CORES); /* 0 -> poke for ourselves */ /* Unlock, (which lets someone else work), and check to see if more work * needs to be done. If so, we'll make sure it gets handled. */ atomic_set(&vc_req_being_handled, 0); /* unlock, to allow others to try */ wrmb(); /* check for any that might have come in while we were out */ if (atomic_read(&nr_new_vcores_wanted)) goto try_handle_it; return 0; }
int main(int argc, char** argv) { pthread_t *my_threads = malloc(sizeof(pthread_t) * max_vcores()); /* set up to receive the PREEMPT_PENDING event. EVENT_VCORE_APPRO tells the * kernel to send the msg to whichever vcore is appropriate. Pthread code * will see the preemption and yield. */ struct event_queue *ev_q = get_event_q(); ev_q->ev_flags = EVENT_IPI | EVENT_NOMSG | EVENT_VCORE_APPRO; register_kevent_q(ev_q, EV_PREEMPT_PENDING); /* actually only need one less, since the _S will be pthread 0 */ for (int i = 0; i < max_vcores() - 1; i++) pthread_create(&my_threads[i], NULL, &while_thread, NULL); assert(num_vcores() == max_vcores()); while (1); /* should never make it here */ return -1; }
int bthread_create(bthread_t* thread, const bthread_attr_t* attr, void *(*start_routine)(void *), void* arg) { struct mcs_lock_qnode local_qn = {0}; bthread_once(&init_once,&_bthread_init); *thread = (bthread_t)malloc(sizeof(work_queue_t)); (*thread)->start_routine = start_routine; (*thread)->arg = arg; (*thread)->next = 0; (*thread)->finished = 0; (*thread)->detached = 0; mcs_lock_lock(&work_queue_lock, &local_qn); { threads_active++; queue_insert(&work_queue_head,&work_queue_tail,*thread); // don't return until we get a vcore while(threads_active > num_vcores() && vcore_request(1)); } mcs_lock_unlock(&work_queue_lock, &local_qn); return 0; }