/* Returns -1 with errno set on error, or 0 on success. This does not return * the number of cores actually granted (though some parts of the kernel do * internally). * * This tries to get "more vcores", based on the number we currently have. * We'll probably need smarter 2LSs in the future that just directly set * amt_wanted. What happens is we can have a bunch of 2LS vcore contexts * trying to get "another vcore", which currently means more than num_vcores(). * If you have someone ask for two more, and then someone else ask for one more, * how many you ultimately ask for depends on if the kernel heard you and * adjusted num_vcores in between the two calls. Or maybe your amt_wanted * already was num_vcores + 5, so neither call is telling the kernel anything * new. It comes down to "one more than I have" vs "one more than I've already * asked for". * * So for now, this will keep the older behavior (one more than I have). It * will try to accumulate any concurrent requests, and adjust amt_wanted up. * Interleaving, repetitive calls (everyone asking for one more) may get * ignored. * * Note the doesn't block or anything (despite the min number requested is * 1), since the kernel won't block the call. * * There are a few concurrency concerns. We have _max_vcores_ever_wanted, * initialization of new vcore stacks/TLSs, making sure we don't ask for too * many (minor point), and most importantly not asking the kernel for too much * or otherwise miscommunicating our desires to the kernel. Remember, the * kernel wants just one answer from the process about what it wants, and it is * up to the process to figure that out. * * So we basically have one thread do the submitting/prepping/bookkeeping, and * other threads come in just update the number wanted and make sure someone * is sorting things out. This will perform a bit better too, since only one * vcore makes syscalls (which hammer the proc_lock). This essentially has * cores submit work, and one core does the work (like Eric's old delta * functions). * * There's a slight semantic change: this will return 0 (success) for the * non-submitters, and 0 if we submitted. -1 only if the submitter had some * non-kernel failure. * * Also, beware that this (like the old version) doesn't protect with races on * num_vcores(). num_vcores() is how many you have now or very soon (accounting * for messages in flight that will take your cores), not how many you told the * kernel you want. */ int vcore_request(long nr_new_vcores) { long nr_to_prep_now, nr_vcores_wanted; /* Early sanity checks */ if ((nr_new_vcores < 0) || (nr_new_vcores + num_vcores() > max_vcores())) return -1; /* consider ERRNO */ /* Post our desires (ROS atomic_add() conflicts with glibc) */ atomic_fetch_and_add(&nr_new_vcores_wanted, nr_new_vcores); try_handle_it: cmb(); /* inc before swap. the atomic is a CPU mb() */ if (atomic_swap(&vc_req_being_handled, 1)) { /* We got a 1 back, so someone else is already working on it */ return 0; } /* So now we're the ones supposed to handle things. This does things in the * "increment based on the number we have", vs "increment on the number we * said we want". * * Figure out how many we have, though this is racy. Yields/preempts/grants * will change this over time, and we may end up asking for less than we * had. */ nr_vcores_wanted = num_vcores(); /* Pull all of the vcores wanted into our local variable, where we'll deal * with prepping/requesting that many vcores. Keep doing this til we think * no more are wanted. */ while ((nr_to_prep_now = atomic_swap(&nr_new_vcores_wanted, 0))) { nr_vcores_wanted += nr_to_prep_now; /* Don't bother prepping or asking for more than we can ever get */ nr_vcores_wanted = MIN(nr_vcores_wanted, max_vcores()); /* Make sure all we might ask for are prepped */ for (long i = _max_vcores_ever_wanted; i < nr_vcores_wanted; i++) { if (allocate_vcore_stack(i) || allocate_transition_tls(i)) { atomic_set(&vc_req_being_handled, 0); /* unlock and bail out*/ return -1; } _max_vcores_ever_wanted++; /* done in the loop to handle failures*/ } } cmb(); /* force a reread of num_vcores() */ /* Update amt_wanted if we now want *more* than what the kernel already * knows. See notes in the func doc. */ if (nr_vcores_wanted > __procdata.res_req[RES_CORES].amt_wanted) __procdata.res_req[RES_CORES].amt_wanted = nr_vcores_wanted; /* If num_vcores isn't what we want, we can poke the ksched. Due to some * races with yield, our desires may be old. Not a big deal; any vcores * that pop up will just end up yielding (or get preempt messages.) */ if (nr_vcores_wanted > num_vcores()) sys_poke_ksched(0, RES_CORES); /* 0 -> poke for ourselves */ /* Unlock, (which lets someone else work), and check to see if more work * needs to be done. If so, we'll make sure it gets handled. */ atomic_set(&vc_req_being_handled, 0); /* unlock, to allow others to try */ wrmb(); /* check for any that might have come in while we were out */ if (atomic_read(&nr_new_vcores_wanted)) goto try_handle_it; return 0; }
void __attribute__((constructor)) vcore_lib_init(void) { uintptr_t mmap_block; /* Note this is racy, but okay. The first time through, we are _S. * Also, this is the "lowest" level constructor for now, so we don't need * to call any other init functions after our run_once() call. This may * change in the future. */ init_once_racy(return); /* Need to alloc vcore0's transition stuff here (technically, just the TLS) * so that schedulers can use vcore0's transition TLS before it comes up in * vcore_entry() */ if (allocate_vcore_stack(0) || allocate_transition_tls(0)) goto vcore_lib_init_fail; /* Initialize our VCPD event queues' ucqs, two pages per ucq, 4 per vcore */ mmap_block = (uintptr_t)mmap(0, PGSIZE * 4 * max_vcores(), PROT_WRITE | PROT_READ, MAP_POPULATE | MAP_ANONYMOUS, -1, 0); /* Yeah, this doesn't fit in the error-handling scheme, but this whole * system doesn't really handle failure, and needs a rewrite involving less * mmaps/munmaps. */ assert(mmap_block); /* Note we may end up doing vcore 0's elsewhere, for _Ss, or else have a * separate ev_q for that. */ for (int i = 0; i < max_vcores(); i++) { /* four pages total for both ucqs from the big block (2 pages each) */ ucq_init_raw(&vcpd_of(i)->ev_mbox_public.ev_msgs, mmap_block + (4 * i ) * PGSIZE, mmap_block + (4 * i + 1) * PGSIZE); ucq_init_raw(&vcpd_of(i)->ev_mbox_private.ev_msgs, mmap_block + (4 * i + 2) * PGSIZE, mmap_block + (4 * i + 3) * PGSIZE); /* Set the lowest level entry point for each vcore. */ vcpd_of(i)->vcore_entry = (uintptr_t)__kernel_vcore_entry; } atomic_init(&vc_req_being_handled, 0); assert(!in_vcore_context()); vcore_libc_init(); return; vcore_lib_init_fail: assert(0); }
/* Helper: prepares a vcore for use. Takes a block of pages for the UCQs. * * Vcores need certain things, such as a stack and TLS. These are determined by * userspace. Every vcore needs these set up before we drop into vcore context * on that vcore. This means we need to prep before asking the kernel for those * vcores. * * We could have this function do its own mmap, at the expense of O(n) syscalls * when we prepare the extra vcores. */ static void __prep_vcore(int vcoreid, uintptr_t mmap_block) { struct preempt_data *vcpd = vcpd_of(vcoreid); int ret; ret = allocate_vcore_stack(vcoreid); assert(!ret); ret = allocate_transition_tls(vcoreid); assert(!ret); vcpd->ev_mbox_public.type = EV_MBOX_UCQ; ucq_init_raw(&vcpd->ev_mbox_public.ucq, mmap_block + 0 * PGSIZE, mmap_block + 1 * PGSIZE); vcpd->ev_mbox_private.type = EV_MBOX_UCQ; ucq_init_raw(&vcpd->ev_mbox_private.ucq, mmap_block + 2 * PGSIZE, mmap_block + 3 * PGSIZE); /* Set the lowest level entry point for each vcore. */ vcpd->vcore_entry = (uintptr_t)__kernel_vcore_entry; }