void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w) { global_state_t* g = cilkg_get_global_state(); CILK_ASSERT(g->scheduler); cpp_scheduler_t* scheduler = (cpp_scheduler_t*) g->scheduler; try { scheduler(w); } catch (...) { __cilkrts_bug("Exception escaped Cilk context"); } }
CILK_API_VOID __cilkrts_end_cilk(void) { // Take out the global OS mutex while we do this to protect against // another thread attempting to bind while we do this global_os_mutex_lock(); if (cilkg_is_published()) { global_state_t *g = cilkg_get_global_state(); if (g->Q || __cilkrts_get_tls_worker()) __cilkrts_bug("Attempt to shut down Cilk while Cilk is still " "running"); __cilkrts_stop_workers(g); __cilkrts_deinit_internal(g); } global_os_mutex_unlock(); }
static inline int grainsize(int req, count_t count) { // A positive requested grain size comes from the user. A very high grain // size risks losing parallelism, but the user told us what they want for // grainsize. Who are we to argue? if (req > 0) return req; // At present, a negative requested grain size is treated the same way as // a zero grain size, i.e., the runtime computes the actual grainsize // using a hueristic. In the future, the compiler may give us additional // information about the size of the cilk_for body by passing a negative // grain size. // Avoid generating a zero grainsize, even for empty loops. if (count < 1) return 1; global_state_t* g = cilkg_get_global_state(); if (g->under_ptool) { // Grainsize = 1, when running under PIN, and when the grainsize has // not explicitly been set by the user. return 1; } else { // Divide loop count by 8 times the worker count and round up. const int Px8 = g->P * 8; count_t n = (count + Px8 - 1) / Px8; // 2K should be enough to amortize the cost of the cilk_for. Any // larger grainsize risks losing parallelism. if (n > 2048) return 2048; return (int) n; // n <= 2048, so no loss of precision on cast to int } }