/* * With the addition of lightweight context switching, worker creation becomes a * bit more complicated because we need all task creation and finish scopes to * be performed from beneath an explicitly created context, rather than from a * pthread context. To do this, we start worker_routine by creating a proxy * context to switch from and create a lightweight context to switch to, which * enters crt_work_loop immediately, moving into the main work loop, eventually * swapping back to the proxy task * to clean up this worker thread when the worker thread is signaled to exit. */ static void *worker_routine(void *args) { const int wid = *((int *)args); set_current_worker(wid); hclib_worker_state *ws = CURRENT_WS_INTERNAL; set_up_worker_thread_affinities(wid); // Create proxy original context to switch from LiteCtx *currentCtx = LiteCtx_proxy_create(__func__); ws->root_ctx = currentCtx; /* * Create the new proxy we will be switching to, which will start with * crt_work_loop at the top of the stack. */ LiteCtx *newCtx = LiteCtx_create(crt_work_loop); newCtx->arg1 = args; #ifdef HCLIB_STATS worker_stats[CURRENT_WS_INTERNAL->id].count_ctx_creates++; #endif // Swap in the newCtx lite context ctx_swap(currentCtx, newCtx, __func__); #ifdef VERBOSE fprintf(stderr, "worker_routine: worker %d exiting, cleaning up proxy %p " "and lite ctx %p\n", hclib_get_current_worker(), currentCtx, newCtx); #endif // free resources LiteCtx_destroy(currentCtx->prev); LiteCtx_proxy_destroy(currentCtx); return NULL; }
void task::exec_internal() { task_state READY_STATE = TASK_STATE_READY; task_state RUNNING_STATE = TASK_STATE_RUNNING; if (_state.compare_exchange_strong(READY_STATE, TASK_STATE_RUNNING)) { task* parent_task = nullptr; if (tls_task_info.magic == 0xdeadbeef) { parent_task = tls_task_info.current_task; } else { set_current_worker(nullptr); } tls_task_info.current_task = this; _spec->on_task_begin.execute(this); exec(); if (_state.compare_exchange_strong(RUNNING_STATE, TASK_STATE_FINISHED)) { _spec->on_task_end.execute(this); // signal_waiters(); [ // inline for performance void* evt = _wait_event.load(); if (evt != nullptr) { auto nevt = (utils::notify_event*)evt; nevt->notify(); } // ] } // for timer else { if (!_wait_for_cancel) { _spec->on_task_end.execute(this); enqueue(); } else { _state.compare_exchange_strong(READY_STATE, TASK_STATE_CANCELLED); _spec->on_task_end.execute(this); // signal_waiters(); [ // inline for performance void* evt = _wait_event.load(); if (evt != nullptr) { auto nevt = (utils::notify_event*)evt; nevt->notify(); } // ] } } tls_task_info.current_task = parent_task; } if (!_spec->allow_inline && !_is_null) { service::lock_checker::check_dangling_lock(); } }
static void hclib_entrypoint(const char **module_dependencies, const int n_module_dependencies, const int instrument) { /* * Assert that the completion flag structures are each on separate cache * lines. */ HASSERT(sizeof(worker_done_t) == 64); load_dependencies(module_dependencies, n_module_dependencies); hclib_call_module_pre_init_functions(); srand(0); hc_context = (hclib_context *)malloc(sizeof(hclib_context)); HASSERT(hc_context); /* * Parse the platform description from the HPT configuration file and load * it into the hclib_context. */ hclib_global_init(); // Initialize any registered modules hclib_call_module_post_init_functions(); // init timer stats hclib_init_stats(0, hc_context->nworkers); if (instrument) { initialize_instrumentation(hc_context->nworkers); } /* Create key to store per thread worker_state */ if (pthread_key_create(&ws_key, NULL) != 0) { log_die("Cannot create ws_key for worker-specific data"); } /* * set pthread's concurrency. Doesn't seem to do much on Linux, only * relevant when there are more pthreads than hardware cores to schedule * them on. */ pthread_setconcurrency(hc_context->nworkers); #ifdef HCLIB_STATS worker_stats = (per_worker_stats *)calloc(hc_context->nworkers, sizeof(*worker_stats)); HASSERT(worker_stats); for (int i = 0; i < hc_context->nworkers; i++) { worker_stats[i].stolen_tasks_per_thread = (size_t *)calloc( hc_context->nworkers, sizeof(size_t)); HASSERT(worker_stats[i].stolen_tasks_per_thread); } #endif // Launch the worker threads pthread_attr_t attr; if (pthread_attr_init(&attr) != 0) { fprintf(stderr, "Error in pthread_attr_init\n"); exit(3); } create_hwloc_cpusets(); // Start workers for (int i = 1; i < hc_context->nworkers; i++) { if (pthread_create(&hc_context->workers[i]->t, &attr, worker_routine, &hc_context->workers[i]->id) != 0) { fprintf(stderr, "Error launching thread\n"); exit(4); } } set_current_worker(0); set_up_worker_thread_affinities(0); const unsigned dist_id = hclib_register_dist_func(default_dist_func); HASSERT(dist_id == HCLIB_DEFAULT_LOOP_DIST); // allocate root finish hclib_start_finish(); }