Exemple #1
0
/*
 * With the addition of lightweight context switching, worker creation becomes a
 * bit more complicated because we need all task creation and finish scopes to
 * be performed from beneath an explicitly created context, rather than from a
 * pthread context. To do this, we start worker_routine by creating a proxy
 * context to switch from and create a lightweight context to switch to, which
 * enters crt_work_loop immediately, moving into the main work loop, eventually
 * swapping back to the proxy task
 * to clean up this worker thread when the worker thread is signaled to exit.
 */
static void *worker_routine(void *args) {
    const int wid = *((int *)args);
    set_current_worker(wid);
    hclib_worker_state *ws = CURRENT_WS_INTERNAL;

    set_up_worker_thread_affinities(wid);

    // Create proxy original context to switch from
    LiteCtx *currentCtx = LiteCtx_proxy_create(__func__);
    ws->root_ctx = currentCtx;

    /*
     * Create the new proxy we will be switching to, which will start with
     * crt_work_loop at the top of the stack.
     */
    LiteCtx *newCtx = LiteCtx_create(crt_work_loop);
    newCtx->arg1 = args;
#ifdef HCLIB_STATS
    worker_stats[CURRENT_WS_INTERNAL->id].count_ctx_creates++;
#endif

    // Swap in the newCtx lite context
    ctx_swap(currentCtx, newCtx, __func__);

#ifdef VERBOSE
    fprintf(stderr, "worker_routine: worker %d exiting, cleaning up proxy %p "
            "and lite ctx %p\n", hclib_get_current_worker(), currentCtx, newCtx);
#endif

    // free resources
    LiteCtx_destroy(currentCtx->prev);
    LiteCtx_proxy_destroy(currentCtx);
    return NULL;
}
Exemple #2
0
void task::exec_internal()
{
    task_state READY_STATE = TASK_STATE_READY;
    task_state RUNNING_STATE = TASK_STATE_RUNNING;

    if (_state.compare_exchange_strong(READY_STATE, TASK_STATE_RUNNING))
    {
        task* parent_task = nullptr;
        if (tls_task_info.magic == 0xdeadbeef)
        {
            parent_task = tls_task_info.current_task;
        }
        else
        {
            set_current_worker(nullptr);
        }
        
        tls_task_info.current_task = this;

        _spec->on_task_begin.execute(this);

        exec();
        
        if (_state.compare_exchange_strong(RUNNING_STATE, TASK_STATE_FINISHED))
        {
            _spec->on_task_end.execute(this);

            // signal_waiters(); [
            // inline for performance
            void* evt = _wait_event.load();
            if (evt != nullptr)
            {
                auto nevt = (utils::notify_event*)evt;
                nevt->notify();
            }
            // ]
        }

        // for timer
        else
        {
            if (!_wait_for_cancel)
            {
                _spec->on_task_end.execute(this);
                enqueue();
            }   
            else
            {
                _state.compare_exchange_strong(READY_STATE, TASK_STATE_CANCELLED);
                _spec->on_task_end.execute(this);

                // signal_waiters(); [
                // inline for performance
                void* evt = _wait_event.load();
                if (evt != nullptr)
                {
                    auto nevt = (utils::notify_event*)evt;
                    nevt->notify();
                }
                // ]
            }
        }
        
        tls_task_info.current_task = parent_task;
    }

    if (!_spec->allow_inline && !_is_null)
    {
        service::lock_checker::check_dangling_lock();
    }
}
Exemple #3
0
static void hclib_entrypoint(const char **module_dependencies,
        const int n_module_dependencies, const int instrument) {
    /*
     * Assert that the completion flag structures are each on separate cache
     * lines.
     */
    HASSERT(sizeof(worker_done_t) == 64);

    load_dependencies(module_dependencies, n_module_dependencies);

    hclib_call_module_pre_init_functions();

    srand(0);

    hc_context = (hclib_context *)malloc(sizeof(hclib_context));
    HASSERT(hc_context);

    /*
     * Parse the platform description from the HPT configuration file and load
     * it into the hclib_context.
     */
    hclib_global_init();

    // Initialize any registered modules
    hclib_call_module_post_init_functions();

    // init timer stats
    hclib_init_stats(0, hc_context->nworkers);

    if (instrument) {
        initialize_instrumentation(hc_context->nworkers);
    }

    /* Create key to store per thread worker_state */
    if (pthread_key_create(&ws_key, NULL) != 0) {
        log_die("Cannot create ws_key for worker-specific data");
    }

    /*
     * set pthread's concurrency. Doesn't seem to do much on Linux, only
     * relevant when there are more pthreads than hardware cores to schedule
     * them on. */
    pthread_setconcurrency(hc_context->nworkers);

#ifdef HCLIB_STATS
    worker_stats = (per_worker_stats *)calloc(hc_context->nworkers,
            sizeof(*worker_stats));
    HASSERT(worker_stats);
    for (int i = 0; i < hc_context->nworkers; i++) {
        worker_stats[i].stolen_tasks_per_thread = (size_t *)calloc(
                hc_context->nworkers, sizeof(size_t));
        HASSERT(worker_stats[i].stolen_tasks_per_thread);
    }
#endif

    // Launch the worker threads
    pthread_attr_t attr;
    if (pthread_attr_init(&attr) != 0) {
        fprintf(stderr, "Error in pthread_attr_init\n");
        exit(3);
    }

    create_hwloc_cpusets();

    // Start workers
    for (int i = 1; i < hc_context->nworkers; i++) {
        if (pthread_create(&hc_context->workers[i]->t, &attr, worker_routine,
                           &hc_context->workers[i]->id) != 0) {
            fprintf(stderr, "Error launching thread\n");
            exit(4);
        }

    }
    set_current_worker(0);

    set_up_worker_thread_affinities(0);

    const unsigned dist_id = hclib_register_dist_func(default_dist_func);
    HASSERT(dist_id == HCLIB_DEFAULT_LOOP_DIST);

    // allocate root finish
    hclib_start_finish();
}