void *entry_pint(void *arg) { PTHREAD_MUTEX_LOCK(mutex); while(first_process_active) PTHREAD_COND_WAIT(no_first_process, mutex); printf("IT'S ALIVE!"); first_process_active = true; }
void *_starpu_gordon_worker(void *arg) { struct starpu_worker_set_s *gordon_set_arg = arg; _starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid); /* TODO set_local_memory_node per SPU */ gordon_init(gordon_set_arg->nworkers); /* NB: On SPUs, the worker_key is set to NULL since there is no point * in associating the PPU thread with a specific SPU (worker) while * it's handling multiple processing units. */ _starpu_set_local_worker_key(NULL); /* TODO set workers' name field */ unsigned spu; for (spu = 0; spu < gordon_set_arg->nworkers; spu++) { struct starpu_worker_s *worker = &gordon_set_arg->workers[spu]; snprintf(worker->name, 32, "SPU %d", worker->id); } /* * To take advantage of PPE being hyperthreaded, we should have 2 threads * for the gordon driver : one injects works, the other makes sure that * gordon is progressing (and performs the callbacks). */ /* launch the progression thread */ PTHREAD_MUTEX_INIT(&progress_mutex, NULL); PTHREAD_COND_INIT(&progress_cond, NULL); pthread_create(&progress_thread, NULL, gordon_worker_progress, gordon_set_arg); /* wait for the progression thread to be ready */ PTHREAD_MUTEX_LOCK(&progress_mutex); while (!progress_thread_is_inited) PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); PTHREAD_MUTEX_UNLOCK(&progress_mutex); _STARPU_DEBUG("progress thread is running ... \n"); /* tell the core that gordon is ready */ PTHREAD_MUTEX_LOCK(&gordon_set_arg->mutex); gordon_set_arg->set_is_initialized = 1; PTHREAD_COND_SIGNAL(&gordon_set_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&gordon_set_arg->mutex); gordon_worker_inject(gordon_set_arg); _STARPU_DEBUG("gordon deinit...\n"); gordon_deinit(); _STARPU_DEBUG("gordon was deinited\n"); pthread_exit((void *)0x42); }
/** * \brief Wait for a job to be completed. * * \return 1 on success, 0 on failure */ int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job) { PTHREAD_LOCK(&job->lock); while (job->state != THREADQUEUE_JOB_STATE_DONE) { PTHREAD_COND_WAIT(&threadqueue->job_done, &job->lock); } PTHREAD_UNLOCK(&job->lock); return 1; }
/* Workers may block when there is no work to do at all. We assume that the * mutex is hold when that function is called. */ void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex) { struct timespec start_time, end_time; STARPU_TRACE_WORKER_SLEEP_START _starpu_worker_set_status(workerid, STATUS_SLEEPING); starpu_clock_gettime(&start_time); _starpu_worker_register_sleeping_start_date(workerid, &start_time); PTHREAD_COND_WAIT(cond, mutex); _starpu_worker_set_status(workerid, STATUS_UNKNOWN); STARPU_TRACE_WORKER_SLEEP_END starpu_clock_gettime(&end_time); int profiling = starpu_profiling_status_get(); if (profiling) { struct timespec sleeping_time; starpu_timespec_sub(&end_time, &start_time, &sleeping_time); _starpu_worker_update_profiling_info_sleeping(workerid, &start_time, &end_time); } }
int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle handle, unsigned node, unsigned async, starpu_access_mode mode) { STARPU_ASSERT(handle); /* it is forbidden to call this function from a callback or a codelet */ if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) return -EDEADLK; struct user_interaction_wrapper wrapper = { .handle = handle, .node = node, .async = async, .cond = PTHREAD_COND_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, .finished = 0 }; if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _prefetch_data_on_node, &wrapper)) { /* we can immediately proceed */ _starpu_fetch_data_on_node(handle, node, mode, async, NULL, NULL); /* remove the "lock"/reference */ if (!async) { _starpu_spin_lock(&handle->header_lock); _starpu_notify_data_dependencies(handle); _starpu_spin_unlock(&handle->header_lock); } } else { PTHREAD_MUTEX_LOCK(&wrapper.lock); while (!wrapper.finished) PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock); PTHREAD_MUTEX_UNLOCK(&wrapper.lock); } return 0; } int starpu_data_prefetch_on_node(starpu_data_handle handle, unsigned node, unsigned async) { return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R); } /* * It is possible to specify that a piece of data can be discarded without * impacting the application. */ void starpu_data_advise_as_important(starpu_data_handle handle, unsigned is_important) { _starpu_spin_lock(&handle->header_lock); /* first take all the children lock (in order !) */ unsigned child; for (child = 0; child < handle->nchildren; child++) { /* make sure the intermediate children is advised as well */ struct starpu_data_state_t *child_handle = &handle->children[child]; if (child_handle->nchildren > 0) starpu_data_advise_as_important(child_handle, is_important); } handle->is_not_important = !is_important; /* now the parent may be used again so we release the lock */ _starpu_spin_unlock(&handle->header_lock); }
/* The data must be released by calling starpu_data_release later on */ int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode) { STARPU_ASSERT(handle); /* it is forbidden to call this function from a callback or a codelet */ if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) return -EDEADLK; struct user_interaction_wrapper wrapper = { .handle = handle, .mode = mode, .node = 0, // unused .cond = PTHREAD_COND_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, .finished = 0 }; // _STARPU_DEBUG("TAKE sequential_consistency_mutex starpu_data_acquire\n"); PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { wrapper.pre_sync_task = starpu_task_create(); wrapper.post_sync_task = starpu_task_create(); #ifdef STARPU_USE_FXT starpu_job_t job = _starpu_get_job_associated_to_task(wrapper.pre_sync_task); job->model_name = "acquire_pre"; job = _starpu_get_job_associated_to_task(wrapper.post_sync_task); job->model_name = "acquire_post"; #endif _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, handle, mode); PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); /* TODO detect if this is superflous */ wrapper.pre_sync_task->synchronous = 1; int ret = starpu_task_submit(wrapper.pre_sync_task, NULL); STARPU_ASSERT(!ret); /* starpu_event event; int ret = starpu_task_submit(wrapper.pre_sync_task, &event); STARPU_ASSERT(!ret); starpu_event_wait(event); starpu_event_release(event); */ } else { PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } /* we try to get the data, if we do not succeed immediately, we set a * callback function that will be executed automatically when the data is * available again, otherwise we fetch the data directly */ if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper)) { /* no one has locked this data yet, so we proceed immediately */ int ret = _starpu_fetch_data_on_node(handle, 0, mode, 0, NULL, NULL); STARPU_ASSERT(!ret); } else { PTHREAD_MUTEX_LOCK(&wrapper.lock); while (!wrapper.finished) PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock); PTHREAD_MUTEX_UNLOCK(&wrapper.lock); } /* At that moment, the caller holds a reference to the piece of data. * We enqueue the "post" sync task in the list associated to the handle * so that it is submitted by the starpu_data_release * function. */ _starpu_add_post_sync_tasks(wrapper.post_sync_task, handle); return 0; } /* This function must be called after starpu_data_acquire so that the * application release the data */ void starpu_data_release(starpu_data_handle handle) { STARPU_ASSERT(handle); /* The application can now release the rw-lock */ _starpu_release_data_on_node(handle, 0, 0); /* In case there are some implicit dependencies, unlock the "post sync" tasks */ _starpu_unlock_post_sync_tasks(handle); }
/** * \brief Function executed by worker threads. */ static void* threadqueue_worker(void* threadqueue_opaque) { threadqueue_queue_t * const threadqueue = (threadqueue_queue_t *) threadqueue_opaque; PTHREAD_LOCK(&threadqueue->lock); for (;;) { while (!threadqueue->stop && threadqueue->first == NULL) { // Wait until there is something to do in the queue. PTHREAD_COND_WAIT(&threadqueue->job_available, &threadqueue->lock); } if (threadqueue->stop) { break; } // Get a job and remove it from the queue. threadqueue_job_t *job = threadqueue_pop_job(threadqueue); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_READY); job->state = THREADQUEUE_JOB_STATE_RUNNING; PTHREAD_UNLOCK(&job->lock); PTHREAD_UNLOCK(&threadqueue->lock); job->fptr(job->arg); PTHREAD_LOCK(&threadqueue->lock); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); job->state = THREADQUEUE_JOB_STATE_DONE; PTHREAD_COND_SIGNAL(&threadqueue->job_done); // Go through all the jobs that depend on this one, decreasing their // ndepends. Count how many jobs can now start executing so we know how // many threads to wake up. int num_new_jobs = 0; for (int i = 0; i < job->rdepends_count; ++i) { threadqueue_job_t * const depjob = job->rdepends[i]; // The dependency (job) is locked before the job depending on it. // This must be the same order as in kvz_threadqueue_job_dep_add. PTHREAD_LOCK(&depjob->lock); assert(depjob->state == THREADQUEUE_JOB_STATE_WAITING || depjob->state == THREADQUEUE_JOB_STATE_PAUSED); assert(depjob->ndepends > 0); depjob->ndepends--; if (depjob->ndepends == 0 && depjob->state == THREADQUEUE_JOB_STATE_WAITING) { // Move the job to ready jobs. threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(depjob)); num_new_jobs++; } // Clear this reference to the job. PTHREAD_UNLOCK(&depjob->lock); kvz_threadqueue_free_job(&job->rdepends[i]); } job->rdepends_count = 0; PTHREAD_UNLOCK(&job->lock); kvz_threadqueue_free_job(&job); // The current thread will process one of the new jobs so we wake up // one threads less than the the number of new jobs. for (int i = 0; i < num_new_jobs - 1; i++) { pthread_cond_signal(&threadqueue->job_available); } } threadqueue->thread_running_count--; PTHREAD_UNLOCK(&threadqueue->lock); return NULL; }