void *gordon_worker_progress(void *arg) { _STARPU_DEBUG("gordon_worker_progress\n"); /* fix the thread on the correct cpu */ struct starpu_worker_set_s *gordon_set_arg = arg; unsigned prog_thread_bind_id = (gordon_set_arg->workers[0].bindid + 1)%(gordon_set_arg->config->nhwcores); _starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id); PTHREAD_MUTEX_LOCK(&progress_mutex); progress_thread_is_inited = 1; PTHREAD_COND_SIGNAL(&progress_cond); PTHREAD_MUTEX_UNLOCK(&progress_mutex); while (1) { /* the Gordon runtime needs to make sure that we poll it * so that we handle jobs that are done */ /* wait for one task termination */ int ret = gordon_wait(0); if (ret) { /* possibly wake the thread that injects work */ starpu_wake_all_blocked_workers(); } } return NULL; }
void *_starpu_gordon_worker(void *arg) { struct starpu_worker_set_s *gordon_set_arg = arg; _starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid); /* TODO set_local_memory_node per SPU */ gordon_init(gordon_set_arg->nworkers); /* NB: On SPUs, the worker_key is set to NULL since there is no point * in associating the PPU thread with a specific SPU (worker) while * it's handling multiple processing units. */ _starpu_set_local_worker_key(NULL); /* TODO set workers' name field */ unsigned spu; for (spu = 0; spu < gordon_set_arg->nworkers; spu++) { struct starpu_worker_s *worker = &gordon_set_arg->workers[spu]; snprintf(worker->name, 32, "SPU %d", worker->id); } /* * To take advantage of PPE being hyperthreaded, we should have 2 threads * for the gordon driver : one injects works, the other makes sure that * gordon is progressing (and performs the callbacks). */ /* launch the progression thread */ PTHREAD_MUTEX_INIT(&progress_mutex, NULL); PTHREAD_COND_INIT(&progress_cond, NULL); pthread_create(&progress_thread, NULL, gordon_worker_progress, gordon_set_arg); /* wait for the progression thread to be ready */ PTHREAD_MUTEX_LOCK(&progress_mutex); while (!progress_thread_is_inited) PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); PTHREAD_MUTEX_UNLOCK(&progress_mutex); _STARPU_DEBUG("progress thread is running ... \n"); /* tell the core that gordon is ready */ PTHREAD_MUTEX_LOCK(&gordon_set_arg->mutex); gordon_set_arg->set_is_initialized = 1; PTHREAD_COND_SIGNAL(&gordon_set_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&gordon_set_arg->mutex); gordon_worker_inject(gordon_set_arg); _STARPU_DEBUG("gordon deinit...\n"); gordon_deinit(); _STARPU_DEBUG("gordon was deinited\n"); pthread_exit((void *)0x42); }
void _starpu_stack_push_task(struct starpu_stack_jobq_s *stack_queue, pthread_mutex_t *sched_mutex, pthread_cond_t *sched_cond, starpu_job_t task) { PTHREAD_MUTEX_LOCK(sched_mutex); total_number_of_jobs++; STARPU_TRACE_JOB_PUSH(task, 0); starpu_job_list_push_front(stack_queue->jobq, task); stack_queue->njobs++; stack_queue->nprocessed++; PTHREAD_COND_SIGNAL(sched_cond); PTHREAD_MUTEX_UNLOCK(sched_mutex); }
/* * Block data request from application */ static inline void _starpu_data_acquire_continuation(void *arg) { struct user_interaction_wrapper *wrapper = arg; starpu_data_handle handle = wrapper->handle; STARPU_ASSERT(handle); _starpu_fetch_data_on_node(handle, 0, wrapper->mode, 0, NULL, NULL); /* continuation of starpu_data_acquire */ PTHREAD_MUTEX_LOCK(&wrapper->lock); wrapper->finished = 1; PTHREAD_COND_SIGNAL(&wrapper->cond); PTHREAD_MUTEX_UNLOCK(&wrapper->lock); }
static void _prefetch_data_on_node(void *arg) { struct user_interaction_wrapper *wrapper = arg; int ret; ret = _starpu_fetch_data_on_node(wrapper->handle, wrapper->node, STARPU_R, wrapper->async, NULL, NULL); STARPU_ASSERT(!ret); PTHREAD_MUTEX_LOCK(&wrapper->lock); wrapper->finished = 1; PTHREAD_COND_SIGNAL(&wrapper->cond); PTHREAD_MUTEX_UNLOCK(&wrapper->lock); if (!wrapper->async) { _starpu_spin_lock(&wrapper->handle->header_lock); _starpu_notify_data_dependencies(wrapper->handle); _starpu_spin_unlock(&wrapper->handle->header_lock); } }
void *_starpu_cpu_worker(void *arg) { struct starpu_worker_s *cpu_arg = arg; unsigned memnode = cpu_arg->memory_node; int workerid = cpu_arg->workerid; int devid = cpu_arg->devid; #ifdef STARPU_USE_FXT _starpu_fxt_register_thread(cpu_arg->bindid); #endif STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode); _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid); _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid); _starpu_set_local_memory_node_key(&memnode); _starpu_set_local_worker_key(cpu_arg); snprintf(cpu_arg->name, 32, "CPU %d", devid); cpu_arg->status = STATUS_UNKNOWN; STARPU_TRACE_WORKER_INIT_END /* tell the main thread that we are ready */ PTHREAD_MUTEX_LOCK(&cpu_arg->mutex); cpu_arg->worker_is_initialized = 1; PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex); starpu_job_t j; int res; while (_starpu_machine_is_running()) { STARPU_TRACE_START_PROGRESS(memnode); _starpu_datawizard_progress(memnode, 1); STARPU_TRACE_END_PROGRESS(memnode); _starpu_execute_registered_progression_hooks(); PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex); /* perhaps there is some local task to be executed first */ j = _starpu_pop_local_task(cpu_arg); /* otherwise ask a task to the scheduler */ if (!j) { struct starpu_task *task = _starpu_pop_task(); if (task) j = _starpu_get_job_associated_to_task(task); } if (j == NULL) { if (_starpu_worker_can_block(memnode)) _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex); PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); continue; }; PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); /* can a cpu perform that task ? */ if (!STARPU_CPU_MAY_PERFORM(j)) { /* put it and the end of the queue ... XXX */ _starpu_push_task(j, 0); continue; } _starpu_set_current_task(j->task); res = execute_job_on_cpu(j, cpu_arg); _starpu_set_current_task(NULL); if (res) { switch (res) { case -EAGAIN: _starpu_push_task(j, 0); continue; default: assert(0); } } _starpu_handle_job_termination(j, 0); } STARPU_TRACE_WORKER_DEINIT_START /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY); pthread_exit(NULL); }
/** * \brief Function executed by worker threads. */ static void* threadqueue_worker(void* threadqueue_opaque) { threadqueue_queue_t * const threadqueue = (threadqueue_queue_t *) threadqueue_opaque; PTHREAD_LOCK(&threadqueue->lock); for (;;) { while (!threadqueue->stop && threadqueue->first == NULL) { // Wait until there is something to do in the queue. PTHREAD_COND_WAIT(&threadqueue->job_available, &threadqueue->lock); } if (threadqueue->stop) { break; } // Get a job and remove it from the queue. threadqueue_job_t *job = threadqueue_pop_job(threadqueue); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_READY); job->state = THREADQUEUE_JOB_STATE_RUNNING; PTHREAD_UNLOCK(&job->lock); PTHREAD_UNLOCK(&threadqueue->lock); job->fptr(job->arg); PTHREAD_LOCK(&threadqueue->lock); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); job->state = THREADQUEUE_JOB_STATE_DONE; PTHREAD_COND_SIGNAL(&threadqueue->job_done); // Go through all the jobs that depend on this one, decreasing their // ndepends. Count how many jobs can now start executing so we know how // many threads to wake up. int num_new_jobs = 0; for (int i = 0; i < job->rdepends_count; ++i) { threadqueue_job_t * const depjob = job->rdepends[i]; // The dependency (job) is locked before the job depending on it. // This must be the same order as in kvz_threadqueue_job_dep_add. PTHREAD_LOCK(&depjob->lock); assert(depjob->state == THREADQUEUE_JOB_STATE_WAITING || depjob->state == THREADQUEUE_JOB_STATE_PAUSED); assert(depjob->ndepends > 0); depjob->ndepends--; if (depjob->ndepends == 0 && depjob->state == THREADQUEUE_JOB_STATE_WAITING) { // Move the job to ready jobs. threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(depjob)); num_new_jobs++; } // Clear this reference to the job. PTHREAD_UNLOCK(&depjob->lock); kvz_threadqueue_free_job(&job->rdepends[i]); } job->rdepends_count = 0; PTHREAD_UNLOCK(&job->lock); kvz_threadqueue_free_job(&job); // The current thread will process one of the new jobs so we wake up // one threads less than the the number of new jobs. for (int i = 0; i < num_new_jobs - 1; i++) { pthread_cond_signal(&threadqueue->job_available); } } threadqueue->thread_running_count--; PTHREAD_UNLOCK(&threadqueue->lock); return NULL; }