static int work_group_scheduler (kernel_run_command *k, struct pool_thread_data *thread_data) { void *arguments[k->kernel->num_args + k->kernel->num_locals]; struct pocl_context pc; unsigned i; unsigned start_index; unsigned end_index; char last_wgs = 0; if (!get_wg_index_range (k, &start_index, &end_index, &last_wgs)) return 0; setup_kernel_arg_array ((void**)&arguments, k); memcpy (&pc, &k->pc, sizeof (struct pocl_context)); do { if (last_wgs) { PTHREAD_LOCK (&scheduler.wq_lock, NULL); LL_DELETE (scheduler.kernel_queue, k); PTHREAD_UNLOCK (&scheduler.wq_lock); } for (i = start_index; i <= end_index; ++i) { translate_wg_index_to_3d_index (k, i, (size_t*)&pc.group_id); #ifdef DEBUG_MT printf("### exec_wg: gid_x %d, gid_y %d, gid_z %d\n", pc.group_id[0], pc.group_id[1], pc.group_id[2]); #endif k->workgroup (arguments, &pc); } }while (get_wg_index_range (k, &start_index, &end_index, &last_wgs)); free_kernel_arg_array (arguments, k); return 1; }
int pthread_scheduler_get_work (thread_data *td, _cl_command_node **cmd_ptr) { _cl_command_node *cmd; kernel_run_command *run_cmd; // execute kernel if available PTHREAD_LOCK (&scheduler.wq_lock, NULL); if ((run_cmd = scheduler.kernel_queue)) { ++run_cmd->ref_count; PTHREAD_UNLOCK (&scheduler.wq_lock); work_group_scheduler (run_cmd, td); PTHREAD_LOCK (&scheduler.wq_lock, NULL); if (!(--run_cmd->ref_count)) { PTHREAD_UNLOCK (&scheduler.wq_lock); finalize_kernel_command (td, run_cmd); } else PTHREAD_UNLOCK (&scheduler.wq_lock); } else PTHREAD_UNLOCK (&scheduler.wq_lock); // execute a command if available PTHREAD_LOCK (&scheduler.wq_lock, NULL); if ((cmd = scheduler.work_queue)) { DL_DELETE (scheduler.work_queue, cmd); PTHREAD_UNLOCK (&scheduler.wq_lock); *cmd_ptr = cmd; return 0; } PTHREAD_UNLOCK (&scheduler.wq_lock); *cmd_ptr = NULL; return 1; }
void pthread_scheduler_release_host () { PTHREAD_LOCK (&scheduler.cq_finished_lock, NULL); pthread_cond_signal (&scheduler.cq_finished_cond); PTHREAD_UNLOCK (&scheduler.cq_finished_lock); }
/** * \brief Function executed by worker threads. */ static void* threadqueue_worker(void* threadqueue_opaque) { threadqueue_queue_t * const threadqueue = (threadqueue_queue_t *) threadqueue_opaque; PTHREAD_LOCK(&threadqueue->lock); for (;;) { while (!threadqueue->stop && threadqueue->first == NULL) { // Wait until there is something to do in the queue. PTHREAD_COND_WAIT(&threadqueue->job_available, &threadqueue->lock); } if (threadqueue->stop) { break; } // Get a job and remove it from the queue. threadqueue_job_t *job = threadqueue_pop_job(threadqueue); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_READY); job->state = THREADQUEUE_JOB_STATE_RUNNING; PTHREAD_UNLOCK(&job->lock); PTHREAD_UNLOCK(&threadqueue->lock); job->fptr(job->arg); PTHREAD_LOCK(&threadqueue->lock); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); job->state = THREADQUEUE_JOB_STATE_DONE; PTHREAD_COND_SIGNAL(&threadqueue->job_done); // Go through all the jobs that depend on this one, decreasing their // ndepends. Count how many jobs can now start executing so we know how // many threads to wake up. int num_new_jobs = 0; for (int i = 0; i < job->rdepends_count; ++i) { threadqueue_job_t * const depjob = job->rdepends[i]; // The dependency (job) is locked before the job depending on it. // This must be the same order as in kvz_threadqueue_job_dep_add. PTHREAD_LOCK(&depjob->lock); assert(depjob->state == THREADQUEUE_JOB_STATE_WAITING || depjob->state == THREADQUEUE_JOB_STATE_PAUSED); assert(depjob->ndepends > 0); depjob->ndepends--; if (depjob->ndepends == 0 && depjob->state == THREADQUEUE_JOB_STATE_WAITING) { // Move the job to ready jobs. threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(depjob)); num_new_jobs++; } // Clear this reference to the job. PTHREAD_UNLOCK(&depjob->lock); kvz_threadqueue_free_job(&job->rdepends[i]); } job->rdepends_count = 0; PTHREAD_UNLOCK(&job->lock); kvz_threadqueue_free_job(&job); // The current thread will process one of the new jobs so we wake up // one threads less than the the number of new jobs. for (int i = 0; i < num_new_jobs - 1; i++) { pthread_cond_signal(&threadqueue->job_available); } } threadqueue->thread_running_count--; PTHREAD_UNLOCK(&threadqueue->lock); return NULL; }