/** * \brief Add a dependency between two jobs. * * \param job job that should be executed after dependency * \param dependency job that should be executed before job * * \return 1 on success, 0 on failure * */ int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *dependency) { // Lock the dependency first and then the job depending on it. // This must be the same order as in threadqueue_worker. PTHREAD_LOCK(&dependency->lock); if (dependency->state == THREADQUEUE_JOB_STATE_DONE) { // The dependency has been completed already so there is nothing to do. PTHREAD_UNLOCK(&dependency->lock); return 1; } PTHREAD_LOCK(&job->lock); job->ndepends++; PTHREAD_UNLOCK(&job->lock); // Add the reverse dependency if (dependency->rdepends_count >= dependency->rdepends_size) { dependency->rdepends_size += THREADQUEUE_LIST_REALLOC_SIZE; size_t bytes = dependency->rdepends_size * sizeof(threadqueue_job_t*); dependency->rdepends = realloc(dependency->rdepends, bytes); } dependency->rdepends[dependency->rdepends_count++] = kvz_threadqueue_copy_ref(job); PTHREAD_UNLOCK(&dependency->lock); return 1; }
void pthread_scheduler_push_kernel (kernel_run_command *run_cmd) { PTHREAD_LOCK (&scheduler.wq_lock, NULL); LL_APPEND (scheduler.kernel_queue, run_cmd); pthread_cond_broadcast (&scheduler.wake_pool); PTHREAD_UNLOCK (&scheduler.wq_lock); }
void pthread_scheduler_push_command (_cl_command_node *cmd) { PTHREAD_LOCK (&scheduler.wq_lock, NULL); DL_APPEND (scheduler.work_queue, cmd); pthread_cond_broadcast (&scheduler.wake_pool); PTHREAD_UNLOCK (&scheduler.wq_lock); }
static int get_wg_index_range (kernel_run_command *k, unsigned *start_index, unsigned *end_index, char *last_wgs) { unsigned max_wgs; *last_wgs = 0; PTHREAD_LOCK (&k->lock, NULL); if (k->remaining_wgs == 0) { PTHREAD_UNLOCK (&k->lock); return 0; } max_wgs = min (POCL_PTHREAD_MAX_WGS, (1 + k->remaining_wgs / scheduler.num_threads)); max_wgs = min (max_wgs, k->remaining_wgs); *start_index = k->wgs_dealt; *end_index = k->wgs_dealt + max_wgs-1; k->remaining_wgs -= max_wgs; k->wgs_dealt += max_wgs; if (k->remaining_wgs == 0) *last_wgs = 1; PTHREAD_UNLOCK (&k->lock); return 1; }
/** * \brief Stop all threads after they finish the current jobs. * * Block until all threads have stopped. * * \return 1 on success, 0 on failure */ int kvz_threadqueue_stop(threadqueue_queue_t * const threadqueue) { PTHREAD_LOCK(&threadqueue->lock); if (threadqueue->stop) { // The threadqueue should have stopped already. assert(threadqueue->thread_running_count == 0); PTHREAD_UNLOCK(&threadqueue->lock); return 1; } // Tell all threads to stop. threadqueue->stop = true; PTHREAD_COND_BROADCAST(&threadqueue->job_available); PTHREAD_UNLOCK(&threadqueue->lock); // Wait for them to stop. for (int i = 0; i < threadqueue->thread_count; i++) { if (pthread_join(threadqueue->threads[i], NULL) != 0) { fprintf(stderr, "pthread_join failed!\n"); return 0; } } return 1; }
/** * \brief Wait for a job to be completed. * * \return 1 on success, 0 on failure */ int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job) { PTHREAD_LOCK(&job->lock); while (job->state != THREADQUEUE_JOB_STATE_DONE) { PTHREAD_COND_WAIT(&threadqueue->job_done, &job->lock); } PTHREAD_UNLOCK(&job->lock); return 1; }
static void pthread_scheduler_sleep() { static struct timespec time_to_wait = {0, 0}; time_to_wait.tv_sec = time(NULL) + 5; PTHREAD_LOCK (&scheduler.wq_lock, NULL); if (scheduler.work_queue == NULL && scheduler.kernel_queue == 0) pthread_cond_timedwait (&scheduler.wake_pool, &scheduler.wq_lock, &time_to_wait); PTHREAD_UNLOCK (&scheduler.wq_lock); }
int kvz_threadqueue_submit(threadqueue_queue_t * const threadqueue, threadqueue_job_t *job) { PTHREAD_LOCK(&threadqueue->lock); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_PAUSED); if (threadqueue->thread_count == 0) { // When not using threads, run the job immediately. job->fptr(job->arg); job->state = THREADQUEUE_JOB_STATE_DONE; } else if (job->ndepends == 0) { threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(job)); pthread_cond_signal(&threadqueue->job_available); } else { job->state = THREADQUEUE_JOB_STATE_WAITING; } PTHREAD_UNLOCK(&job->lock); PTHREAD_UNLOCK(&threadqueue->lock); return 1; }
/** * \brief Initialize the queue. * * \return 1 on success, 0 on failure */ threadqueue_queue_t * kvz_threadqueue_init(int thread_count) { threadqueue_queue_t *threadqueue = MALLOC(threadqueue_queue_t, 1); if (!threadqueue) { goto failed; } if (pthread_mutex_init(&threadqueue->lock, NULL) != 0) { fprintf(stderr, "pthread_mutex_init failed!\n"); goto failed; } if (pthread_cond_init(&threadqueue->job_available, NULL) != 0) { fprintf(stderr, "pthread_cond_init failed!\n"); goto failed; } if (pthread_cond_init(&threadqueue->job_done, NULL) != 0) { fprintf(stderr, "pthread_cond_init failed!\n"); goto failed; } threadqueue->threads = MALLOC(pthread_t, thread_count); if (!threadqueue->threads) { fprintf(stderr, "Could not malloc threadqueue->threads!\n"); goto failed; } threadqueue->thread_count = 0; threadqueue->thread_running_count = 0; threadqueue->stop = false; threadqueue->first = NULL; threadqueue->last = NULL; // Lock the queue before creating threads, to ensure they all have correct information. PTHREAD_LOCK(&threadqueue->lock); for (int i = 0; i < thread_count; i++) { if (pthread_create(&threadqueue->threads[i], NULL, threadqueue_worker, threadqueue) != 0) { fprintf(stderr, "pthread_create failed!\n"); goto failed; } threadqueue->thread_count++; threadqueue->thread_running_count++; } PTHREAD_UNLOCK(&threadqueue->lock); return threadqueue; failed: kvz_threadqueue_free(threadqueue); return NULL; }
int pthread_scheduler_get_work (thread_data *td, _cl_command_node **cmd_ptr) { _cl_command_node *cmd; kernel_run_command *run_cmd; // execute kernel if available PTHREAD_LOCK (&scheduler.wq_lock, NULL); if ((run_cmd = scheduler.kernel_queue)) { ++run_cmd->ref_count; PTHREAD_UNLOCK (&scheduler.wq_lock); work_group_scheduler (run_cmd, td); PTHREAD_LOCK (&scheduler.wq_lock, NULL); if (!(--run_cmd->ref_count)) { PTHREAD_UNLOCK (&scheduler.wq_lock); finalize_kernel_command (td, run_cmd); } else PTHREAD_UNLOCK (&scheduler.wq_lock); } else PTHREAD_UNLOCK (&scheduler.wq_lock); // execute a command if available PTHREAD_LOCK (&scheduler.wq_lock, NULL); if ((cmd = scheduler.work_queue)) { DL_DELETE (scheduler.work_queue, cmd); PTHREAD_UNLOCK (&scheduler.wq_lock); *cmd_ptr = cmd; return 0; } PTHREAD_UNLOCK (&scheduler.wq_lock); *cmd_ptr = NULL; return 1; }
static int work_group_scheduler (kernel_run_command *k, struct pool_thread_data *thread_data) { void *arguments[k->kernel->num_args + k->kernel->num_locals]; struct pocl_context pc; unsigned i; unsigned start_index; unsigned end_index; char last_wgs = 0; if (!get_wg_index_range (k, &start_index, &end_index, &last_wgs)) return 0; setup_kernel_arg_array ((void**)&arguments, k); memcpy (&pc, &k->pc, sizeof (struct pocl_context)); do { if (last_wgs) { PTHREAD_LOCK (&scheduler.wq_lock, NULL); LL_DELETE (scheduler.kernel_queue, k); PTHREAD_UNLOCK (&scheduler.wq_lock); } for (i = start_index; i <= end_index; ++i) { translate_wg_index_to_3d_index (k, i, (size_t*)&pc.group_id); #ifdef DEBUG_MT printf("### exec_wg: gid_x %d, gid_y %d, gid_z %d\n", pc.group_id[0], pc.group_id[1], pc.group_id[2]); #endif k->workgroup (arguments, &pc); } }while (get_wg_index_range (k, &start_index, &end_index, &last_wgs)); free_kernel_arg_array (arguments, k); return 1; }
void pthread_scheduler_release_host () { PTHREAD_LOCK (&scheduler.cq_finished_lock, NULL); pthread_cond_signal (&scheduler.cq_finished_cond); PTHREAD_UNLOCK (&scheduler.cq_finished_lock); }
/** * \brief Function executed by worker threads. */ static void* threadqueue_worker(void* threadqueue_opaque) { threadqueue_queue_t * const threadqueue = (threadqueue_queue_t *) threadqueue_opaque; PTHREAD_LOCK(&threadqueue->lock); for (;;) { while (!threadqueue->stop && threadqueue->first == NULL) { // Wait until there is something to do in the queue. PTHREAD_COND_WAIT(&threadqueue->job_available, &threadqueue->lock); } if (threadqueue->stop) { break; } // Get a job and remove it from the queue. threadqueue_job_t *job = threadqueue_pop_job(threadqueue); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_READY); job->state = THREADQUEUE_JOB_STATE_RUNNING; PTHREAD_UNLOCK(&job->lock); PTHREAD_UNLOCK(&threadqueue->lock); job->fptr(job->arg); PTHREAD_LOCK(&threadqueue->lock); PTHREAD_LOCK(&job->lock); assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); job->state = THREADQUEUE_JOB_STATE_DONE; PTHREAD_COND_SIGNAL(&threadqueue->job_done); // Go through all the jobs that depend on this one, decreasing their // ndepends. Count how many jobs can now start executing so we know how // many threads to wake up. int num_new_jobs = 0; for (int i = 0; i < job->rdepends_count; ++i) { threadqueue_job_t * const depjob = job->rdepends[i]; // The dependency (job) is locked before the job depending on it. // This must be the same order as in kvz_threadqueue_job_dep_add. PTHREAD_LOCK(&depjob->lock); assert(depjob->state == THREADQUEUE_JOB_STATE_WAITING || depjob->state == THREADQUEUE_JOB_STATE_PAUSED); assert(depjob->ndepends > 0); depjob->ndepends--; if (depjob->ndepends == 0 && depjob->state == THREADQUEUE_JOB_STATE_WAITING) { // Move the job to ready jobs. threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(depjob)); num_new_jobs++; } // Clear this reference to the job. PTHREAD_UNLOCK(&depjob->lock); kvz_threadqueue_free_job(&job->rdepends[i]); } job->rdepends_count = 0; PTHREAD_UNLOCK(&job->lock); kvz_threadqueue_free_job(&job); // The current thread will process one of the new jobs so we wake up // one threads less than the the number of new jobs. for (int i = 0; i < num_new_jobs - 1; i++) { pthread_cond_signal(&threadqueue->job_available); } } threadqueue->thread_running_count--; PTHREAD_UNLOCK(&threadqueue->lock); return NULL; }