void *_starpu_cpu_worker(void *arg) { struct starpu_worker_s *cpu_arg = arg; unsigned memnode = cpu_arg->memory_node; int workerid = cpu_arg->workerid; int devid = cpu_arg->devid; #ifdef STARPU_USE_FXT _starpu_fxt_register_thread(cpu_arg->bindid); #endif STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode); _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid); _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid); _starpu_set_local_memory_node_key(&memnode); _starpu_set_local_worker_key(cpu_arg); snprintf(cpu_arg->name, 32, "CPU %d", devid); cpu_arg->status = STATUS_UNKNOWN; STARPU_TRACE_WORKER_INIT_END /* tell the main thread that we are ready */ PTHREAD_MUTEX_LOCK(&cpu_arg->mutex); cpu_arg->worker_is_initialized = 1; PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex); starpu_job_t j; int res; while (_starpu_machine_is_running()) { STARPU_TRACE_START_PROGRESS(memnode); _starpu_datawizard_progress(memnode, 1); STARPU_TRACE_END_PROGRESS(memnode); _starpu_execute_registered_progression_hooks(); PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex); /* perhaps there is some local task to be executed first */ j = _starpu_pop_local_task(cpu_arg); /* otherwise ask a task to the scheduler */ if (!j) { struct starpu_task *task = _starpu_pop_task(); if (task) j = _starpu_get_job_associated_to_task(task); } if (j == NULL) { if (_starpu_worker_can_block(memnode)) _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex); PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); continue; }; PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); /* can a cpu perform that task ? */ if (!STARPU_CPU_MAY_PERFORM(j)) { /* put it and the end of the queue ... XXX */ _starpu_push_task(j, 0); continue; } _starpu_set_current_task(j->task); res = execute_job_on_cpu(j, cpu_arg); _starpu_set_current_task(NULL); if (res) { switch (res) { case -EAGAIN: _starpu_push_task(j, 0); continue; default: assert(0); } } _starpu_handle_job_termination(j, 0); } STARPU_TRACE_WORKER_DEINIT_START /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY); pthread_exit(NULL); }
/* This is called when a task is finished with a piece of data * (or on starpu_data_release) * * The header lock must already be taken by the caller. * This may free the handle if it was lazily unregistered (1 is returned in * that case). The handle pointer thus becomes invalid for the caller. */ int _starpu_notify_data_dependencies(starpu_data_handle_t handle) { _starpu_spin_checklocked(&handle->header_lock); /* A data access has finished so we remove a reference. */ STARPU_ASSERT(handle->refcnt > 0); handle->refcnt--; STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (_starpu_data_check_not_busy(handle)) /* Handle was destroyed, nothing left to do. */ return 1; if (handle->arbiter) { unsigned refcnt = handle->refcnt; STARPU_ASSERT(_starpu_data_requester_list_empty(&handle->req_list)); STARPU_ASSERT(_starpu_data_requester_list_empty(&handle->reduction_req_list)); _starpu_spin_unlock(&handle->header_lock); /* _starpu_notify_arbitered_dependencies will handle its own locking */ if (!refcnt) _starpu_notify_arbitered_dependencies(handle); /* We have already unlocked */ return 1; } STARPU_ASSERT(_starpu_data_requester_list_empty(&handle->arbitered_req_list)); /* In case there is a pending reduction, and that this is the last * requester, we may go back to a "normal" coherency model. */ if (handle->reduction_refcnt > 0) { //fprintf(stderr, "NOTIFY REDUCTION TASK RED REFCNT %d\n", handle->reduction_refcnt); handle->reduction_refcnt--; if (handle->reduction_refcnt == 0) _starpu_data_end_reduction_mode_terminate(handle); } struct _starpu_data_requester *r; while ((r = may_unlock_data_req_list_head(handle))) { /* STARPU_RW accesses are treated as STARPU_W */ enum starpu_data_access_mode r_mode = r->mode; if (r_mode == STARPU_RW) r_mode = STARPU_W; int put_in_list = 1; if ((handle->reduction_refcnt == 0) && (handle->current_mode == STARPU_REDUX) && (r_mode != STARPU_REDUX)) { _starpu_data_end_reduction_mode(handle); /* Since we need to perform a mode change, we freeze * the request if needed. */ put_in_list = (handle->reduction_refcnt > 0); } else { put_in_list = 0; } if (put_in_list) { /* We need to put the request back because we must * perform a reduction before. */ _starpu_data_requester_list_push_front(&handle->req_list, r); } else { /* The data is now attributed to that request so we put a * reference on it. */ handle->refcnt++; handle->busy_count++; enum starpu_data_access_mode previous_mode = handle->current_mode; handle->current_mode = r_mode; /* In case we enter in a reduction mode, we invalidate all per * worker replicates. Note that the "per_node" replicates are * kept intact because we'll reduce a valid copy of the * "per-node replicate" with the per-worker replicates .*/ if ((r_mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX)) _starpu_data_start_reduction_mode(handle); _starpu_spin_unlock(&handle->header_lock); if (r->is_requested_by_codelet) { if (!unlock_one_requester(r)) _starpu_push_task(r->j); } else { STARPU_ASSERT(r->ready_data_callback); /* execute the callback associated with the data requester */ r->ready_data_callback(r->argcb); } _starpu_data_requester_delete(r); _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (_starpu_data_check_not_busy(handle)) return 1; } } return 0; }
void *gordon_worker_inject(struct starpu_worker_set_s *arg) { while(_starpu_machine_is_running()) { if (gordon_busy_enough()) { /* gordon already has enough work, wait a little TODO */ _starpu_wait_on_sched_event(); } else { #ifndef NOCHAIN int ret = 0; //FIXME we should look into the local job list here ! struct starpu_job_list_s *list = _starpu_pop_every_task(STARPU_GORDON); /* XXX 0 is hardcoded */ if (list) { /* partition lists */ unsigned size = job_list_size(list); unsigned nchunks = (size<2*arg->nworkers)?size:(2*arg->nworkers); //unsigned nchunks = (size<arg->nworkers)?size:(arg->nworkers); /* last element may be a little smaller (by 1) */ unsigned chunksize = size/nchunks; unsigned chunk; for (chunk = 0; chunk < nchunks; chunk++) { struct starpu_job_list_s *chunk_list; if (chunk != (nchunks -1)) { /* split the list in 2 parts : list = chunk_list | tail */ chunk_list = starpu_job_list_new(); /* find the end */ chunk_list->_head = list->_head; starpu_job_itor_t it_j = starpu_job_list_begin(list); unsigned ind; for (ind = 0; ind < chunksize; ind++) { it_j = starpu_job_list_next(it_j); } /* it_j should be the first element of the new list (tail) */ chunk_list->_tail = it_j->_prev; chunk_list->_tail->_next = NULL; list->_head = it_j; it_j->_prev = NULL; } else { /* this is the last chunk */ chunk_list = list; } ret = inject_task_list(chunk_list, &arg->workers[0]); } } else { _starpu_wait_on_sched_event(); } #else /* gordon should accept a little more work */ starpu_job_t j; j = _starpu_pop_task(); // _STARPU_DEBUG("pop task %p\n", j); if (j) { if (STARPU_GORDON_MAY_PERFORM(j)) { /* inject that task */ /* XXX we hardcore &arg->workers[0] for now */ inject_task(j, &arg->workers[0]); } else { _starpu_push_task(j, 0); } } #endif } } return NULL; }