static void lock_all_subtree(starpu_data_handle handle) { if (handle->nchildren == 0) { /* this is a leaf */ while (_starpu_spin_trylock(&handle->header_lock)) _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0); } else { /* lock all sub-subtrees children */ unsigned child; for (child = 0; child < handle->nchildren; child++) { lock_all_subtree(&handle->children[child]); } } }
int _starpu_wait_data_request_completion(starpu_data_request_t r, unsigned may_alloc) { int retval; int do_delete = 0; uint32_t local_node = _starpu_get_local_memory_node(); do { _starpu_spin_lock(&r->lock); if (r->completed) break; _starpu_spin_unlock(&r->lock); #ifndef STARPU_NON_BLOCKING_DRIVERS _starpu_wake_all_blocked_workers_on_node(r->handling_node); #endif _starpu_datawizard_progress(local_node, may_alloc); } while (1); retval = r->retval; if (retval) _STARPU_DISP("REQUEST %p COMPLETED (retval %d) !\n", r, r->retval); r->refcnt--; /* if nobody is waiting on that request, we can get rid of it */ if (r->refcnt == 0) do_delete = 1; _starpu_spin_unlock(&r->lock); if (do_delete) starpu_data_request_destroy(r); return retval; }
/* No lock is held, this acquires and releases the handle header lock */ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index) { if (handle->arbiter) return _starpu_attempt_to_submit_arbitered_data_request(request_from_codelet, handle, mode, callback, argcb, j, buffer_index); if (mode == STARPU_RW) mode = STARPU_W; /* Take the lock protecting the header. We try to do some progression * in case this is called from a worker, otherwise we just wait for the * lock to be available. */ if (request_from_codelet) { int cpt = 0; while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) { cpt++; _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); } else { _starpu_spin_lock(&handle->header_lock); } /* If we have a request that is not used for the reduction, and that a * reduction is pending, we put it at the end of normal list, and we * use the reduction_req_list instead */ unsigned pending_reduction = (handle->reduction_refcnt > 0); unsigned frozen = 0; /* If we are currently performing a reduction, we freeze any request * that is not explicitely a reduction task. */ unsigned is_a_reduction_task = (request_from_codelet && j->reduction_task); if (pending_reduction && !is_a_reduction_task) frozen = 1; /* If there is currently nobody accessing the piece of data, or it's * not another writter and if this is the same type of access as the * current one, we can proceed. */ unsigned put_in_list = 1; enum starpu_data_access_mode previous_mode = handle->current_mode; if (!frozen && ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode)))) { /* Detect whether this is the end of a reduction phase */ /* We don't want to start multiple reductions of the * same handle at the same time ! */ if ((handle->reduction_refcnt == 0) && (previous_mode == STARPU_REDUX) && (mode != STARPU_REDUX)) { _starpu_data_end_reduction_mode(handle); /* Since we need to perform a mode change, we freeze * the request if needed. */ put_in_list = (handle->reduction_refcnt > 0); } else { put_in_list = 0; } } if (put_in_list) { /* there cannot be multiple writers or a new writer * while the data is in read mode */ handle->busy_count++; /* enqueue the request */ struct _starpu_data_requester *r = _starpu_data_requester_new(); r->mode = mode; r->is_requested_by_codelet = request_from_codelet; r->j = j; r->buffer_index = buffer_index; r->ready_data_callback = callback; r->argcb = argcb; /* We put the requester in a specific list if this is a reduction task */ struct _starpu_data_requester_list *req_list = is_a_reduction_task?&handle->reduction_req_list:&handle->req_list; _starpu_data_requester_list_push_back(req_list, r); /* failed */ put_in_list = 1; } else { handle->refcnt++; handle->busy_count++; /* Do not write to handle->current_mode if it is already * R. This avoids a spurious warning from helgrind when * the following happens: * acquire(R) in thread A * acquire(R) in thread B * release_data_on_node() in thread A * helgrind would shout that the latter reads current_mode * unsafely. * * This actually basically explains helgrind that it is a * shared R acquisition. */ if (mode != STARPU_R || handle->current_mode != mode) handle->current_mode = mode; if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX)) _starpu_data_start_reduction_mode(handle); /* success */ put_in_list = 0; } _starpu_spin_unlock(&handle->header_lock); return put_in_list; }
void *_starpu_cpu_worker(void *arg) { struct starpu_worker_s *cpu_arg = arg; unsigned memnode = cpu_arg->memory_node; int workerid = cpu_arg->workerid; int devid = cpu_arg->devid; #ifdef STARPU_USE_FXT _starpu_fxt_register_thread(cpu_arg->bindid); #endif STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode); _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid); _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid); _starpu_set_local_memory_node_key(&memnode); _starpu_set_local_worker_key(cpu_arg); snprintf(cpu_arg->name, 32, "CPU %d", devid); cpu_arg->status = STATUS_UNKNOWN; STARPU_TRACE_WORKER_INIT_END /* tell the main thread that we are ready */ PTHREAD_MUTEX_LOCK(&cpu_arg->mutex); cpu_arg->worker_is_initialized = 1; PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex); starpu_job_t j; int res; while (_starpu_machine_is_running()) { STARPU_TRACE_START_PROGRESS(memnode); _starpu_datawizard_progress(memnode, 1); STARPU_TRACE_END_PROGRESS(memnode); _starpu_execute_registered_progression_hooks(); PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex); /* perhaps there is some local task to be executed first */ j = _starpu_pop_local_task(cpu_arg); /* otherwise ask a task to the scheduler */ if (!j) { struct starpu_task *task = _starpu_pop_task(); if (task) j = _starpu_get_job_associated_to_task(task); } if (j == NULL) { if (_starpu_worker_can_block(memnode)) _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex); PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); continue; }; PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); /* can a cpu perform that task ? */ if (!STARPU_CPU_MAY_PERFORM(j)) { /* put it and the end of the queue ... XXX */ _starpu_push_task(j, 0); continue; } _starpu_set_current_task(j->task); res = execute_job_on_cpu(j, cpu_arg); _starpu_set_current_task(NULL); if (res) { switch (res) { case -EAGAIN: _starpu_push_task(j, 0); continue; default: assert(0); } } _starpu_handle_job_termination(j, 0); } STARPU_TRACE_WORKER_DEINIT_START /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY); pthread_exit(NULL); }