static int eager_calibration_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task && starpu_sched_component_is_eager_calibration(component)); STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); starpu_task_bundle_t bundle = task->bundle; int workerid; for(workerid = starpu_bitmap_first(component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(component->workers_in_ctx, workerid)) { struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id); int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { double d; if(bundle) d = starpu_task_bundle_expected_length(bundle, archtype, nimpl); else d = starpu_task_expected_length(task, archtype, nimpl); if(isnan(d)) { int i; for (i = 0; i < component->nchildren; i++) { int idworker; for(idworker = starpu_bitmap_first(component->children[i]->workers); idworker != -1; idworker = starpu_bitmap_next(component->children[i]->workers, idworker)) { if (idworker == workerid) { if(starpu_sched_component_is_worker(component->children[i])) { component->children[i]->can_pull(component->children[i]); return 1; } else return component->children[i]->push_task(component->children[i],task); } } } } } } } return 1; }
/* The data must be released by calling starpu_data_release later on */ int starpu_data_acquire_cb(starpu_data_handle handle, starpu_access_mode mode, void (*callback)(void *), void *arg) { STARPU_ASSERT(handle); struct user_interaction_wrapper *wrapper = malloc(sizeof(struct user_interaction_wrapper)); STARPU_ASSERT(wrapper); wrapper->handle = handle; wrapper->mode = mode; wrapper->callback = callback; wrapper->callback_arg = arg; PTHREAD_COND_INIT(&wrapper->cond, NULL); PTHREAD_MUTEX_INIT(&wrapper->lock, NULL); wrapper->finished = 0; //TODO: instead of having the is_prefetch argument, _starpu_fetch_data shoud consider two flags: async and detached _starpu_spin_lock(&handle->header_lock); handle->per_node[0].refcnt++; _starpu_spin_unlock(&handle->header_lock); PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { wrapper->pre_sync_task = starpu_task_create(); wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback; wrapper->pre_sync_task->callback_arg = wrapper; wrapper->post_sync_task = starpu_task_create(); #ifdef STARPU_USE_FXT starpu_job_t job = _starpu_get_job_associated_to_task(wrapper->pre_sync_task); job->model_name = "acquire_cb_pre"; job = _starpu_get_job_associated_to_task(wrapper->post_sync_task); job->model_name = "acquire_cb_post"; #endif _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, handle, mode); PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); /* TODO detect if this is superflous */ int ret = starpu_task_submit(wrapper->pre_sync_task, NULL); STARPU_ASSERT(!ret); } else { PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); starpu_data_acquire_cb_pre_sync_callback(wrapper); } return 0; }
int main(int argc, char **argv) { double timing; struct timeval start; struct timeval end; starpu_init(NULL); fprintf(stderr, "#tasks : %d\n", ntasks); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); /* We check if the function is valid from the codelet or from * the callback */ task->cl = &dummy_cl; task->cl_arg = task; task->callback_func = check_task_callback; task->callback_arg = task; int ret = starpu_task_submit(task, NULL); STARPU_ASSERT(!ret); } starpu_task_wait_for_all(); fprintf(stderr, "#empty tasks : %d\n", ntasks); /* We repeat the same experiment with null codelets */ for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = NULL; /* We check if the function is valid from the callback */ task->callback_func = check_task_callback; task->callback_arg = task; int ret = starpu_task_submit(task, NULL); STARPU_ASSERT(!ret); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; }
static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == variable_interface->elemsize); memcpy((void*)variable_interface->ptr, ptr, variable_interface->elemsize); return 0; }
/* We assume that the job will not disappear under our hands */ void _starpu_notify_dependencies(struct _starpu_job *j) { STARPU_ASSERT(j); STARPU_ASSERT(j->task); /* unlock tasks depending on that task */ _starpu_notify_task_dependencies(j); /* unlock tags depending on that task */ if (j->task->use_tag) _starpu_notify_tag_dependencies(j->tag); }
static void _starpu_data_acquire_continuation_non_blocking(void *arg) { int ret; struct user_interaction_wrapper *wrapper = arg; starpu_data_handle handle = wrapper->handle; STARPU_ASSERT(handle); ret = _starpu_fetch_data_on_node(handle, 0, wrapper->mode, 1, _starpu_data_acquire_fetch_data_callback, wrapper); STARPU_ASSERT(!ret); }
/* * compute d = r * descr[0] = d, descr[1] = r */ void cpu_codelet_func_2(void *descr[], STARPU_ATTRIBUTE_UNUSED void *arg) { /* simply copy r into d */ uint32_t nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); STARPU_ASSERT(STARPU_VECTOR_GET_ELEMSIZE(descr[0]) == STARPU_VECTOR_GET_ELEMSIZE(descr[1])); float *src = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *dst = (float *)STARPU_VECTOR_GET_PTR(descr[0]); memcpy(dst, src, nx*elemsize); }
static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { char *data = ptr; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double)); memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double)); memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); return 0; }
int main(int argc, char **argv) { double timing; struct timeval start; struct timeval end; parse_args(argc, argv); starpu_init(NULL); fprintf(stderr, "#tasks : %d\n", ntasks); gettimeofday(&start, NULL); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->cl_arg = NULL; task->callback_func = NULL; task->callback_arg = NULL; task->destroy = 0; starpu_event event; int ret = starpu_task_submit(task, &event); STARPU_ASSERT(!ret); ret = starpu_event_wait(event); STARPU_ASSERT(!ret); starpu_event_release(event); starpu_task_destroy(task); } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "Total: %lf secs\n", timing/1000000); fprintf(stderr, "Per task: %lf usecs\n", timing/ntasks); starpu_shutdown(); return 0; }
/* We assume that r->lock is taken by the caller */ void _starpu_data_request_append_callback(starpu_data_request_t r, void (*callback_func)(void *), void *callback_arg) { STARPU_ASSERT(r); if (callback_func) { struct callback_list *link = malloc(sizeof(struct callback_list)); STARPU_ASSERT(link); link->callback_func = callback_func; link->callback_arg = callback_arg; link->next = r->callbacks; r->callbacks = link; } }
int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED) { #if defined(CL_PROFILING_CLOCK_CYCLE_COUNT)||defined(CL_PROFILING_STALL_CYCLE_COUNT)||defined(CL_PROFILING_POWER_CONSUMED) struct starpu_task *task = starpu_task_get_current(); struct starpu_profiling_task_info *info = task->profiling_info; #endif #ifdef CL_PROFILING_CLOCK_CYCLE_COUNT if (starpu_profiling_status_get() && info) { cl_int err; unsigned int clock_cycle_count; size_t size; err = clGetEventProfilingInfo(event, CL_PROFILING_CLOCK_CYCLE_COUNT, sizeof(clock_cycle_count), &clock_cycle_count, &size); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(clock_cycle_count)); info->used_cycles += clock_cycle_count; } #endif #ifdef CL_PROFILING_STALL_CYCLE_COUNT if (starpu_profiling_status_get() && info) { cl_int err; unsigned int stall_cycle_count; size_t size; err = clGetEventProfilingInfo(event, CL_PROFILING_STALL_CYCLE_COUNT, sizeof(stall_cycle_count), &stall_cycle_count, &size); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(stall_cycle_count)); info->stall_cycles += stall_cycle_count; } #endif #ifdef CL_PROFILING_POWER_CONSUMED if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking))) { cl_int err; double power_consumed; size_t size; err = clGetEventProfilingInfo(event, CL_PROFILING_POWER_CONSUMED, sizeof(power_consumed), &power_consumed, &size); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(power_consumed)); info->power_consumed += power_consumed; } #endif return 0; }
/* the generic interface that call the proper underlying implementation */ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked) { struct starpu_task *task = j->task; task->status = STARPU_TASK_READY; /* in case there is no codelet associated to the task (that's a control * task), we directly execute its callback and enforce the * corresponding dependencies */ if (task->cl == NULL) { _starpu_handle_job_termination(j, job_is_already_locked); return 0; } if (STARPU_UNLIKELY(task->execute_on_a_specific_worker)) { unsigned workerid = task->workerid; struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid); if (use_prefetch) { uint32_t memory_node = starpu_worker_get_memory_node(workerid); _starpu_prefetch_task_input_on_node(task, memory_node); } return _starpu_push_local_task(worker, j); } else { STARPU_ASSERT(policy.push_task); return policy.push_task(task); } }
int _starpu_allocate_memory_on_node(starpu_data_handle handle, uint32_t dst_node, unsigned may_alloc) { size_t allocated_memory; STARPU_ASSERT(handle); /* A buffer is already allocated on the node */ if (handle->per_node[dst_node].allocated) return 0; if (!may_alloc) return ENOMEM; void *interface = starpu_data_get_interface_on_node(handle, dst_node); allocated_memory = _starpu_allocate_interface(handle, interface, dst_node); /* perhaps we could really not handle that capacity misses */ if (!allocated_memory) return ENOMEM; /* perhaps we could really not handle that capacity misses */ if (allocated_memory) register_mem_chunk(handle, dst_node, allocated_memory, 1); handle->per_node[dst_node].allocated = 1; handle->per_node[dst_node].automatically_allocated = 1; return 0; }
int inject_task_list(struct starpu_job_list_s *list, struct starpu_worker_s *worker) { /* first put back all tasks that can not be performed by Gordon */ unsigned nvalids = 0; unsigned ninvalids = 0; starpu_job_t j; // TODO ! // // for (j = starpu_job_list_begin(list); j != starpu_job_list_end(list); j = starpu_job_list_next(j) ) // { // if (!STARPU_GORDON_MAY_PERFORM(j)) { // // XXX TODO // ninvalids++; // assert(0); // } // else { // nvalids++; // } // } nvalids = job_list_size(list); // _STARPU_DEBUG("nvalids %d \n", nvalids); struct gordon_task_wrapper_s *task_wrapper = malloc(sizeof(struct gordon_task_wrapper_s)); gordon_job_t *gordon_jobs = gordon_alloc_jobs(nvalids, 0); task_wrapper->gordon_job = gordon_jobs; task_wrapper->list = list; task_wrapper->j = NULL; task_wrapper->terminated = 0; task_wrapper->worker = worker; unsigned index; for (j = starpu_job_list_begin(list), index = 0; j != starpu_job_list_end(list); j = starpu_job_list_next(j), index++) { int ret; struct starpu_task *task = j->task; ret = _starpu_fetch_task_input(task, 0); STARPU_ASSERT(!ret); gordon_jobs[index].index = task->cl->gordon_func; struct starpu_perfmodel_t *model = j->task->cl->model; if (model && model->benchmarking) gordon_jobs[index].flags.sampling = 1; /* we should not hardcore the memory node ... XXX */ unsigned memory_node = 0; starpu_to_gordon_buffers(j, &gordon_jobs[index], memory_node); } gordon_pushjob(task_wrapper->gordon_job, gordon_callback_list_func, task_wrapper); return 0; }
static unsigned try_to_reuse_mem_chunk(starpu_mem_chunk_t mc, unsigned node, starpu_data_handle new_data, unsigned is_already_in_mc_list) { unsigned success = 0; starpu_data_handle old_data; old_data = mc->data; STARPU_ASSERT(old_data); /* try to lock all the leafs of the subtree */ lock_all_subtree(old_data); /* check if they are all "free" */ if (may_free_subtree(old_data, node)) { success = 1; /* in case there was nobody using that buffer, throw it * away after writing it back to main memory */ transfer_subtree_to_node(old_data, node, 0); /* now replace the previous data */ reuse_mem_chunk(node, new_data, mc, is_already_in_mc_list); } /* unlock the leafs */ unlock_all_subtree(old_data); return success; }
static void assign_combinations_without_hwloc(struct starpu_worker_collection* worker_collection, int* workers, unsigned n, int min, int max) { int size,i,count =0; //if the maximun number of worker is already reached if(worker_collection->nworkers >= STARPU_NMAXWORKERS - 1) return; for (size = min; size <= max; size *= 2) { unsigned first; for (first = 0; first < n; first += size) { if (first + size <= n) { int found_workerids[size]; for (i = 0; i < size; i++) found_workerids[i] = workers[first + i]; /* We register this combination */ int newworkerid; newworkerid = starpu_combined_worker_assign_workerid(size, found_workerids); STARPU_ASSERT(newworkerid >= 0); count++; worker_collection->add(worker_collection, newworkerid); //if the maximun number of worker is reached, then return if(worker_collection->nworkers >= STARPU_NMAXWORKERS - 1) return; } } } }
void _starpu_memory_node_get_name(unsigned node, char *name, int size) { const char *prefix; switch (descr.nodes[node]) { case STARPU_CPU_RAM: prefix = "RAM"; break; case STARPU_CUDA_RAM: prefix = "CUDA"; break; case STARPU_OPENCL_RAM: prefix = "OpenCL"; break; case STARPU_DISK_RAM: prefix = "Disk"; break; case STARPU_MIC_RAM: prefix = "MIC"; break; case STARPU_SCC_RAM: prefix = "SCC_RAM"; break; case STARPU_SCC_SHM: prefix = "SCC_shared"; break; case STARPU_UNUSED: default: prefix = "unknown"; STARPU_ASSERT(0); } snprintf(name, size, "%s %u", prefix, descr.devid[node]); }
/* This function is intended to be used by external tools that should read the * performance model files */ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *model) { model->symbol = symbol; /* where is the file if it exists ? */ char path[256]; get_model_path(model, path, 256); // _STARPU_DEBUG("get_model_path -> %s\n", path); /* does it exist ? */ int res; res = access(path, F_OK); if (res) { _STARPU_DISP("There is no performance model for symbol %s\n", symbol); return 1; } FILE *f = fopen(path, "r"); STARPU_ASSERT(f); parse_model_file(f, model, 1); return 0; }
/* registered_models_rwlock must be taken in write mode before calling this * function */ void _starpu_register_model(struct starpu_perfmodel_t *model) { /* add the model to a linked list */ struct starpu_model_list_t *node = malloc(sizeof(struct starpu_model_list_t)); node->model = model; //model->debug_modelid = debug_modelid++; /* put this model at the beginning of the list */ node->next = registered_models; registered_models = node; #ifdef STARPU_MODEL_DEBUG _starpu_create_sampling_directory_if_needed(); unsigned arch; for (arch = 0; arch < NARCH_VARIATIONS; arch++) { char debugpath[256]; starpu_perfmodel_debugfilepath(model, arch, debugpath, 256); model->per_arch[arch].debug_file = fopen(debugpath, "a+"); STARPU_ASSERT(model->per_arch[arch].debug_file); } #endif return; }
starpu_job_t _starpu_stack_pop_task(struct starpu_stack_jobq_s *stack_queue, pthread_mutex_t *sched_mutex) { starpu_job_t j = NULL; if (stack_queue->njobs == 0) return NULL; if (stack_queue->njobs > 0) { /* there is a task */ j = starpu_job_list_pop_back(stack_queue->jobq); STARPU_ASSERT(j); stack_queue->njobs--; STARPU_TRACE_JOB_POP(j, 0); /* we are sure that we got it now, so at worst, some people thought * there remained some work and will soon discover it is not true */ PTHREAD_MUTEX_LOCK(sched_mutex); total_number_of_jobs--; PTHREAD_MUTEX_UNLOCK(sched_mutex); } return j; }
/* Post MPI send */ static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, int local_rank) { struct block_description *descr = get_block_description(z); STARPU_ASSERT(descr->mpi_node == local_rank); struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; int dest = neighbour->mpi_node; STARPU_ASSERT(neighbour->mpi_node != local_rank); /* Send neighbour's border copy to the neighbour */ starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; starpu_mpi_isend_detached(handle0, dest, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z); starpu_mpi_isend_detached(handle1, dest, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z); }
/* Post MPI recv */ static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, int local_rank) { struct block_description *descr = get_block_description(z); STARPU_ASSERT(descr->mpi_node != local_rank); struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; int source = descr->mpi_node; STARPU_ASSERT(neighbour->mpi_node == local_rank); /* Receive our neighbour's border in our neighbour copy */ starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); }
/* If sequential consistency mode is enabled, this function blocks until the * handle is available in the requested access mode. */ int _starpu_data_wait_until_available(starpu_data_handle handle, starpu_access_mode mode) { /* If sequential consistency is enabled, wait until data is available */ PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { struct starpu_task *sync_task; sync_task = starpu_task_create(); sync_task->destroy = 1; /* It is not really a RW access, but we want to make sure that * all previous accesses are done */ _starpu_detect_implicit_data_deps_with_handle(sync_task, sync_task, handle, mode); PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); /* TODO detect if this is superflous */ starpu_event event; int ret = starpu_task_submit(sync_task, &event); STARPU_ASSERT(!ret); starpu_event_wait(event); starpu_event_release(event); } else { PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } return 0; }
/* open an existing memory on disk */ static void *starpu_unistd_o_direct_open(void *base, void *pos, size_t size) { struct starpu_unistd_global_obj * obj = malloc(sizeof(struct starpu_unistd_global_obj)); STARPU_ASSERT(obj != NULL); /* only flags change between unistd and unistd_o_direct */ obj->flags = O_RDWR | O_DIRECT | O_BINARY; return starpu_unistd_global_open (obj, base, pos, size); }
/* * This function frees all the memory that was implicitely allocated by StarPU * (for the data replicates). This is not ensuring data coherency, and should * only be called while StarPU is getting shut down. */ size_t _starpu_free_all_automatically_allocated_buffers(uint32_t node) { int res; size_t freed = 0; res = pthread_rwlock_wrlock(&mc_rwlock[node]); STARPU_ASSERT(!res); freed += flush_memchunk_cache(node); freed += free_potentially_in_use_mc(node, 1); res = pthread_rwlock_unlock(&mc_rwlock[node]); STARPU_ASSERT(!res); return freed; }
static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg) { STARPU_ASSERT(j); if (_starpu_add_successor_to_cg_list(&j->job_successors, cg)) /* the task was already completed sooner */ _starpu_notify_cg(cg); }
void _insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode) { STARPU_ASSERT(listp); struct _starpu_handle_list *list = *listp; /* If the list is empty or the handle's address the smallest among the * list, we insert it as first element */ if (!list || list->handle > handle) { struct _starpu_handle_list *link = (struct _starpu_handle_list *) malloc(sizeof(struct _starpu_handle_list)); STARPU_ASSERT(link); link->handle = handle; link->mode = mode; link->next = list; *listp = link; return; } struct _starpu_handle_list *prev = list; /* Look for the same handle if already present in the list. * Else place it right before the smallest following handle */ while (list && (handle >= list->handle)) { prev = list; list = list->next; } if (prev->handle == handle) { /* The handle is already in the list, the merge both the access modes */ prev->mode = (enum starpu_data_access_mode) ((int) prev->mode | (int) mode); } else { /* The handle was not in the list, we insert it after 'prev', thus right before * 'list' which is the smallest following handle */ struct _starpu_handle_list *link = (struct _starpu_handle_list *) malloc(sizeof(struct _starpu_handle_list)); STARPU_ASSERT(link); link->handle = handle; link->mode = mode; link->next = prev->next; prev->next = link; } }
static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node) { size_t freed = 0; STARPU_ASSERT(mc->ops); STARPU_ASSERT(mc->ops->free_data_on_node); starpu_data_handle handle = mc->data; /* Does this memory chunk refers to a handle that does not exist * anymore ? */ unsigned data_was_deleted = mc->data_was_deleted; // while (_starpu_spin_trylock(&handle->header_lock)) // _starpu_datawizard_progress(_starpu_get_local_memory_node()); //FIXME: can we block here ? // _starpu_spin_lock(&handle->header_lock); if (mc->automatically_allocated && (!handle || data_was_deleted || handle->per_node[node].refcnt == 0)) { if (handle && !data_was_deleted) STARPU_ASSERT(handle->per_node[node].allocated); mc->ops->free_data_on_node(mc->interface, node); if (handle && !data_was_deleted) { handle->per_node[node].allocated = 0; /* XXX why do we need that ? */ handle->per_node[node].automatically_allocated = 0; } freed = mc->size; if (handle && !data_was_deleted) STARPU_ASSERT(handle->per_node[node].refcnt == 0); } // _starpu_spin_unlock(&handle->header_lock); return freed; }
uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = _starpu_memory_node_get_local_key(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); return STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node)); }
void _starpu_opencl_discover_devices(struct _starpu_machine_config *config) { /* Discover the number of OpenCL devices. Fill the result in CONFIG. */ /* As OpenCL must have been initialized before calling this function, * `nb_device' is ensured to be correctly set. */ STARPU_ASSERT(init_done == 1); config->topology.nhwopenclgpus = nb_devices; }