int _starpu_opencl_deinit_context(int devid) { #ifdef STARPU_SIMGRID int j; for (j = 0; j < STARPU_MAX_PIPELINE; j++) { task_finished[devid][j] = 0; STARPU_PTHREAD_MUTEX_DESTROY(&task_mutex[devid][j]); STARPU_PTHREAD_COND_DESTROY(&task_cond[devid][j]); } #else /* !STARPU_SIMGRID */ cl_int err; STARPU_PTHREAD_MUTEX_LOCK(&big_lock); _STARPU_DEBUG("De-initialising context for dev %d\n", devid); err = clReleaseContext(contexts[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseCommandQueue(queues[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseCommandQueue(in_transfer_queues[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseCommandQueue(out_transfer_queues[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseCommandQueue(peer_transfer_queues[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseCommandQueue(alloc_queues[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); contexts[devid] = NULL; STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock); #endif return 0; }
static void save_history_based_model(struct starpu_perfmodel_t *model) { STARPU_ASSERT(model); STARPU_ASSERT(model->symbol); /* TODO checks */ /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */ char path[256]; get_model_path(model, path, 256); _STARPU_DEBUG("Opening performance model file %s for model %s\n", path, model->symbol); /* overwrite existing file, or create it */ FILE *f; f = fopen(path, "w+"); STARPU_ASSERT(f); dump_model_file(f, model); fclose(f); }
void *_starpu_cpu_worker(void *arg) { struct starpu_worker_s *cpu_arg = arg; unsigned memnode = cpu_arg->memory_node; int workerid = cpu_arg->workerid; int devid = cpu_arg->devid; #ifdef STARPU_USE_FXT _starpu_fxt_register_thread(cpu_arg->bindid); #endif STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode); _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid); _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid); _starpu_set_local_memory_node_key(&memnode); _starpu_set_local_worker_key(cpu_arg); snprintf(cpu_arg->name, 32, "CPU %d", devid); cpu_arg->status = STATUS_UNKNOWN; STARPU_TRACE_WORKER_INIT_END /* tell the main thread that we are ready */ PTHREAD_MUTEX_LOCK(&cpu_arg->mutex); cpu_arg->worker_is_initialized = 1; PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex); starpu_job_t j; int res; while (_starpu_machine_is_running()) { STARPU_TRACE_START_PROGRESS(memnode); _starpu_datawizard_progress(memnode, 1); STARPU_TRACE_END_PROGRESS(memnode); _starpu_execute_registered_progression_hooks(); PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex); /* perhaps there is some local task to be executed first */ j = _starpu_pop_local_task(cpu_arg); /* otherwise ask a task to the scheduler */ if (!j) { struct starpu_task *task = _starpu_pop_task(); if (task) j = _starpu_get_job_associated_to_task(task); } if (j == NULL) { if (_starpu_worker_can_block(memnode)) _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex); PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); continue; }; PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); /* can a cpu perform that task ? */ if (!STARPU_CPU_MAY_PERFORM(j)) { /* put it and the end of the queue ... XXX */ _starpu_push_task(j, 0); continue; } _starpu_set_current_task(j->task); res = execute_job_on_cpu(j, cpu_arg); _starpu_set_current_task(NULL); if (res) { switch (res) { case -EAGAIN: _starpu_push_task(j, 0); continue; default: assert(0); } } _starpu_handle_job_termination(j, 0); } STARPU_TRACE_WORKER_DEINIT_START /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY); pthread_exit(NULL); }
int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; char *binary; char binary_file_name[1024]; size_t length; cl_int binary_status; opencl_programs->programs[dev] = NULL; starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); if (context == NULL) { _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); continue; } // Load the binary buffer err = _starpu_opencl_get_binary_name(binary_file_name, 1024, kernel_id, dev, device); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); binary = _starpu_opencl_load_program_binary(binary_file_name, &length); // Create the compute program from the binary buffer program = clCreateProgramWithBinary(context, 1, &device, &length, (const unsigned char **) &binary, &binary_status, &err); if (!program || err != CL_SUCCESS) { _STARPU_DISP("Error: Failed to load program binary!\n"); return EXIT_FAILURE; } // Build the program executable err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // Get the status { cl_build_status status; size_t len; static char buffer[4096] = ""; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); if (len > 2) _STARPU_DISP("Compilation output\n%s\n", buffer); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) { _STARPU_DISP("Error: Failed to build program executable!\n"); _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); return EXIT_FAILURE; } } // Store program opencl_programs->programs[dev] = program; } return 0; }
static int _starpu_opencl_compile_or_load_opencl_from_string(const char *opencl_program_source, const char* build_options, struct starpu_opencl_program *opencl_programs, const char* source_file_name) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; if (opencl_programs) opencl_programs->programs[dev] = NULL; starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); if (context == NULL) { _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); continue; } // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) &opencl_program_source, NULL, &err); if (!program || err != CL_SUCCESS) { _STARPU_DISP("Error: Failed to load program source with options %s!\n", build_options); return EXIT_FAILURE; } // Build the program executable err = clBuildProgram(program, 1, &device, build_options, NULL, NULL); // Get the status { cl_build_status status; size_t len; static char buffer[4096] = ""; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); if (len > 2) _STARPU_DISP("Compilation output\n%s\n", buffer); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) { _STARPU_DISP("Error: Failed to build program executable!\n"); _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); return EXIT_FAILURE; } } // Store program if (opencl_programs) opencl_programs->programs[dev] = program; else { char binary_file_name[1024]; char *binary; size_t binary_len; FILE *fh; err = _starpu_opencl_get_binary_name(binary_file_name, 1024, source_file_name, dev, device); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_len, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); binary = malloc(binary_len); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); fh = fopen(binary_file_name, "w"); if (fh == NULL) { _STARPU_DISP("Error: Failed to open file <%s>\n", binary_file_name); perror("fopen"); return EXIT_FAILURE; } fwrite(binary, binary_len, 1, fh); fclose(fh); free(binary); _STARPU_DEBUG("File <%s> created\n", binary_file_name); } } return EXIT_SUCCESS; }
/* We first try to grab the global lock in read mode to check whether the model * was loaded or not (this is very likely to have been already loaded). If the * model was not loaded yet, we take the lock in write mode, and if the model * is still not loaded once we have the lock, we do load it. */ static void load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history) { STARPU_ASSERT(model); STARPU_ASSERT(model->symbol); int already_loaded; PTHREAD_RWLOCK_RDLOCK(®istered_models_rwlock); already_loaded = model->is_loaded; PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); if (already_loaded) return; /* The model is still not loaded so we grab the lock in write mode, and * if it's not loaded once we have the lock, we do load it. */ PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); /* Was the model initialized since the previous test ? */ if (model->is_loaded) { PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); return; } PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL); PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock); /* make sure the performance model directory exists (or create it) */ _starpu_create_sampling_directory_if_needed(); /* * We need to keep track of all the model that were opened so that we can * possibly update them at runtime termination ... */ _starpu_register_model(model); char path[256]; get_model_path(model, path, 256); _STARPU_DEBUG("Opening performance model file %s for model %s ... ", path, model->symbol); unsigned calibrate_flag = _starpu_get_calibrate_flag(); model->benchmarking = calibrate_flag; /* try to open an existing file and load it */ int res; res = access(path, F_OK); if (res == 0) { if (calibrate_flag == 2) { /* The user specified that the performance model should * be overwritten, so we don't load the existing file ! * */ _STARPU_DEBUG("Overwrite existing file\n"); initialize_model(model); } else { /* We load the available file */ _STARPU_DEBUG("File exists\n"); FILE *f; f = fopen(path, "r"); STARPU_ASSERT(f); parse_model_file(f, model, scan_history); fclose(f); } } else { _STARPU_DEBUG("File does not exists\n"); if (!calibrate_flag) { _STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol); _starpu_set_calibrate_flag(1); model->benchmarking = 1; } initialize_model(model); } model->is_loaded = 1; PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock); PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); }