void pthread_scheduler_wait_cq (cl_command_queue cq) { while (1) { pthread_mutex_lock (&scheduler.cq_finished_lock); POCL_LOCK_OBJ (cq); if (cq->command_count == 0) { POCL_UNLOCK_OBJ (cq); pthread_mutex_unlock (&scheduler.cq_finished_lock); return; } POCL_UNLOCK_OBJ (cq); pthread_cond_wait (&scheduler.cq_finished_cond, &scheduler.cq_finished_lock); pthread_mutex_unlock (&scheduler.cq_finished_lock); } }
cl_int program_compile_dynamic_wg_binaries(cl_program program) { unsigned i, device_i; cl_int errcode = CL_SUCCESS; _cl_command_node cmd; assert(program->num_kernels); assert(program->build_status == CL_BUILD_SUCCESS); memset(&cmd, 0, sizeof(_cl_command_node)); cmd.type = CL_COMMAND_NDRANGE_KERNEL; char cachedir[POCL_FILENAME_LENGTH]; cmd.command.run.tmp_dir = cachedir; POCL_LOCK_OBJ(program); /* build the dynamic WG sized parallel.bc and device specific code, * for each kernel & device combo */ for (device_i = 0; device_i < program->num_devices; ++device_i) { cl_device_id device = program->devices[device_i]; /* program may not be built for some of its devices */ if (program->pocl_binaries[device_i] || (!program->binaries[device_i])) continue; cmd.device = device; for (i=0; i < program->num_kernels; i++) { pocl_cache_make_kernel_cachedir_path(cachedir, program, device, program->default_kernels[i], 0,0,0); errcode = pocl_llvm_generate_workgroup_function(device, program->default_kernels[i], 0,0,0); if (errcode != CL_SUCCESS) { POCL_MSG_ERR("Failed to generate workgroup function for " "kernel %s for device %s\n", program->kernel_names[i], device->short_name); goto RET; } cmd.command.run.kernel = program->default_kernels[i]; device->ops->compile_kernel(&cmd, program->default_kernels[i], device); } } RET: POCL_UNLOCK_OBJ(program); return errcode; }
void pocl_command_enqueue (cl_command_queue command_queue, _cl_command_node *node) { POCL_LOCK_OBJ(command_queue); LL_APPEND (command_queue->root, node); POCL_UNLOCK_OBJ(command_queue); #ifdef POCL_DEBUG_BUILD if (pocl_is_option_set("POCL_IMPLICIT_FINISH")) POclFinish (command_queue); #endif POCL_UPDATE_EVENT_QUEUED (&node->event); }
cl_int compile_and_link_program(int compile_program, int link_program, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, cl_uint num_input_programs, const cl_program *input_programs, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data) { char program_bc_path[POCL_FILENAME_LENGTH]; char link_options[512]; int errcode, error; int create_library = 0; int requires_cr_sqrt_div = 0; int spir_build = 0; unsigned flush_denorms = 0; uint64_t fsize; cl_device_id *unique_devlist = NULL; char *binary = NULL; unsigned device_i = 0, actually_built = 0; size_t i, j; char *temp_options = NULL; const char *extra_build_options = pocl_get_string_option ("POCL_EXTRA_BUILD_FLAGS", NULL); int build_error_code = (link_program ? CL_BUILD_PROGRAM_FAILURE : CL_COMPILE_PROGRAM_FAILURE); POCL_GOTO_LABEL_COND (PFN_NOTIFY, (program == NULL), CL_INVALID_PROGRAM); POCL_GOTO_LABEL_COND (PFN_NOTIFY, (num_devices > 0 && device_list == NULL), CL_INVALID_VALUE); POCL_GOTO_LABEL_COND (PFN_NOTIFY, (num_devices == 0 && device_list != NULL), CL_INVALID_VALUE); POCL_GOTO_LABEL_COND (PFN_NOTIFY, (pfn_notify == NULL && user_data != NULL), CL_INVALID_VALUE); POCL_GOTO_LABEL_ON (PFN_NOTIFY, program->kernels, CL_INVALID_OPERATION, "Program already has kernels\n"); POCL_GOTO_LABEL_ON (PFN_NOTIFY, (program->source == NULL && program->binaries == NULL), CL_INVALID_PROGRAM, "Program doesn't have sources or binaries! You need " "to call clCreateProgramWith{Binary|Source} first\n"); POCL_GOTO_LABEL_ON (PFN_NOTIFY, ((program->source == NULL) && (link_program == 0)), CL_INVALID_OPERATION, "Cannot clCompileProgram when program has no source\n"); POCL_LOCK_OBJ (program); program->main_build_log[0] = 0; /* TODO this should be somehow utilized at linking */ POCL_MEM_FREE (program->compiler_options); if (extra_build_options) { size_t len = (options != NULL) ? strlen (options) : 0; len += strlen (extra_build_options) + 2; temp_options = (char *)malloc (len); temp_options[0] = 0; if (options != NULL) { strcpy (temp_options, options); strcat (temp_options, " "); } strcat (temp_options, extra_build_options); } else temp_options = (char*) options; if (temp_options) { i = strlen (temp_options); size_t size = i + 512; /* add some space for pocl-added options */ program->compiler_options = (char *)malloc (size); errcode = process_options (temp_options, program->compiler_options, link_options, program, compile_program, link_program, &create_library, &flush_denorms, &requires_cr_sqrt_div, &spir_build, size); if (errcode != CL_SUCCESS) goto ERROR_CLEAN_OPTIONS; } POCL_MSG_PRINT_LLVM ("building program with options %s\n", program->compiler_options); program->flush_denorms = flush_denorms; #if !(defined(__x86_64__) && defined(__GNUC__)) if (flush_denorms) { POCL_MSG_WARN ("flush to zero is currently only implemented for " "x86-64 & gcc/clang, ignoring flag\n"); } #endif /* DEVICE LIST */ if (num_devices == 0) { num_devices = program->num_devices; device_list = program->devices; } else { // convert subdevices to devices and remove duplicates cl_uint real_num_devices = 0; unique_devlist = pocl_unique_device_list (device_list, num_devices, &real_num_devices); num_devices = real_num_devices; device_list = unique_devlist; } clean_program_on_rebuild (program); /* Build the fully linked non-parallel bitcode for all devices. */ for (device_i = 0; device_i < program->num_devices; ++device_i) { cl_device_id device = program->devices[device_i]; /* find the device in the supplied devices-to-build-for list */ int found = 0; for (i = 0; i < num_devices; ++i) if (device_list[i] == device) found = 1; if (!found) continue; if (requires_cr_sqrt_div && !(device->single_fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { APPEND_TO_MAIN_BUILD_LOG (REQUIRES_CR_SQRT_DIV_ERR); POCL_GOTO_ERROR_ON (1, build_error_code, REQUIRES_CR_SQRT_DIV_ERR " %s\n", device->short_name); } actually_built++; /* clCreateProgramWithSource */ if (program->source) { #ifdef OCS_AVAILABLE if (device->compiler_available == CL_TRUE) { POCL_MSG_PRINT_INFO ("building from sources for device %d\n", device_i); error = pocl_llvm_build_program ( program, device_i, program->compiler_options, program_bc_path, num_input_headers, input_headers, header_include_names, (create_library ? 0 : link_program)); POCL_GOTO_ERROR_ON ((error != 0), build_error_code, "pocl_llvm_build_program() failed\n"); } else #endif { APPEND_TO_MAIN_BUILD_LOG ( "Cannot build a program from sources with pocl " "that does not have online compiler support\n"); POCL_GOTO_ERROR_ON (1, CL_COMPILER_NOT_AVAILABLE, "%s", program->main_build_log); } } /* clCreateProgramWithBinaries */ else if (program->binaries[device_i] && (program->pocl_binaries[device_i] == NULL)) { #ifdef OCS_AVAILABLE /* bitcode is now either plain LLVM IR or SPIR IR */ int spir_binary = bitcode_is_spir ((char*)program->binaries[device_i], program->binary_sizes[device_i]); if (spir_binary) POCL_MSG_PRINT_LLVM ("LLVM-SPIR binary detected\n"); else POCL_MSG_PRINT_LLVM ("building from a BC binary for device %d\n", device_i); if (spir_binary) { #ifdef ENABLE_SPIR if (!strstr (device->extensions, "cl_khr_spir")) { APPEND_TO_MAIN_BUILD_LOG (REQUIRES_SPIR_SUPPORT); POCL_GOTO_ERROR_ON (1, build_error_code, REQUIRES_SPIR_SUPPORT " %s\n", device->short_name); } if (!spir_build) POCL_MSG_WARN ( "SPIR binary provided, but no spir in build options\n"); /* SPIR binaries need to be explicitly linked to the kernel * library. for non-SPIR binaries this happens as part of build * process when program.bc is generated. */ error = pocl_llvm_link_program (program, device_i, program_bc_path, 0, NULL, NULL, NULL, 0, 1); POCL_GOTO_ERROR_ON (error, CL_LINK_PROGRAM_FAILURE, "Failed to link SPIR program.bc\n"); #else APPEND_TO_MAIN_BUILD_LOG (REQUIRES_SPIR_SUPPORT); POCL_GOTO_ERROR_ON (1, build_error_code, REQUIRES_SPIR_SUPPORT " %s\n", device->short_name); #endif } #else APPEND_TO_MAIN_BUILD_LOG ( "Cannot build program from LLVM IR binaries with " "pocl that does not have online compiler support\n"); POCL_GOTO_ERROR_ON (1, CL_COMPILER_NOT_AVAILABLE, "%s", program->main_build_log); #endif } else if (program->pocl_binaries[device_i]) { POCL_MSG_PRINT_INFO("having a poclbinary for device %d\n", device_i); #ifdef OCS_AVAILABLE if (program->binaries[device_i] == NULL) { POCL_MSG_WARN ( "pocl-binary for this device doesn't contain " "program.bc - you won't be able to rebuild/link it\n"); /* do not try to read program.bc or LLVM IRs * TODO maybe read LLVM IRs ?*/ continue; } #else continue; #endif } else if (link_program && (num_input_programs > 0)) { #ifdef OCS_AVAILABLE /* just link binaries. */ unsigned char *cur_device_binaries[num_input_programs]; size_t cur_device_binary_sizes[num_input_programs]; void *cur_llvm_irs[num_input_programs]; for (j = 0; j < num_input_programs; j++) { assert (device == input_programs[j]->devices[device_i]); cur_device_binaries[j] = input_programs[j]->binaries[device_i]; assert (cur_device_binaries[j]); cur_device_binary_sizes[j] = input_programs[j]->binary_sizes[device_i]; if (input_programs[j]->llvm_irs[device_i] == NULL) pocl_update_program_llvm_irs (input_programs[j], device_i); cur_llvm_irs[j] = input_programs[j]->llvm_irs[device_i]; assert (cur_llvm_irs[j]); } error = pocl_llvm_link_program ( program, device_i, program_bc_path, num_input_programs, cur_device_binaries, cur_device_binary_sizes, cur_llvm_irs, create_library, 0); POCL_GOTO_ERROR_ON ((error != CL_SUCCESS), CL_LINK_PROGRAM_FAILURE, "pocl_llvm_link_program() failed\n"); #else POCL_GOTO_ERROR_ON ((1), CL_LINK_PROGRAM_FAILURE, "clCompileProgram/clLinkProgram/clBuildProgram" " require a pocl built with LLVM\n"); #endif } else { POCL_GOTO_ERROR_ON (1, CL_INVALID_BINARY, "No sources nor binaries for device %s - can't " "build the program\n", device->short_name); } #ifdef OCS_AVAILABLE /* Read binaries from program.bc to memory */ if (program->binaries[device_i] == NULL) { errcode = pocl_read_file(program_bc_path, &binary, &fsize); POCL_GOTO_ERROR_ON(errcode, CL_BUILD_ERROR, "Failed to read binaries from program.bc to " "memory: %s\n", program_bc_path); program->binary_sizes[device_i] = (size_t)fsize; program->binaries[device_i] = (unsigned char *)binary; } if (program->llvm_irs[device_i] == NULL) { pocl_update_program_llvm_irs(program, device_i); } /* Maintain a 'last_accessed' file in every program's * cache directory. Will be useful for cache pruning script * that flushes old directories based on LRU */ pocl_cache_update_program_last_access(program, device_i); #endif } POCL_GOTO_ERROR_ON ((actually_built < num_devices), build_error_code, "Some of the devices on the argument-supplied list are" "not available for the program, or do not exist\n"); program->build_status = CL_BUILD_SUCCESS; program->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; /* if program will be compiled using clCompileProgram its binary_type * will be set to CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT. * * if program was created by clLinkProgram which is called * with the –createlibrary link option its binary_type will be set to * CL_PROGRAM_BINARY_TYPE_LIBRARY. */ if (create_library) program->binary_type = CL_PROGRAM_BINARY_TYPE_LIBRARY; if (compile_program && !link_program) program->binary_type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; assert(program->num_kernels == 0); /* get non-device-specific kernel metadata. We can stop after finding * the first method that works.*/ for (device_i = 0; device_i < program->num_devices; device_i++) { #ifdef OCS_AVAILABLE if (program->binaries[device_i]) { program->num_kernels = pocl_llvm_get_kernel_count (program, device_i); if (program->num_kernels) { program->kernel_meta = calloc (program->num_kernels, sizeof (pocl_kernel_metadata_t)); pocl_llvm_get_kernels_metadata (program, device_i); } break; } #endif if (program->pocl_binaries[device_i]) { program->num_kernels = pocl_binary_get_kernel_count (program, device_i); if (program->num_kernels) { program->kernel_meta = calloc (program->num_kernels, sizeof (pocl_kernel_metadata_t)); pocl_binary_get_kernels_metadata (program, device_i); } break; } } POCL_GOTO_ERROR_ON ((device_i >= program->num_devices), CL_INVALID_BINARY, "Could find kernel metadata in the built program\n"); /* calculate device-specific kernel hashes. */ for (j = 0; j < program->num_kernels; ++j) { program->kernel_meta[j].build_hash = calloc (program->num_devices, sizeof (pocl_kernel_hash_t)); for (device_i = 0; device_i < program->num_devices; device_i++) { pocl_calculate_kernel_hash (program, j, device_i); } } errcode = CL_SUCCESS; goto FINISH; ERROR: free_meta (program); program->kernels = NULL; for (device_i = 0; device_i < program->num_devices; device_i++) { if (program->source) { POCL_MEM_FREE (program->binaries[device_i]); program->binary_sizes[device_i] = 0; } } ERROR_CLEAN_OPTIONS: if (temp_options != options) free (temp_options); program->build_status = CL_BUILD_ERROR; FINISH: POCL_UNLOCK_OBJ (program); POCL_MEM_FREE (unique_devlist); PFN_NOTIFY: if (pfn_notify) pfn_notify (program, user_data); return errcode; }
cl_int program_compile_dynamic_wg_binaries (cl_program program) { unsigned i, device_i; _cl_command_node cmd; assert(program->build_status == CL_BUILD_SUCCESS); if (program->num_kernels == 0) return CL_SUCCESS; memset(&cmd, 0, sizeof(_cl_command_node)); cmd.type = CL_COMMAND_NDRANGE_KERNEL; POCL_LOCK_OBJ (program); /* Build the dynamic WG sized parallel.bc and device specific code, for each kernel & device combo. */ for (device_i = 0; device_i < program->num_devices; ++device_i) { cl_device_id device = program->devices[device_i]; /* program may not be built for some of its devices */ if (program->pocl_binaries[device_i] || (!program->binaries[device_i])) continue; cmd.device = device; cmd.device_i = device_i; struct _cl_kernel fake_k; memset (&fake_k, 0, sizeof (fake_k)); fake_k.context = program->context; fake_k.program = program; fake_k.next = NULL; cl_kernel kernel = &fake_k; for (i=0; i < program->num_kernels; i++) { fake_k.meta = &program->kernel_meta[i]; fake_k.name = fake_k.meta->name; cmd.command.run.hash = fake_k.meta->build_hash[device_i]; size_t local_x = 0, local_y = 0, local_z = 0; if (kernel->meta->reqd_wg_size[0] > 0 && kernel->meta->reqd_wg_size[1] > 0 && kernel->meta->reqd_wg_size[2] > 0) { local_x = kernel->meta->reqd_wg_size[0]; local_y = kernel->meta->reqd_wg_size[1]; local_z = kernel->meta->reqd_wg_size[2]; } cmd.command.run.pc.local_size[0] = local_x; cmd.command.run.pc.local_size[1] = local_y; cmd.command.run.pc.local_size[2] = local_z; cmd.command.run.kernel = kernel; /* Force generate a generic WG function to ensure generality. */ device->ops->compile_kernel (&cmd, kernel, device, 0); /* Then generate a specialized one with goffset 0 since it's a very common case. */ cmd.command.run.pc.global_offset[0] = cmd.command.run.pc.global_offset[1] = cmd.command.run.pc.global_offset[2] = 0; cmd.command.run.force_large_grid_wg_func = 1; device->ops->compile_kernel (&cmd, kernel, device, 1); } } POCL_UNLOCK_OBJ (program); return CL_SUCCESS; }