/* TODO: there's not much to do here, just build the kernel for HSA. Perhaps share the same function for all WG sizes in case it's an SPMD target. */ static void compile (_cl_command_node *cmd) { int error; char bytecode[POCL_FILENAME_LENGTH]; char objfile[POCL_FILENAME_LENGTH]; FILE *file; char *elf_blob; size_t file_size, got_size; hsa_runtime_caller_t caller; error = snprintf (bytecode, POCL_FILENAME_LENGTH, "%s/%s", cmd->command.run.tmp_dir, POCL_PARALLEL_BC_FILENAME); assert (error >= 0); error = snprintf (objfile, POCL_FILENAME_LENGTH, "%s/%s.o", cmd->command.run.tmp_dir, POCL_PARALLEL_BC_FILENAME); assert (error >= 0); error = pocl_llvm_codegen (cmd->command.run.kernel, cmd->device, bytecode, objfile); assert (error == 0); /* Load the built AMDGPU ELF file. */ file = fopen (objfile, "rb"); assert (file != NULL); cmd->command.run.device_data = (void**)malloc (sizeof(void*)*2); cmd->command.run.device_data[0] = malloc (sizeof(hsa_amd_code_unit_t)); cmd->command.run.device_data[1] = malloc (sizeof(hsa_amd_code_t)); file_size = pocl_file_size (file); elf_blob = (char*)malloc (file_size); got_size = fread (elf_blob, 1, file_size, file); if (file_size != got_size) POCL_ABORT ("pocl-hsa: could not read the AMD ELF."); caller.caller = 0; if (hsa_ext_code_unit_load (caller, NULL, 0, elf_blob, file_size, NULL, NULL, (hsa_amd_code_unit_t*)cmd->command.run.device_data[0]) != HSA_STATUS_SUCCESS) { POCL_ABORT ("pocl-hsa: error while loading the built AMDGPU ELF binary."); } if (hsa_ext_code_unit_get_info (*(hsa_amd_code_unit_t*)cmd->command.run.device_data[0], HSA_EXT_CODE_UNIT_INFO_CODE_ENTITY_CODE, 0, (hsa_amd_code_t*)cmd->command.run.device_data[1]) != HSA_STATUS_SUCCESS) { POCL_ABORT ("pocl-hsa: unable to get the code handle to the kernel."); } free (elf_blob); fclose (file); }
/** * Generate code from the final bitcode using the LLVM * tools. * * Uses an existing (cached) one, if available. * * @param tmpdir The directory of the work-group function bitcode. * @param return the generated binary filename. */ const char* llvm_codegen (const char* tmpdir, cl_kernel kernel, cl_device_id device) { const char* pocl_verbose_ptr = pocl_get_string_option("POCL_VERBOSE", (char*)NULL); int pocl_verbose = pocl_verbose_ptr && *pocl_verbose_ptr; char command[COMMAND_LENGTH]; char bytecode[POCL_FILENAME_LENGTH]; char objfile[POCL_FILENAME_LENGTH]; char* module = (char*) malloc(min(POCL_FILENAME_LENGTH, strlen(tmpdir) + strlen(kernel->function_name) + 5)); // strlen of / .so 4+1 int error; cl_program program = kernel->program; error = snprintf (module, POCL_FILENAME_LENGTH, "%s/%s.so", tmpdir, kernel->function_name); assert (error >= 0); error = snprintf (objfile, POCL_FILENAME_LENGTH, "%s/%s.so.o", tmpdir, kernel->function_name); assert (error >= 0); if (access (module, F_OK) != 0) { error = snprintf (bytecode, POCL_FILENAME_LENGTH, "%s/%s", tmpdir, POCL_PARALLEL_BC_FILENAME); assert (error >= 0); error = pocl_llvm_codegen( kernel, device, bytecode, objfile); assert (error == 0); // clang is used as the linker driver in LINK_CMD error = snprintf (command, COMMAND_LENGTH, #ifndef ANDROID LINK_CMD " " HOST_CLANG_FLAGS " " HOST_LD_FLAGS " -o %s %s", #else ANDROID_POCL_PREFIX"/bin/ld " HOST_LD_FLAGS " -o %s %s " " /system/lib/crtend_so.o /system/lib/crtbegin_so.o -ldl -lc ", #endif module, objfile); assert (error >= 0); if (pocl_verbose) { fprintf(stderr, "[pocl] executing [%s]\n", command); fflush(stderr); } error = system (command); assert (error == 0); /* Save space in kernel cache */ if (!pocl_get_bool_option("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES", 0)) { pocl_remove_file(objfile); pocl_remove_file(bytecode); } } return module; }