static void *call_compiler(const char *src, size_t len, const char *arch_arg, size_t *bin_len, char **log, size_t *log_len, int *ret) { nvrtcProgram prog; void *buf = NULL; size_t buflen; const char *opts[4] = { "-arch", "" , "-G", "-lineinfo" }; nvrtcResult err, err2; opts[1] = arch_arg; err = nvrtcCreateProgram(&prog, src, NULL, 0, NULL, NULL); if (err != NVRTC_SUCCESS) FAIL(NULL, GA_SYS_ERROR); err = nvrtcCompileProgram(prog, #ifdef DEBUG 4, #else 2, #endif opts); if (log != NULL) { err2 = nvrtcGetProgramLogSize(prog, &buflen); if (err2 != NVRTC_SUCCESS) goto end2; buf = malloc(buflen); if (buf == NULL) goto end2; err2 = nvrtcGetProgramLog(prog, (char *)buf); if (err2 != NVRTC_SUCCESS) goto end2; if (log_len != NULL) *log_len = buflen; *log = (char *)buf; buf = NULL; } end2: if (err != NVRTC_SUCCESS) goto end; err = nvrtcGetPTXSize(prog, &buflen); if (err != NVRTC_SUCCESS) goto end; buf = malloc(buflen); if (buf == NULL) { nvrtcDestroyProgram(&prog); FAIL(NULL, GA_MEMORY_ERROR); } err = nvrtcGetPTX(prog, (char *)buf); if (err != NVRTC_SUCCESS) goto end; *bin_len = buflen; end: nvrtcDestroyProgram(&prog); if (err != NVRTC_SUCCESS) { free(buf); FAIL(NULL, GA_SYS_ERROR); } return buf; }
static CUmodule build_kernel_source(const char *source_file, long target_capability) { char *source; int link_dev_runtime; nvrtcProgram program; nvrtcResult rc; char arch_buf[128]; const char *options[10]; int opt_index = 0; int build_failure = 0; char *build_log; size_t build_log_len; char *ptx_image; size_t ptx_image_len; void *bin_image; size_t bin_image_len; CUmodule cuda_module; CUresult cuda_rc; source = load_kernel_source(source_file, &link_dev_runtime); rc = nvrtcCreateProgram(&program, source, NULL, 0, NULL, NULL); if (rc != NVRTC_SUCCESS) nvrtc_error(rc, "nvrtcCreateProgram"); /* * Put command line options as cuda_program.c doing */ options[opt_index++] = "-I " CUDA_INCLUDE_PATH; snprintf(arch_buf, sizeof(arch_buf), "--gpu-architecture=compute_%ld", target_capability); options[opt_index++] = arch_buf; #ifdef PGSTROM_DEBUG options[opt_index++] = "--device-debug"; options[opt_index++] = "--generate-line-info"; #endif options[opt_index++] = "--use_fast_math"; if (link_dev_runtime) options[opt_index++] = "--relocatable-device-code=true"; /* * Kick runtime compiler */ rc = nvrtcCompileProgram(program, opt_index, options); if (rc != NVRTC_SUCCESS) { if (rc == NVRTC_ERROR_COMPILATION) build_failure = 1; else nvrtc_error(rc, "nvrtcCompileProgram"); } /* * Print build log */ rc = nvrtcGetProgramLogSize(program, &build_log_len); if (rc != NVRTC_SUCCESS) nvrtc_error(rc, "nvrtcGetProgramLogSize"); build_log = malloc(build_log_len + 1); if (!build_log) { fputs("out of memory", stderr); exit(1); } rc = nvrtcGetProgramLog(program, build_log); if (rc != NVRTC_SUCCESS) nvrtc_error(rc, "nvrtcGetProgramLog"); if (build_log_len > 1) printf("build log:\n%s\n", build_log); if (build_failure) exit(1); /* * Get PTX Image */ rc = nvrtcGetPTXSize(program, &ptx_image_len); if (rc != NVRTC_SUCCESS) nvrtc_error(rc, "nvrtcGetPTXSize"); ptx_image = malloc(ptx_image_len + 1); if (!ptx_image) { fputs("out of memory", stderr); exit(1); } rc = nvrtcGetPTX(program, ptx_image); if (rc != NVRTC_SUCCESS) nvrtc_error(rc, "nvrtcGetPTX"); ptx_image[ptx_image_len] = '\0'; /* * Link device runtime if needed */ if (link_dev_runtime) { link_device_libraries(ptx_image, ptx_image_len, &bin_image, &bin_image_len, target_capability); } else { bin_image = ptx_image; bin_image_len = ptx_image_len; } cuda_rc = cuModuleLoadData(&cuda_module, bin_image); if (cuda_rc != CUDA_SUCCESS) cuda_error(rc, "cuModuleLoadData"); return cuda_module; }