/* * This function load the ptx file ptxPath and extract the kernel kName * to phKernel * @param phKernel Output kernel handle * @param ptxPath ptx file name * @param kName kernel name */ void ptxJIT(CUmodule *phModule, CUfunction *phKernel, const char *ptxPath, const char *kName) { CUlinkState cuLinkState; CUjit_option options[6]; void *optionVals[6]; float walltime; char error_log[8192], info_log[8192]; unsigned int logSize = 8192; void *cuOut; size_t outSize; int myErr = 0; // Setup linker options // Return walltime from JIT compilation options[0] = CU_JIT_WALL_TIME; optionVals[0] = (void *) &walltime; // Pass a buffer for info messages options[1] = CU_JIT_INFO_LOG_BUFFER; optionVals[1] = (void *) info_log; // Pass the size of the info buffer options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; optionVals[2] = (void *) (long)logSize; // Pass a buffer for error message options[3] = CU_JIT_ERROR_LOG_BUFFER; optionVals[3] = (void *) error_log; // Pass the size of the error buffer options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; optionVals[4] = (void *) (long) logSize; // Make the linker verbose options[5] = CU_JIT_LOG_VERBOSE; optionVals[5] = (void *) 1; // Create a pending linker invocation checkCudaErrors(cuLinkCreate(6,options, optionVals, &cuLinkState)); // Load the ptx from the file myErr = cuLinkAddFile(cuLinkState, CU_JIT_INPUT_PTX, ptxPath, 0, 0, 0); if (myErr != CUDA_SUCCESS){ // Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above. fprintf(stderr,"PTX Linker Error:\n%s\n",error_log); } // Complete the linker step checkCudaErrors(cuLinkComplete(cuLinkState, &cuOut, &outSize)); // Linker walltime and info_log were requested in options above. printf("CUDA Link Completed in %fms. Linker Output:\n%s\n", walltime, info_log); // Load resulting cuBin into module checkCudaErrors(cuModuleLoadData(phModule, cuOut)); // Locate the kernel entry point checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, kName)); // Destroy the linker invocation checkCudaErrors(cuLinkDestroy(cuLinkState)); }
// load/read kernel from 'program' file/string, compile and return the requested function CUresult ptxJIT(const char* program, const char* functionName, CUmodule *phModule, CUfunction *phKernel, CUlinkState *lState, bool bFromFile) { CUjit_option options[6]; void *optionVals[6]; float walltime(0); const unsigned logSize(8192); char error_log[logSize], info_log[logSize]; void *cuOut; size_t outSize; // Setup linker options // Return walltime from JIT compilation options[0] = CU_JIT_WALL_TIME; optionVals[0] = (void*)&walltime; // Pass a buffer for info messages options[1] = CU_JIT_INFO_LOG_BUFFER; optionVals[1] = (void*)info_log; // Pass the size of the info buffer options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; optionVals[2] = (void*)(long)logSize; // Pass a buffer for error message options[3] = CU_JIT_ERROR_LOG_BUFFER; optionVals[3] = (void*)error_log; // Pass the size of the error buffer options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; optionVals[4] = (void*)(long)logSize; // Make the linker verbose options[5] = CU_JIT_LOG_VERBOSE; optionVals[5] = (void*)1; // Create a pending linker invocation checkCudaErrors(cuLinkCreate(6, options, optionVals, lState)); DEBUG("Loading '%s' program", functionName); CUresult myErr; if (bFromFile) { // Load the PTX from the file (64-bit) myErr = cuLinkAddFile(*lState, CU_JIT_INPUT_PTX, program, 0, 0, 0); } else { // Load the PTX from the string myPtx (64-bit) myErr = cuLinkAddData(*lState, CU_JIT_INPUT_PTX, (void*)program, strlen(program)+1, 0, 0, 0, 0); } if (myErr != CUDA_SUCCESS) { // Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above. VERBOSE("PTX Linker Error: %s", error_log); return myErr; } // Complete the linker step checkCudaErrors(cuLinkComplete(*lState, &cuOut, &outSize)); // Linker walltime and info_log were requested in options above. DEBUG("CUDA link completed (%gms):\n%s", walltime, info_log); // Load resulting cuBin into module checkCudaErrors(cuModuleLoadData(phModule, cuOut)); // Locate the kernel entry point checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, functionName)); // Destroy the linker invocation checkCudaErrors(cuLinkDestroy(*lState)); return CUDA_SUCCESS; }
//------------------------------------------------------------------------------ void build(CUmodule& module, CUfunction& kernel, const std::vector< std::string >& files, const char* kernel_name) { CUjit_option options[] = {CU_JIT_WALL_TIME, CU_JIT_INFO_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_LOG_VERBOSE}; float walltime = 0.f; const unsigned bufsize = 0x10000; char error_buf[bufsize] = ""; char log_buf[bufsize] = ""; const int verbose = 1; void* option_values[] = {(void*) &walltime, (void*) log_buf, (void*) bufsize, (void*) error_buf, (void*) bufsize, (void*) verbose}; void* compiled_code = 0; size_t compiled_size = 0; int status = CUDA_SUCCESS - 1; CUlinkState link_state = CUlinkState(); const int num_options = sizeof(options) / sizeof(CUjit_option); // Create a pending linker invocation CCHECK(cuLinkCreate(num_options, options, option_values, &link_state)); for(std::vector< std::string >::const_iterator i = files.begin(); i != files.end(); ++i) { status = cuLinkAddFile(link_state, CU_JIT_INPUT_PTX, i->c_str(), 0, //num options 0, //options, 0); //option values } if( status != CUDA_SUCCESS ) { std::cerr << "PTX Linker Error:\n"<< error_buf << std::endl; exit(EXIT_FAILURE); } // Complete the linker step: compiled_code is filled with executable code //???: what do I do with the returned data ? can/should I delete it ? CCHECK(cuLinkComplete(link_state, &compiled_code, &compiled_size)); assert(compiled_size > 0); assert(compiled_code); std::cout << "CUDA Link Completed in " << walltime << " ms\n" << log_buf << std::endl; CCHECK(cuModuleLoadData(&module, compiled_code)); CCHECK(cuModuleGetFunction(&kernel, module, kernel_name)); CCHECK(cuLinkDestroy(link_state)); }
static void link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, unsigned num_objs) { CUjit_option opts[7]; void *optvals[7]; float elapsed = 0.0; #define LOGSIZE 8192 char elog[LOGSIZE]; char ilog[LOGSIZE]; unsigned long logsize = LOGSIZE; CUlinkState linkstate; CUresult r; void *linkout; size_t linkoutsize __attribute__ ((unused)); opts[0] = CU_JIT_WALL_TIME; optvals[0] = &elapsed; opts[1] = CU_JIT_INFO_LOG_BUFFER; optvals[1] = &ilog[0]; opts[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; optvals[2] = (void *) logsize; opts[3] = CU_JIT_ERROR_LOG_BUFFER; optvals[3] = &elog[0]; opts[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; optvals[4] = (void *) logsize; opts[5] = CU_JIT_LOG_VERBOSE; optvals[5] = (void *) 1; opts[6] = CU_JIT_TARGET; optvals[6] = (void *) CU_TARGET_COMPUTE_30; r = cuLinkCreate (7, opts, optvals, &linkstate); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r)); for (; num_objs--; ptx_objs++) { /* cuLinkAddData's 'data' argument erroneously omits the const qualifier. */ GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code); r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, (char*)ptx_objs->code, ptx_objs->size, 0, 0, 0, 0); if (r != CUDA_SUCCESS) { GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r)); } } GOMP_PLUGIN_debug (0, "Linking\n"); r = cuLinkComplete (linkstate, &linkout, &linkoutsize); GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed); GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r)); r = cuModuleLoadData (module, linkout); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r)); r = cuLinkDestroy (linkstate); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r)); }