/*
 * This function load the ptx file ptxPath and extract the kernel kName
 * to phKernel
 * @param phKernel    Output kernel handle
 * @param ptxPath     ptx file name
 * @param kName       kernel name
 */
void ptxJIT(CUmodule *phModule, CUfunction *phKernel, const char *ptxPath, const char *kName)
{
  CUlinkState cuLinkState;
  CUjit_option options[6];
  void *optionVals[6];
  float walltime;
  char error_log[8192], info_log[8192];
  unsigned int logSize = 8192;
  void *cuOut;
  size_t outSize;
  int myErr = 0;

  // Setup linker options
  // Return walltime from JIT compilation
  options[0] = CU_JIT_WALL_TIME;
  optionVals[0] = (void *) &walltime;
  // Pass a buffer for info messages
  options[1] = CU_JIT_INFO_LOG_BUFFER;
  optionVals[1] = (void *) info_log;
  // Pass the size of the info buffer
  options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
  optionVals[2] = (void *) (long)logSize;
  // Pass a buffer for error message
  options[3] = CU_JIT_ERROR_LOG_BUFFER;
  optionVals[3] = (void *) error_log;
  // Pass the size of the error buffer
  options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
  optionVals[4] = (void *) (long) logSize;
  // Make the linker verbose
  options[5] = CU_JIT_LOG_VERBOSE;
  optionVals[5] = (void *) 1;

  // Create a pending linker invocation
  checkCudaErrors(cuLinkCreate(6,options, optionVals, &cuLinkState));

  // Load the ptx from the file
  myErr = cuLinkAddFile(cuLinkState, CU_JIT_INPUT_PTX, ptxPath, 0, 0, 0);
  if (myErr != CUDA_SUCCESS){
    // Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above.
    fprintf(stderr,"PTX Linker Error:\n%s\n",error_log);
  }

  // Complete the linker step
  checkCudaErrors(cuLinkComplete(cuLinkState, &cuOut, &outSize));

  // Linker walltime and info_log were requested in options above.
  printf("CUDA Link Completed in %fms. Linker Output:\n%s\n", walltime, info_log);

  // Load resulting cuBin into module
  checkCudaErrors(cuModuleLoadData(phModule, cuOut));

  // Locate the kernel entry point
  checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, kName));

  // Destroy the linker invocation
  checkCudaErrors(cuLinkDestroy(cuLinkState));
}
Esempio n. 2
0
static void
link_device_libraries(void *ptx_image, size_t ptx_image_len,
					  void **p_bin_image, size_t *p_bin_image_len,
					  long target_capability)
{
	CUlinkState		lstate;
	CUjit_option	jit_options[10];
	void		   *jit_option_values[10];
	int				jit_index = 0;
	CUresult		rc;
	char			pathname[1024];

	/*
	 * JIT options
	 */
	jit_options[jit_index] = CU_JIT_TARGET;
	jit_option_values[jit_index] = (void *)target_capability;
	jit_index++;
#ifdef PGSTROM_DEBUG
	jit_options[jit_index] = CU_JIT_GENERATE_DEBUG_INFO;
	jit_option_values[jit_index] = (void *)1UL;
	jit_index++;

	jit_options[jit_index] = CU_JIT_GENERATE_LINE_INFO;
	jit_option_values[jit_index] = (void *)1UL;
	jit_index++;
#endif
	/* makes a linkage object */
	rc = cuLinkCreate(jit_index, jit_options, jit_option_values, &lstate);
	if (rc != CUDA_SUCCESS)
		cuda_error(rc, "cuLinkCreate");

    /* add the base PTX image */
	rc = cuLinkAddData(lstate, CU_JIT_INPUT_PTX,
					   ptx_image, ptx_image_len,
					   "PG-Strom", 0, NULL, NULL);
	if (rc != CUDA_SUCCESS)
		cuda_error(rc, "cuLinkAddData");

    /* libcudart.a, if any */
	snprintf(pathname, sizeof(pathname), "%s/libcudadevrt.a",
			 CUDA_LIBRARY_PATH);
	rc = cuLinkAddFile(lstate, CU_JIT_INPUT_LIBRARY, pathname,
					   0, NULL, NULL);
	if (rc != CUDA_SUCCESS)
		cuda_error(rc, "cuLinkAddFile");

	/* do the linkage */
	rc = cuLinkComplete(lstate, p_bin_image, p_bin_image_len);
	if (rc != CUDA_SUCCESS)
		cuda_error(rc, "cuLinkComplete");


}
Esempio n. 3
0
CubinResult ptx_to_cubin(const std::string& ptx,
                         const unsigned block_size,
                         const CudaMgr_Namespace::CudaMgr* cuda_mgr) {
  CHECK(!ptx.empty());
  CHECK(cuda_mgr->getDeviceCount() > 0);
  static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)->setContext(0);
  std::vector<CUjit_option> option_keys;
  std::vector<void*> option_values;
  fill_options(option_keys, option_values, block_size);
  CHECK_EQ(option_values.size(), option_keys.size());
  unsigned num_options = option_keys.size();
  CUlinkState link_state;
  checkCudaErrors(cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state));

  boost::filesystem::path gpu_rt_path{mapd_root_abs_path()};
  gpu_rt_path /= "QueryEngine";
  gpu_rt_path /= "cuda_mapd_rt.a";
  if (!boost::filesystem::exists(gpu_rt_path)) {
    throw std::runtime_error("MapD GPU runtime library not found at " + gpu_rt_path.string());
  }

  if (!gpu_rt_path.empty()) {
    // How to create a static CUDA library:
    // 1. nvcc -std=c++11 -arch=sm_30 --device-link -c [list of .cu files]
    // 2. nvcc -std=c++11 -arch=sm_30 -lib [list of .o files generated by step 1] -o [library_name.a]
    checkCudaErrors(cuLinkAddFile(
        link_state, CU_JIT_INPUT_LIBRARY, gpu_rt_path.c_str(), num_options, &option_keys[0], &option_values[0]));
  }
  checkCudaErrors(cuLinkAddData(link_state,
                                CU_JIT_INPUT_PTX,
                                static_cast<void*>(const_cast<char*>(ptx.c_str())),
                                ptx.length() + 1,
                                0,
                                num_options,
                                &option_keys[0],
                                &option_values[0]));
  void* cubin{nullptr};
  size_t cubinSize{0};
  checkCudaErrors(cuLinkComplete(link_state, &cubin, &cubinSize));
  CHECK(cubin);
  CHECK_GT(cubinSize, size_t(0));
  return {cubin, option_keys, option_values, link_state};
}
Esempio n. 4
0
// load/read kernel from 'program' file/string, compile and return the requested function
CUresult ptxJIT(const char* program, const char* functionName, CUmodule *phModule, CUfunction *phKernel, CUlinkState *lState, bool bFromFile)
{
	CUjit_option options[6];
	void *optionVals[6];
	float walltime(0);
	const unsigned logSize(8192);
	char error_log[logSize], info_log[logSize];
	void *cuOut;
	size_t outSize;

	// Setup linker options
	// Return walltime from JIT compilation
	options[0] = CU_JIT_WALL_TIME;
	optionVals[0] = (void*)&walltime;
	// Pass a buffer for info messages
	options[1] = CU_JIT_INFO_LOG_BUFFER;
	optionVals[1] = (void*)info_log;
	// Pass the size of the info buffer
	options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
	optionVals[2] = (void*)(long)logSize;
	// Pass a buffer for error message
	options[3] = CU_JIT_ERROR_LOG_BUFFER;
	optionVals[3] = (void*)error_log;
	// Pass the size of the error buffer
	options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
	optionVals[4] = (void*)(long)logSize;
	// Make the linker verbose
	options[5] = CU_JIT_LOG_VERBOSE;
	optionVals[5] = (void*)1;

	// Create a pending linker invocation
	checkCudaErrors(cuLinkCreate(6, options, optionVals, lState));

	DEBUG("Loading '%s' program", functionName);
	CUresult myErr;
	if (bFromFile) {
		// Load the PTX from the file (64-bit)
		myErr = cuLinkAddFile(*lState, CU_JIT_INPUT_PTX, program, 0, 0, 0);
	} else {
		// Load the PTX from the string myPtx (64-bit)
		myErr = cuLinkAddData(*lState, CU_JIT_INPUT_PTX, (void*)program, strlen(program)+1, 0, 0, 0, 0);
	}
	if (myErr != CUDA_SUCCESS) {
		// Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above.
		VERBOSE("PTX Linker Error: %s", error_log);
		return myErr;
	}

	// Complete the linker step
	checkCudaErrors(cuLinkComplete(*lState, &cuOut, &outSize));

	// Linker walltime and info_log were requested in options above.
	DEBUG("CUDA link completed (%gms):\n%s", walltime, info_log);

	// Load resulting cuBin into module
	checkCudaErrors(cuModuleLoadData(phModule, cuOut));

	// Locate the kernel entry point
	checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, functionName));

	// Destroy the linker invocation
	checkCudaErrors(cuLinkDestroy(*lState));
	return CUDA_SUCCESS;
}
//------------------------------------------------------------------------------
void build(CUmodule& module,
           CUfunction& kernel,
           const std::vector< std::string >& files,
           const char* kernel_name) {


    CUjit_option options[] = {CU_JIT_WALL_TIME,
                              CU_JIT_INFO_LOG_BUFFER,
                              CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
                              CU_JIT_ERROR_LOG_BUFFER,
                              CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
                              CU_JIT_LOG_VERBOSE};
    float walltime = 0.f;
    const unsigned bufsize = 0x10000;
    char error_buf[bufsize] = "";
    char log_buf[bufsize] = "";
    const int verbose = 1;                          
    void* option_values[] = {(void*) &walltime,
                             (void*) log_buf, 
                             (void*) bufsize,
                             (void*) error_buf,
                             (void*) bufsize,
                             (void*) verbose};

    void* compiled_code = 0;
    size_t compiled_size = 0;
    int status = CUDA_SUCCESS - 1;
      
    CUlinkState link_state = CUlinkState();
    
    const int num_options = sizeof(options) / sizeof(CUjit_option);

    // Create a pending linker invocation
    CCHECK(cuLinkCreate(num_options,
                        options, option_values, &link_state));

    for(std::vector< std::string >::const_iterator i = files.begin();
        i != files.end();
        ++i) {
        status = cuLinkAddFile(link_state,
                             CU_JIT_INPUT_PTX, 
                             i->c_str(),
                             0, //num options
                             0, //options,
                             0); //option values
    }


    if( status != CUDA_SUCCESS ) {
        std::cerr << "PTX Linker Error:\n"<< error_buf << std::endl;
        exit(EXIT_FAILURE);
    }

    // Complete the linker step: compiled_code is filled with executable code
    //???: what do I do with the returned data ? can/should I delete it ?
    CCHECK(cuLinkComplete(link_state, &compiled_code, &compiled_size));
    assert(compiled_size > 0);
    assert(compiled_code);

    std::cout << "CUDA Link Completed in " << walltime << " ms\n"
              << log_buf << std::endl; 

    CCHECK(cuModuleLoadData(&module, compiled_code));

    CCHECK(cuModuleGetFunction(&kernel, module, kernel_name));

    CCHECK(cuLinkDestroy(link_state));
}
Esempio n. 6
0
static void
link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
	  unsigned num_objs)
{
  CUjit_option opts[7];
  void *optvals[7];
  float elapsed = 0.0;
#define LOGSIZE 8192
  char elog[LOGSIZE];
  char ilog[LOGSIZE];
  unsigned long logsize = LOGSIZE;
  CUlinkState linkstate;
  CUresult r;
  void *linkout;
  size_t linkoutsize __attribute__ ((unused));

  opts[0] = CU_JIT_WALL_TIME;
  optvals[0] = &elapsed;

  opts[1] = CU_JIT_INFO_LOG_BUFFER;
  optvals[1] = &ilog[0];

  opts[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
  optvals[2] = (void *) logsize;

  opts[3] = CU_JIT_ERROR_LOG_BUFFER;
  optvals[3] = &elog[0];

  opts[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
  optvals[4] = (void *) logsize;

  opts[5] = CU_JIT_LOG_VERBOSE;
  optvals[5] = (void *) 1;

  opts[6] = CU_JIT_TARGET;
  optvals[6] = (void *) CU_TARGET_COMPUTE_30;

  r = cuLinkCreate (7, opts, optvals, &linkstate);
  if (r != CUDA_SUCCESS)
    GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));

  for (; num_objs--; ptx_objs++)
    {
      /* cuLinkAddData's 'data' argument erroneously omits the const
	 qualifier.  */
      GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
      r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, (char*)ptx_objs->code,
			 ptx_objs->size, 0, 0, 0, 0);
      if (r != CUDA_SUCCESS)
	{
	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
	  GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s",
			     cuda_error (r));
	}
    }

  GOMP_PLUGIN_debug (0, "Linking\n");
  r = cuLinkComplete (linkstate, &linkout, &linkoutsize);

  GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
  GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);

  if (r != CUDA_SUCCESS)
    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));

  r = cuModuleLoadData (module, linkout);
  if (r != CUDA_SUCCESS)
    GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));

  r = cuLinkDestroy (linkstate);
  if (r != CUDA_SUCCESS)
    GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r));
}