Пример #1
0
static void *call_compiler(const char *src, size_t len, const char *arch_arg,
                           size_t *bin_len, char **log, size_t *log_len,
                           int *ret) {
  nvrtcProgram prog;
  void *buf = NULL;
  size_t buflen;
  const char *opts[4] = {
    "-arch", ""
    , "-G", "-lineinfo"
  };
  nvrtcResult err, err2;

  opts[1] = arch_arg;

  err = nvrtcCreateProgram(&prog, src, NULL, 0, NULL, NULL);
  if (err != NVRTC_SUCCESS) FAIL(NULL, GA_SYS_ERROR);

  err = nvrtcCompileProgram(prog,
#ifdef DEBUG
                            4,
#else
                            2,
#endif
                            opts);
  if (log != NULL) {
    err2 = nvrtcGetProgramLogSize(prog, &buflen);
    if (err2 != NVRTC_SUCCESS) goto end2;
    buf = malloc(buflen);
    if (buf == NULL) goto end2;
    err2 = nvrtcGetProgramLog(prog, (char *)buf);
    if (err2 != NVRTC_SUCCESS) goto end2;
    if (log_len != NULL) *log_len = buflen;
    *log = (char *)buf;
    buf = NULL;
  }
end2:
  if (err != NVRTC_SUCCESS) goto end;

  err = nvrtcGetPTXSize(prog, &buflen);
  if (err != NVRTC_SUCCESS) goto end;

  buf = malloc(buflen);
  if (buf == NULL) {
    nvrtcDestroyProgram(&prog);
    FAIL(NULL, GA_MEMORY_ERROR);
  }

  err = nvrtcGetPTX(prog, (char *)buf);
  if (err != NVRTC_SUCCESS) goto end;

  *bin_len = buflen;

end:
  nvrtcDestroyProgram(&prog);
  if (err != NVRTC_SUCCESS) {
    free(buf);
    FAIL(NULL, GA_SYS_ERROR);
  }
  return buf;
}
Пример #2
0
static CUmodule
build_kernel_source(const char *source_file, long target_capability)
{
	char		   *source;
	int				link_dev_runtime;
	nvrtcProgram	program;
	nvrtcResult		rc;
	char			arch_buf[128];
	const char	   *options[10];
	int				opt_index = 0;
	int				build_failure = 0;
	char		   *build_log;
	size_t			build_log_len;
	char		   *ptx_image;
	size_t			ptx_image_len;
	void		   *bin_image;
	size_t			bin_image_len;
	CUmodule		cuda_module;
	CUresult		cuda_rc;

	source = load_kernel_source(source_file, &link_dev_runtime);
	rc = nvrtcCreateProgram(&program,
							source,
							NULL,
							0,
							NULL,
							NULL);
	if (rc != NVRTC_SUCCESS)
		nvrtc_error(rc, "nvrtcCreateProgram");

	/*
	 * Put command line options as cuda_program.c doing
	 */
	options[opt_index++] = "-I " CUDA_INCLUDE_PATH;
	snprintf(arch_buf, sizeof(arch_buf),
			 "--gpu-architecture=compute_%ld", target_capability);
	options[opt_index++] = arch_buf;
#ifdef PGSTROM_DEBUG
	options[opt_index++] = "--device-debug";
	options[opt_index++] = "--generate-line-info";
#endif
	options[opt_index++] = "--use_fast_math";
	if (link_dev_runtime)
		options[opt_index++] = "--relocatable-device-code=true";

	/*
	 * Kick runtime compiler
	 */
	rc = nvrtcCompileProgram(program, opt_index, options);
	if (rc != NVRTC_SUCCESS)
	{
		if (rc == NVRTC_ERROR_COMPILATION)
			build_failure = 1;
		else
			nvrtc_error(rc, "nvrtcCompileProgram");
	}

	/*
	 * Print build log
	 */
	rc = nvrtcGetProgramLogSize(program, &build_log_len);
	if (rc != NVRTC_SUCCESS)
		nvrtc_error(rc, "nvrtcGetProgramLogSize");
	build_log = malloc(build_log_len + 1);
	if (!build_log)
	{
		fputs("out of memory", stderr);
		exit(1);
	}
	rc = nvrtcGetProgramLog(program, build_log);
	if (rc != NVRTC_SUCCESS)
		nvrtc_error(rc, "nvrtcGetProgramLog");

	if (build_log_len > 1)
		printf("build log:\n%s\n", build_log);
	if (build_failure)
		exit(1);

	/*
	 * Get PTX Image
	 */
	rc = nvrtcGetPTXSize(program, &ptx_image_len);
	if (rc != NVRTC_SUCCESS)
		nvrtc_error(rc, "nvrtcGetPTXSize");
	ptx_image = malloc(ptx_image_len + 1);
	if (!ptx_image)
	{
		fputs("out of memory", stderr);
		exit(1);
	}
	rc = nvrtcGetPTX(program, ptx_image);
	if (rc != NVRTC_SUCCESS)
		nvrtc_error(rc, "nvrtcGetPTX");
	ptx_image[ptx_image_len] = '\0';

	/*
	 * Link device runtime if needed
	 */
	if (link_dev_runtime)
	{
		link_device_libraries(ptx_image, ptx_image_len,
							  &bin_image, &bin_image_len,
							  target_capability);
	}
	else
	{
		bin_image = ptx_image;
		bin_image_len = ptx_image_len;
	}

	cuda_rc = cuModuleLoadData(&cuda_module, bin_image);
	if (cuda_rc != CUDA_SUCCESS)
		cuda_error(rc, "cuModuleLoadData");
	return cuda_module;
}