bool load_kernels(bool experimental) { /* verify if device was initialized */ if(!device_initialized) { fprintf(stderr, "OpenCL: failed to initialize device.\n"); return false; } /* verify we have right opencl version */ if(!opencl_version_check()) return false; /* md5 hash to detect changes */ string kernel_path = path_get("kernel"); string kernel_md5 = path_files_md5_hash(kernel_path); string device_md5 = device_md5_hash(); /* try to use cache binary */ string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());; clbin = path_user_get(path_join("cache", clbin)); if(path_exists(clbin)) { /* if exists already, try use it */ if(!load_binary(kernel_path, clbin)) return false; } else { /* compile kernel */ if(!compile_kernel(kernel_path, kernel_md5)) return false; /* save binary for reuse */ save_binary(clbin); } /* find kernels */ ckPathTraceKernel = clCreateKernel(cpProgram, "kernel_ocl_path_trace", &ciErr); if(opencl_error(ciErr)) return false; ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr); if(opencl_error(ciErr)) return false; return true; }
string compile_kernel() { /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); /* attempt to use kernel provided with blender */ string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); if(path_exists(cubin)) return cubin; /* not found, try to use locally compiled kernel */ string kernel_path = path_get("kernel"); string md5 = path_files_md5_hash(kernel_path); cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); cubin = path_user_get(path_join("cache", cubin)); /* if exists already, use it */ if(path_exists(cubin)) return cubin; #ifdef _WIN32 if(cuHavePrecompiledKernels()) { if(major < 2) cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor)); else cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor)); return ""; } #endif /* if not, find CUDA compiler */ string nvcc = cuCompilerPath(); if(nvcc == "") { cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location."); return ""; } /* compile */ string kernel = path_join(kernel_path, "kernel.cu"); string include = kernel_path; const int machine = system_cpu_bits(); const int maxreg = 24; double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); path_create_directories(cubin); string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " "-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC", nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str()); if(system(command.c_str()) == -1) { cuda_error_message("Failed to execute compilation command, see console for details."); return ""; } /* verify if compilation succeeded */ if(!path_exists(cubin)) { cuda_error_message("CUDA kernel compilation failed, see console for details."); return ""; } printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); return cubin; }