int cuCompilerVersion() { string path = cuCompilerPath(); if(path == "") return 0; /* get --version output */ FILE *pipe = popen((path + " --version").c_str(), "r"); if(!pipe) { fprintf(stderr, "CUDA: failed to run compiler to retrieve version"); return 0; } char buf[128]; string output = ""; while(!feof(pipe)) if(fgets(buf, 128, pipe) != NULL) output += buf; pclose(pipe); /* parse version number */ string marker = "Cuda compilation tools, release "; size_t offset = output.find(marker); if(offset == string::npos) { fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output.c_str()); return 0; } string versionstr = output.substr(offset + marker.size(), string::npos); int major, minor; if(sscanf(versionstr.c_str(), "%d.%d", &major, &minor) < 2) { fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output.c_str()); return 0; } return 10*major + minor; }
string compile_kernel() { /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); /* attempt to use kernel provided with blender */ string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); if(path_exists(cubin)) return cubin; /* not found, try to use locally compiled kernel */ string kernel_path = path_get("kernel"); string md5 = path_files_md5_hash(kernel_path); cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); cubin = path_user_get(path_join("cache", cubin)); /* if exists already, use it */ if(path_exists(cubin)) return cubin; #ifdef _WIN32 if(cuHavePrecompiledKernels()) { if(major < 2) cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor)); else cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor)); return ""; } #endif /* if not, find CUDA compiler */ string nvcc = cuCompilerPath(); if(nvcc == "") { cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location."); return ""; } /* compile */ string kernel = path_join(kernel_path, "kernel.cu"); string include = kernel_path; const int machine = system_cpu_bits(); const int maxreg = 24; double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); path_create_directories(cubin); string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " "-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC", nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str()); if(system(command.c_str()) == -1) { cuda_error_message("Failed to execute compilation command, see console for details."); return ""; } /* verify if compilation succeeded */ if(!path_exists(cubin)) { cuda_error_message("CUDA kernel compilation failed, see console for details."); return ""; } printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); return cubin; }
CCL_NAMESPACE_BEGIN /* utility macros */ #define CUDA_LIBRARY_FIND_CHECKED(name) \ name = (t##name*)dynamic_library_find(lib, #name); #define CUDA_LIBRARY_FIND(name) \ name = (t##name*)dynamic_library_find(lib, #name); \ assert(name); #define CUDA_LIBRARY_FIND_V2(name) \ name = (t##name*)dynamic_library_find(lib, #name "_v2"); \ assert(name); /* initialization function */ bool cuLibraryInit() { static bool initialized = false; static bool result = false; if(initialized) return result; initialized = true; /* library paths */ #ifdef _WIN32 /* expected in c:/windows/system or similar, no path needed */ const char *path = "nvcuda.dll"; #elif defined(__APPLE__) /* default installation path */ const char *path = "/usr/local/cuda/lib/libcuda.dylib"; #else const char *path = "libcuda.so"; #endif /* load library */ DynamicLibrary *lib = dynamic_library_open(path); if(lib == NULL) return false; /* detect driver version */ int driver_version = 1000; CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion); if(cuDriverGetVersion) cuDriverGetVersion(&driver_version); /* we require version 4.0 */ if(driver_version < 4000) return false; /* fetch all function pointers */ CUDA_LIBRARY_FIND(cuInit); CUDA_LIBRARY_FIND(cuDeviceGet); CUDA_LIBRARY_FIND(cuDeviceGetCount); CUDA_LIBRARY_FIND(cuDeviceGetName); CUDA_LIBRARY_FIND(cuDeviceComputeCapability); CUDA_LIBRARY_FIND(cuDeviceTotalMem); CUDA_LIBRARY_FIND(cuDeviceGetProperties); CUDA_LIBRARY_FIND(cuDeviceGetAttribute); CUDA_LIBRARY_FIND(cuCtxCreate); CUDA_LIBRARY_FIND(cuCtxDestroy); CUDA_LIBRARY_FIND(cuCtxAttach); CUDA_LIBRARY_FIND(cuCtxDetach); CUDA_LIBRARY_FIND(cuCtxPushCurrent); CUDA_LIBRARY_FIND(cuCtxPopCurrent); CUDA_LIBRARY_FIND(cuCtxGetDevice); CUDA_LIBRARY_FIND(cuCtxSynchronize); CUDA_LIBRARY_FIND(cuModuleLoad); CUDA_LIBRARY_FIND(cuModuleLoadData); CUDA_LIBRARY_FIND(cuModuleUnload); CUDA_LIBRARY_FIND(cuModuleGetFunction); CUDA_LIBRARY_FIND(cuModuleGetGlobal); CUDA_LIBRARY_FIND(cuModuleGetTexRef); CUDA_LIBRARY_FIND(cuMemGetInfo); CUDA_LIBRARY_FIND(cuMemAlloc); CUDA_LIBRARY_FIND(cuMemAllocPitch); CUDA_LIBRARY_FIND(cuMemFree); CUDA_LIBRARY_FIND(cuMemGetAddressRange); CUDA_LIBRARY_FIND(cuMemAllocHost); CUDA_LIBRARY_FIND(cuMemFreeHost); CUDA_LIBRARY_FIND(cuMemHostAlloc); CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer); CUDA_LIBRARY_FIND(cuMemcpyHtoD); CUDA_LIBRARY_FIND(cuMemcpyDtoH); CUDA_LIBRARY_FIND(cuMemcpyDtoD); CUDA_LIBRARY_FIND(cuMemcpyDtoA); CUDA_LIBRARY_FIND(cuMemcpyAtoD); CUDA_LIBRARY_FIND(cuMemcpyHtoA); CUDA_LIBRARY_FIND(cuMemcpyAtoH); CUDA_LIBRARY_FIND(cuMemcpyAtoA); CUDA_LIBRARY_FIND(cuMemcpy2D); CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned); CUDA_LIBRARY_FIND(cuMemcpy3D); CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync); CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync); CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync); CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync); CUDA_LIBRARY_FIND(cuMemcpy2DAsync); CUDA_LIBRARY_FIND(cuMemcpy3DAsync); CUDA_LIBRARY_FIND(cuMemsetD8); CUDA_LIBRARY_FIND(cuMemsetD16); CUDA_LIBRARY_FIND(cuMemsetD32); CUDA_LIBRARY_FIND(cuMemsetD2D8); CUDA_LIBRARY_FIND(cuMemsetD2D16); CUDA_LIBRARY_FIND(cuMemsetD2D32); CUDA_LIBRARY_FIND(cuFuncSetBlockShape); CUDA_LIBRARY_FIND(cuFuncSetSharedSize); CUDA_LIBRARY_FIND(cuFuncGetAttribute); CUDA_LIBRARY_FIND(cuArrayCreate); CUDA_LIBRARY_FIND(cuArrayGetDescriptor); CUDA_LIBRARY_FIND(cuArrayDestroy); CUDA_LIBRARY_FIND(cuArray3DCreate); CUDA_LIBRARY_FIND(cuArray3DGetDescriptor); CUDA_LIBRARY_FIND(cuTexRefCreate); CUDA_LIBRARY_FIND(cuTexRefDestroy); CUDA_LIBRARY_FIND(cuTexRefSetArray); CUDA_LIBRARY_FIND(cuTexRefSetAddress); CUDA_LIBRARY_FIND(cuTexRefSetAddress2D); CUDA_LIBRARY_FIND(cuTexRefSetFormat); CUDA_LIBRARY_FIND(cuTexRefSetAddressMode); CUDA_LIBRARY_FIND(cuTexRefSetFilterMode); CUDA_LIBRARY_FIND(cuTexRefSetFlags); CUDA_LIBRARY_FIND(cuTexRefGetAddress); CUDA_LIBRARY_FIND(cuTexRefGetArray); CUDA_LIBRARY_FIND(cuTexRefGetAddressMode); CUDA_LIBRARY_FIND(cuTexRefGetFilterMode); CUDA_LIBRARY_FIND(cuTexRefGetFormat); CUDA_LIBRARY_FIND(cuTexRefGetFlags); CUDA_LIBRARY_FIND(cuParamSetSize); CUDA_LIBRARY_FIND(cuParamSeti); CUDA_LIBRARY_FIND(cuParamSetf); CUDA_LIBRARY_FIND(cuParamSetv); CUDA_LIBRARY_FIND(cuParamSetTexRef); CUDA_LIBRARY_FIND(cuLaunch); CUDA_LIBRARY_FIND(cuLaunchGrid); CUDA_LIBRARY_FIND(cuLaunchGridAsync); CUDA_LIBRARY_FIND(cuEventCreate); CUDA_LIBRARY_FIND(cuEventRecord); CUDA_LIBRARY_FIND(cuEventQuery); CUDA_LIBRARY_FIND(cuEventSynchronize); CUDA_LIBRARY_FIND(cuEventDestroy); CUDA_LIBRARY_FIND(cuEventElapsedTime); CUDA_LIBRARY_FIND(cuStreamCreate); CUDA_LIBRARY_FIND(cuStreamQuery); CUDA_LIBRARY_FIND(cuStreamSynchronize); CUDA_LIBRARY_FIND(cuStreamDestroy); /* cuda 2.1 */ CUDA_LIBRARY_FIND(cuModuleLoadDataEx); CUDA_LIBRARY_FIND(cuModuleLoadFatBinary); CUDA_LIBRARY_FIND(cuGLCtxCreate); CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); /* cuda 2.3 */ CUDA_LIBRARY_FIND(cuMemHostGetFlags); CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); /* cuda 3.0 */ CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync); CUDA_LIBRARY_FIND(cuFuncSetCacheConfig); CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource); CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray); CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer); CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags); CUDA_LIBRARY_FIND(cuGraphicsMapResources); CUDA_LIBRARY_FIND(cuGraphicsUnmapResources); CUDA_LIBRARY_FIND(cuGetExportTable); /* cuda 3.1 */ CUDA_LIBRARY_FIND(cuModuleGetSurfRef); CUDA_LIBRARY_FIND(cuSurfRefSetArray); CUDA_LIBRARY_FIND(cuSurfRefGetArray); CUDA_LIBRARY_FIND(cuCtxSetLimit); CUDA_LIBRARY_FIND(cuCtxGetLimit); /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library * has both the old ones for compatibility and new ones with _v2 postfix, * we load the _v2 ones here. */ CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem); CUDA_LIBRARY_FIND_V2(cuCtxCreate); CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal); CUDA_LIBRARY_FIND_V2(cuMemGetInfo); CUDA_LIBRARY_FIND_V2(cuMemAlloc); CUDA_LIBRARY_FIND_V2(cuMemAllocPitch); CUDA_LIBRARY_FIND_V2(cuMemFree); CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange); CUDA_LIBRARY_FIND_V2(cuMemAllocHost); CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer); CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD); CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH); CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD); CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA); CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD); CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA); CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH); CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA); CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync); CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync); CUDA_LIBRARY_FIND_V2(cuMemcpy2D); CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned); CUDA_LIBRARY_FIND_V2(cuMemcpy3D); CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync); CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync); CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync); CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync); CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync); CUDA_LIBRARY_FIND_V2(cuMemsetD8); CUDA_LIBRARY_FIND_V2(cuMemsetD16); CUDA_LIBRARY_FIND_V2(cuMemsetD32); CUDA_LIBRARY_FIND_V2(cuMemsetD2D8); CUDA_LIBRARY_FIND_V2(cuMemsetD2D16); CUDA_LIBRARY_FIND_V2(cuMemsetD2D32); CUDA_LIBRARY_FIND_V2(cuArrayCreate); CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor); CUDA_LIBRARY_FIND_V2(cuArray3DCreate); CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor); CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress); CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D); CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress); CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer); CUDA_LIBRARY_FIND_V2(cuGLCtxCreate); /* cuda 4.0 */ CUDA_LIBRARY_FIND(cuCtxSetCurrent); if(cuHavePrecompiledKernels()) result = true; #ifndef _WIN32 else if(cuCompilerPath() != "") result = true; #endif return result; }