Exemplo n.º 1
0
	string compile_kernel()
	{
		/* compute cubin name */
		int major, minor;
		cuDeviceComputeCapability(&major, &minor, cuDevId);

		/* attempt to use kernel provided with blender */
		string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
		if(path_exists(cubin))
			return cubin;

		/* not found, try to use locally compiled kernel */
		string kernel_path = path_get("kernel");
		string md5 = path_files_md5_hash(kernel_path);

		cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
		cubin = path_user_get(path_join("cache", cubin));

		/* if exists already, use it */
		if(path_exists(cubin))
			return cubin;

#ifdef _WIN32
		if(cuHavePrecompiledKernels()) {
			if(major < 2)
				cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor));
			else
				cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor));
			return "";
		}
#endif

		/* if not, find CUDA compiler */
		string nvcc = cuCompilerPath();

		if(nvcc == "") {
			cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location.");
			return "";
		}

		/* compile */
		string kernel = path_join(kernel_path, "kernel.cu");
		string include = kernel_path;
		const int machine = system_cpu_bits();
		const int maxreg = 24;

		double starttime = time_dt();
		printf("Compiling CUDA kernel ...\n");

		path_create_directories(cubin);

		string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
			"-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC",
			nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str());

		if(system(command.c_str()) == -1) {
			cuda_error_message("Failed to execute compilation command, see console for details.");
			return "";
		}

		/* verify if compilation succeeded */
		if(!path_exists(cubin)) {
			cuda_error_message("CUDA kernel compilation failed, see console for details.");
			return "";
		}

		printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);

		return cubin;
	}
Exemplo n.º 2
0
CCL_NAMESPACE_BEGIN

/* utility macros */
#define CUDA_LIBRARY_FIND_CHECKED(name) \
	name = (t##name*)dynamic_library_find(lib, #name);

#define CUDA_LIBRARY_FIND(name) \
	name = (t##name*)dynamic_library_find(lib, #name); \
	assert(name);

#define CUDA_LIBRARY_FIND_V2(name) \
	name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
	assert(name);

/* initialization function */

bool cuLibraryInit()
{
	static bool initialized = false;
	static bool result = false;

	if(initialized)
		return result;
	
	initialized = true;

	/* library paths */
#ifdef _WIN32
	/* expected in c:/windows/system or similar, no path needed */
	const char *path = "nvcuda.dll";
#elif defined(__APPLE__)
	/* default installation path */
	const char *path = "/usr/local/cuda/lib/libcuda.dylib";
#else
	const char *path = "libcuda.so";
#endif

	/* load library */
	DynamicLibrary *lib = dynamic_library_open(path);

	if(lib == NULL)
		return false;

	/* detect driver version */
	int driver_version = 1000;

	CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
	if(cuDriverGetVersion)
		cuDriverGetVersion(&driver_version);

	/* we require version 4.0 */
	if(driver_version < 4000)
		return false;

	/* fetch all function pointers */
	CUDA_LIBRARY_FIND(cuInit);
	CUDA_LIBRARY_FIND(cuDeviceGet);
	CUDA_LIBRARY_FIND(cuDeviceGetCount);
	CUDA_LIBRARY_FIND(cuDeviceGetName);
	CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
	CUDA_LIBRARY_FIND(cuDeviceTotalMem);
	CUDA_LIBRARY_FIND(cuDeviceGetProperties);
	CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
	CUDA_LIBRARY_FIND(cuCtxCreate);
	CUDA_LIBRARY_FIND(cuCtxDestroy);
	CUDA_LIBRARY_FIND(cuCtxAttach);
	CUDA_LIBRARY_FIND(cuCtxDetach);
	CUDA_LIBRARY_FIND(cuCtxPushCurrent);
	CUDA_LIBRARY_FIND(cuCtxPopCurrent);
	CUDA_LIBRARY_FIND(cuCtxGetDevice);
	CUDA_LIBRARY_FIND(cuCtxSynchronize);
	CUDA_LIBRARY_FIND(cuModuleLoad);
	CUDA_LIBRARY_FIND(cuModuleLoadData);
	CUDA_LIBRARY_FIND(cuModuleUnload);
	CUDA_LIBRARY_FIND(cuModuleGetFunction);
	CUDA_LIBRARY_FIND(cuModuleGetGlobal);
	CUDA_LIBRARY_FIND(cuModuleGetTexRef);
	CUDA_LIBRARY_FIND(cuMemGetInfo);
	CUDA_LIBRARY_FIND(cuMemAlloc);
	CUDA_LIBRARY_FIND(cuMemAllocPitch);
	CUDA_LIBRARY_FIND(cuMemFree);
	CUDA_LIBRARY_FIND(cuMemGetAddressRange);
	CUDA_LIBRARY_FIND(cuMemAllocHost);
	CUDA_LIBRARY_FIND(cuMemFreeHost);
	CUDA_LIBRARY_FIND(cuMemHostAlloc);
	CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
	CUDA_LIBRARY_FIND(cuMemcpyHtoD);
	CUDA_LIBRARY_FIND(cuMemcpyDtoH);
	CUDA_LIBRARY_FIND(cuMemcpyDtoD);
	CUDA_LIBRARY_FIND(cuMemcpyDtoA);
	CUDA_LIBRARY_FIND(cuMemcpyAtoD);
	CUDA_LIBRARY_FIND(cuMemcpyHtoA);
	CUDA_LIBRARY_FIND(cuMemcpyAtoH);
	CUDA_LIBRARY_FIND(cuMemcpyAtoA);
	CUDA_LIBRARY_FIND(cuMemcpy2D);
	CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
	CUDA_LIBRARY_FIND(cuMemcpy3D);
	CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
	CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
	CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
	CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
	CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
	CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
	CUDA_LIBRARY_FIND(cuMemsetD8);
	CUDA_LIBRARY_FIND(cuMemsetD16);
	CUDA_LIBRARY_FIND(cuMemsetD32);
	CUDA_LIBRARY_FIND(cuMemsetD2D8);
	CUDA_LIBRARY_FIND(cuMemsetD2D16);
	CUDA_LIBRARY_FIND(cuMemsetD2D32);
	CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
	CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
	CUDA_LIBRARY_FIND(cuFuncGetAttribute);
	CUDA_LIBRARY_FIND(cuArrayCreate);
	CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
	CUDA_LIBRARY_FIND(cuArrayDestroy);
	CUDA_LIBRARY_FIND(cuArray3DCreate);
	CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
	CUDA_LIBRARY_FIND(cuTexRefCreate);
	CUDA_LIBRARY_FIND(cuTexRefDestroy);
	CUDA_LIBRARY_FIND(cuTexRefSetArray);
	CUDA_LIBRARY_FIND(cuTexRefSetAddress);
	CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
	CUDA_LIBRARY_FIND(cuTexRefSetFormat);
	CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
	CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
	CUDA_LIBRARY_FIND(cuTexRefSetFlags);
	CUDA_LIBRARY_FIND(cuTexRefGetAddress);
	CUDA_LIBRARY_FIND(cuTexRefGetArray);
	CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
	CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
	CUDA_LIBRARY_FIND(cuTexRefGetFormat);
	CUDA_LIBRARY_FIND(cuTexRefGetFlags);
	CUDA_LIBRARY_FIND(cuParamSetSize);
	CUDA_LIBRARY_FIND(cuParamSeti);
	CUDA_LIBRARY_FIND(cuParamSetf);
	CUDA_LIBRARY_FIND(cuParamSetv);
	CUDA_LIBRARY_FIND(cuParamSetTexRef);
	CUDA_LIBRARY_FIND(cuLaunch);
	CUDA_LIBRARY_FIND(cuLaunchGrid);
	CUDA_LIBRARY_FIND(cuLaunchGridAsync);
	CUDA_LIBRARY_FIND(cuEventCreate);
	CUDA_LIBRARY_FIND(cuEventRecord);
	CUDA_LIBRARY_FIND(cuEventQuery);
	CUDA_LIBRARY_FIND(cuEventSynchronize);
	CUDA_LIBRARY_FIND(cuEventDestroy);
	CUDA_LIBRARY_FIND(cuEventElapsedTime);
	CUDA_LIBRARY_FIND(cuStreamCreate);
	CUDA_LIBRARY_FIND(cuStreamQuery);
	CUDA_LIBRARY_FIND(cuStreamSynchronize);
	CUDA_LIBRARY_FIND(cuStreamDestroy);

	/* cuda 2.1 */
	CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
	CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
	CUDA_LIBRARY_FIND(cuGLCtxCreate);
	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);

	/* cuda 2.3 */
	CUDA_LIBRARY_FIND(cuMemHostGetFlags);
	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);

	/* cuda 3.0 */
	CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
	CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
	CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
	CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
	CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
	CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
	CUDA_LIBRARY_FIND(cuGraphicsMapResources);
	CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
	CUDA_LIBRARY_FIND(cuGetExportTable);

	/* cuda 3.1 */
	CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
	CUDA_LIBRARY_FIND(cuSurfRefSetArray);
	CUDA_LIBRARY_FIND(cuSurfRefGetArray);
	CUDA_LIBRARY_FIND(cuCtxSetLimit);
	CUDA_LIBRARY_FIND(cuCtxGetLimit);

	/* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
	 * has both the old ones for compatibility and new ones with _v2 postfix,
	 * we load the _v2 ones here. */
	CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
	CUDA_LIBRARY_FIND_V2(cuCtxCreate);
	CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
	CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
	CUDA_LIBRARY_FIND_V2(cuMemAlloc);
	CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
	CUDA_LIBRARY_FIND_V2(cuMemFree);
	CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
	CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
	CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
	CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
	CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
	CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
	CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
	CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
	CUDA_LIBRARY_FIND_V2(cuMemsetD8);
	CUDA_LIBRARY_FIND_V2(cuMemsetD16);
	CUDA_LIBRARY_FIND_V2(cuMemsetD32);
	CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
	CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
	CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
	CUDA_LIBRARY_FIND_V2(cuArrayCreate);
	CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
	CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
	CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
	CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
	CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
	CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
	CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
	CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);

	/* cuda 4.0 */
	CUDA_LIBRARY_FIND(cuCtxSetCurrent);

	if(cuHavePrecompiledKernels())
		result = true;
#ifndef _WIN32
	else if(cuCompilerPath() != "")
		result = true;
#endif

	return result;
}