void AtlasLibrary::Interface::load() { if(_failed) return; if(loaded()) return; #ifdef __APPLE__ const char* libraryName = "libcblas.dylib"; #else const char* libraryName = "libcblas.so.3"; #endif _library = dlopen(libraryName, RTLD_LAZY); util::log("AtlasLibrary") << "Loading library '" << libraryName << "'\n"; if(!loaded()) { util::log("AtlasLibrary") << " Failed to load library '" << libraryName << "'\n"; _failed = true; return; } #define DynLink( function ) util::bit_cast(function, \ dlsym(_library, #function)); checkFunction((void*)function, #function) DynLink(cblas_sgemm); #undef DynLink util::log("AtlasLibrary") << " Loaded library '" << libraryName << "' successfully\n"; }
void CudaDriver::Interface::load() { if( _driver != 0 ) return; #if __GNUC__ report( "Loading " << _libname ); _driver = dlopen( _libname.c_str(), RTLD_LAZY ); if( _driver == 0 ) { report( "Failed to load cuda driver." ); report( " " << dlerror() ); return; } DynLink(cuInit); DynLink(cuDriverGetVersion); DynLink(cuDeviceGet); DynLink(cuDeviceGetCount); DynLink(cuDeviceGetName); DynLink(cuDeviceComputeCapability); DynLinkV(cuDeviceTotalMem); DynLink(cuDeviceGetProperties); DynLink(cuDeviceGetAttribute); DynLink(cuCtxGetLimit); DynLink(cuCtxGetApiVersion); DynLinkV(cuCtxCreate); DynLink(cuCtxDestroy); DynLink(cuCtxAttach); DynLink(cuCtxDetach); DynLink(cuCtxPushCurrent); DynLink(cuCtxPopCurrent); DynLink(cuCtxGetDevice); DynLink(cuCtxSynchronize); DynLink(cuModuleLoad); DynLink(cuModuleLoadData); DynLink(cuModuleLoadDataEx); DynLink(cuModuleLoadFatBinary); DynLink(cuModuleUnload); DynLink(cuModuleGetFunction); DynLinkV(cuModuleGetGlobal); DynLink(cuModuleGetTexRef); DynLinkV(cuMemGetInfo); DynLinkV(cuMemAlloc); DynLinkV(cuMemAllocPitch); DynLinkV(cuMemFree); DynLinkV(cuMemGetAddressRange); DynLinkV(cuMemAllocHost); DynLinkV(cuMemHostRegister); DynLinkV(cuMemHostUnregister); DynLink(cuMemFreeHost); DynLink(cuMemHostAlloc); DynLinkV(cuMemHostGetDevicePointer); DynLink(cuMemHostGetFlags); DynLinkV(cuMemcpyHtoD); DynLinkV(cuMemcpyDtoH); DynLinkV(cuMemcpyDtoD); DynLinkV(cuMemcpyDtoA); DynLinkV(cuMemcpyAtoD); DynLinkV(cuMemcpyHtoA); DynLinkV(cuMemcpyAtoH); DynLinkV(cuMemcpyAtoA); DynLinkV(cuMemcpy2D); DynLinkV(cuMemcpy2DUnaligned); DynLinkV(cuMemcpy3D); DynLinkV(cuMemcpyHtoDAsync); DynLinkV(cuMemcpyDtoHAsync); DynLinkV(cuMemcpyHtoAAsync); DynLinkV(cuMemcpyAtoHAsync); DynLinkV(cuMemcpy2DAsync); DynLinkV(cuMemcpy3DAsync); DynLinkV(cuMemsetD8); DynLinkV(cuMemsetD16); DynLinkV(cuMemsetD32); DynLinkV(cuMemsetD2D8); DynLinkV(cuMemsetD2D16); DynLinkV(cuMemsetD2D32); DynLink(cuFuncSetBlockShape); DynLink(cuFuncSetSharedSize); DynLink(cuFuncGetAttribute); DynLink(cuFuncSetCacheConfig); DynLinkV(cuArrayCreate); DynLinkV(cuArrayGetDescriptor); DynLink(cuArrayDestroy); DynLinkV(cuArray3DCreate); DynLinkV(cuArray3DGetDescriptor); DynLink(cuTexRefCreate); DynLink(cuTexRefDestroy); DynLink(cuTexRefSetArray); DynLinkV(cuTexRefSetAddress); DynLinkV(cuTexRefSetAddress2D); DynLink(cuTexRefSetFormat); DynLink(cuTexRefSetAddressMode); DynLink(cuTexRefSetFilterMode); DynLink(cuTexRefSetFlags); DynLinkV(cuTexRefGetAddress); DynLink(cuTexRefGetArray); DynLink(cuTexRefGetAddressMode); DynLink(cuTexRefGetFilterMode); DynLink(cuTexRefGetFormat); DynLink(cuTexRefGetFlags); DynLink(cuParamSetSize); DynLink(cuParamSeti); DynLink(cuParamSetf); DynLink(cuParamSetv); DynLink(cuParamSetTexRef); DynLink(cuLaunch); DynLink(cuLaunchGrid); DynLink(cuLaunchGridAsync); DynLink(cuEventCreate); DynLink(cuEventRecord); DynLink(cuEventQuery); DynLink(cuEventSynchronize); DynLink(cuEventDestroy); DynLink(cuEventElapsedTime); DynLink(cuStreamCreate); DynLink(cuStreamQuery); DynLink(cuStreamSynchronize); DynLink(cuStreamDestroy); DynLink(cuGraphicsUnregisterResource); DynLink(cuGraphicsSubResourceGetMappedArray); DynLinkV(cuGraphicsResourceGetMappedPointer); DynLink(cuGraphicsResourceSetMapFlags); DynLink(cuGraphicsMapResources); DynLink(cuGraphicsUnmapResources); DynLink(cuGetExportTable); DynLink(cuGLInit); DynLinkV(cuGLCtxCreate); DynLink(cuGraphicsGLRegisterBuffer); DynLink(cuGraphicsGLRegisterImage); DynLink(cuGLRegisterBufferObject); DynLink(cuGLSetBufferObjectMapFlags); CUresult result = (*cuDriverGetVersion)(&_version); if (result == CUDA_SUCCESS) { report(" Driver version is: " << _version << " and was called successfully"); } else { report("cuDriverGetVersion() returned " << result); } #else assertM(false, "CUDA Driver support not compiled into Ocelot."); #endif }
void CudnnLibrary::Interface::load() { if(_failed) return; if(loaded()) return; if(!parallel::isCudaEnabled()) return; #ifdef __APPLE__ const char* libraryName = "libcudnn.dylib"; #else const char* libraryName = "libcudnn.so"; #endif _library = dlopen(libraryName, RTLD_LAZY); util::log("CudnnLibrary") << "Loading library '" << libraryName << "'\n"; if(!loaded()) { util::log("Cudnnlibrary") << " Failed to load library '" << libraryName << "'\n"; _failed = true; return; } try { #define DynLink( function ) \ util::bit_cast(function, dlsym(_library, #function)); \ checkFunction((void*)function, #function) DynLink(cudnnGetErrorString); DynLink(cudnnCreate); DynLink(cudnnDestroy); DynLink(cudnnCreateTensorDescriptor); DynLink(cudnnSetTensor4dDescriptor); DynLink(cudnnSetTensorNdDescriptor); DynLink(cudnnGetTensorNdDescriptor); DynLink(cudnnDestroyTensorDescriptor); DynLink(cudnnTransformTensor); DynLink(cudnnCreateFilterDescriptor); DynLink(cudnnSetFilter4dDescriptor); DynLink(cudnnSetFilterNdDescriptor); DynLink(cudnnGetFilterNdDescriptor); DynLink(cudnnDestroyFilterDescriptor); DynLink(cudnnCreateConvolutionDescriptor); DynLink(cudnnSetConvolution2dDescriptor); DynLink(cudnnDestroyConvolutionDescriptor); DynLink(cudnnCreatePoolingDescriptor); DynLink(cudnnSetPooling2dDescriptor_v4); DynLink(cudnnDestroyPoolingDescriptor); DynLink(cudnnGetPooling2dForwardOutputDim); DynLink(cudnnGetConvolutionForwardAlgorithm); DynLink(cudnnGetConvolutionForwardWorkspaceSize); DynLink(cudnnConvolutionForward); DynLink(cudnnGetConvolutionBackwardDataAlgorithm); DynLink(cudnnGetConvolutionBackwardDataWorkspaceSize); DynLink(cudnnConvolutionBackwardData); DynLink(cudnnGetConvolutionBackwardFilterAlgorithm); DynLink(cudnnGetConvolutionBackwardFilterWorkspaceSize); DynLink(cudnnConvolutionBackwardFilter); DynLink(cudnnPoolingForward); DynLink(cudnnPoolingBackward); DynLink(cudnnCreateDropoutDescriptor); DynLink(cudnnDestroyDropoutDescriptor); DynLink(cudnnSetDropoutDescriptor); DynLink(cudnnCreateRNNDescriptor); DynLink(cudnnDestroyRNNDescriptor); DynLink(cudnnSetRNNDescriptor); DynLink(cudnnGetRNNWorkspaceSize); DynLink(cudnnGetRNNTrainingReserveSize); DynLink(cudnnGetRNNParamsSize); DynLink(cudnnGetRNNLinLayerMatrixParams); DynLink(cudnnGetRNNLinLayerBiasParams); DynLink(cudnnRNNForwardTraining); DynLink(cudnnRNNBackwardData); DynLink(cudnnRNNBackwardWeights); #undef DynLink auto status = (*cudnnCreate)(&_handle); if(status != CUDNN_STATUS_SUCCESS) { throw std::runtime_error("cudnnCreate failed: " + getErrorString(status)); } util::log("Cudnnlibrary") << " Loaded library '" << libraryName << "' successfully\n"; } catch(...) { unload(); throw; } }