int GPUInterface::Initialize() { #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr,"\t\t\tEntering GPUInterface::Initialize\n"); #endif resourceMap = new std::map<int, int>; // Driver init; CUDA manual: "Currently, the Flags parameter must be 0." CUresult error = cuInit(0); if (error == CUDA_ERROR_NO_DEVICE) { return 0; } else if (error != CUDA_SUCCESS) { fprintf(stderr, "CUDA error: \"%s\" from file <%s>, line %i.\n", GetCUDAErrorDescription(error), __FILE__, __LINE__); exit(-1); } int numDevices = 0; SAFE_CUDA(cuDeviceGetCount(&numDevices)); CUdevice tmpCudaDevice; int capabilityMajor; int capabilityMinor; int currentDevice = 0; for (int i=0; i < numDevices; i++) { SAFE_CUDA(cuDeviceGet(&tmpCudaDevice, i)); SAFE_CUDA(cuDeviceComputeCapability(&capabilityMajor, &capabilityMinor, tmpCudaDevice)); if ((capabilityMajor > 1 && capabilityMinor != 9999) || (capabilityMajor == 1 && capabilityMinor > 0)) { resourceMap->insert(std::make_pair(currentDevice++, i)); } } #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr,"\t\t\tLeaving GPUInterface::Initialize\n"); #endif return 1; }
void GPUInterface::SetDevice(int deviceNumber, int paddedStateCount, int categoryCount, int paddedPatternCount, int unpaddedPatternCount, int tipCount, long flags) { #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr,"\t\t\tEntering GPUInterface::SetDevice\n"); #endif SAFE_CUDA(cuDeviceGet(&cudaDevice, (*resourceMap)[deviceNumber])); unsigned int ctxFlags = CU_CTX_SCHED_AUTO; if (flags & BEAGLE_FLAG_SCALING_DYNAMIC) { ctxFlags |= CU_CTX_MAP_HOST; } CUresult error = cuCtxCreate(&cudaContext, ctxFlags, cudaDevice); if(error != CUDA_SUCCESS) { fprintf(stderr, "CUDA error: \"%s\" (%d) from file <%s>, line %i.\n", GetCUDAErrorDescription(error), error, __FILE__, __LINE__); if (error == CUDA_ERROR_INVALID_DEVICE) { fprintf(stderr, "(The requested CUDA device is likely set to compute exclusive mode. This mode prevents multiple processes from running on the device.)"); } exit(-1); } InitializeKernelResource(paddedStateCount, flags & BEAGLE_FLAG_PRECISION_DOUBLE); if (!kernelResource) { fprintf(stderr,"Critical error: unable to find kernel code for %d states.\n",paddedStateCount); exit(-1); } kernelResource->categoryCount = categoryCount; kernelResource->patternCount = paddedPatternCount; kernelResource->unpaddedPatternCount = unpaddedPatternCount; kernelResource->flags = flags; SAFE_CUDA(cuModuleLoadData(&cudaModule, kernelResource->kernelCode)); if ((paddedPatternCount < BEAGLE_MULTI_GRID_MAX || flags & BEAGLE_FLAG_PARALLELOPS_GRID) && !(flags & BEAGLE_FLAG_PARALLELOPS_STREAMS)) { numStreams = 1; cudaStreams = (CUstream*) malloc(sizeof(CUstream) * numStreams); cudaEvents = (CUevent*) malloc(sizeof(CUevent) * (numStreams + 1)); cudaStreams[0] = NULL; CUevent event; for(int i=0; i<2; i++) { SAFE_CUDA(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING)); cudaEvents[i] = event; } } else { numStreams = tipCount/2 + 1; if (numStreams > BEAGLE_STREAM_COUNT) { numStreams = BEAGLE_STREAM_COUNT; } cudaStreams = (CUstream*) malloc(sizeof(CUstream) * numStreams); CUstream stream; cudaEvents = (CUevent*) malloc(sizeof(CUevent) * (numStreams + 1)); CUevent event; for(int i=0; i<numStreams; i++) { SAFE_CUDA(cuStreamCreate(&stream, CU_STREAM_DEFAULT)); cudaStreams[i] = stream; SAFE_CUDA(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING)); cudaEvents[i] = event; } SAFE_CUDA(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING)); cudaEvents[numStreams] = event; } SAFE_CUDA(cuCtxPopCurrent(&cudaContext)); #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr,"\t\t\tLeaving GPUInterface::SetDevice\n"); #endif }