void THCudaInit(THCudaState* state) { int count = 0; THCudaCheck(cudaGetDeviceCount(&count)); int device = 0; THCudaCheck(cudaGetDevice(&device)); state->rngState = (THCudaRNGState*)malloc(sizeof(THCudaRNGState)); THCRandom_init(state->rngState, count, device); THCudaBlas_init(count, device); int i,j; for(i=0; i < count; ++i) { THCudaCheck(cudaSetDevice(i)); for (j=0; j < count; ++j) { if(i != j) { int can = 0; THCudaCheck(cudaDeviceCanAccessPeer(&can, i, j)); if(can) THCudaCheck(cudaDeviceEnablePeerAccess(j, 0)); } } } THCudaCheck(cudaSetDevice(device)); }
void THCudaInit(THCState* state) { int count = 0; THCudaCheck(cudaGetDeviceCount(&count)); int device = 0; THCudaCheck(cudaGetDevice(&device)); state->rngState = (THCRNGState*)malloc(sizeof(THCRNGState)); THCRandom_init(state, count, device); state->blasState = (THCBlasState*)malloc(sizeof(THCBlasState)); THCudaBlas_init(state, count, device); state->numDevices = count; state->deviceProperties = (struct cudaDeviceProp*)malloc(count * sizeof(struct cudaDeviceProp)); THCState_setDeviceMode(state, THCStateDeviceModeManual); state->numUserStreams = 0; state->streamsPerDevice = (cudaStream_t**)malloc(count * sizeof(cudaStream_t*)); /* Enable P2P access between all pairs, if possible */ THCudaEnablePeerToPeerAccess(state); for (int i = 0; i < count; ++i) { THCudaCheck(cudaSetDevice(i)); THCudaCheck(cudaGetDeviceProperties(&state->deviceProperties[i], i)); /* Stream index 0 will be the default stream for convenience; by default no user streams are reserved */ state->streamsPerDevice[i] = (cudaStream_t*)malloc(sizeof(cudaStream_t)); state->streamsPerDevice[i][0] = NULL; } /* Restore to previous device */ THCudaCheck(cudaSetDevice(device)); /* Start in the default stream on the current device */ state->currentPerDeviceStream = 0; state->currentStream = NULL; }
void THCudaInit(THCState* state) { int count = 0; THCudaCheck(cudaGetDeviceCount(&count)); int device = 0; THCudaCheck(cudaGetDevice(&device)); state->rngState = (THCRNGState*)malloc(sizeof(THCRNGState)); THCRandom_init(state, count, device); state->blasState = (THCBlasState*)malloc(sizeof(THCBlasState)); THCudaBlas_init(state, count, device); int i,j; for(i=0; i < count; ++i) { THCudaCheck(cudaSetDevice(i)); for (j=0; j < count; ++j) { if(i != j) { int can = 0; THCudaCheck(cudaDeviceCanAccessPeer(&can, i, j)); if(can) { cudaError_t err = cudaDeviceEnablePeerAccess(j, 0); if (err == cudaErrorPeerAccessAlreadyEnabled) { // Any future call to cudaGetLastError will now return an error, // even though we've already dealt with this specific error here. // Call cudaGetLastError once to reset the last error state. cudaGetLastError(); continue; } THCudaCheck(err); } } } } THCudaCheck(cudaSetDevice(device)); }