Beispiel #1
0
void THCudaInit(THCudaState* state)
{
  int count = 0;
  THCudaCheck(cudaGetDeviceCount(&count));

  int device = 0;
  THCudaCheck(cudaGetDevice(&device));

  state->rngState = (THCudaRNGState*)malloc(sizeof(THCudaRNGState));
  THCRandom_init(state->rngState, count, device);

  THCudaBlas_init(count, device);

  int i,j;
  for(i=0; i < count; ++i)
  {
    THCudaCheck(cudaSetDevice(i));
    for (j=0; j < count; ++j)
    {
      if(i != j)
      {
        int can = 0;
        THCudaCheck(cudaDeviceCanAccessPeer(&can, i, j));
        if(can)
          THCudaCheck(cudaDeviceEnablePeerAccess(j, 0));
      }
    }
  }
  THCudaCheck(cudaSetDevice(device));
}
Beispiel #2
0
void THCudaInit(THCState* state)
{
  int count = 0;
  THCudaCheck(cudaGetDeviceCount(&count));

  int device = 0;
  THCudaCheck(cudaGetDevice(&device));

  state->rngState = (THCRNGState*)malloc(sizeof(THCRNGState));
  THCRandom_init(state, count, device);

  state->blasState = (THCBlasState*)malloc(sizeof(THCBlasState));
  THCudaBlas_init(state, count, device);

  state->numDevices = count;
  state->deviceProperties =
    (struct cudaDeviceProp*)malloc(count * sizeof(struct cudaDeviceProp));

  THCState_setDeviceMode(state, THCStateDeviceModeManual);

  state->numUserStreams = 0;
  state->streamsPerDevice =
    (cudaStream_t**)malloc(count * sizeof(cudaStream_t*));

  /* Enable P2P access between all pairs, if possible */
  THCudaEnablePeerToPeerAccess(state);

  for (int i = 0; i < count; ++i)
  {
    THCudaCheck(cudaSetDevice(i));
    THCudaCheck(cudaGetDeviceProperties(&state->deviceProperties[i], i));
    /* Stream index 0 will be the default stream for convenience; by
       default no user streams are reserved */
    state->streamsPerDevice[i] =
      (cudaStream_t*)malloc(sizeof(cudaStream_t));
    state->streamsPerDevice[i][0] = NULL;
  }

  /* Restore to previous device */
  THCudaCheck(cudaSetDevice(device));

  /* Start in the default stream on the current device */
  state->currentPerDeviceStream = 0;
  state->currentStream = NULL;
}
Beispiel #3
0
void THCudaInit(THCState* state)
{
  int count = 0;
  THCudaCheck(cudaGetDeviceCount(&count));

  int device = 0;
  THCudaCheck(cudaGetDevice(&device));

  state->rngState = (THCRNGState*)malloc(sizeof(THCRNGState));
  THCRandom_init(state, count, device);

  state->blasState = (THCBlasState*)malloc(sizeof(THCBlasState));
  THCudaBlas_init(state, count, device);

  int i,j;
  for(i=0; i < count; ++i)
  {
    THCudaCheck(cudaSetDevice(i));
    for (j=0; j < count; ++j)
    {
      if(i != j)
      {
        int can = 0;
        THCudaCheck(cudaDeviceCanAccessPeer(&can, i, j));
        if(can) {
          cudaError_t err = cudaDeviceEnablePeerAccess(j, 0);
          if (err == cudaErrorPeerAccessAlreadyEnabled) {
            // Any future call to cudaGetLastError will now return an error,
            // even though we've already dealt with this specific error here.
            // Call cudaGetLastError once to reset the last error state.
            cudaGetLastError();

            continue;
          }
          THCudaCheck(err);
        }
      }
    }
  }
  THCudaCheck(cudaSetDevice(device));
}