コード例 #1
0
ファイル: THCGeneral.c プロジェクト: ibcn-cloudlet/cutorch
void THCudaShutdown(THCState* state)
{
  THCRandom_shutdown(state);

  free(state->rngState);
  free(state->cudaHostAllocator);
  free(state->deviceProperties);

  int deviceCount = 0;
  int prevDev = -1;
  THCudaCheck(cudaGetDevice(&prevDev));
  THCudaCheck(cudaGetDeviceCount(&deviceCount));

  /* cleanup p2p access state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    free(state->p2pAccessEnabled[dev]);
  }
  free(state->p2pAccessEnabled);

  /* cleanup per-device state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    THCudaCheck(cudaSetDevice(dev));
    /* Free Torch-defined streams (0 is the default stream) */
    for (int stream = 1; stream <= state->numUserStreams; ++stream) {
      THCudaCheck(cudaStreamDestroy(
                    THCState_getDeviceStream(state, dev, stream)));
    }
    /* Free Torch-defined handles (0 is NULL for consistency with streams API) */
    for (int handle = 1; handle <= state->numUserBlasHandles; ++handle) {
      THCublasCheck(cublasDestroy(
                      THCState_getDeviceBlasHandle(state, dev, handle)));
    }
    /* Free per-stream scratch space; starts at 0 because there is space for
       the default stream as well*/
    for (int stream = 0; stream <= state->numUserStreams; ++stream) {
      THCudaCheck(THCudaFree(state, THCState_getDeviceScratchSpace(state, dev, stream)));
    }

    free(state->resourcesPerDevice[dev].streams);
    free(state->resourcesPerDevice[dev].blasHandles);
    free(state->resourcesPerDevice[dev].devScratchSpacePerStream);
  }
  free(state->resourcesPerDevice);
  state->cudaDeviceAllocator.shutdown(state->cudaDeviceAllocator.state);
  THCThreadLocal_free(state->currentPerDeviceStream);
  THCThreadLocal_free(state->currentPerDeviceBlasHandle);

  THCudaCheck(cudaSetDevice(prevDev));
}
コード例 #2
0
ファイル: THCGeneral.cpp プロジェクト: HustlehardInc/pytorch
void THCudaShutdown(THCState* state)
{
  THCRandom_shutdown(state);

  free(state->rngState);
  free(state->deviceProperties);

  int deviceCount = 0;
  int prevDev = -1;
  THCudaCheck(cudaGetDevice(&prevDev));
  THCudaCheck(cudaGetDeviceCount(&deviceCount));

  /* cleanup p2p access state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    free(state->p2pAccessEnabled[dev]);
  }
  free(state->p2pAccessEnabled);

  /* cleanup per-device state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    THCudaCheck(cudaSetDevice(dev));
    THCCudaResourcesPerDevice* res = &(state->resourcesPerDevice[dev]);
    /* Free user defined BLAS handles */
    for (int i = 0; i < res->numBlasHandles; ++i) {
      THCublasCheck(cublasDestroy(res->blasHandles[i]));
    }
    /* Free user defined sparse handles */
    for (int i = 0; i < res->numSparseHandles; ++i) {
      THCusparseCheck(cusparseDestroy(res->sparseHandles[i]));
    }

    free(res->blasHandles);
    free(res->sparseHandles);
    THCStream_free((THCStream*)THCThreadLocal_get(state->currentStreams[dev]));
    THCThreadLocal_free(state->currentStreams[dev]);
  }
  free(state->resourcesPerDevice);
  if (state->cudaDeviceAllocator->emptyCache) {
    state->cudaDeviceAllocator->emptyCache(state->cudaDeviceAllocator->state);
  }
  if (state->cudaHostAllocator == &THCCachingHostAllocator) {
    THCCachingHostAllocator_emptyCache();
  }
  free(state->currentStreams);
  THCThreadLocal_free(state->currentPerDeviceBlasHandle);

  THCudaCheck(cudaSetDevice(prevDev));
}
コード例 #3
0
ファイル: THCGeneral.c プロジェクト: ASAPPinc/cutorch
void THCState_setBlasHandle(THCState *state, int device, int handle)
{  /* `device` is a CUDA index */
  if (device >= state->numDevices || device < 0)
  {
    THError("%d is not a device", device + 1 /* back to Torch index */);
  }

  if (handle > state->numUserBlasHandles || handle <= 0)
  {
    THError("%d is not a valid handle, valid range is: (1, %d)",
            handle, state->numUserBlasHandles);
  }
  state->currentBlasHandle =
    THCState_getDeviceBlasHandle(state, device, handle);
  state->currentPerDeviceBlasHandle = handle;
  THCublasCheck(cublasSetStream(state->currentBlasHandle, state->currentStream));
}
コード例 #4
0
ファイル: THCGeneral.c プロジェクト: ASAPPinc/cutorch
void THCState_setStream(THCState *state, int device, int stream)
{
  /* `device` is a CUDA index */
  if (device >= state->numDevices || device < 0)
  {
    THError("%d is not a device", device + 1 /* back to Torch index */);
  }

  if (stream > state->numUserStreams || stream < 0)
  {
    THError("%d is not a stream", stream);
  }
  state->currentStream =
    THCState_getDeviceStream(state, device, stream);
  state->currentPerDeviceStream = stream;
  THCublasCheck(cublasSetStream(state->currentBlasHandle,
                                state->currentStream));
}
コード例 #5
0
ファイル: THCGeneral.c プロジェクト: ASAPPinc/cutorch
void THCState_reserveBlasHandles(THCState* state, int numBlasHandles)
{
  if (numBlasHandles <= state->numUserBlasHandles)
  {
    return;
  }

  int prevDev = -1;
  THCudaCheck(cudaGetDevice(&prevDev));

  /* Otherwise, we have to allocate a new set of blasHandles */
  for (int dev = 0; dev < state->numDevices; ++dev) {
    THCudaCheck(cudaSetDevice(dev));

    /* +1 to be consistent with stream API, blas handle 0 is NULL and unused */
    cublasHandle_t* newBlasHandles =
      (cublasHandle_t*) malloc((numBlasHandles + 1) * sizeof(cublasHandle_t));

    /* Copy over old blasHandles
       (0 is NULL, 1 ... numUserBlasHandles are rest) */
    newBlasHandles[0] = NULL;
    for (int hndl = 1; hndl <= state->numUserBlasHandles; ++hndl) {
      newBlasHandles[hndl] = THCState_getDeviceBlasHandle(state, dev, hndl);
    }

    /* Allocate new handles */
    for (int hndl = state->numUserBlasHandles + 1; hndl <= numBlasHandles; ++hndl) {
      newBlasHandles[hndl] = NULL;
      THCublasCheck(cublasCreate(newBlasHandles + hndl));
    }

    THCCudaResourcesPerDevice* res = THCState_getDeviceResourcePtr(state, dev);
    free(res->blasHandles);
    res->blasHandles = newBlasHandles;
  }

  state->numUserBlasHandles = numBlasHandles;

  THCudaCheck(cudaSetDevice(prevDev));
}
コード例 #6
0
ファイル: THCGeneral.cpp プロジェクト: HustlehardInc/pytorch
void THCState_reserveDeviceBlasHandles(THCState* state, int device, int numBlasHandles)
{
  int prevDev = -1;
  THCCudaResourcesPerDevice* res = THCState_getDeviceResourcePtr(state, device);
  if (numBlasHandles <= res->numBlasHandles) {
    return;
  }

  THCudaCheck(cudaGetDevice(&prevDev));
  THCudaCheck(cudaSetDevice(device));

  size_t size = numBlasHandles * sizeof(cublasHandle_t);
  cublasHandle_t* handles = (cublasHandle_t*) realloc(res->blasHandles, size);
  for (int i = res->numBlasHandles; i < numBlasHandles; ++i) {
    handles[i] = NULL;
    THCublasCheck(cublasCreate(&handles[i]));
  }
  res->blasHandles = handles;
  res->numBlasHandles = numBlasHandles;

  THCudaCheck(cudaSetDevice(prevDev));
}