Пример #1
0
void THCState_setStream(THCState *state, int device, int stream)
{
  /* `device` is a CUDA index */
  if (device >= state->numDevices || device < 0)
  {
    THError("%d is not a device", device + 1 /* back to Torch index */);
  }

  if (stream > state->numUserStreams || stream < 0)
  {
    THError("%d is not a stream", stream);
  }
  state->currentStream =
    THCState_getDeviceStream(state, device, stream);
  state->currentPerDeviceStream = stream;
  THCublasCheck(cublasSetStream(state->currentBlasHandle,
                                state->currentStream));
}
Пример #2
0
void waitSingleDeviceEvents(lua_State *L, THCState *state, int arg,
                           int device, cudaEvent_t * event, int numEvents)
{
  /* Push table to top */
  lua_pushvalue(L, arg);

  /* Then, wait on the events. Each stream is actually waiting on itself here
     too, but that's harmless and isn't worth weeding out. */
  lua_pushnil(L);
  while (lua_next(L, -2)) {
    int streamId = (int) lua_tonumber(L, -1);
    cudaStream_t stream =
      THCState_getDeviceStream(state, device, streamId);
    for (int i = 0; i < numEvents; i++) {
      THCudaCheck(cudaStreamWaitEvent(stream, event[i], 0));
    }
    lua_pop(L, 1);
  }

  /* Pop table from top */
  lua_pop(L, 1);
}
Пример #3
0
void THCudaShutdown(THCState* state)
{
  THCRandom_shutdown(state);
  THCAllocator_shutdown(state);
  free(state->rngState);
  free(state->deviceProperties);

  int deviceCount = 0;
  int prevDev = -1;
  THCudaCheck(cudaGetDevice(&prevDev));
  THCudaCheck(cudaGetDeviceCount(&deviceCount));

  for (int dev = 0; dev < deviceCount; ++dev) {
    THCudaCheck(cudaSetDevice(dev));
    /* Free Torch-defined streams (0 is the default stream) */
    for (int stream = 1; stream <= state->numUserStreams; ++stream) {
      THCudaCheck(cudaStreamDestroy(
                    THCState_getDeviceStream(state, dev, stream)));
    }
    /* Free Torch-defined handles (0 is NULL for consistency with streams API) */
    for (int handle = 1; handle <= state->numUserBlasHandles; ++handle) {
      THCublasCheck(cublasDestroy(
                      THCState_getDeviceBlasHandle(state, dev, handle)));
    }
    /* Free per-stream scratch space; starts at 0 because there is space for
       the default stream as well*/
    for (int stream = 0; stream <= state->numUserStreams; ++stream) {
      THCudaCheck(THCudaFree(state, THCState_getDeviceScratchSpace(state, dev, stream)));
    }

    free(state->resourcesPerDevice[dev].streams);
    free(state->resourcesPerDevice[dev].blasHandles);
    free(state->resourcesPerDevice[dev].devScratchSpacePerStream);
  }
  free(state->resourcesPerDevice);

  THCudaCheck(cudaSetDevice(prevDev));
}
Пример #4
0
int createSingleDeviceEvents(lua_State *L, THCState *state, int arg,
                             int device, cudaEvent_t* event)
{

  /* Push table to top */
  lua_pushvalue(L, arg);

  /* Record events */
  lua_pushnil(L);
  int i = 0;
  while (lua_next(L, -2)) {
    int streamId = (int) lua_tonumber(L, -1);
    cudaStream_t streamWaitingOn =
      THCState_getDeviceStream(state, device, streamId);
    THCudaCheck(cudaEventCreateWithFlags(&event[i], cudaEventDisableTiming));
    THCudaCheck(cudaEventRecord(event[i], streamWaitingOn));
    lua_pop(L, 1);
    i++;
  }
  /* Pop table from top */
  lua_pop(L, 1);
  return i;
}
Пример #5
0
void waitMultiDeviceEvents(lua_State *L, THCState *state, int arg,
                           cudaEvent_t* events, int streams)
{
  /* Push {gpu={streams...}} table */
  lua_pushvalue(L, arg);

  /* Then, wait on the events. Each stream is actually waiting on itself here
     too, but that's harmless and isn't worth weeding out. */
  lua_pushnil(L);
  while (lua_next(L, -2)) {
    int device = (int) lua_tonumber(L, -2) - 1;
    THCudaCheck(cudaSetDevice(device));

    /* Push stream table */
    lua_pushvalue(L, -1);
    lua_pushnil(L);
    while (lua_next(L, -2)) {
      int streamId = (int) lua_tonumber(L, -1);

      cudaStream_t stream =
        THCState_getDeviceStream(state, device, streamId);

      /* Each stream waits on all events */
      for (int i = 0; i < streams; ++i) {
        THCudaCheck(cudaStreamWaitEvent(stream, events[i], 0));
      }

      lua_pop(L, 1);
    }

    /* Pop stream table and GPU entry */
    lua_pop(L, 2);
  }

  /* Pop {gpu={streams...}} table */
  lua_pop(L, 1);
}