void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src)
{
  THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match");
  THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous");
  THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous");

  if (THTensor_(nElement)(self) == 0) return;

  // Perform the copy wrt the current stream on the CudaTensor's device.
  int tensorDevice = THCTensor_(getDevice)(state, src);
  int currentDevice;
  THCudaCheck(cudaGetDevice(&currentDevice));

  if (currentDevice != tensorDevice) {
    THCudaCheck(cudaSetDevice(tensorDevice));
  }

  THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self),
                              THCTensor_(data)(state, src),
                              THCTensor_(nElement)(state, src) * sizeof(real),
                              cudaMemcpyDeviceToHost,
                              THCState_getDeviceStream(state, tensorDevice,
                                                       THCState_getCurrentStreamIndex(state))));

  if (currentDevice != tensorDevice) {
    THCudaCheck(cudaSetDevice(currentDevice));
  }
}
Ejemplo n.º 2
0
/*
   Usage:
   n = cutorch.getStream()
   Returns the current user stream for all devices in use (as previously
   set via cutorch.setStream(n). 0 is the default stream on the device
   and is its initial value.
*/
static int cutorch_getStream(lua_State *L)
{
  THCState *state = cutorch_getstate(L);
  lua_pushnumber(L, THCState_getCurrentStreamIndex(state));

  return 1;
}
Ejemplo n.º 3
0
void* THCState_getCurrentDeviceScratchSpace(THCState* state)
{
  int device = -1;
  THCudaCheck(cudaGetDevice(&device));
  int stream = THCState_getCurrentStreamIndex(state);

  return THCState_getDeviceScratchSpace(state, device, stream);
}
Ejemplo n.º 4
0
static int cutorch_setDevice(lua_State *L)
{
  THCState *state = cutorch_getstate(L);
  int device = (int)luaL_checknumber(L, 1)-1;
  THCudaCheck(cudaSetDevice(device));
  THCRandom_setGenerator(state, device);

  /* The stream is per device, so update the stream as well */
  THCState_setStream(state, device, THCState_getCurrentStreamIndex(state));
  THCState_setBlasHandle(state, device, THCState_getCurrentBlasHandleIndex(state));

  return 0;
}
Ejemplo n.º 5
0
void THCState_setDevice(THCState *state, int device)
{
  int curDev;
  THCudaCheck(cudaGetDevice(&curDev));
  if (device != curDev) {
    THCudaCheck(cudaSetDevice(device));
    THCRandom_setGenerator(state, device);
    THCudaBlas_setHandle(state, device);

    /* The stream is per device, so update the stream as well */
    THCState_setStream(state, device, THCState_getCurrentStreamIndex(state));
  }
}
Ejemplo n.º 6
0
cudaStream_t THCState_getCurrentStream(THCState *state)
{
  /* This is called at the point of kernel execution.
     For some debugging code or improperly instrumented kernels,
     `state` is null */
  if (state) {
    int device;
    THCudaCheck(cudaGetDevice(&device));

    int streamIndex = THCState_getCurrentStreamIndex(state);
    if (streamIndex == 0) {
      return NULL;
    }

    return THCState_getDeviceResourcePtr(state, device)->streams[streamIndex];
  } else {
    /* assume default stream */
    return NULL;
  }
}