Ejemplo n.º 1
TH_API void THLab_(syev)(THTensor *a_, THTensor *w_, const char *jobz, const char *uplo)
  int n, lda, lwork, info;
  THTensor *A;
  THTensor *work;
  real wkopt;

  THArgCheck(a_->nDimension == 2, 2, "A should be 2 dimensional");
  A = THTensor_(newContiguous)(a_);
  n = A->size[1];
  lda = n;
  // get optimal workspace size
  THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(A), lda,
		  THTensor_(data)(w_), &wkopt, -1, &info);
  lwork = (int)wkopt;
  work = THTensor_(newWithSize1d)(lwork);
  THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(A), lda,
		  THTensor_(data)(w_), THTensor_(data)(work), lwork, &info);

  if (info > 0)
    THError(" Lapack syev : Failed to converge. %d off-diagonal elements of an didn't converge to zero",info);
  else if (info < 0)
    THError("Lapack syev : Argument %d : illegal value", -info);
Ejemplo n.º 2
void THFloatTensor_addr(THFloatTensor *r_, float beta, THFloatTensor *t, float alpha, THFloatTensor *vec1, THFloatTensor *vec2)
	if( (vec1->nDimension != 1) || (vec2->nDimension != 1) )
		THError("vector and vector expected, got %dD, %dD tensors", vec1->nDimension, vec2->nDimension);

	if(t->nDimension != 2)
		THError("expected matrix, got %dD tensor for t", t->nDimension);

	if( (t->size[0] != vec1->size[0]) || (t->size[1] != vec2->size[0]) )
		THError("size mismatch, t: %ld, vec1: %ld, t: %ld, vec2: %ld", t->size[0], vec1->size[0], t->size[1], vec2->size[0]);

	if(r_ != t)
		THError("r_ != t not implemented");

	if(beta != 1)
		THFloatTensor_mul(r_, r_, beta);

  if(r_->stride[0] == 1)
    THBlas_ger(vec1->size[0], vec2->size[0],
                 alpha, THFloatTensor_data(vec1), vec1->stride[0],
                 THFloatTensor_data(vec2), vec2->stride[0],
                 THFloatTensor_data(r_), r_->stride[1]);
  else if(r_->stride[1] == 1)
    THBlas_ger(vec2->size[0], vec1->size[0],
                 alpha, THFloatTensor_data(vec2), vec2->stride[0],
                 THFloatTensor_data(vec1), vec1->stride[0],
                 THFloatTensor_data(r_), r_->stride[0]);
  else THError("addr for non-contiguous not implemented");
Ejemplo n.º 3
void* THRealloc(void *ptr, ptrdiff_t size)

  if(size == 0)
    return NULL;

  if(size < 0)
    THError("$ Torch: invalid memory size -- maybe an overflow?");

  ptrdiff_t oldSize = -getAllocSize(ptr);
  void *newptr = realloc(ptr, size);

  if(!newptr && torchGCFunction) {
    newptr = realloc(ptr, size);

    THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824);

  // update heapSize only after successfully reallocated
  THHeapUpdate(oldSize + getAllocSize(newptr));

  return newptr;
Ejemplo n.º 4
Archivo: init.c Proyecto: 0wu/cutorch
int checkAndCountListOfStreams(lua_State *L, THCState *state, int arg,
                               int device)
  if (!lua_istable(L, arg)) {
    THError("expecting table of device streams");

  /* Push table to top */
  lua_pushvalue(L, arg);

  /* Check that all values in the table are numeric and in bounds */
  int streams = 0;
  while (lua_next(L, -2)) {
    if (!lua_isnumber(L, -1)) {
      THError("streamWaitFor: list of streams must be numeric");
    int streamId = (int) lua_tonumber(L, -1);

    /* This will error out if the stream is not in bounds */
    THCState_getDeviceStream(state, device, streamId);

    lua_pop(L, 1);

  /* Pop table from top */
  lua_pop(L, 1);
  return streams;
Ejemplo n.º 5
void THNN_(ClassNLLCriterion_updateOutput)(THNNState *state, THTensor *input,
                                           THIndexTensor *target,
                                           THTensor *output, bool sizeAverage,
                                           THTensor *weights,
                                           THTensor *total_weight)
  int n_dims = THTensor_(nDimension)(input);
  int n_classes = THTensor_(size)(input, n_dims - 1);

  if (THIndexTensor_(nDimension)(target) > 1) {
    THError("multi-target not supported");
  if (THTensor_(nDimension)(input) > 2) {
    THError("input tensor should be 1D or 2D");

  input = THTensor_(newContiguous)(input);
  target = THIndexTensor_(newContiguous)(target);
  weights = weights ? THTensor_(newContiguous)(weights) : NULL;

  real *input_data = THTensor_(data)(input);
  THIndex_t *target_data = THIndexTensor_(data)(target);
  real *weights_data = weights ? THTensor_(data)(weights) : NULL;
  real *output_data = THTensor_(data)(output);
  real *total_weight_data = THTensor_(data)(total_weight);

  output_data[0] = total_weight_data[0] = 0.0;

  if (THTensor_(nDimension)(input) == 1) {
    int cur_target = target_data[0] - 1;
    THAssert(cur_target >= 0 && cur_target < n_classes);
    total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
    output_data[0] = -input_data[cur_target] * total_weight_data[0];
  } else if (THTensor_(nDimension)(input) == 2) {
    int batch_size = THTensor_(size)(input, 0);
    THAssert(THIndexTensor_(size)(target, 0) == batch_size);

    int n_target = THTensor_(size)(input, 1);

    int i;
    for (i = 0; i < batch_size; i++) {
      int cur_target = target_data[i] - 1;
      THAssert(cur_target >= 0 && cur_target < n_classes);

      real cur_weight = weights ? weights_data[cur_target] : 1.0f;
      total_weight_data[0] += cur_weight;
      output_data[0] -= input_data[i * n_target + cur_target] * cur_weight;

  if (sizeAverage && total_weight_data[0]) {
    output_data[0] /= total_weight_data[0];

  if (weights) {
Ejemplo n.º 6
static void THMapAllocator_free(void* ctx_, void* data) {
  THMapAllocatorContext *ctx = ctx_;

#ifdef _WIN32
  if(UnmapViewOfFile(data) == 0)
    THError("could not unmap the shared memory file");
#else /* _WIN32 */
  if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) {
    if (close(ctx->fd) == -1)
      THError("could not close file descriptor %d", ctx->fd);

  if (munmap(data, ctx->size))
    THError("could not unmap the shared memory file");

      if (shm_unlink(ctx->filename) == -1)
        THError("could not unlink the shared memory file %s", ctx->filename);
      THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename);
#endif /* _WIN32 */

Ejemplo n.º 7
void* THRealloc(void *ptr, long size)
  if(size == 0)
    return NULL;

  if(size < 0)
    THError("$ Torch: invalid memory size -- maybe an overflow?");

  void *newptr = realloc(ptr, size);

  if(!newptr && torchGCFunction) {
    newptr = realloc(ptr, size);
  THHeapUpdate(getAllocSize(newptr ? newptr : ptr));

    THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824);

  return newptr;
Ejemplo n.º 8
void THNN_(LookupTable_renorm)(
          THNNState *state,
          THIndexTensor *idx,
          THTensor *weight,
          real maxNorm,
          real normType)
  if (!THTensor_(isContiguous)(weight))
    THError("weight must be contiguous");
  if (!THIndexTensor_(isContiguous)(idx))
    THError("input must be contiguous");
  if (THIndexTensor_(nDimension)(idx) != 1)
    THError("idx must be a vector");
  if (normType <= 0)
    THError("non-positive-norm not supported");

  long i;
  THIndex_t *row_idx = THIndexTensor_(data)(idx);
  long numel = THIndexTensor_(nElement)(idx);

  long numw = THTensor_(size)(weight, 0);
  long stride = THTensor_(stride)(weight, 0);
  real *gw = THTensor_(data)(weight);
  for (i=0; i<numel; i++)
    if (row_idx[i] < 1 || row_idx[i] > numw)
      THError("input out of range");
  // get unique indices
  qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
  long ptr = 0;
  for (i=0; i<numel; i++)
    if (i == 0 || row_idx[i] != row_idx[i-1])
      row_idx[ptr++] = row_idx[i];
  numel = ptr;

#ifdef _OPENMP
  if (numel > 1000)
    // The strategy is to parallelize over the rows that appear in
    // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
    // This distributes the work evenly to each thread.
    #pragma omp parallel for private(i)
    for (i=0; i<numel; i++)
      long k = row_idx[i] - 1;
      THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
  for (i=0; i<numel; i++)
    long k = row_idx[i] - 1;
    THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
Ejemplo n.º 9
TH_API void THLab_(gesvd)(THTensor *a_, THTensor *s_, THTensor *u_, THTensor *vt_, char jobu)
  int k,m, n, lda, ldu, ldvt, lwork, info;
  THTensor *A, *work;
  real wkopt;
  char jobvt = jobu;

  THArgCheck(a_->nDimension == 2, 2, "A should be 2 dimensional");
  THArgCheck(jobu == 'A' || jobu == 'S',4, "jobu can be 'A' or 'S'");
  A = THTensor_(newContiguous)(a_);
  m = A->size[1];
  n = A->size[0];
  k = (m < n ? m : n);

  lda = m;
  ldu = m;
  ldvt = n;
  if (jobu == 'A')
		   THTensor_(data)(vt_), ldvt,
		   &wkopt, -1, &info);
  lwork = (int)wkopt;
  work = THTensor_(newWithSize1d)(lwork);
		   THTensor_(data)(vt_), ldvt,
		   THTensor_(data)(work),lwork, &info);
  if (info > 0)
    THError(" Lapack gesvd : %d superdiagonals failed to converge.",info);
  else if (info < 0)
    THError("Lapack gesvd : Argument %d : illegal value", -info);
Ejemplo n.º 10
static ptrdiff_t applyHeapDelta() {
  ptrdiff_t oldHeapSize = THAtomicAddPtrdiff(&heapSize, heapDelta);
#ifdef DEBUG
  if (heapDelta > 0 && oldHeapSize > PTRDIFF_MAX - heapDelta)
    THError("applyHeapDelta: heapSize(%td) + increased(%td) > PTRDIFF_MAX, heapSize overflow!", oldHeapSize, heapDelta);
  if (heapDelta < 0 && oldHeapSize < PTRDIFF_MIN - heapDelta)
    THError("applyHeapDelta: heapSize(%td) + decreased(%td) < PTRDIFF_MIN, heapSize underflow!", oldHeapSize, heapDelta);
  ptrdiff_t newHeapSize = oldHeapSize + heapDelta;
  heapDelta = 0;
  return newHeapSize;
Ejemplo n.º 11
int THCState_getPeerToPeerAccess(THCState* state, int dev, int devToAccess)
  if (dev < 0 || dev >= state->numDevices) {
    THError("%d is not a device", dev);

  if (devToAccess < 0 || dev >= state->numDevices) {
    THError("%d is not a device", devToAccess);

  return state->p2pAccessEnabled[dev][devToAccess];
Ejemplo n.º 12
int THCState_getPeerToPeerAccess(THCState* state, int dev, int devToAccess)
  int numDevices = 0;
  if (dev < 0 || dev >= numDevices) {
    THError("%d is not a device", dev);

  if (devToAccess < 0 || dev >= numDevices) {
    THError("%d is not a device", devToAccess);

  return state->p2pAccessEnabled[dev][devToAccess];
Ejemplo n.º 13
cublasHandle_t THCState_getCurrentBlasHandle(THCState *state)
  /* This is called at the point of kernel execution.
     For some debugging code or improperly instrumented kernels,
     `state` is null */
  if (state) {
    if (state->currentBlasHandle <= 0) {
      THError("%d is not a valid handle, valid range is: (1, %d)",
              state->currentBlasHandle, state->numUserBlasHandles);
    return state->currentBlasHandle;
  THError("THCState and blasHandles must be set as there is no default blasHandle");
  return NULL;
Ejemplo n.º 14
TH_API void THLab_(gesv)(THTensor *a_, THTensor *b_)
  int n, nrhs, lda, ldb, info;
  THIntTensor *ipiv;
  THTensor *A, *B;
  THArgCheck(a_->nDimension == 2, 2, "A should be 2 dimensional");
  THArgCheck(a_->size[0] == a_->size[1], 2, "A should be symmetric");

  n = (int)a_->size[1];
  lda = n;
  ldb = n;
  if (b_->nDimension == 1)
    nrhs = 1;
    THArgCheck(n == b_->size[0], 1, "size incompatible A,b");
    nrhs = b_->size[0];
    THArgCheck(n == b_->size[1], 1, "size incompatible A,b");

  A = THTensor_(newContiguous)(a_);
  B = THTensor_(newContiguous)(b_);
  ipiv = THIntTensor_newWithSize1d((long)n);
  THLapack_(gesv)(n, nrhs, 
		  THTensor_(data)(A), lda, THIntTensor_data(ipiv),
		  THTensor_(data)(B), ldb, &info);


  if (info < 0)
    THError("Lapack gesv : Argument %d : illegal value", -info);
  else if (info > 0)
    THError("Lapack gesv : U(%d,%d) is zero, singular U.", info,info);

Ejemplo n.º 15
static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride)
  THAssert(self->storage != NULL);
  /* storage */
  if(self->storage != storage)
      THCudaStorage_free(state, self->storage);

      self->storage = storage;
      THCudaStorage_retain(state, self->storage);
      self->storage = THCudaStorage_new(state);

  /* storageOffset */
  if(storageOffset < 0)
    THError("Tensor: invalid storage offset");
  self->storageOffset = storageOffset;

  /* size and stride */
  THCudaTensor_rawResize(state, self, nDimension, size, stride);
Ejemplo n.º 16
static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(scalar_t *gradInput_p, scalar_t *gradOutput_p,
                                                         THIndex_t *ind_p,
                                                         int nslices,
                                                         int iwidth, int iheight,
                                                         int owidth, int oheight)
  at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) {
    for (auto k = start; k < end; k++)
      scalar_t *gradInput_p_k = gradInput_p + k*iwidth*iheight;
      scalar_t *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
      THIndex_t *ind_p_k = ind_p + k*iwidth*iheight;

      int i, j;
      THIndex_t maxp;
      for(i = 0; i < iheight; i++)
        for(j = 0; j < iwidth; j++)
          maxp = ind_p_k[i*iwidth + j]; /* retrieve position of max */
          if(maxp < 0 || maxp >= owidth * oheight) {
              THError("invalid max index %ld, owidth= %d, oheight= %d", maxp, owidth, oheight);
          gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */
THFloatTensor *nn_SpatialConvolutionMM_updateOutput(struct module *module, THFloatTensor *input)
	int kW = module->SpatialConvolution.kW;
	int kH = module->SpatialConvolution.kH;
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;
	int padW = module->SpatialConvolution.padW;
	int padH = module->SpatialConvolution.padH;

	THFloatTensor *finput = module->SpatialConvolution.finput;
	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias   = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int batch = 1;
	if (input->nDimension == 3) {
		batch = 0;
		THFloatTensor_resize4d(input, 1, input->size[0], input->size[1], input->size[2]);

	long batchSize = input->size[0];
	long nInputPlane  = module->SpatialConvolution.nInputPlane;
	long nOutputPlane = module->SpatialConvolution.nOutputPlane;
	long inputWidth   = input->size[3];
	long inputHeight  = input->size[2];
	long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
	long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;

	if (outputWidth < 1 || outputHeight < 1)
		THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",

	THFloatTensor_resize3d(finput, batchSize, kW*kH*nInputPlane, outputHeight*outputWidth);
	THFloatTensor_resize4d(output, batchSize, nOutputPlane, outputHeight, outputWidth);

	long t;
#pragma omp parallel for if(batchSize >= 4) private(t)
	for (t = 0; t < batchSize; t++) {
		THFloatTensor *input_t = THFloatTensor_newSelect(input, 0, t);
		THFloatTensor *output_t = THFloatTensor_newSelect(output, 0, t);
		THFloatTensor *finput_t = THFloatTensor_newSelect(finput, 0, t);

		nn_SpatialConvolutionMM_updateOutput_frame(input_t, output_t, weight, bias, finput_t,
			kW, kH, dW, dH, padW, padH,
			nInputPlane, inputWidth, inputHeight,
			nOutputPlane, outputWidth, outputHeight);


	if (batch == 0) {
		THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
		THFloatTensor_resize3d(input, nInputPlane, inputHeight, inputWidth);

	return output;
Ejemplo n.º 18
static inline void THNN_(VolumetricFullConvolution_shapeCheck)(
                         THTensor *input, THTensor *gradOutput,
                         THTensor *weight, THTensor *bias,
                         int dT, int dW, int dH, int pT, int pW, int pH,
                         int aT, int aW, int aH) {
  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");
  // number of input & output planes and kernel size is indirectly defined by the weight tensor
  THNN_ARGCHECK(weight->nDimension == 5, 4, weight,
                "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
                "expected for weight, but got: %s");
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
  THArgCheck(aT < dT && aW < dW && aH < dH, 15,
             "output adjustment must be smaller than stride, but got "
             "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d",
             aT, aH, aW, dT, dH, dW);

  int ndim = input->nDimension;
  const int nInputPlane  = (int)weight->size[0];
  const int nOutputPlane = (int)weight->size[1];
  const int kT           = (int)weight->size[2];
  const int kH           = (int)weight->size[3];
  const int kW           = (int)weight->size[4];

  if (bias != NULL) {
    THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]);

  int dimf = 0;
  int dimd = 1;
  int dimh = 2;
  int dimw = 3;

  if (ndim == 5) {

  const long inputWidth   = input->size[dimw];
  const long inputHeight  = input->size[dimh];
  const long inputDepth   = input->size[dimd];
  const long outputWidth  = (inputWidth  - 1) * dW - 2*pW + kW + aW;
  const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
  const long outputDepth  = (inputDepth  - 1) * dT - 2*pT + kT + aT;

  if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
    THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",

  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
Ejemplo n.º 19
static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
                                                         THIndex_t *ind_p,
                                                         long nslices,
                                                         long iwidth, long iheight,
                                                         long owidth, long oheight)
  long k;
#pragma omp parallel for private(k)
  for (k = 0; k < nslices; k++)
    real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
    real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
    THIndex_t *ind_p_k = ind_p + k*iwidth*iheight;

    long i, j, maxp;
    for(i = 0; i < iheight; i++)
      for(j = 0; j < iwidth; j++)
        maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
        if(maxp<0 || maxp>=owidth*oheight){
            THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
        gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */
Ejemplo n.º 20
void THCudaGetGridSize(int *nBlockPerColumn_, int *nBlockPerRow_, int *nThreadPerBlock_, long size)
  const int nThreadPerBlock = 256;
  long nBlockPerGrid = size / nThreadPerBlock;
  long nBlockPerColumn = 0L;
  long nBlockPerRow = 0L;

  if(size % nThreadPerBlock)

  if(nBlockPerGrid <= 65535)
    nBlockPerRow = nBlockPerGrid;
    nBlockPerColumn = 1;
  else if(nBlockPerGrid <= (65355L * 65355L))
    unsigned int uiSqrt = (unsigned int)(sqrt((float)nBlockPerGrid));
    nBlockPerRow = uiSqrt;
    nBlockPerColumn = uiSqrt;
    while((nBlockPerRow * nBlockPerColumn) < nBlockPerGrid)
    THError("too large vector for Cuda, sorry");

  *nBlockPerColumn_ = (int)nBlockPerColumn;
  *nBlockPerRow_ = (int)nBlockPerRow;
  *nThreadPerBlock_ = (int)nThreadPerBlock;
Ejemplo n.º 21
cudaStream_t THCState_getDeviceStream(THCState *state, int device, int stream)
  /* `device` is a CUDA index */
  if (device >= state->numDevices || device < 0)
    THError("%d is not a device", device + 1 /* back to Torch index */);

  /* Stream 0 is the default stream, 1 ... `numUserStreams` are Torch streams */
  if (stream > state->numUserStreams || stream < 0)
    THError("%d is not a stream", stream);

  return state->streamsPerDevice[device][stream];
Ejemplo n.º 22
THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags)
  THMapAllocatorContext *ctx = THAlloc(sizeof(THMapAllocatorContext));

    THError("TH_ALLOCATOR_MAPPED_EXCLUSIVE flag requires opening the file "
        "in shared mode");

  if (filename) {
    ctx->filename = THAlloc(strlen(filename)+1);
    strcpy(ctx->filename, filename);
  } else {
    ctx->filename = unknown_filename;
  ctx->flags = flags;
  ctx->size = 0;
#ifdef _WIN32
  ctx->handle = INVALID_HANDLE_VALUE;
  ctx->fd = -1;

  return ctx;
Ejemplo n.º 23
static int loadnpz_l(lua_State *L){
		const char *filename = lua_tostring(L, 1);

		std::string fpath = std::string(filename);

		cnpy::npz_t npzData = cnpy::npz_load(filename);

		// create a new table
		int tbl = lua_gettop(L);

		for (cnpy::npz_t::iterator i=npzData.begin(); i!=npzData.end(); ++i){
			std::string name = i->first;
			cnpy::NpyArray arr = i->second;

			lua_pushstring(L, name.c_str());
			load_array_to_lua(L, arr);
			lua_rawset(L, tbl);

	}catch (std::exception& e){

	return 1;
Ejemplo n.º 24
static void nn_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *output_p,
                                                      real *ind_p,
                                                      long nslices,
                                                      long iwidth, long iheight,
                                                      long owidth, long oheight)
  long k;
#pragma omp parallel for private(k)
  for (k = 0; k < nslices; k++)
    real *output_p_k = output_p + k*owidth*oheight;
    real *input_p_k = input_p + k*iwidth*iheight;
    real *ind_p_k = ind_p + k*iwidth*iheight;

    long i, j, maxp;
    for(i = 0; i < iheight; i++)
      for(j = 0; j < iwidth; j++)
        maxp = ind_p_k[i*iwidth + j] - 1;  /* retrieve position of max */
        if(maxp<0 || maxp>=owidth*oheight){
            THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
        output_p_k[maxp] = input_p_k[i*iwidth + j]; /* update output */
Ejemplo n.º 25
void THCState_setPeerToPeerAccess(THCState* state, int dev, int devToAccess,
                                  int enable)
  /* This will perform device bounds checking for us */
  int prevEnabled = THCState_getPeerToPeerAccess(state, dev, devToAccess);

  if (enable != prevEnabled) {
    /* If we're attempting to enable p2p access but p2p access isn't */
    /* supported, throw an error */
    if (enable) {
      int access = 0;
      THCudaCheck(cudaDeviceCanAccessPeer(&access, dev, devToAccess));

      if (!access) {
        THError("p2p access not supported for %d accessing %d",
                dev, devToAccess);

    state->p2pAccessEnabled[dev][devToAccess] = enable;

    int prevDev = 0;

    /* This should be in sync with the current access state */
    if (enable) {
      THCudaCheck(cudaDeviceEnablePeerAccess(devToAccess, 0));
    } else {

Ejemplo n.º 26
int THProcessYUYV(THNETWORK *network, unsigned char *image, int width, int height, float **results, int *outwidth, int *outheight)
	THFloatTensor *out;
	THFloatStorage *st;

#ifdef CUDNN
		THError("This function is not supported with CUDNN");
	st = THFloatStorage_new(width * height * 3);
	yuyv2fRGB(image, st->data, width*height, width, width, height, network->mean, network->std);
	THFloatTensor *t = THFloatTensor_new();
	t->storage = st;
	t->nDimension = 3;
	t->size[0] = 3;
	t->size[1] = height;
	t->size[2] = width;
	t->stride[0] = width * height;
	t->stride[1] = width;
	t->stride[2] = 1;
	out = forward(network->net, t);
	*results = out->storage->data;
	if(out->nDimension >= 3)
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
Ejemplo n.º 27
void* THAlloc(long size)
  void *ptr;

  if(size < 0)
    THError("$ Torch: invalid memory size -- maybe an overflow?");

  if(size == 0)
    return NULL;

  ptr = malloc(size);
    THError("$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824);

  return ptr;
Ejemplo n.º 28
static void THGPUTensor_rawSet(THGPUTensor *self, THGPUStorage *storage, long storageOffset, int nDimension, long *size, long *stride)
  /* storage */
  if (self->storage != storage)
    if (self->storage)

    if (storage)
      self->storage = storage;
      self->storage = NULL;

  /* storageOffset */
  if (storageOffset < 0)
    THError("Tensor: invalid storage offset");
  self->storageOffset = storageOffset;

  /* size and stride */
  THGPUTensor_rawResize(self, nDimension, size, stride);
Ejemplo n.º 29
static inline void THNN_(Col2Im_shapeCheck)(
                         THNNState *state,
                         THTensor *input,
                         THTensor *gradOutput,
                         int64_t outputHeight, int64_t outputWidth,
                         int64_t kH, int64_t kW, int64_t dilationH, int64_t dilationW,
                         int64_t padH, int64_t padW, int64_t dH, int64_t dW) {

  THArgCheck(kW > 0 && kH > 0, 6,
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 12,
             "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
  THArgCheck(dilationW > 0 && dilationH > 0, 8,
             "dilation should be greater than zero, but got dilationH: %d dilationW: %d", dilationH, dilationW);

  int64_t ndim = THTensor_(nDimensionLegacyNoScalars)(input);
  THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
                "Expected non-empty 2D or 3D input tensor, but got input of shape %s");

  int64_t batch_dim = (ndim == 3) ? 0 : -1;
  int64_t nInputPlane  = input->size(batch_dim + 1);

  if (nInputPlane % (kW * kH) != 0) {
    THError("Expected size of input's dimension 1 to be divisible by the "
            "product of kernel_size, but got input.size(1)=%lld and "
            "kernel_size=(%d, %d).", (long long) nInputPlane, kH, kW);

  int64_t inputLength  = input->size(batch_dim + 2);
  int64_t nBlocksH = div_rtn<int64_t>(outputHeight + 2 * padH - dilationH * (kH - 1) - 1, dH) + 1;
  int64_t nBlocksW = div_rtn<int64_t>(outputWidth + 2 * padW - dilationW * (kW - 1) - 1, dW) + 1;

  if (inputLength != (nBlocksH * nBlocksW)) {
    THError("Given output_size=(%d, %d), kernel_size=(%d, %d), "
            "dilation=(%d, %d), padding=(%d, %d), stride=(%d, %d), expected "
            "size of input's dimension 2 to match the calculated number of "
            "sliding blocks %lld * %lld = %lld, but got input.size(2)=%lld.",
            outputHeight, outputWidth, kH, kW, dilationH, dilationW, padH, padW, dH, dW,
            (long long) nBlocksH, (long long) nBlocksW,
            (long long) (nBlocksH * nBlocksW), (long long) inputLength);

  if (outputWidth < 1 || outputHeight < 1) {
    THError("Expected output spatial size to be positive, but got: output_size=(%d, %d).",
            outputHeight, outputWidth);
Ejemplo n.º 30
Archivo: LogSum1d.c Proyecto: noa/jhnn
static void jhu_THLogSum1d_init(lua_State *L) {
    int ret = luaT_pushmetatable(L, "torch.DoubleTensor");
    if(ret == 0) {
        THError("problem pushing metatable");
    luaT_registeratname(L, jhu_THLogSum1d__, "jhu");
    lua_pop(L, 1);