void THNN_(SoftMax_updateOutput)( THNNState *state, THTensor *input, THTensor *output, int64_t dim) { THArgCheck(dim >= 0 && dim < input->nDimension, 4, "dim out of range (got %d, but input has %d dims)", dim, input->nDimension); uint64_t outer_size = 1; uint64_t dim_size = input->size[dim]; uint64_t inner_size = 1; for (int64_t i = 0; i < dim; ++i) outer_size *= input->size[i]; for (int64_t i = dim + 1; i < input->nDimension; ++i) inner_size *= input->size[i]; input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); real *input_data_base = THTensor_(data)(input); real *output_data_base = THTensor_(data)(output); uint64_t dim_stride = inner_size; uint64_t outer_stride = dim_size * dim_stride; SOFTMAX_SIZE_TYPE i, d; #pragma omp parallel for private(i, d) for (i = 0; i < SOFTMAX_CAST_TYPE (outer_size * inner_size); i++) { uint64_t outer_idx = i / inner_size; uint64_t inner_idx = i % inner_size; real *input_data = input_data_base + outer_idx * outer_stride + inner_idx; real *output_data = output_data_base + outer_idx * outer_stride + inner_idx; real input_max = -THInf; for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) { if (input_data[d * dim_stride] >= input_max) input_max = input_data[d * dim_stride]; } accreal sum = 0; for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) { real z = exp(input_data[d * dim_stride] - input_max); output_data[d * dim_stride] = z; sum += z; } real invsum = 1 / sum; // NOTE: truncate sum to real once for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) { output_data[d * dim_stride] *= invsum; } } THTensor_(free)(input); }
void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long sliceIndex) { int d; if(!src) src = self; THArgCheck(src->nDimension > 1, 1, "cannot select on a vector"); THArgCheck((dimension >= 0) && (dimension < src->nDimension), 3, "out of range"); THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 4, "out of range"); THCudaTensor_set(state, self, src); THCudaTensor_narrow(state, self, NULL, dimension, sliceIndex, 1); for(d = dimension; d < self->nDimension-1; d++) { self->size[d] = self->size[d+1]; self->stride[d] = self->stride[d+1]; } self->nDimension--; }
void THGPUTensor_setStorage(THGPUTensor *self, THGPUStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_) { if (size_ && stride_) THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes"); THGPUTensor_rawSet(self, storage_, storageOffset_, (size_ ? size_->size : (stride_ ? stride_->size : 0)), (size_ ? size_->data : NULL), (stride_ ? stride_->data : NULL)); }
void THNN_(SoftMax_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *output, int64_t dim) { THNN_CHECK_SHAPE(output, gradOutput); THArgCheck(dim >= 0 && dim < output->nDimension, 6, "dim out of range (got %d, but input has %d dims)", dim, output->nDimension); uint64_t outer_size = 1; uint64_t dim_size = output->size[dim]; uint64_t inner_size = 1; for (int64_t i = 0; i < dim; ++i) outer_size *= output->size[i]; for (int64_t i = dim + 1; i < output->nDimension; ++i) inner_size *= output->size[i]; gradOutput = THTensor_(newContiguous)(gradOutput); output = THTensor_(newContiguous)(output); THTensor_(resizeAs)(gradInput, output); real *gradInput_data_base = THTensor_(data)(gradInput); real *output_data_base = THTensor_(data)(output); real *gradOutput_data_base = THTensor_(data)(gradOutput); uint64_t dim_stride = inner_size; uint64_t outer_stride = dim_size * dim_stride; SOFTMAX_SIZE_TYPE i, d; #pragma omp parallel for private(i, d) for (i = 0; i < SOFTMAX_CAST_TYPE (outer_size * inner_size); i++) { uint64_t outer_idx = i / inner_size; uint64_t inner_idx = i % inner_size; real *gradInput_data = gradInput_data_base + outer_idx * outer_stride + inner_idx; real *output_data = output_data_base + outer_idx * outer_stride + inner_idx; real *gradOutput_data = gradOutput_data_base + outer_idx * outer_stride + inner_idx; accreal sum = 0; for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) sum += ((accreal)gradOutput_data[d * dim_stride]) * ((accreal)output_data[d * dim_stride]); for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) gradInput_data[d * dim_stride] = output_data[d * dim_stride] * (gradOutput_data[d * dim_stride] - sum); } THTensor_(free)(gradOutput); THTensor_(free)(output); }
static inline void THNN_(VolumetricUpSamplingNearest_shapeCheck) (THTensor *input, THTensor *gradOutput, int scale_factor) { THArgCheck(input != NULL, 2, "5D input tensor expected but got NULL"); THArgCheck(scale_factor > 1, 4, "scale_factor must be greater than 1, but got: %d", scale_factor); THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D input tensor expected but got: %s"); if (input->nDimension == 4) { int nChannels = THTensor_(size)(input, 0); int inputDepth = THTensor_(size)(input, 1); int inputHeight = THTensor_(size)(input, 2); int inputWidth = THTensor_(size)(input, 3); int outputDepth = inputDepth * scale_factor; int outputHeight = inputHeight * scale_factor; int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); } } else { int nBatch = THTensor_(size)(input, 0); int nChannels = THTensor_(size)(input, 1); int inputDepth = THTensor_(size)(input, 2); int inputHeight = THTensor_(size)(input, 3); int inputWidth = THTensor_(size)(input, 4); int outputDepth = inputDepth * scale_factor; int outputHeight = inputHeight * scale_factor; int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth); } } }
static void THApkFile_seekEnd(THFile *self) { THApkFile *dfself = (THApkFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); if(fseek(dfself->handle, 0L, SEEK_END) < 0) { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("unable to seek at end of file"); } }
static void THDiskFile_seek(THFile *self, ssize_t position) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); #if defined(_WIN64) THArgCheck(position <= INT64_MAX, 2, "position must be smaller than INT64_MAX"); if(_fseeki64(dfself->handle, (int64_t)position, SEEK_SET) < 0) #elif defined(_WIN32) THArgCheck(position <= LONG_MAX, 2, "position must be smaller than LONG_MAX"); if(fseek(dfself->handle, (int32_t)position, SEEK_SET) < 0) #else THArgCheck(position <= LLONG_MAX, 2, "position must be smaller than LLONG_MAX"); if(fseeko(dfself->handle, (off_t)position, SEEK_SET) < 0) #endif { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("unable to seek to position %zu", position); } }
void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1, int dimension2) { long z; if(!src) src = self; THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range"); THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range"); THCudaTensor_set(state, self, src); if(dimension1 == dimension2) return; z = self->stride[dimension1]; self->stride[dimension1] = self->stride[dimension2]; self->stride[dimension2] = z; z = self->size[dimension1]; self->size[dimension1] = self->size[dimension2]; self->size[dimension2] = z; }
static size_t THApkFile_position(THFile *self) { THApkFile *dfself = (THApkFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); long offset = ftell(dfself->handle); if (offset > -1) return (size_t)offset; else if(!dfself->file.isQuiet) THError("unable to obtain disk file offset (maybe a long overflow occured)"); return 0; }
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 0, 1); if(input->nDimension == 3) { nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH); } else { long T = input->size[0]; long t; #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } } THTensor_(transpose)(weight, weight, 0, 1); return 1; }
void THCudaTensor_copyFloat(THCudaTensor *self, struct THFloatTensor *src) { THArgCheck(THCudaTensor_nElement(self) == THFloatTensor_nElement(src), 2, "sizes do not match"); { THCudaTensor *selfc = THCudaTensor_newContiguous(self); src = THFloatTensor_newContiguous(src); THCudaCheck(cudaMemcpy(selfc->storage->data + selfc->storageOffset, src->storage->data + src->storageOffset, THFloatTensor_nElement(src) * sizeof(float), cudaMemcpyHostToDevice)); THFloatTensor_free(src); THCudaTensor_freeCopyTo(selfc, self); } }
void THNN_(LogSoftMax_updateOutput)(THNNState *state, THTensor *input, THTensor *output) { real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if (input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else { THArgCheck(0, 2, "vector or matrix expected"); } input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); real *input_data0 = THTensor_(data)(input); real *output_data0 = THTensor_(data)(output); accreal logsum; real maxInput; #pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data) for (t = 0; t < nframe; t++) { logsum = 0; maxInput = -THInf; input_data = input_data0 + dim*t; output_data = output_data0 + dim*t; for (d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for (d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for (d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; } THTensor_(free)(input); }
static int nn_(LogSoftMax_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if(input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if(input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else THArgCheck(0, 2, "vector or matrix expected"); input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); for(t = 0; t < nframe; t++) { accreal logsum = 0; real maxInput = -THInf; for(d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for(d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for(d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; input_data += dim; output_data += dim; } THTensor_(free)(input); return 1; }
// directly assign without cloning or retaining (internal method) THCSTensor* THCSTensor_(_move)(THCState *state, THCSTensor *self, THCIndexTensor *indices, THCTensor *values) { int empty = THCTensor_(_nDimension)(state, values) == 0; if (!empty) { THArgCheck(THCIndexTensor_(_nDimension)(state, indices) == 2, 2, "indices must be nDim x nnz"); THArgCheck(THCIndexTensor_(size)(state, indices, 1) == THCTensor_(size)(state, values, 0), 2, "indices and values must have same nnz"); THArgCheck(THCIndexTensor_(size)(state, indices, 0) == self->nDimensionI, 2, "indices has incorrect first dimension, expected %d, got %d", self->nDimensionI, THCIndexTensor_(size)(state, indices, 0)); THArgCheck(THCTensor_(_nDimension)(state, values) == self->nDimensionV + 1, 3, "values has incorrect number of dimensions, expected %d, got %d", self->nDimensionV + 1, THCTensor_(_nDimension)(state, values)); } else { THArgCheck(THCIndexTensor_(_nDimension)(state, indices) == 0, 2, "if values is empty, indices must be empty too"); } THCIndexTensor_(free)(state, self->indices); THCTensor_(free)(state, self->values); self->indices = indices; self->values = values; self->nnz = empty ? 0 : THCTensor_(size)(state, values, 0); self->coalesced = 0; return self; }
void THNN_(SpatialClassNLLCriterion_updateGradInput)( THNNState *state, THTensor *input, THIndexTensor *target, THTensor *gradInput, bool sizeAverage, THTensor *weights, THTensor *total_weight) { INITIAL_CHECK; THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous"); real *total_weight_data = THTensor_(data)(total_weight); if (*total_weight_data <= 0) return; target = THIndexTensor_(newContiguous)(target); weights = weights ? THTensor_(newContiguous)(weights) : NULL; THIndex_t *target_data = THIndexTensor_(data)(target); real *weights_data = weights ? THTensor_(data)(weights) : NULL; real *gradInput_data = THTensor_(data)(gradInput); long batch_size = THTensor_(size)(input, 0); long n_classes = THTensor_(size)(input, 1); long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3); long sample_size = map_size * n_classes; real normalize = sizeAverage ? *total_weight_data : 1.0f; int b; #pragma omp parallel for for (b = 0; b < batch_size; b++) { int elem; for (elem = 0; elem < map_size; elem++) { int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE; THAssert(cur_target >= 0 && cur_target < n_classes); gradInput_data[b * sample_size + cur_target * map_size + elem] = -(weights ? weights_data[cur_target] : 1.0f) / normalize; } } THIndexTensor_(free)(target); if (weights) THTensor_(free)(weights); }
/* Storage init */ THGPUTensor *THGPUTensor_newWithStorage(THGPUStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride) { THGPUTensor *self = (THGPUTensor*)THAlloc(sizeof(THGPUTensor)); if (size && stride) THArgCheck(size->size == stride->size, 4, "inconsistent size"); THGPUTensor_rawInit(self); THGPUTensor_rawSet(self, storage, storageOffset, (size ? size->size : (stride ? stride->size : 0)), (size ? size->data : NULL), (stride ? stride->data : NULL)); return self; }
static int THNN_(view_weight_local)(THTensor **_weight) { THTensor *weight = *_weight; THArgCheck(weight->nDimension == 3 || weight->nDimension == 6, 4, "weight tensor should be 3D or 6D - got %dD", weight->nDimension); if (weight->nDimension == 6) { long s1 = weight->size[0] * weight->size[1]; long s2 = weight->size[2]; long s3 = weight->size[3] * weight->size[4] * weight->size[5]; *_weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset, s1, -1, s2, -1, s3, -1); return 1; } return 0; }
static int torch_TensorOperator_(__div__)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *r; THArgCheck(lua_isnumber(L,2), 2, "number expected"); r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); THTensor_(resizeAs)(r, tensor); THTensor_(copy)(r, tensor); THTensor_(mul)(r, r, 1/lua_tonumber(L, 2)); return 1; }
static int torch_Tensor_(size)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, "dimension %d out of range of %dD tensor", dim+1, THTensor_(nDimension)(tensor)); lua_pushnumber(L, tensor->size[dim]); } else { THLongStorage *size = THTensor_(newSizeOf)(tensor); luaT_pushudata(L, size, "torch.LongStorage"); } return 1; }
void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src) { THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); { THCTensor *selfc = THCTensor_(newContiguous)(state, self); src = THTensor_(newContiguous)(src); THCudaCheck(cudaMemcpy(THCTensor_(data)(state,selfc), THTensor_(data)(src), THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice)); THTensor_(free)(src); THCTensor_(freeCopyTo)(state, selfc, self); } }
hostreal THCStorage_(get)(THCState *state, const THCStorage *self, long index) { THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds"); #ifndef THC_REAL_IS_HALF real value; THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(real), cudaMemcpyDeviceToHost)); return realToHostreal(value); #else float *ret_d; float ret; THCudaCheck(THCudaMalloc(state, (void**)&ret_d, sizeof(float))); THCHalf2Float(state, ret_d, self->data + index, 1); THCudaCheck(cudaMemcpy(&ret, ret_d, sizeof(float), cudaMemcpyDeviceToHost)); THCudaFree(state, ret_d); return ret; #endif }
static int torch_Tensor_(stride)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, "dimension %d out of range of %dD tensor", dim+1, THTensor_(nDimension)(tensor)); lua_pushnumber(L, tensor->stride[dim]); } else { THLongStorage *storage = THLongStorage_newWithSize(tensor->nDimension); memmove(storage->data, tensor->stride, sizeof(long)*tensor->nDimension); luaT_pushudata(L, storage, "torch.LongStorage"); } return 1; }
THCudaStorage* THCudaStorage_newWithSize(long size) { THArgCheck(size >= 0, 2, "invalid size"); if(size > 0) { THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); THCudaCheck(cudaMalloc((void**)&(storage->data), size * sizeof(float))); storage->size = size; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; return storage; } else { return THCudaStorage_new(); } }
static ssize_t THDiskFile_position(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); #if defined(_WIN64) int64_t offset = _ftelli64(dfself->handle); #elif defined(_WIN32) int32_t offset = ftell(dfself->handle); #else off_t offset = ftello(dfself->handle); #endif if (offset > -1) return (ssize_t)offset; else if(!dfself->file.isQuiet) THError("unable to obtain disk file offset (maybe a long overflow occurred)"); return 0; }
static void THDiskFile_seekEnd(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); #if defined(_WIN64) if(_fseeki64(dfself->handle, 0, SEEK_END) < 0) #elif defined(_WIN32) if(fseek(dfself->handle, 0, SEEK_END) < 0) #else if(fseeko(dfself->handle, 0, SEEK_END) < 0) #endif { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("unable to seek at end of file"); } }
void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) { int n, nrhs, lda, ldb, info; THIntTensor *ipiv; THTensor *ra__; THTensor *rb__; int clonea; int cloneb; int destroya; int destroyb; if (a == NULL || ra_ == a) /* possibly destroy the inputs */ { THArgCheck(ra_->nDimension == 2, 1, "A should be 2 dimensional"); ra__ = THTensor_(new)(); clonea = THTensor_(lapackClone)(ra__,ra_,0); destroya = 1; }
void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src) { THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); { THCTensor *selfc = THCTensor_(newContiguous)(state, self); src = THTensor_(newContiguous)(src); cudaStream_t stream = THCState_getCurrentStream(state); THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state,selfc), THTensor_(data)(src), THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice, stream)); THCudaCheck(cudaStreamSynchronize(stream)); THTensor_(free)(src); THCTensor_(freeCopyTo)(state, selfc, self); } }
void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension) { int d; if(!src) src = self; THArgCheck(dimension < src->nDimension, 3, "dimension out of range"); THCudaTensor_set(state, self, src); if(src->size[dimension] == 1 && src->nDimension > 1) { for(d = dimension; d < self->nDimension-1; d++) { self->size[d] = self->size[d+1]; self->stride[d] = self->stride[d+1]; } self->nDimension--; } }
static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck) (THTensor *input, THTensor *gradOutput, int nBatch, int nChannels, int inputHeight, int inputWidth, int outputHeight, int outputWidth) { THArgCheck(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0, 2, "input and output sizes should be greater than 0," " but got input (H: %d, W: %d) output (H: %d, W: %d)", inputHeight, inputWidth, outputHeight, outputWidth); if (input != NULL) { THNN_ARGCHECK(!input->is_empty() && input->dim() == 4, 2, input, "non-empty 4D input tensor expected but got: %s"); } if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); } }
static inline void THNN_(VolumetricUpSamplingTrilinear_shapeCheck) (THTensor *input, THTensor *gradOutput, int nBatch, int nChannels, int inputDepth, int inputHeight, int inputWidth, int outputDepth, int outputHeight, int outputWidth) { THArgCheck(inputDepth > 0 && inputHeight > 0 && inputWidth > 0 && outputDepth > 0 && outputHeight > 0 && outputWidth > 0, 2, "input and output sizes should be greater than 0," " but got input (D: %d, H: %d, W: %d) output (D: %d, H: %d, W: %d)", inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth); if (input != NULL) { THNN_ARGCHECK(input->nDimension == 5, 2, input, "5D input tensor expected but got: %s"); } if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth); } }