static int nn_(SpatialConvolutionLocal_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); long inputWidth = luaT_getfieldcheckint(L, 1, "iW"); long inputHeight = luaT_getfieldcheckint(L, 1, "iH"); long outputWidth = luaT_getfieldcheckint(L, 1, "oW"); long outputHeight = luaT_getfieldcheckint(L, 1, "oH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); if(input->nDimension == 3) { THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth); THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); nn_(SpatialConvolutionLocal_updateOutput_frame)(input, output, weight, bias, finput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth); THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth); #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *input_t = THTensor_(newSelect)(input, 0, t); THTensor *output_t = THTensor_(newSelect)(output, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionLocal_updateOutput_frame)(input_t, output_t, weight, bias, finput_t, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(input_t); THTensor_(free)(output_t); THTensor_(free)(finput_t); } } return 1; }
static int nn_(SpatialConvolutionLocal_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); long inputWidth = luaT_getfieldcheckint(L, 1, "iW"); long inputHeight = luaT_getfieldcheckint(L, 1, "iH"); long outputWidth = luaT_getfieldcheckint(L, 1, "oW"); long outputHeight = luaT_getfieldcheckint(L, 1, "oH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 1, 2); if(input->nDimension == 3) { nn_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); nn_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } } THTensor_(transpose)(weight, weight, 1, 2); return 1; }
static int nn_(SparseLinear_updateOutput)(lua_State *L) { long i; THTensor * input = luaT_checkudata(L, 2, torch_Tensor); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long outDim = weight->size[0]; long inDim = weight->size[1]; luaL_argcheck(L, nn_(checkInput)(input), 2, "input size must be nnz x 2"); luaL_argcheck(L, nn_(checkSize1D)(output, outDim), 1, "output size wrong"); luaL_argcheck(L, nn_(checkSize1D)(bias, outDim), 1, "bias size wrong"); lua_getfield(L, 1, "shardBuffer"); if (!lua_isnil(L, -1)) { THTensor *buffer = luaT_getfieldcheckudata(L, 1, "shardBuffer", torch_Tensor); long num_shards = buffer->size[1]; luaL_argcheck(L, buffer->nDimension == 2 && buffer->size[0] == outDim && num_shards > 0, 1, "shardBuffer size wrong"); THTensor_(zero)(buffer); #pragma omp parallel for private(i) schedule(static) num_threads(num_shards) for (i = 0; i < input->size[0]; i++) { #ifdef _OPENMP int shardId = omp_get_thread_num(); #else int shardId = 1; #endif long offset = (long)(THTensor_(get2d)(input, i, 0)) - 1; if (offset >= 0 && offset < inDim) { THBlas_(axpy)(outDim, THTensor_(get2d)(input, i, 1), THTensor_(data)(weight) + offset * weight->stride[1], weight->stride[0], THTensor_(data)(buffer) + shardId * buffer->stride[1], buffer->stride[0]); } else { luaL_error(L, "index out of bound. updateOutput: \ %ld not between 1 and %ld", offset + 1, inDim); } } THTensor_(sum)(output, buffer, 1); THTensor_(cadd)(output, bias, 1.0, output); lua_getfield(L, 1, "output"); return 1; }
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 0, 1); if(input->nDimension == 3) { nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH); } else { long T = input->size[0]; long t; THStorage_(clearFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED); #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } THStorage_(setFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED); } THTensor_(transpose)(weight, weight, 0, 1); return 1; }
static int nn_(SpatialConvolutionLocal_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); long inputWidth = luaT_getfieldcheckint(L, 1, "iW"); long inputHeight = luaT_getfieldcheckint(L, 1, "iH"); long outputWidth = luaT_getfieldcheckint(L, 1, "oW"); long outputHeight = luaT_getfieldcheckint(L, 1, "oH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); if(input->nDimension == 3) { nn_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; for(t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } return 0; }
static void nn_(SpatialConvolutionLocal_updateOutput_frame)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput, int kW, int kH, int dW, int dH, int padW, int padH, long nInputPlane, long inputWidth, long inputHeight, long nOutputPlane, long outputWidth, long outputHeight) { long i; THTensor *output3d, *finput3d; nn_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); THTensor_(copy)(output, bias); output3d = THTensor_(newWithStorage3d)(output->storage, output->storageOffset, outputHeight*outputWidth, 1, nOutputPlane, outputHeight*outputWidth, 1, nOutputPlane*outputHeight*outputWidth); finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset, outputHeight*outputWidth, 1, kW*kH*nInputPlane, outputHeight*outputWidth, 1, kW*kH*nInputPlane*outputHeight*outputWidth); // weight: oH*oW x nOutputPlane x nInputPlane*kH*kW // finput3d: oH*oW x nInputPlane*kH*kW x 1 THTensor_(baddbmm)(output3d, 1.0, output3d, 1.0, weight, finput3d); // output3d: oH*oW x nOutputPlane x 1 THTensor_(free)(output3d); THTensor_(free)(finput3d); }
static void nn_(SpatialConvolutionLocal_updateGradInput_frame)(THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH, long nInputPlane, long inputWidth, long inputHeight, long nOutputPlane, long outputWidth, long outputHeight) { THTensor *gradOutput3d, *fgradInput3d; gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset, outputHeight*outputWidth, 1, nOutputPlane, outputHeight*outputWidth, 1, nOutputPlane*outputHeight*outputWidth); fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset, outputHeight*outputWidth, 1, kW*kH*nInputPlane, outputHeight*outputWidth, 1, kW*kH*nInputPlane*outputHeight*outputWidth); // weight: oH*oW x nInputPlane*kH*kW x nOutputPlane // gradOutput3d: oH*oW x nOutputPlane x 1 THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d); // fgradInput3d: oH*oW x nInputPlane*kH*kW x 1 THTensor_(free)(gradOutput3d); THTensor_(free)(fgradInput3d); THTensor_(zero)(gradInput); nn_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); }
static void nn_(SpatialConvolutionMM_updateGradInput_frame)(THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH) { THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset, gradOutput->size[0], -1, gradOutput->size[1]*gradOutput->size[2], -1); THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); THTensor_(free)(gradOutput2d); THTensor_(zero)(gradInput); nn_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, gradInput->size[0], gradInput->size[2], gradInput->size[1], gradOutput->size[2], gradOutput->size[1]); }
static int nn_(SpatialConvolutionMM_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); if(input->nDimension == 3) { nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else { long T = input->size[0]; long t; for(t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } return 0; }
static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput, int kW, int kH, int dW, int dH, int padW, int padH, long nInputPlane, long inputWidth, long inputHeight, long nOutputPlane, long outputWidth, long outputHeight) { long i; THTensor *output2d; nn_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, nOutputPlane, -1, outputHeight*outputWidth, -1); for(i = 0; i < nOutputPlane; i++) THVector_(fill)(output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth); THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); THTensor_(free)(output2d); }
static void nn_(VolumetricAveragePooling_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn"); lua_pop(L,1); }
static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int nslices; int itime; int iheight; int iwidth; int otime; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = gradOutput->size[dimt]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); /* backprop */ if (input->nDimension == 4) { /* non-batch mode*/ nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data, gradOutput_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; #pragma omp parallel for private(p) for (p = 0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; }
#pragma omp parallel for private(p) for (p = 0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; } static const struct luaL_Reg nn_(VolumetricAveragePooling__) [] = { {"VolumetricAveragePooling_updateOutput", nn_(VolumetricAveragePooling_updateOutput)}, {"VolumetricAveragePooling_updateGradInput", nn_(VolumetricAveragePooling_updateGradInput)}, {NULL, NULL} }; static void nn_(VolumetricAveragePooling_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn"); lua_pop(L,1); } #endif
static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) { // Input THTensor *input = (THTensor*)luaT_checkudata(L, 2, torch_Tensor); // Params: int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); THTensor *weight = (THTensor*)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = (THTensor*)luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *output = (THTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { luaL_argcheck(L, input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match"); // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); } else { luaL_argcheck(L, input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match"); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Resize output THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); // Resize temporary columns THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Define a buffer of ones, for bias accumulation // Note: this buffer can be shared with other modules, it only ever gets increased, // and always contains ones. if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { // Resize plane and fill with ones... THTensor_(resize2d)(ones, outputHeight, outputWidth); THTensor_(fill)(ones, 1); } // Helpers THTensor *input_n = THTensor_(new)(); THTensor *output_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per output: THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(output_n, output, 0, elt); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m = weight->size[1] * weight->size[2] * weight->size[3]; long n = columns->size[1]; long k = weight->size[0]; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 'n', 't', n, m, k, 1, THTensor_(data)(input_n), n, THTensor_(data)(weight), m, 0, THTensor_(data)(columns), n ); // Unpack columns back into input: nn_(col2im)( THTensor_(data)(columns), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(output_n) ); // Do Bias after: // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m_ = nOutputPlane; long n_ = outputHeight * outputWidth; long k_ = 1; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 't', 'n', n_, m_, k_, 1, THTensor_(data)(ones), k_, THTensor_(data)(bias), k_, 1, THTensor_(data)(output_n), n_ ); } // Free THTensor_(free)(input_n); THTensor_(free)(output_n); // Resize output if (batch == 0) { THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); } // return output return 1; }
static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) { // Inputs THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor); // Params int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); float scale = luaL_optnumber(L, 4, 1); THTensor *gradWeight = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Define a buffer of ones, for bias accumulation if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { // Resize plane and fill with ones... THTensor_(resize2d)(ones, outputHeight, outputWidth); THTensor_(fill)(ones, 1); } // Resize temporary columns THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Helpers THTensor *input_n = THTensor_(new)(); THTensor *gradOutput_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per output: THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(gradOutput_n, gradOutput, 0, elt); // Extract columns: nn_(im2col)( THTensor_(data)(gradOutput_n), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(columns) ); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long n = columns->size[0]; // nOutputPlane * kh * kw long m = input_n->size[0]; // nInputPlane long k = columns->size[1]; // inputHeight * inputWidth // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 't', 'n', n, m, k, scale, THTensor_(data)(columns), k, THTensor_(data)(input_n), k, 1, THTensor_(data)(gradWeight), n ); // Do Bias: // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m_ = nOutputPlane; long k_ = outputHeight * outputWidth; // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) THBlas_(gemv)( 't', k_, m_, scale, THTensor_(data)(gradOutput_n), k_, THTensor_(data)(ones), 1, 1, THTensor_(data)(gradBias), 1 ); } // Free THTensor_(free)(input_n); THTensor_(free)(gradOutput_n); // Resize if (batch == 0) { THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); } // Return nothing return 0; }
static int nn_(SpatialFullConvolution_updateGradInput)(lua_State *L) { // Inputs THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor); // Params int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); THTensor *weight = (THTensor *)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradColumns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradInput = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Resize output THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth); // Resize temporary columns THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Helpers THTensor *gradInput_n = THTensor_(new)(); THTensor *gradOutput_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per sample: THTensor_(select)(gradInput_n, gradInput, 0, elt); THTensor_(select)(gradOutput_n, gradOutput, 0, elt); // Extract columns: nn_(im2col)( THTensor_(data)(gradOutput_n), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(gradColumns) ); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m = weight->size[0]; long n = gradColumns->size[1]; long k = weight->size[1] * weight->size[2] * weight->size[3]; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 'n', 'n', n, m, k, 1, THTensor_(data)(gradColumns), n, THTensor_(data)(weight), k, 0, THTensor_(data)(gradInput_n), n ); } // Free THTensor_(free)(gradInput_n); THTensor_(free)(gradOutput_n); // Resize output if (batch == 0) { THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); } // Return gradInput return 1; }
static void nn_(SpatialUpSampling_init)(lua_State *L) { luaT_pushmetaclass(L, torch_(Tensor_id)); luaT_registeratname(L, nn_(SpatialUpSampling__), "nn"); lua_pop(L,1); }
for(p = 0; p < input->size[0]; p++) { /* BIAS */ real *ptr_gradOutput = gradOutput_data + p*nOutputPlane*noutSlice + k*noutSlice; long l; for(l = 0; l < noutSlice; l++) gradBias_data[k] += scale*ptr_gradOutput[l]; } } /* gradient to kernels */ THTensor_(conv2DRevgerm)(gradWeight, 1.0, scale, input, gradOutput, dH, dW); } return 0; } static const struct luaL_Reg nn_(SpatialConvolution__) [] = { {"SpatialConvolution_updateOutput", nn_(SpatialConvolution_updateOutput)}, {"SpatialConvolution_updateGradInput", nn_(SpatialConvolution_updateGradInput)}, {"SpatialConvolution_accGradParameters", nn_(SpatialConvolution_accGradParameters)}, {NULL, NULL} }; static void nn_(SpatialConvolution_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(SpatialConvolution__), "nn"); lua_pop(L,1); } #endif
static void nn_(SpatialAdaptiveMaxPooling_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(SpatialAdaptiveMaxPooling__), "nn"); lua_pop(L,1); }
{ nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; } static const struct luaL_Reg nn_(SpatialAdaptiveMaxPooling__) [] = { {"SpatialAdaptiveMaxPooling_updateOutput", nn_(SpatialAdaptiveMaxPooling_updateOutput)}, {"SpatialAdaptiveMaxPooling_updateGradInput", nn_(SpatialAdaptiveMaxPooling_updateGradInput)}, {NULL, NULL} }; static void nn_(SpatialAdaptiveMaxPooling_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(SpatialAdaptiveMaxPooling__), "nn"); lua_pop(L,1); } #endif
static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(input); return 1; }
static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int dimf = 0; int dimw = 2; int dimh = 1; long nInputPlane; long inputWidth; long inputHeight; long nOutputPlane; long outputWidth; long outputHeight; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); if (input->nDimension == 4) { dimf++; dimw++; dimh++; } nInputPlane = input->size[dimf]; inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; nOutputPlane = weight->size[0]; outputWidth = (inputWidth + 2*padW - kW) / dW + 1; outputHeight = (inputHeight + 2*padH - kH) / dH + 1; if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); if(input->nDimension == 3) { THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth); THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); nn_(SpatialConvolutionMM_updateOutput_frame)(input, output, weight, bias, finput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth); THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth); #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *input_t = THTensor_(newSelect)(input, 0, t); THTensor *output_t = THTensor_(newSelect)(output, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionMM_updateOutput_frame)(input_t, output_t, weight, bias, finput_t, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(input_t); THTensor_(free)(output_t); THTensor_(free)(finput_t); } } return 1; }
partial_d = term3 * THTensor_(get3d)(input, k, y+1, x) - term1 * THTensor_(get3d)(input, k, y, x); } else { partial_d = -THTensor_(get3d)(input, k, y, x); } partial_d *= THTensor_(get3d)(gradOutput, 1, y, x); THTensor_(set3d)(gradInput, k, y+1, x, partial_d + THTensor_(get3d)(gradInput, k, y+1, x)); } } } } } return 1; } static const struct luaL_Reg nn_(SpatialGraph__) [] = { {"SpatialGraph_updateOutput", nn_(SpatialGraph_updateOutput)}, {"SpatialGraph_updateGradInput", nn_(SpatialGraph_updateGradInput)}, {NULL, NULL} }; static void nn_(SpatialGraph_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(SpatialGraph__), "nn"); lua_pop(L,1); } #endif
{ accreal sum = 0; for(d = 0; d < dim; d++) sum += gradOutput_data[d]; for(d = 0; d < dim; d++) gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum; gradInput_data += dim; output_data += dim; gradOutput_data += dim; } return 1; } static const struct luaL_Reg nn_(LogSoftMax__) [] = { {"LogSoftMax_updateOutput", nn_(LogSoftMax_updateOutput)}, {"LogSoftMax_updateGradInput", nn_(LogSoftMax_updateGradInput)}, {NULL, NULL} }; void nn_(LogSoftMax_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(LogSoftMax__), "nn"); lua_pop(L,1); } #endif
static int nn_(SpatialAdaptiveMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; int nslices; int iheight; int iwidth; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; real *indices_data; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THTensor_(data)(indices); /* backprop */ if (input->nDimension == 3) { nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, indices_data+nslices*owidth*oheight, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; }
void nn_(LogSoftMax_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(LogSoftMax__), "nn"); lua_pop(L,1); }
static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); long oheight = luaT_getfieldcheckint(L, 1, "H"); long owidth = luaT_getfieldcheckint(L, 1, "W"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; long nslices; long iheight; long iwidth; long istride_d; long istride_h; long istride_w; long istride_b; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); if (input->nDimension == 4) { istride_b = input->stride[0]; nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; /* strides */ istride_d = input->stride[dimh-1]; istride_h = input->stride[dimh]; istride_w = input->stride[dimw]; /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); /* indices will contain i,j locations for each output point */ THTensor_(resize4d)(indices, 2, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*owidth*oheight, indices_data, nslices, iwidth, iheight, owidth, oheight, istride_w,istride_h, istride_d); } else { long p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); /* indices will contain i,j locations for each output point */ THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight, indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight, istride_w,istride_h, istride_d); } } return 1; }
int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); if (sizeAverage) grad /= nframe; int f; for (f = 0; f < nframe; ++f) { grad_data[f*ndim+(int)target_data[f]-1] = grad; } } else THArgCheck(0, 2, "vector or matrix expected"); THTensor_(free)(target); THTensor_(free)(input); THTensor_(free)(gradInput); return 1; } static const struct luaL_Reg nn_(ClassNLLCriterion__) [] = { {"ClassNLLCriterion_updateOutput", nn_(ClassNLLCriterion_updateOutput)}, {"ClassNLLCriterion_updateGradInput", nn_(ClassNLLCriterion_updateGradInput)}, {NULL, NULL} }; static void nn_(ClassNLLCriterion_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(ClassNLLCriterion__), "nn"); lua_pop(L,1); } #endif
static void nn_(SpatialConvolution_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(SpatialConvolution__), "nn"); lua_pop(L,1); }
static void nn_(ClassNLLCriterion_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(ClassNLLCriterion__), "nn"); lua_pop(L,1); }