static int nn_(SpatialConvolution_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor *tweight; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to input */ tweight = THTensor_(newTranspose)(weight,0,1); if (input->nDimension == 3) { THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C"); } else { THTensor_(conv2Dmm)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C"); } THTensor_(free)(tweight); return 1; }
static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); // get contiguous gradOutput gradOutput = THTensor_(newContiguous)(gradOutput); // resize THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); // sizes int ichannels = input->size[0]; int iheight = input->size[1]; int iwidth = input->size[2]; int ochannels = ichannels; int oheight = gradOutput->size[1]; int owidth = gradOutput->size[2]; // get raw pointers real *gradInput_data = THTensor_(data)(gradInput); real *gradOutput_data = THTensor_(data)(gradOutput); real *indices_data = THTensor_(data)(indices); // backprop long k; for (k = 0; k < input->size[0]; k++) { // pointers to slices real *gradOutput_p = gradOutput_data + k*owidth*oheight; real *gradInput_p = gradInput_data + k*iwidth*iheight; real *indy_p = indices_data + k*owidth*oheight; real *indx_p = indices_data + (k+ochannels)*owidth*oheight; // calculate max points int i,j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { // retrieve position of max long maxi = *(indy_p + i*owidth + j) - 1 + i*dH; long maxj = *(indx_p + i*owidth + j) - 1 + j*dW; // update gradient *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j); } } } // cleanup THTensor_(free)(gradOutput); return 1; }
static int nnOmp_(SpatialMaxPooling_updateGradInputOmp)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); setompnthread(L,1,"nThread"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); THTensor *gradOutputPlane, *gradInputPlane, *unfoldedGradInputPlane, *gradLocalInput; int k,i,j; THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); gradInputPlane = THTensor_(new)(); gradOutputPlane = THTensor_(new)(); gradLocalInput = THTensor_(new)(); unfoldedGradInputPlane = THTensor_(new)(); for (k = 0; k < input->size[0]; k++) { /* get input and output plane */ THTensor_(select)(gradOutputPlane, gradOutput, 0, k); THTensor_(select)(gradInputPlane, gradInput, 0, k); /* Unfold input to get each local window */ THTensor_(unfold)(unfoldedGradInputPlane, gradInputPlane, 0, kH, dH); THTensor_(unfold)(unfoldedGradInputPlane, NULL, 1, kW, dW); /* Calculate max points */ for(i = 0; i < gradOutputPlane->size[0]; i++) { for(j = 0; j < gradOutputPlane->size[1]; j++) { THTensor_(select)(gradLocalInput, unfoldedGradInputPlane,0,i); THTensor_(select)(gradLocalInput, NULL, 0,j); long maxi = THTensor_(get4d)(indices,0,k,i,j)-1; long maxj = THTensor_(get4d)(indices,1,k,i,j)-1; double gi = THTensor_(get2d)(gradLocalInput,maxi,maxj)+THTensor_(get2d)(gradOutputPlane,i,j); THTensor_(set2d)(gradLocalInput,maxi,maxj,gi); } } } /* Cleanup */ THTensor_(free)(gradInputPlane); THTensor_(free)(gradOutputPlane); THTensor_(free)(unfoldedGradInputPlane); THTensor_(free)(gradLocalInput); return 1; }
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 0, 1); if(input->nDimension == 3) { nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH); } else { long T = input->size[0]; long t; THStorage_(clearFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED); #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } THStorage_(setFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED); } THTensor_(transpose)(weight, weight, 0, 1); return 1; }
static int nn_TemporalSubSampling_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int kW = luaT_getfieldcheckint(L, 1, "kW"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); THTensor *unfoldedInput, *unfoldedInputFrame, *unfoldedInputFrames; THTensor *outputFrame; int nInputFrame, nOutputFrame; int i, k; luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); luaL_argcheck(L, input->size[0] == inputFrameSize, 2, "invalid input frame size"); luaL_argcheck(L, input->size[1] >= kW, 2, "input sequence smaller than kernel size"); nInputFrame = input->size[1]; nOutputFrame = (nInputFrame - kW) / dW + 1; THTensor_resize2d(output, inputFrameSize, nOutputFrame); outputFrame = THTensor_new(); unfoldedInput = THTensor_new(); unfoldedInputFrame = THTensor_new(); unfoldedInputFrames = THTensor_new(); THTensor_unfold(unfoldedInput, input, 1, kW, dW); for(k = 0; k < nOutputFrame; k++) { THTensor_select(unfoldedInputFrames, unfoldedInput, 1, k); THTensor_select(outputFrame, output, 1, k); THTensor_zero(outputFrame); for(i = 0; i < kW; i++) { THTensor_select(unfoldedInputFrame, unfoldedInputFrames, 1, i); THTensor_addTensor(outputFrame, 1, unfoldedInputFrame); } THTensor_cmul(outputFrame, weight); THTensor_addTensor(outputFrame, 1, bias); } THTensor_free(outputFrame); THTensor_free(unfoldedInput); THTensor_free(unfoldedInputFrame); THTensor_free(unfoldedInputFrames); return 1; }
static int gpunn_SpatialConvolutionGPU_accGradParameters(lua_State *L) { THGPUTensor *input = (THGPUTensor *)luaT_checkudata(L, 2, "torch.GPUTensor"); THGPUTensor *gradOutput = (THGPUTensor *)luaT_checkudata(L, 3, "torch.GPUTensor"); THGPUTensor *gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", "torch.GPUTensor"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padding = luaT_getfieldcheckint(L, 1, "padding"); int partialSum = luaT_getfieldcheckint(L, 1, "partialSum"); float scale = luaL_optnumber(L, 4, 1); long nOutputPlane = gradWeight->size[3]; long nInputPlane = gradWeight->size[0]; long kH = gradWeight->size[1]; long kW = gradWeight->size[2]; long inputHeight = input->size[1]; long inputWidth = input->size[2]; long batchSize = input->size[3]; long outputHeight = (padding + inputHeight - kH) / dH + 1; long outputWidth = (padding + inputWidth - kW) / dW + 1; // asserts luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square"); luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW"); luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW"); if (partialSum) { // compute partial gradients for outputHeight*outputWidth/partialSum groups of filters separately gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeightPartial", "torch.GPUTensor"); THGPUTensor_resize4d(gradWeight, outputHeight * outputWidth / partialSum, nInputPlane, kH * kW, nOutputPlane); // numModuleY*numModulesX/partialSum, numFilterColors, filterPixels, numFilters } // all the data must be contiguous: luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(gradWeight), 1, "weight must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(gradOutput), 1, "output must be contiguous"); auto avInput = input->get_array_view(); auto avGradOutput = gradOutput->get_array_view(); auto avGradWeight = gradWeight->get_array_view(); // convolutions spatialConv_accGradParameters(avInput, avGradOutput, avGradWeight, nInputPlane, inputHeight, inputWidth, batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW, -floor((double)padding/2), dW, 0, scale, partialSum); return 0; }
static int nn_(Min_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THLongStorage *dim; long i; luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); dim = THLongStorage_newWithSize(input->nDimension); for(i = 0; i < input->nDimension; i++) dim->data[i] = input->size[i]; dim->data[dimension] = 1; THTensor_(resize)(output, dim, NULL); THTensor_(resize)(indices, dim, NULL); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension, long theIndex = 0; real theMin = input_data[0]; for(i = 1; i < input_size; i++) { if(input_data[i*input_stride] < theMin) { theIndex = i; theMin = input_data[i*input_stride]; } } *indices_data = theIndex+1; *output_data = theMin;) if(output->nDimension > 1)
static int nn_(SpatialUpSampling_backward)(lua_State *L) { // get all params THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); // dims int iwidth = input->size[2]; int iheight = input->size[1]; int ichannels = input->size[0]; int owidth = gradOutput->size[2]; int oheight = gradOutput->size[1]; int ochannels = gradOutput->size[0]; // resize gradInput THTensor_(zero)(gradInput); // get raw pointers real *gradInput_data = THTensor_(data)(gradInput); real *gradOutput_data = THTensor_(data)(gradOutput); // compute gradients for each plane int k; for (k=0; k<ochannels; k++) { // get planes real *gradInput_p = gradInput_data + k*iwidth*iheight; real *gradOutput_p = gradOutput_data + k*owidth*oheight; // for each plane, resample int x,y; for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { // input positions (floored) int ix = x/dW; int iy = y/dH; // accumulate gradient gradInput_p[iy*iwidth + ix] += gradOutput_p[y*owidth + x]; } } } return 1; }
static int gpunn_SpatialConvolutionGPU_updateOutput(lua_State *L) { THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padding = luaT_getfieldcheckint(L, 1, "padding"); THGPUTensor *weight = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "weight", "torch.GPUTensor"); THGPUTensor *output = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "output", "torch.GPUTensor"); luaL_argcheck(L, input->nDimension == 4, 2, "4D (batch mode) tensor is expected"); long nOutputPlane = weight->size[3]; long nInputPlane = weight->size[0]; long kH = weight->size[1]; long kW = weight->size[2]; long inputHeight = input->size[1]; long inputWidth = input->size[2]; long batchSize = input->size[3]; long outputHeight = (padding + inputHeight - kH) / dH + 1; long outputWidth = (padding + inputWidth - kW) / dW + 1; // resize output THGPUTensor_resize4d(output, nOutputPlane, outputHeight, outputWidth, batchSize); // asserts luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square"); luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW"); luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW"); // all the data must be contiguous: luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(weight), 1, "weight must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(output), 1, "output must be contiguous"); auto avInput = input->get_array_view(); auto avOutput = output->get_array_view(); auto avWeight = weight->get_array_view(); // convolutions spatialConv_updateOutput(avInput, avWeight, avOutput, nInputPlane, inputHeight, inputWidth, batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW, -floor((double)padding/2), dW, 0, 1, true); return 1; }
static int nn_(SpatialUpSampling_forward)(lua_State *L) { // get all params THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); // dims int iwidth = input->size[2]; int iheight = input->size[1]; int ochannels = input->size[0]; int owidth = iwidth * dW; int oheight = iheight * dH; // get raw pointers real *input_data = THTensor_(data)(input); real *output_data = THTensor_(data)(output); // resample each plane int k; for (k=0; k<ochannels; k++) { // get planes real *input_p = input_data + k*iwidth*iheight; real *output_p = output_data + k*owidth*oheight; // for each plane, resample int x,y; for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { // input positions (floored) int ix = x/dW; int iy = y/dH; // set output output_p[y*owidth + x] = input_p[iy*iwidth + ix]; } } } return 1; }
static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); int dW = luaT_getfieldcheckint(L, 1, "dW"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); // get contiguous gradOutput gradOutput = THTensor_(newContiguous)(gradOutput); // resize and zero THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); // sizes int noframe = gradOutput->size[0]; long framesize = gradOutput->size[1]; // get raw pointers real *gradInput_data = THTensor_(data)(gradInput); real *gradOutput_data = THTensor_(data)(gradOutput); real *indices_data = THTensor_(data)(indices); long t, y; for(t = 0; t < noframe; t++) { real *gip = gradInput_data + t*framesize*dW; real *gop = gradOutput_data + t*framesize; real *xp = indices_data + t*framesize; #pragma omp parallel for private(y) for(y = 0; y < framesize; y++) { // compute local max: long maxindex = (long)xp[y]; gip[maxindex*framesize+y] += gop[y]; } } // cleanup THTensor_(free)(gradOutput); return 1; }
static int nn_(SpatialConvolutionMM_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); if(input->nDimension == 3) { nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else { long T = input->size[0]; long t; for(t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } return 0; }
static int nn_(SpatialGraph_updateOutput)(lua_State *L) { // get all params THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int connex = luaT_getfieldcheckint(L, 1, "connex"); int dist = luaT_getfieldcheckint(L, 1, "dist"); int norm = luaT_getfieldcheckint(L, 1, "normalize"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); // dims int iwidth = input->size[2]; int iheight = input->size[1]; int ichannels = input->size[0]; int owidth = iwidth; int oheight = iheight; int ochannels = connex / 2; // norm ? double normer = (norm == 1) ? 1/sqrt(ichannels) : 1; // zero output THTensor_(zero)(output); // Euclidean distance if (dist == 0) { // Sum[ (Xi - Xi+1)^2 ] int x,y,k; for (k=0; k<ichannels; k++) { for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { if (x < owidth-1) { double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1)); THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x)); } if (y < oheight-1) { double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x)); THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x)); } } } } // Sqrt[ Sum[ (Xi - Xi+1)^2 ] ] for (k=0; k<ochannels; k++) { for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { THTensor_(set3d)(output, k, y, x, sqrt(THTensor_(get3d)(output, k, y, x)) * normer); } } } // Cosine dissimilarity } else { // add epsilon to input (to get rid of 0s) THTensor *inputb = THTensor_(newWithSize3d)(input->size[0], input->size[1], input->size[2]); THTensor_(copy)(inputb, input); THTensor_(add)(inputb, inputb, 1e-12); // Sum[ (Xi * Xi+1) ] int x,y,k; for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { double norm_A = 0; double norm_B = 0; double norm_C = 0; for (k=0; k<ichannels; k++) { norm_A += square(THTensor_(get3d)(inputb, k, y, x)); if (x < owidth-1) { double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y, x+1); THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x)); norm_B += square(THTensor_(get3d)(inputb, k, y, x+1)); } if (y < oheight-1) { double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y+1, x); THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x)); norm_C += square(THTensor_(get3d)(inputb, k, y+1, x)); } } if (x < owidth-1) { if (norm) { THTensor_(set3d)(output, 0, y, x, 1 - THTensor_(get3d)(output, 0, y, x) / (sqrt(norm_A) * sqrt(norm_B))); } else { THTensor_(set3d)(output, 0, y, x, ichannels - THTensor_(get3d)(output, 0, y, x)); } } if (y < oheight-1) { if (norm) { THTensor_(set3d)(output, 1, y, x, 1 - THTensor_(get3d)(output, 1, y, x) / (sqrt(norm_A) * sqrt(norm_C))); } else { THTensor_(set3d)(output, 1, y, x, ichannels - THTensor_(get3d)(output, 1, y, x)); } } } } // Cleanup THTensor_(free)(inputb); } return 1; }
static int nn_(SpatialGraph_updateGradInput)(lua_State *L) { // get all params THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); //int connex = luaT_getfieldcheckint(L, 1, "connex"); int dist = luaT_getfieldcheckint(L, 1, "dist"); int norm = luaT_getfieldcheckint(L, 1, "normalize"); // dims //int iwidth = input->size[2]; //int iheight = input->size[1]; int ichannels = input->size[0]; int owidth = gradOutput->size[2]; int oheight = gradOutput->size[1]; //int ochannels = gradOutput->size[0]; // norm ? double normer = (norm == 1) ? 1/sqrt(ichannels)/sqrt(ichannels) : 1; // resize gradInput THTensor_(zero)(gradInput); // compute derivatives, and backpropagate output error to input if (dist == 0) { int x,y,k; for (k=0; k<ichannels; k++) { for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { if (x < owidth-1) { double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1); if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 0, y, x); partial_d *= THTensor_(get3d)(gradOutput, 0, y, x) * normer; THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); THTensor_(set3d)(gradInput, k, y, x+1, -partial_d + THTensor_(get3d)(gradInput, k, y, x+1)); } if (y < oheight-1) { double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x); if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 1, y, x); partial_d *= THTensor_(get3d)(gradOutput, 1, y, x) * normer; THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); THTensor_(set3d)(gradInput, k, y+1, x, -partial_d + THTensor_(get3d)(gradInput, k, y+1, x)); } } } } // Cosine } else { int x,y,k; for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { double sum_A = 0; double sum_B = 0; double sum_C = 0; double sum_AB = 0; double sum_AC = 0; if (norm) { for (k=0; k<ichannels; k++) { sum_A += square(THTensor_(get3d)(input, k, y, x)); if (x < owidth-1) { sum_B += square(THTensor_(get3d)(input, k, y, x+1)); sum_AB += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y, x+1); } if (y < oheight-1) { sum_C += square(THTensor_(get3d)(input, k, y+1, x)); sum_AC += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y+1, x); } } } double term1, term2, term3, partial_d; double epsi = 1e-12; if (x < owidth-1) { if (norm) { term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_B, 1/2) + epsi ); term2 = sum_AB / ( pow(sum_A, 3/2) * pow(sum_B, 1/2) + epsi ); term3 = sum_AB / ( pow(sum_B, 3/2) * pow(sum_A, 1/2) + epsi ); } for (k=0; k<ichannels; k++) { if (norm) { partial_d = term2 * THTensor_(get3d)(input, k, y, x) - term1 * THTensor_(get3d)(input, k, y, x+1); } else { partial_d = -THTensor_(get3d)(input, k, y, x+1); } partial_d *= THTensor_(get3d)(gradOutput, 0, y, x); THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); if (norm) { partial_d = term3 * THTensor_(get3d)(input, k, y, x+1) - term1 * THTensor_(get3d)(input, k, y, x); } else { partial_d = -THTensor_(get3d)(input, k, y, x); } partial_d *= THTensor_(get3d)(gradOutput, 0, y, x); THTensor_(set3d)(gradInput, k, y, x+1, partial_d + THTensor_(get3d)(gradInput, k, y, x+1)); } } if (y < oheight-1) { if (norm) { term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_C, 1/2) + epsi ); term2 = sum_AC / ( pow(sum_A, 3/2) * pow(sum_C, 1/2) + epsi ); term3 = sum_AC / ( pow(sum_C, 3/2) * pow(sum_A, 1/2) + epsi ); } for (k=0; k<ichannels; k++) { if (norm) { partial_d = term2 * THTensor_(get3d)(input, k, y, x) - term1 * THTensor_(get3d)(input, k, y+1, x); } else { partial_d = -THTensor_(get3d)(input, k, y+1, x); } partial_d *= THTensor_(get3d)(gradOutput, 1, y, x); THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); if (norm) { partial_d = term3 * THTensor_(get3d)(input, k, y+1, x) - term1 * THTensor_(get3d)(input, k, y, x); } else { partial_d = -THTensor_(get3d)(input, k, y, x); } partial_d *= THTensor_(get3d)(gradOutput, 1, y, x); THTensor_(set3d)(gradInput, k, y+1, x, partial_d + THTensor_(get3d)(gradInput, k, y+1, x)); } } } } } return 1; }
static int nn_SpatialConvolution_backward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id); THTensor *gradInputPlane, *unfoldedInputPlane, *unfoldedGradInputPlane, *inputPlane; THTensor *gradOutputPlane; THTensor *weightPlane, *gradWeightPlane; int i, k; gradInputPlane = THTensor_new(); unfoldedInputPlane = THTensor_new(); unfoldedGradInputPlane = THTensor_new(); inputPlane = THTensor_new(); gradOutputPlane = THTensor_new(); weightPlane = THTensor_new(); gradWeightPlane = THTensor_new(); /* Not necessary with partial backprop: */ THTensor_resizeAs(gradInput, input); THTensor_zero(gradInput); for(k = 0; k < nOutputPlane; k++) { THTensor_select(gradOutputPlane, gradOutput, 2, k); THTensor_set1d(gradBias, k, THTensor_get1d(gradBias, k) + THTensor_sum(gradOutputPlane)); for(i = 0; i < nInputPlane; i++) { /* ------------------------- gradWeight ------------------------------------- */ /* Get the input image */ THTensor_select(inputPlane, input, 2, i); THTensor_unfold(unfoldedInputPlane, inputPlane, 0, kW, dW); THTensor_unfold(unfoldedInputPlane, NULL, 1, kH, dH); THTensor_transpose(unfoldedInputPlane,NULL,0,2); THTensor_transpose(unfoldedInputPlane,NULL,1,3); /* Get the good gradWeight for (k,i) (k out, i in) */ THTensor_select(gradWeightPlane, gradWeight, 3, k); THTensor_select(gradWeightPlane, NULL, 2, i); THTensor_addT4dotT2(gradWeightPlane, 1, unfoldedInputPlane, gradOutputPlane); /* -------------------------- gradInput ------------------------------------- */ /* Not necessary with partial backprop: */ /* Get the gradInput image */ THTensor_select(gradInputPlane, gradInput, 2, i); THTensor_unfold(unfoldedGradInputPlane, gradInputPlane, 0, kW, dW); THTensor_unfold(unfoldedGradInputPlane, NULL , 1, kH, dH); /* Get the good weight for (k,i) (k out, i in) */ THTensor_select(weightPlane, weight, 3, k); THTensor_select(weightPlane, NULL, 2, i); THTensor_addT2outT2(unfoldedGradInputPlane, 1, gradOutputPlane, weightPlane); } } THTensor_free(gradInputPlane); THTensor_free(unfoldedInputPlane); THTensor_free(unfoldedGradInputPlane); THTensor_free(inputPlane); THTensor_free(gradOutputPlane); THTensor_free(weightPlane); THTensor_free(gradWeightPlane); return 1; }
static int nn_SpatialConvolution_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); THTensor *outputPlane, *inputPlane, *weightPlane, *unfoldedInputPlane; int i, k; luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[2] == nInputPlane, 2, "invalid number of input planes"); luaL_argcheck(L, input->size[0] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size"); THTensor_resize3d(output, (input->size[0] - kW) / dW + 1, (input->size[1] - kH) / dH + 1, nOutputPlane); inputPlane = THTensor_new(); weightPlane = THTensor_new(); outputPlane = THTensor_new(); unfoldedInputPlane = THTensor_new(); for(k = 0; k < nOutputPlane; k++) { THTensor_select(outputPlane, output, 2, k); /* Initialize to the bias */ THTensor_fill(outputPlane, THTensor_get1d(bias, k)); /* Go! */ for(i = 0; i < nInputPlane; i++) { THTensor_select(inputPlane, input, 2, i); /* Get the good mask for (k,i) (k out, i in) */ THTensor_select(weightPlane, weight, 3, k); THTensor_select(weightPlane, NULL, 2, i); /* Get the input image */ THTensor_unfold(unfoldedInputPlane, inputPlane, 0, kW, dW); THTensor_unfold(unfoldedInputPlane, NULL, 1, kH, dH); THTensor_addT4dotT2(outputPlane, 1, unfoldedInputPlane, weightPlane); } } THTensor_free(inputPlane); THTensor_free(weightPlane); THTensor_free(outputPlane); THTensor_free(unfoldedInputPlane); return 1; }
static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) { // Input THTensor *input = (THTensor*)luaT_checkudata(L, 2, torch_Tensor); // Params: int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); THTensor *weight = (THTensor*)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = (THTensor*)luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *output = (THTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { luaL_argcheck(L, input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match"); // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); } else { luaL_argcheck(L, input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match"); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Resize output THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); // Resize temporary columns THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Define a buffer of ones, for bias accumulation // Note: this buffer can be shared with other modules, it only ever gets increased, // and always contains ones. if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { // Resize plane and fill with ones... THTensor_(resize2d)(ones, outputHeight, outputWidth); THTensor_(fill)(ones, 1); } // Helpers THTensor *input_n = THTensor_(new)(); THTensor *output_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per output: THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(output_n, output, 0, elt); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m = weight->size[1] * weight->size[2] * weight->size[3]; long n = columns->size[1]; long k = weight->size[0]; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 'n', 't', n, m, k, 1, THTensor_(data)(input_n), n, THTensor_(data)(weight), m, 0, THTensor_(data)(columns), n ); // Unpack columns back into input: nn_(col2im)( THTensor_(data)(columns), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(output_n) ); // Do Bias after: // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m_ = nOutputPlane; long n_ = outputHeight * outputWidth; long k_ = 1; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 't', 'n', n_, m_, k_, 1, THTensor_(data)(ones), k_, THTensor_(data)(bias), k_, 1, THTensor_(data)(output_n), n_ ); } // Free THTensor_(free)(input_n); THTensor_(free)(output_n); // Resize output if (batch == 0) { THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); } // return output return 1; }
static int nxn_(Jitter_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int xstart = luaT_getfieldcheckint(L, 1, "xstart"); int ystart = luaT_getfieldcheckint(L, 1, "ystart"); int xcrop = luaT_getfieldcheckint(L, 1, "xcrop"); int ycrop = luaT_getfieldcheckint(L, 1, "ycrop"); int hflip = luaT_getfieldcheckint(L, 1, "randflip"); int bs = input->size[0]; int outy = input->size[1] - ycrop; int outx = input->size[2] - xcrop; int channels = input->size[3]; THTensor_(resize4d)(output, bs, outy, outx, channels); real* idata = THTensor_(data)(input); real* odata = THTensor_(data)(output); int istr0 = input->stride[0]; int istr1 = input->stride[1]; int istr2 = input->stride[2]; int istr3 = input->stride[3]; int ostr0 = output->stride[0]; int ostr1 = output->stride[1]; int ostr2 = output->stride[2]; int ostr3 = output->stride[3]; /* This is jittering + hflip */ int batchidx, y, x, ch; if(hflip==1) { #pragma omp parallel for private(batchidx) for(batchidx=0; batchidx<bs; batchidx++) { for (y = 0; y<outy; y++) { for(x = 0; x<outx; x++) { for (ch = 0; ch < channels; ch++) { odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (xstart-1+outx-1-x)*istr2 + ch*istr3]; } } } } } else /* This is only jittering */ { #pragma omp parallel for private(batchidx) for(batchidx=0; batchidx<bs; batchidx++) { for (y = 0; y<outy; y++) { for(x = 0; x<outx; x++) { for (ch = 0; ch < channels; ch++) { odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (x+xstart-1)*istr2 + ch*istr3]; } } } } } return 1; }
static int nn_(SpatialConvolution_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int dimw = 2; int dimh = 1; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); if (input->nDimension == 4) { dimw++; dimh++; } { long nOutputPlane = weight->size[0]; long kW = weight->size[3]; long kH = weight->size[2]; long inputWidth = input->size[dimw]; long inputHeight = input->size[dimh]; long outputWidth = (inputWidth - kW) / dW + 1; long outputHeight = (inputHeight - kH) / dH + 1; if (input->nDimension == 3) { long i; real* bias_data; real* output_data; THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); /* add bias */ bias_data = THTensor_(data)(bias); output_data = THTensor_(data)(output); #pragma omp parallel for private(i) for (i=0; i<bias->size[0]; i++) { /*THTensor_(select)(outn,output,0,i);*/ /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ real *ptr_output = output_data + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } /*THTensor_(free)(outn);*/ /* do convolutions */ THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); } else { real* bias_data; real* output_data; long p; THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); bias_data = THTensor_(data)(bias); output_data = THTensor_(data)(output); #pragma omp parallel for private(p) for (p=0; p<input->size[0]; p++) { /* BIAS */ long i; for (i=0; i<bias->size[0]; i++) { real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } } /* do convolutions */ THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); } } return 1; }
static int nn_LcEncoder_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int winX = luaT_getfieldcheckint(L, 1, "winX"); int winY = luaT_getfieldcheckint(L, 1, "winY"); int woutX = luaT_getfieldcheckint(L, 1, "woutX"); int woutY = luaT_getfieldcheckint(L, 1, "woutY"); double xStep = luaT_getfieldchecknumber(L, 1, "xStep"); double yStep = luaT_getfieldchecknumber(L, 1, "yStep"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1"); THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX; THTensor *weightSelectedX, *weightSelectedYX; inputPlane = THTensor_new(); inputNarrowedX = THTensor_new(); inputNarrowedYX = THTensor_new(); weightSelectedX = THTensor_new(); weightSelectedYX = THTensor_new(); // get output size from input THTensor_resize3d(output, (input->size[0] - winX+1) / xStep, (input->size[1] - winY+1) / yStep, 1); THTensor_select(inputPlane, input, 2, 0); int y,x,iy,ix,wy,wx; for (y = 0; y<output->size[1]; y++) { iy = (int)floor(y*yStep); wy = y%woutY; for (x = 0; x<output->size[0]; x++) { ix = (int)floor(x*xStep); wx = x%woutX; THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX); THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY); THTensor_select(weightSelectedX, weight, 3, wy); THTensor_select(weightSelectedYX, weightSelectedX, 2, wx); double dot = THTensor_dot(inputNarrowedYX, weightSelectedYX); double biasSelect = THTensor_get2d(bias,wx,wy); THTensor_set3d(output,x,y,0,dot+biasSelect); } } THTensor_free(inputPlane); THTensor_free(inputNarrowedX); THTensor_free(inputNarrowedYX); THTensor_free(weightSelectedX); THTensor_free(weightSelectedYX); return 1; }
static int nn_LcEncoder_backward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id); int winX = luaT_getfieldcheckint(L, 1, "winX"); int winY = luaT_getfieldcheckint(L, 1, "winY"); int woutX = luaT_getfieldcheckint(L, 1, "woutX"); int woutY = luaT_getfieldcheckint(L, 1, "woutY"); double xStep = luaT_getfieldchecknumber(L, 1, "xStep"); double yStep = luaT_getfieldchecknumber(L, 1, "yStep"); luaL_argcheck(L, input->nDimension == 3, 2, "input 3D tensor expected"); luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1"); luaL_argcheck(L, gradOutput->nDimension == 3, 3, "gradOutput 3D tensor expected"); luaL_argcheck(L, gradOutput->size[2] == 1, 3, "invalid gradOutput 3rd dim size has to be 1"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id); /* ----------------------- gradWeight ----------------------- */ THTensor_fill(gradWeight, 0); THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX; inputPlane = THTensor_new(); inputNarrowedX = THTensor_new(); inputNarrowedYX = THTensor_new(); THTensor_select(inputPlane, input, 2, 0); THTensor *gradWeightSelectedX, *gradWeightSelectedYX; gradWeightSelectedX = THTensor_new(); gradWeightSelectedYX = THTensor_new(); /* ----------------------- gradInput ------------------------ */ THTensor_resizeAs(gradInput, input); THTensor_fill(gradInput, 0); THTensor *gradInputPlane, *gradInputNarrowedX, *gradInputNarrowedYX; gradInputPlane = THTensor_new(); gradInputNarrowedX = THTensor_new(); gradInputNarrowedYX = THTensor_new(); THTensor_select(gradInputPlane, gradInput, 2, 0); THTensor *weightSelectedX, *weightSelectedYX; weightSelectedX = THTensor_new(); weightSelectedYX = THTensor_new(); int y,x,iy,ix,wy,wx; for (y = 0; y<gradOutput->size[1]; y++) { iy = (int)floor(y*yStep); wy = y%woutY; for (x = 0; x<gradOutput->size[0]; x++) { ix = (int)floor(x*xStep); wx = x%woutX; double gradOutVal = THTensor_get3d(gradOutput,x,y,0); /* ----------------------- gradWeight ----------------------- */ THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX); THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY); THTensor_select(gradWeightSelectedX, gradWeight, 3, wy); THTensor_select(gradWeightSelectedYX, gradWeightSelectedX, 2, wx); THTensor_addTensor(gradWeightSelectedYX, gradOutVal, inputNarrowedYX); /* ----------------------- gradBias ----------------------- */ THTensor_set2d(gradBias,wx,wy, THTensor_get2d(gradBias,wx,wy) + gradOutVal); /* ----------------------- gradInput ------------------------ */ THTensor_narrow(gradInputNarrowedX, gradInputPlane, 0, ix, winX); THTensor_narrow(gradInputNarrowedYX, gradInputNarrowedX, 1, iy, winY); THTensor_select(weightSelectedX, weight, 3, wy); THTensor_select(weightSelectedYX, weightSelectedX, 2, wx); THTensor_addTensor(gradInputNarrowedYX, gradOutVal, weightSelectedYX); } } /* free gradWeight */ THTensor_free(inputPlane); THTensor_free(inputNarrowedX); THTensor_free(inputNarrowedYX); THTensor_free(gradWeightSelectedX); THTensor_free(gradWeightSelectedYX); /* free gradInput */ THTensor_free(gradInputPlane); THTensor_free(gradInputNarrowedX); THTensor_free(gradInputNarrowedYX); THTensor_free(weightSelectedX); THTensor_free(weightSelectedYX); return 1; }
static int nn_(SpatialFullConvolution_updateGradInput)(lua_State *L) { // Inputs THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor); // Params int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); THTensor *weight = (THTensor *)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradColumns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradInput = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Resize output THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth); // Resize temporary columns THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Helpers THTensor *gradInput_n = THTensor_(new)(); THTensor *gradOutput_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per sample: THTensor_(select)(gradInput_n, gradInput, 0, elt); THTensor_(select)(gradOutput_n, gradOutput, 0, elt); // Extract columns: nn_(im2col)( THTensor_(data)(gradOutput_n), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(gradColumns) ); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m = weight->size[0]; long n = gradColumns->size[1]; long k = weight->size[1] * weight->size[2] * weight->size[3]; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 'n', 'n', n, m, k, 1, THTensor_(data)(gradColumns), n, THTensor_(data)(weight), k, 0, THTensor_(data)(gradInput_n), n ); } // Free THTensor_(free)(gradInput_n); THTensor_(free)(gradOutput_n); // Resize output if (batch == 0) { THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); } // Return gradInput return 1; }
static int nnconv1d_(HorizontalConvolution_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int kL = luaT_getfieldcheckint(L, 1, "kL"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor expected"); // change to batch mode int batch = 1; if (input->nDimension == 3) { batch = 0; THTensor_(resize4d)(input, 1, nInputPlane, input->size[1], input->size[2]); } long batchSize = input->size[0]; long inputHeight = input->size[2]; long inputWidth = input->size[3]; long outputHeight = inputHeight; long outputWidth = inputWidth - kL + 1; THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); int elt; #pragma omp parallel for private(elt) for (elt = 0; elt < batchSize; elt++) { // select each batch THTensor *input_t = THTensor_(newSelect)(input, 0, elt); THTensor *output_t = THTensor_(newSelect)(output, 0, elt); // fill biases int i, j, k; for (i = 0; i < nOutputPlane; i++) { THVector_(fill)(output_t->storage->data+output_t->storageOffset+output_t->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth); } // convolve horizontally for (i = 0; i < nInputPlane; i++) { for (j = 0; j < inputHeight; j++) { for (k = 0; k < kL; k++) { THVector_(add)(output_t->storage->data + output_t->storageOffset + output_t->stride[0]*i + output_t->stride[1]*j, input_t->storage->data + input_t->storageOffset + input_t->stride[0]*i + input_t->stride[1]*j + k, *(THTensor_(data)(weight)+i*kL+k), outputWidth); } } } // release temp tensors THTensor_(free)(input_t); THTensor_(free)(output_t); } // revert to single batch if (batch == 0) { THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); } return 1; }
static int nnconv1d_(HorizontalConvolution_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int kL = luaT_getfieldcheckint(L, 1, "kL"); THTensor *ones = luaT_getfieldcheckudata(L, 1, "ones", torch_Tensor); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); // change to batch mode int batch = 1; if (input->nDimension == 3) { batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long batchSize = input->size[0]; long inputHeight = input->size[2]; long inputWidth = input->size[3]; long outputHeight = inputHeight; long outputWidth = inputWidth - kL + 1; if (ones->nDimension != 1 || ones->size[0] < outputHeight*outputWidth) { THTensor_(resize1d)(ones, outputHeight*outputWidth); THTensor_(fill)(ones, 1); } int elt; for (elt = 0; elt < batchSize; elt++) { // select each batch in 2D THTensor *input_t = THTensor_(newSelect)(input, 0, elt); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt); THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset, nOutputPlane, -1, outputWidth*outputHeight, -1); // dot products int i, j, k; for (i = 0; i < nInputPlane; i++) { for (k = 0; k < kL; k++) { for (j = 0; j < outputHeight; j++) { *(gradWeight->storage->data + gradWeight->storageOffset + i*gradWeight->stride[0] + k) += scale*THBlas_(dot) (outputWidth, gradOutput_t->storage->data + gradOutput_t->storageOffset + i*gradOutput_t->stride[0] + j*gradOutput_t->stride[1], gradOutput_t->stride[2], input_t->storage->data + input_t->storageOffset + i*input_t->stride[0] + j*input_t->stride[1] + k, input_t->stride[2]); } } } // fill biases THTensor_(addmv)(gradBias, 1, gradBias, scale, gradOutput2d, ones); THTensor_(free)(gradOutput2d); THTensor_(free)(input_t); THTensor_(free)(gradOutput_t); } // revert to single batch if (batch == 0) { THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); } return 0; }
static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); long oheight = luaT_getfieldcheckint(L, 1, "H"); long owidth = luaT_getfieldcheckint(L, 1, "W"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; long nslices; long iheight; long iwidth; long istride_d; long istride_h; long istride_w; long istride_b; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); if (input->nDimension == 4) { istride_b = input->stride[0]; nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; /* strides */ istride_d = input->stride[dimh-1]; istride_h = input->stride[dimh]; istride_w = input->stride[dimw]; /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); /* indices will contain i,j locations for each output point */ THTensor_(resize4d)(indices, 2, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*owidth*oheight, indices_data, nslices, iwidth, iheight, owidth, oheight, istride_w,istride_h, istride_d); } else { long p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); /* indices will contain i,j locations for each output point */ THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight, indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight, istride_w,istride_h, istride_d); } } return 1; }
static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int nslices; int itime; int iheight; int iwidth; int otime; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = gradOutput->size[dimt]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); /* backprop */ if (input->nDimension == 4) { /* non-batch mode*/ nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data, gradOutput_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; #pragma omp parallel for private(p) for (p = 0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; }
static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); int dimw = 2; int dimh = 1; real *gradBias_data; real *gradOutput_data; long noutSlice; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); if (input->nDimension == 4) { dimw++; dimh++; } /* gradient to bias */ gradBias_data = THTensor_(data)(gradBias); gradOutput_data = THTensor_(data)(gradOutput); noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; /*THTensor* gradOutSlice = THTensor_(new)();*/ if (input->nDimension == 3) { long k; #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { /*THTensor_(select)(gradOutSlice, gradOutput, 0, k);*/ real *ptr_gradOutput = gradOutput_data + k*noutSlice; long l; for(l = 0; l < noutSlice; l++) gradBias_data[k] += scale*ptr_gradOutput[l]; } /* gradient to kernels */ THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW); } else { long k; #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { long p; for(p = 0; p < input->size[0]; p++) { /* BIAS */ real *ptr_gradOutput = gradOutput_data + p*nOutputPlane*noutSlice + k*noutSlice; long l; for(l = 0; l < noutSlice; l++) gradBias_data[k] += scale*ptr_gradOutput[l]; } } /* gradient to kernels */ THTensor_(conv2DRevgerm)(gradWeight, 1.0, scale, input, gradOutput, dH, dW); } return 0; }
static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(input); return 1; }
static int nnconv1d_(HorizontalConvolution_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int kL = luaT_getfieldcheckint(L, 1, "kL"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); // change to batch mode int batch = 1; if (input->nDimension == 3) { batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, nOutputPlane, gradOutput->size[1], gradOutput->size[2]); } long batchSize = input->size[0]; long inputHeight = input->size[2]; long inputWidth = input->size[3]; long outputHeight = inputHeight; long outputWidth = inputWidth - kL + 1; THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); int elt; #pragma omp parallel for private(elt) for (elt = 0; elt < batchSize; elt++) { // select each batch THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, elt); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt); // convolve horizontally int i, j, k; for (i = 0; i < nOutputPlane; i++) { for (j = 0; j < outputHeight; j++) { for (k = 0; k < kL; k++) { THVector_(add)(gradInput_t->storage->data + gradInput_t->storageOffset + gradInput_t->stride[0]*i + gradInput_t->stride[1]*j + k, gradOutput_t->storage->data + gradOutput_t->storageOffset + gradOutput_t->stride[0]*i + gradOutput_t->stride[1]*j, *(THTensor_(data)(weight)+i*kL+k), outputWidth); // needs to change } } } // release temp tensors THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); } // revert to single batch if (batch == 0) { THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); } return 1; }
static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) { // Inputs THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor); // Params int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); float scale = luaL_optnumber(L, 4, 1); THTensor *gradWeight = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Define a buffer of ones, for bias accumulation if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { // Resize plane and fill with ones... THTensor_(resize2d)(ones, outputHeight, outputWidth); THTensor_(fill)(ones, 1); } // Resize temporary columns THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Helpers THTensor *input_n = THTensor_(new)(); THTensor *gradOutput_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per output: THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(gradOutput_n, gradOutput, 0, elt); // Extract columns: nn_(im2col)( THTensor_(data)(gradOutput_n), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(columns) ); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long n = columns->size[0]; // nOutputPlane * kh * kw long m = input_n->size[0]; // nInputPlane long k = columns->size[1]; // inputHeight * inputWidth // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 't', 'n', n, m, k, scale, THTensor_(data)(columns), k, THTensor_(data)(input_n), k, 1, THTensor_(data)(gradWeight), n ); // Do Bias: // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m_ = nOutputPlane; long k_ = outputHeight * outputWidth; // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) THBlas_(gemv)( 't', k_, m_, scale, THTensor_(data)(gradOutput_n), k_, THTensor_(data)(ones), 1, 1, THTensor_(data)(gradBias), 1 ); } // Free THTensor_(free)(input_n); THTensor_(free)(gradOutput_n); // Resize if (batch == 0) { THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); } // Return nothing return 0; }