static int nn_(SparseLinear_updateOutput)(lua_State *L) { long i; THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); long dim = weight->size[0]; /* number of weights.. */ THTensor_(copy)(output, bias); for(i = 0; i < input->size[1]; i++) { long offset = (long)(THTensor_(get2d)(input, 0, i))-1; if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ { real val = THTensor_(get2d)(input, 1, i); THBlas_(axpy)(output->size[0], val, THTensor_(data)(weight)+offset*weight->stride[0], weight->stride[1], THTensor_(data)(output), output->stride[0]); } else luaL_error(L, "index out of bound"); } return 1; }
static int nn_(Min_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THLongStorage *dim; long i; luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); dim = THLongStorage_newWithSize(input->nDimension); for(i = 0; i < input->nDimension; i++) dim->data[i] = input->size[i]; dim->data[dimension] = 1; THTensor_(resize)(output, dim, NULL); THTensor_(resize)(indices, dim, NULL); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension, long theIndex = 0; real theMin = input_data[0]; for(i = 1; i < input_size; i++) { if(input_data[i*input_stride] < theMin) { theIndex = i; theMin = input_data[i*input_stride]; } } *indices_data = theIndex+1; *output_data = theMin;) if(output->nDimension > 1)
int nn_(SparseLinear_updateParameters)(lua_State *L) { long i; real learningRate = luaL_checknumber(L, 2); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_Tensor); long dim = weight->size[0]; /* number of weights.. */ THTensor_(cadd)(bias, bias, -learningRate, gradBias); for(i = 0; i < lastInput->size[1]; i++) { long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1; if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ { THBlas_(axpy)(bias->size[0], -learningRate, THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], gradWeight->stride[1], THTensor_(data)(weight)+offset*weight->stride[0], weight->stride[1]); } else luaL_error(L, "index out of bound"); } return 0; }
static int nn_(SpatialConvolution_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor *tweight; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to input */ tweight = THTensor_(newTranspose)(weight,0,1); if (input->nDimension == 3) { THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C"); } else { THTensor_(conv2Dmm)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C"); } THTensor_(free)(tweight); return 1; }
static int nn_(SpatialConvolutionLocal_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); long inputWidth = luaT_getfieldcheckint(L, 1, "iW"); long inputHeight = luaT_getfieldcheckint(L, 1, "iH"); long outputWidth = luaT_getfieldcheckint(L, 1, "oW"); long outputHeight = luaT_getfieldcheckint(L, 1, "oH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); if(input->nDimension == 3) { THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth); THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); nn_(SpatialConvolutionLocal_updateOutput_frame)(input, output, weight, bias, finput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth); THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth); #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *input_t = THTensor_(newSelect)(input, 0, t); THTensor *output_t = THTensor_(newSelect)(output, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionLocal_updateOutput_frame)(input_t, output_t, weight, bias, finput_t, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(input_t); THTensor_(free)(output_t); THTensor_(free)(finput_t); } } return 1; }
static int nn_(SpatialConvolutionLocal_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); long inputWidth = luaT_getfieldcheckint(L, 1, "iW"); long inputHeight = luaT_getfieldcheckint(L, 1, "iH"); long outputWidth = luaT_getfieldcheckint(L, 1, "oW"); long outputHeight = luaT_getfieldcheckint(L, 1, "oH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 1, 2); if(input->nDimension == 3) { nn_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); nn_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } } THTensor_(transpose)(weight, weight, 1, 2); return 1; }
static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); // get contiguous gradOutput gradOutput = THTensor_(newContiguous)(gradOutput); // resize THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); // sizes int ichannels = input->size[0]; int iheight = input->size[1]; int iwidth = input->size[2]; int ochannels = ichannels; int oheight = gradOutput->size[1]; int owidth = gradOutput->size[2]; // get raw pointers real *gradInput_data = THTensor_(data)(gradInput); real *gradOutput_data = THTensor_(data)(gradOutput); real *indices_data = THTensor_(data)(indices); // backprop long k; for (k = 0; k < input->size[0]; k++) { // pointers to slices real *gradOutput_p = gradOutput_data + k*owidth*oheight; real *gradInput_p = gradInput_data + k*iwidth*iheight; real *indy_p = indices_data + k*owidth*oheight; real *indx_p = indices_data + (k+ochannels)*owidth*oheight; // calculate max points int i,j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { // retrieve position of max long maxi = *(indy_p + i*owidth + j) - 1 + i*dH; long maxj = *(indx_p + i*owidth + j) - 1 + j*dW; // update gradient *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j); } } } // cleanup THTensor_(free)(gradOutput); return 1; }
static int nnOmp_(SpatialMaxPooling_updateGradInputOmp)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); setompnthread(L,1,"nThread"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); THTensor *gradOutputPlane, *gradInputPlane, *unfoldedGradInputPlane, *gradLocalInput; int k,i,j; THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); gradInputPlane = THTensor_(new)(); gradOutputPlane = THTensor_(new)(); gradLocalInput = THTensor_(new)(); unfoldedGradInputPlane = THTensor_(new)(); for (k = 0; k < input->size[0]; k++) { /* get input and output plane */ THTensor_(select)(gradOutputPlane, gradOutput, 0, k); THTensor_(select)(gradInputPlane, gradInput, 0, k); /* Unfold input to get each local window */ THTensor_(unfold)(unfoldedGradInputPlane, gradInputPlane, 0, kH, dH); THTensor_(unfold)(unfoldedGradInputPlane, NULL, 1, kW, dW); /* Calculate max points */ for(i = 0; i < gradOutputPlane->size[0]; i++) { for(j = 0; j < gradOutputPlane->size[1]; j++) { THTensor_(select)(gradLocalInput, unfoldedGradInputPlane,0,i); THTensor_(select)(gradLocalInput, NULL, 0,j); long maxi = THTensor_(get4d)(indices,0,k,i,j)-1; long maxj = THTensor_(get4d)(indices,1,k,i,j)-1; double gi = THTensor_(get2d)(gradLocalInput,maxi,maxj)+THTensor_(get2d)(gradOutputPlane,i,j); THTensor_(set2d)(gradLocalInput,maxi,maxj,gi); } } } /* Cleanup */ THTensor_(free)(gradInputPlane); THTensor_(free)(gradOutputPlane); THTensor_(free)(unfoldedGradInputPlane); THTensor_(free)(gradLocalInput); return 1; }
static int nn_(SparseLinear_updateOutput)(lua_State *L) { long i; THTensor * input = luaT_checkudata(L, 2, torch_Tensor); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long outDim = weight->size[0]; long inDim = weight->size[1]; luaL_argcheck(L, nn_(checkInput)(input), 2, "input size must be nnz x 2"); luaL_argcheck(L, nn_(checkSize1D)(output, outDim), 1, "output size wrong"); luaL_argcheck(L, nn_(checkSize1D)(bias, outDim), 1, "bias size wrong"); lua_getfield(L, 1, "shardBuffer"); if (!lua_isnil(L, -1)) { THTensor *buffer = luaT_getfieldcheckudata(L, 1, "shardBuffer", torch_Tensor); long num_shards = buffer->size[1]; luaL_argcheck(L, buffer->nDimension == 2 && buffer->size[0] == outDim && num_shards > 0, 1, "shardBuffer size wrong"); THTensor_(zero)(buffer); #pragma omp parallel for private(i) schedule(static) num_threads(num_shards) for (i = 0; i < input->size[0]; i++) { #ifdef _OPENMP int shardId = omp_get_thread_num(); #else int shardId = 1; #endif long offset = (long)(THTensor_(get2d)(input, i, 0)) - 1; if (offset >= 0 && offset < inDim) { THBlas_(axpy)(outDim, THTensor_(get2d)(input, i, 1), THTensor_(data)(weight) + offset * weight->stride[1], weight->stride[0], THTensor_(data)(buffer) + shardId * buffer->stride[1], buffer->stride[0]); } else { luaL_error(L, "index out of bound. updateOutput: \ %ld not between 1 and %ld", offset + 1, inDim); } } THTensor_(sum)(output, buffer, 1); THTensor_(cadd)(output, bias, 1.0, output); lua_getfield(L, 1, "output"); return 1; }
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 0, 1); if(input->nDimension == 3) { nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH); } else { long T = input->size[0]; long t; THStorage_(clearFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED); #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } THStorage_(setFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED); } THTensor_(transpose)(weight, weight, 0, 1); return 1; }
static int nn_TemporalSubSampling_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int kW = luaT_getfieldcheckint(L, 1, "kW"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); THTensor *unfoldedInput, *unfoldedInputFrame, *unfoldedInputFrames; THTensor *outputFrame; int nInputFrame, nOutputFrame; int i, k; luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); luaL_argcheck(L, input->size[0] == inputFrameSize, 2, "invalid input frame size"); luaL_argcheck(L, input->size[1] >= kW, 2, "input sequence smaller than kernel size"); nInputFrame = input->size[1]; nOutputFrame = (nInputFrame - kW) / dW + 1; THTensor_resize2d(output, inputFrameSize, nOutputFrame); outputFrame = THTensor_new(); unfoldedInput = THTensor_new(); unfoldedInputFrame = THTensor_new(); unfoldedInputFrames = THTensor_new(); THTensor_unfold(unfoldedInput, input, 1, kW, dW); for(k = 0; k < nOutputFrame; k++) { THTensor_select(unfoldedInputFrames, unfoldedInput, 1, k); THTensor_select(outputFrame, output, 1, k); THTensor_zero(outputFrame); for(i = 0; i < kW; i++) { THTensor_select(unfoldedInputFrame, unfoldedInputFrames, 1, i); THTensor_addTensor(outputFrame, 1, unfoldedInputFrame); } THTensor_cmul(outputFrame, weight); THTensor_addTensor(outputFrame, 1, bias); } THTensor_free(outputFrame); THTensor_free(unfoldedInput); THTensor_free(unfoldedInputFrame); THTensor_free(unfoldedInputFrames); return 1; }
static int gpunn_SpatialConvolutionGPU_accGradParameters(lua_State *L) { THGPUTensor *input = (THGPUTensor *)luaT_checkudata(L, 2, "torch.GPUTensor"); THGPUTensor *gradOutput = (THGPUTensor *)luaT_checkudata(L, 3, "torch.GPUTensor"); THGPUTensor *gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", "torch.GPUTensor"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padding = luaT_getfieldcheckint(L, 1, "padding"); int partialSum = luaT_getfieldcheckint(L, 1, "partialSum"); float scale = luaL_optnumber(L, 4, 1); long nOutputPlane = gradWeight->size[3]; long nInputPlane = gradWeight->size[0]; long kH = gradWeight->size[1]; long kW = gradWeight->size[2]; long inputHeight = input->size[1]; long inputWidth = input->size[2]; long batchSize = input->size[3]; long outputHeight = (padding + inputHeight - kH) / dH + 1; long outputWidth = (padding + inputWidth - kW) / dW + 1; // asserts luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square"); luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW"); luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW"); if (partialSum) { // compute partial gradients for outputHeight*outputWidth/partialSum groups of filters separately gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeightPartial", "torch.GPUTensor"); THGPUTensor_resize4d(gradWeight, outputHeight * outputWidth / partialSum, nInputPlane, kH * kW, nOutputPlane); // numModuleY*numModulesX/partialSum, numFilterColors, filterPixels, numFilters } // all the data must be contiguous: luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(gradWeight), 1, "weight must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(gradOutput), 1, "output must be contiguous"); auto avInput = input->get_array_view(); auto avGradOutput = gradOutput->get_array_view(); auto avGradWeight = gradWeight->get_array_view(); // convolutions spatialConv_accGradParameters(avInput, avGradOutput, avGradWeight, nInputPlane, inputHeight, inputWidth, batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW, -floor((double)padding/2), dW, 0, scale, partialSum); return 0; }
static int nn_(SpatialConvolutionLocal_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); long inputWidth = luaT_getfieldcheckint(L, 1, "iW"); long inputHeight = luaT_getfieldcheckint(L, 1, "iH"); long outputWidth = luaT_getfieldcheckint(L, 1, "oW"); long outputHeight = luaT_getfieldcheckint(L, 1, "oH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); if(input->nDimension == 3) { nn_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); } else { long T = input->size[0]; long t; for(t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); nn_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } return 0; }
static int nn_(LogSigmoid_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor_(resizeAs)(output, input); THTensor_(resizeAs)(buffer, input); TH_TENSOR_APPLY3(real, output, real, input, real, buffer, \ real z = exp(-*input_data); \ *buffer_data = z; \ *output_data = -log(1. + z);) return 1;
static int gpunn_Abs_updateGradInput(lua_State *L) { THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor"); THGPUTensor *gradOutput = (THGPUTensor*)luaT_checkudata(L, 3, "torch.GPUTensor"); THGPUTensor *gradInput = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "gradInput", "torch.GPUTensor"); long size = THGPUTensor_nElement(input); input = THGPUTensor_newContiguous(input); gradOutput = THGPUTensor_newContiguous(gradOutput); THGPUTensor_resizeAs(gradInput, input); auto dv_input_data = input->get_bolt_dev_vec(); auto dv_gradOutput_data = gradOutput->get_bolt_dev_vec(); auto dv_gradInput_data = gradInput->get_bolt_dev_vec(); bolt::amp::transform(dv_input_data.begin() + input->storageOffset, dv_input_data.begin() + input->storageOffset + size, dv_gradOutput_data.begin() + gradOutput->storageOffset, dv_gradInput_data.begin() + gradInput->storageOffset, absupdateGradInput_functor()); THGPUTensor_free(gradOutput); THGPUTensor_free(input); return 1; }
static int nn_(SparseLinear_accGradParameters)(lua_State *L) { long i; THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); real scale = luaL_optnumber(L, 4, 1); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id)); real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay"); long dim = gradWeight->size[0]; /* number of weights.. */ for(i = 0; i < input->size[1]; i++) { long offset = (long)(THTensor_(get2d)(input, 0, i))-1; if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ { real val = scale*THTensor_(get2d)(input, 1, i); THBlas_(scal)(gradOutput->size[0], 0, THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], gradWeight->stride[1]); /* zero */ THBlas_(axpy)(gradOutput->size[0], val, THTensor_(data)(gradOutput), gradOutput->stride[0], THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], gradWeight->stride[1]); } else luaL_error(L, "index out of bound"); } THTensor_(cadd)(gradBias, gradBias, 1, gradOutput); if(weightDecay != 0) THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight); THTensor_(resizeAs)(lastInput, input); THTensor_(copy)(lastInput, input); return 0; }
static int nn_(Sqrt_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor_(resizeAs)(gradInput, input); if (output->nDimension == 1 || !THTensor_(isContiguous)(output) || !THTensor_(isContiguous)(gradOutput) || !THTensor_(isContiguous)(gradInput)) { TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \ *gradInput_data = ((*output_data == 0.0) ? 0.0 : \ (0.5 * (*gradOutput_data / *output_data))););
static int gpunn_SpatialConvolutionGPU_updateOutput(lua_State *L) { THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padding = luaT_getfieldcheckint(L, 1, "padding"); THGPUTensor *weight = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "weight", "torch.GPUTensor"); THGPUTensor *output = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "output", "torch.GPUTensor"); luaL_argcheck(L, input->nDimension == 4, 2, "4D (batch mode) tensor is expected"); long nOutputPlane = weight->size[3]; long nInputPlane = weight->size[0]; long kH = weight->size[1]; long kW = weight->size[2]; long inputHeight = input->size[1]; long inputWidth = input->size[2]; long batchSize = input->size[3]; long outputHeight = (padding + inputHeight - kH) / dH + 1; long outputWidth = (padding + inputWidth - kW) / dW + 1; // resize output THGPUTensor_resize4d(output, nOutputPlane, outputHeight, outputWidth, batchSize); // asserts luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square"); luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW"); luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW"); // all the data must be contiguous: luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(weight), 1, "weight must be contiguous"); luaL_argcheck(L, THGPUTensor_isContiguous(output), 1, "output must be contiguous"); auto avInput = input->get_array_view(); auto avOutput = output->get_array_view(); auto avWeight = weight->get_array_view(); // convolutions spatialConv_updateOutput(avInput, avWeight, avOutput, nInputPlane, inputHeight, inputWidth, batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW, -floor((double)padding/2), dW, 0, 1, true); return 1; }
static int nn_(Square_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY2(real, output, real, input, \ *output_data = *input_data * *input_data;);
static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); int dW = luaT_getfieldcheckint(L, 1, "dW"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); // get contiguous gradOutput gradOutput = THTensor_(newContiguous)(gradOutput); // resize and zero THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); // sizes int noframe = gradOutput->size[0]; long framesize = gradOutput->size[1]; // get raw pointers real *gradInput_data = THTensor_(data)(gradInput); real *gradOutput_data = THTensor_(data)(gradOutput); real *indices_data = THTensor_(data)(indices); long t, y; for(t = 0; t < noframe; t++) { real *gip = gradInput_data + t*framesize*dW; real *gop = gradOutput_data + t*framesize; real *xp = indices_data + t*framesize; #pragma omp parallel for private(y) for(y = 0; y < framesize; y++) { // compute local max: long maxindex = (long)xp[y]; gip[maxindex*framesize+y] += gop[y]; } } // cleanup THTensor_(free)(gradOutput); return 1; }
static int nn_(Sqrt_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); real bias = luaT_getfieldchecknumber(L,1,"eps"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor_(resizeAs)(output, input); THTensor_(sqrt)(output, input); return 1; }
static int nn_(ExampleModule_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY2(real, output, real, input, \ *output_data = fabs(*input_data);) return 1;
static int nn_(Sqrt_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); real bias = luaT_getfieldchecknumber(L,1,"eps"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY2(real, output, real, input, \ *output_data = sqrt(*input_data + bias););
static int nn_(LogSoftMax_updateGradInput)(lua_State *L) { THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); real *gradInput_data, *gradOutput_data, *output_data; long nframe = 0, dim = 0; long t, d; if(output->nDimension == 1) { nframe = 1; dim = output->size[0]; } else if(output->nDimension == 2) { nframe = output->size[0]; dim = output->size[1]; } else THError("vector or matrix expected"); THTensor_(resizeAs)(gradInput, output); gradInput_data = THTensor_(data)(gradInput); output_data = THTensor_(data)(output); gradOutput_data = THTensor_(data)(gradOutput); for(t = 0; t < nframe; t++) { accreal sum = 0; for(d = 0; d < dim; d++) sum += gradOutput_data[d]; for(d = 0; d < dim; d++) gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum; gradInput_data += dim; output_data += dim; gradOutput_data += dim; } return 1; }
static int nn_(ReLU_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY2(real, output, real, input, \ *output_data = *input_data > 0 ? *input_data : 0;) return 1;
static int nn_Tanh_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); THTensor_resizeAs(output, input); TH_TENSOR_APPLY2(double, output, double, input, \ *output_p = tanh(*input_p);) return 1;
static int nn_(HardShrink_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY2(real, output, real, input, \ if ((*input_data) > 0.5) *output_data = *input_data - 0.5; \ else if ((*input_data) < 0.5) *output_data = *input_data + 0.5; \ else *output_data = 0;);
static int nn_(Tanh_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor_(resizeAs)(output, input); if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output)) { TH_TENSOR_APPLY2(real, output, real, input, \ *output_data = tanh(*input_data););
static int nn_AbsModuleHessian_backwardHessian(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *hessianOutput = luaT_checkudata(L, 3, torch_Tensor_id); THTensor *hessianInput = luaT_getfieldcheckudata(L, 1, "hessianInput", torch_Tensor_id); THTensor_resizeAs(hessianInput, input); TH_TENSOR_APPLY3(double, hessianInput, double, hessianOutput, double, input, \ double z = *input_p; \ double squaredDerivate = (z >= 0 ? 1 : -1)*(z >= 0 ? 1 : -1); *hessianInput_p = *hessianOutput_p * squaredDerivate;) return 1;
static int nn_(LogSoftMax_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if(input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if(input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else THArgCheck(0, 2, "vector or matrix expected"); input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); for(t = 0; t < nframe; t++) { accreal logsum = 0; real maxInput = -THInf; for(d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for(d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for(d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; input_data += dim; output_data += dim; } THTensor_(free)(input); return 1; }