int nn_(SparseLinear_updateParameters)(lua_State *L) { long i; real learningRate = luaL_checknumber(L, 2); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id)); real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay"); long dim = weight->size[0]; /* number of weights.. */ THTensor_(cadd)(bias, bias, -learningRate, gradBias); for(i = 0; i < lastInput->size[1]; i++) { long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1; if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ { THBlas_(axpy)(bias->size[0], -learningRate, THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], gradWeight->stride[1], THTensor_(data)(weight)+offset*weight->stride[0], weight->stride[1]); } else luaL_error(L, "index out of bound"); } return 0; }
static int nn_(Sqrt_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); real bias = luaT_getfieldchecknumber(L,1,"eps"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor_(resizeAs)(output, input); THTensor_(sqrt)(output, input); return 1; }
static int nn_(Sqrt_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); real bias = luaT_getfieldchecknumber(L,1,"eps"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY2(real, output, real, input, \ *output_data = sqrt(*input_data + bias););
static int nn_(SparseLinear_accGradParameters)(lua_State *L) { long i; THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id)); THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); real scale = luaL_optnumber(L, 4, 1); THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id)); real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay"); long dim = gradWeight->size[0]; /* number of weights.. */ for(i = 0; i < input->size[1]; i++) { long offset = (long)(THTensor_(get2d)(input, 0, i))-1; if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ { real val = scale*THTensor_(get2d)(input, 1, i); THBlas_(scal)(gradOutput->size[0], 0, THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], gradWeight->stride[1]); /* zero */ THBlas_(axpy)(gradOutput->size[0], val, THTensor_(data)(gradOutput), gradOutput->stride[0], THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], gradWeight->stride[1]); } else luaL_error(L, "index out of bound"); } THTensor_(cadd)(gradBias, gradBias, 1, gradOutput); if(weightDecay != 0) THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight); THTensor_(resizeAs)(lastInput, input); THTensor_(copy)(lastInput, input); return 0; }
static int nn_LcEncoder_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int winX = luaT_getfieldcheckint(L, 1, "winX"); int winY = luaT_getfieldcheckint(L, 1, "winY"); int woutX = luaT_getfieldcheckint(L, 1, "woutX"); int woutY = luaT_getfieldcheckint(L, 1, "woutY"); double xStep = luaT_getfieldchecknumber(L, 1, "xStep"); double yStep = luaT_getfieldchecknumber(L, 1, "yStep"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1"); THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX; THTensor *weightSelectedX, *weightSelectedYX; inputPlane = THTensor_new(); inputNarrowedX = THTensor_new(); inputNarrowedYX = THTensor_new(); weightSelectedX = THTensor_new(); weightSelectedYX = THTensor_new(); // get output size from input THTensor_resize3d(output, (input->size[0] - winX+1) / xStep, (input->size[1] - winY+1) / yStep, 1); THTensor_select(inputPlane, input, 2, 0); int y,x,iy,ix,wy,wx; for (y = 0; y<output->size[1]; y++) { iy = (int)floor(y*yStep); wy = y%woutY; for (x = 0; x<output->size[0]; x++) { ix = (int)floor(x*xStep); wx = x%woutX; THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX); THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY); THTensor_select(weightSelectedX, weight, 3, wy); THTensor_select(weightSelectedYX, weightSelectedX, 2, wx); double dot = THTensor_dot(inputNarrowedYX, weightSelectedYX); double biasSelect = THTensor_get2d(bias,wx,wy); THTensor_set3d(output,x,y,0,dot+biasSelect); } } THTensor_free(inputPlane); THTensor_free(inputNarrowedX); THTensor_free(inputNarrowedYX); THTensor_free(weightSelectedX); THTensor_free(weightSelectedYX); return 1; }
static int nn_LcEncoder_backward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id); int winX = luaT_getfieldcheckint(L, 1, "winX"); int winY = luaT_getfieldcheckint(L, 1, "winY"); int woutX = luaT_getfieldcheckint(L, 1, "woutX"); int woutY = luaT_getfieldcheckint(L, 1, "woutY"); double xStep = luaT_getfieldchecknumber(L, 1, "xStep"); double yStep = luaT_getfieldchecknumber(L, 1, "yStep"); luaL_argcheck(L, input->nDimension == 3, 2, "input 3D tensor expected"); luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1"); luaL_argcheck(L, gradOutput->nDimension == 3, 3, "gradOutput 3D tensor expected"); luaL_argcheck(L, gradOutput->size[2] == 1, 3, "invalid gradOutput 3rd dim size has to be 1"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id); /* ----------------------- gradWeight ----------------------- */ THTensor_fill(gradWeight, 0); THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX; inputPlane = THTensor_new(); inputNarrowedX = THTensor_new(); inputNarrowedYX = THTensor_new(); THTensor_select(inputPlane, input, 2, 0); THTensor *gradWeightSelectedX, *gradWeightSelectedYX; gradWeightSelectedX = THTensor_new(); gradWeightSelectedYX = THTensor_new(); /* ----------------------- gradInput ------------------------ */ THTensor_resizeAs(gradInput, input); THTensor_fill(gradInput, 0); THTensor *gradInputPlane, *gradInputNarrowedX, *gradInputNarrowedYX; gradInputPlane = THTensor_new(); gradInputNarrowedX = THTensor_new(); gradInputNarrowedYX = THTensor_new(); THTensor_select(gradInputPlane, gradInput, 2, 0); THTensor *weightSelectedX, *weightSelectedYX; weightSelectedX = THTensor_new(); weightSelectedYX = THTensor_new(); int y,x,iy,ix,wy,wx; for (y = 0; y<gradOutput->size[1]; y++) { iy = (int)floor(y*yStep); wy = y%woutY; for (x = 0; x<gradOutput->size[0]; x++) { ix = (int)floor(x*xStep); wx = x%woutX; double gradOutVal = THTensor_get3d(gradOutput,x,y,0); /* ----------------------- gradWeight ----------------------- */ THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX); THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY); THTensor_select(gradWeightSelectedX, gradWeight, 3, wy); THTensor_select(gradWeightSelectedYX, gradWeightSelectedX, 2, wx); THTensor_addTensor(gradWeightSelectedYX, gradOutVal, inputNarrowedYX); /* ----------------------- gradBias ----------------------- */ THTensor_set2d(gradBias,wx,wy, THTensor_get2d(gradBias,wx,wy) + gradOutVal); /* ----------------------- gradInput ------------------------ */ THTensor_narrow(gradInputNarrowedX, gradInputPlane, 0, ix, winX); THTensor_narrow(gradInputNarrowedYX, gradInputNarrowedX, 1, iy, winY); THTensor_select(weightSelectedX, weight, 3, wy); THTensor_select(weightSelectedYX, weightSelectedX, 2, wx); THTensor_addTensor(gradInputNarrowedYX, gradOutVal, weightSelectedYX); } } /* free gradWeight */ THTensor_free(inputPlane); THTensor_free(inputNarrowedX); THTensor_free(inputNarrowedYX); THTensor_free(gradWeightSelectedX); THTensor_free(gradWeightSelectedYX); /* free gradInput */ THTensor_free(gradInputPlane); THTensor_free(gradInputNarrowedX); THTensor_free(gradInputNarrowedYX); THTensor_free(weightSelectedX); THTensor_free(weightSelectedYX); return 1; }
static int nxn_(CrossMapNormalization_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor *zsave = luaT_getfieldcheckudata(L, 1, "z", torch_Tensor); real alpha = luaT_getfieldchecknumber(L, 1, "alpha"); real beta = luaT_getfieldchecknumber(L, 1, "beta"); real k = luaT_getfieldchecknumber(L, 1, "k"); long n = luaT_getfieldcheckint(L, 1, "n"); real alphan = alpha / n; long bs=input->size[0]; long isize1=input->size[1]; long isize2=input->size[2]; long npix=bs*isize1*isize2; long planes=input->size[3]; long istr2=input->stride[2]; long gostr2=gradOutput->stride[2]; THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); real * inptr = THTensor_(data)(input); real * zptr = THTensor_(data)(zsave); real * goptr = THTensor_(data)(gradOutput); real * giptr = THTensor_(data)(gradInput); long idx; #pragma omp parallel for private(idx) for(idx=0; idx<npix; idx++) { long ch; long j; real * curinptr=inptr+idx*istr2; real * curzptr=zptr+idx*planes; real * curgoptr=goptr+idx*gostr2; real * curgiptr=giptr+idx*planes; /*for(ch=0; ch<planes; ch++) { real gradi = 0; real ai = curinptr[ch]; long endo = ch + n/2 + 1; long starto = endo - n; starto = (starto < 0) ? 0 : starto; endo = (endo > planes) ? planes : endo; for (j=starto; j<endo; j++) { real aj = curinptr[j]; real gj = curgoptr[j]; gradi += (ch == j) ? gj * pow(curzptr[j], -beta) : 0; gradi -= gj * 2 * alphan * beta * ai * aj * pow(curzptr[j], -beta-1); } curgiptr[ch]=gradi; }*/ for(ch=0; ch<planes; ch++) { real z = curzptr[ch]; real zb = pow(curzptr[ch], -beta); real zb2 = zb/z; real gj = curgoptr[ch]; real aj = curinptr[ch]; curgiptr[ch] =gj*zb; curzptr[ch] = gj * 2 * alphan * beta * aj * zb2; } for(ch=0; ch<planes; ch++) { real ai = curinptr[ch]; long endo = ch + n/2 + 1; long starto = endo - n; starto = (starto < 0) ? 0 : starto; endo = (endo > planes) ? planes : endo; for (j=starto; j<endo; j++) { curgiptr[ch] -= ai * curzptr[j]; } } } return 1; }
static int nxn_(CrossMapNormalization_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int dimension = 4-1; THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor *zsave = luaT_getfieldcheckudata(L, 1, "z", torch_Tensor); real alpha = luaT_getfieldchecknumber(L, 1, "alpha"); real beta = luaT_getfieldchecknumber(L, 1, "beta"); real k = luaT_getfieldchecknumber(L, 1, "k"); long n = luaT_getfieldcheckint(L, 1, "n"); real alphan = alpha / n; long bs=input->size[0]; long isize1=input->size[1]; long isize2=input->size[2]; long npix=bs*isize1*isize2; long planes=input->size[3]; long istr2=input->stride[2]; assert(istr2==planes); luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); THTensor_(resizeAs)(output, input); THTensor_(resizeAs)(zsave, input); /* TH_TENSOR_DIM_APPLY2(real, output, real, input, dimension, for(i = 0; i < input_size; i++) { real z = 0; long startf = i - n/2; long endf = startf + n; startf = (startf < 0) ? 0 : startf; endf = (endf > input_size) ? input_size : endf; for(j=startf; j<endf; j++) { real x = input_data[j*input_stride]; z += x * x; } z=k+z*alphan; output_data[i*output_stride] = input_data[i*input_stride] * pow(z, -beta); } );*/ real * inptr = THTensor_(data)(input); real * zptr = THTensor_(data)(zsave); real * optr = THTensor_(data)(output); long idx; #pragma omp parallel for private(idx) for(idx=0; idx<npix; idx++) { long ch; long j; real * curinptr=inptr+idx*istr2; real * curzptr=zptr+idx*planes; real * curoptr=optr+idx*planes; for(ch=0; ch<planes; ch++) { real z=0; real val; long startf = ch - n/2; long endf = startf + n; startf = (startf < 0) ? 0 : startf; endf = (endf > planes) ? planes : endf; for(j=startf; j<endf; j++) { real x = curinptr[j]; z += x * x; if (j==ch) val=x; } z=k+z*alphan; curzptr[ch]=z; curoptr[ch]=val*pow(z, -beta); } } return 1; }
static int nxn_(Dropmap_updateGradInput)(lua_State *L) { THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor *mask = luaT_getfieldcheckudata(L, 1, "mask", torch_Tensor); float sameoverbatch = luaT_getfieldchecknumber(L, 1, "sameoverbatch"); THTensor_(resizeAs)(gradInput, gradOutput); int bs = gradOutput->size[0]; int ymax = gradOutput->size[1]; int xmax = gradOutput->size[2]; int channels = gradOutput->size[3]; real* gidata = THTensor_(data)(gradInput); real* godata = THTensor_(data)(gradOutput); real* maskdata = THTensor_(data)(mask); int gistr0 = gradInput->stride[0]; int gistr1 = gradInput->stride[1]; int gistr2 = gradInput->stride[2]; int gistr3 = gradInput->stride[3]; int gostr0 = gradOutput->stride[0]; int gostr1 = gradOutput->stride[1]; int gostr2 = gradOutput->stride[2]; int gostr3 = gradOutput->stride[3]; int batchidx, y, x, ch; if(sameoverbatch==1) { #pragma omp parallel for private(batchidx) for(batchidx=0; batchidx<bs; batchidx++) { for (y = 0; y<ymax; y++) { for(x = 0; x<xmax; x++) { for (ch = 0; ch < channels; ch++) { if(maskdata[ch]==0) { gidata[batchidx*gistr0 + y*gistr1 + x*gistr2 + ch*gistr3] = 0; } else { gidata[batchidx*gistr0 + y*gistr1 + x*gistr2 + ch*gistr3] = godata[batchidx*gostr0 + y*gostr1 + x*gostr2 + ch*gostr3]; } } } } } } else { #pragma omp parallel for private(batchidx) for(batchidx=0; batchidx<bs; batchidx++) { for (y = 0; y<ymax; y++) { for(x = 0; x<xmax; x++) { for (ch = 0; ch < channels; ch++) { if(maskdata[batchidx*channels+ch]==0) { gidata[batchidx*gistr0 + y*gistr1 + x*gistr2 + ch*gistr3] = 0; } else { gidata[batchidx*gistr0 + y*gistr1 + x*gistr2 + ch*gistr3] = godata[batchidx*gostr0 + y*gostr1 + x*gostr2 + ch*gostr3]; } } } } } } return 1; }