static int gpunn_MSECriterion_updateOutput(lua_State *L) { THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor"); THGPUTensor *target = (THGPUTensor*)luaT_checkudata(L, 3, "torch.GPUTensor"); int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); luaL_argcheck(L, THGPUTensor_nElement(input) == THGPUTensor_nElement(target), 2, "input and target need to have the same number of elements"); long size = THGPUTensor_nElement(input); input = THGPUTensor_newContiguous(input); target = THGPUTensor_newContiguous(target); float sum = boltInnerProduct_plus_mse( input, target); if(sizeAverage) sum /= size; THGPUTensor_free(input); THGPUTensor_free(target); lua_pushnumber(L, sum); lua_setfield(L, 1, "output"); lua_pushnumber(L, sum); return 1; }
// TODO check bound of target_data by ndim = input->size[0];? static int nn_(ClassNLLCriterion_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); input = THTensor_(newContiguous)(input); real *input_data = THTensor_(data)(input); THTensor *target = luaT_checkudata(L, 3, torch_Tensor); target = THTensor_(newContiguous)(target); real *target_data = THTensor_(data)(target); accreal sum = .0; if(input->nDimension == 1) { sum = -input_data[(int)target_data[0]-1]; } else if(input->nDimension == 2) { int nframe = input->size[0]; int ndim = input->size[1]; int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); int f; for (f = 0; f < nframe; ++f) { sum -= input_data[f*ndim+(int)target_data[f]-1]; } if (sizeAverage) sum /= nframe; } else THArgCheck(0, 2, "vector or matrix expected"); lua_pushnumber(L, sum); lua_setfield(L, 1, "output"); THTensor_(free)(target); THTensor_(free)(input); return 1; }
static int nn_AbsCriterion_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *target = luaT_checkudata(L, 3, torch_Tensor_id); int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); double sum; sum = 0; TH_TENSOR_APPLY2(double, input, double, target, sum += fabs(*input_p - *target_p);) if(sizeAverage)
static int nn_(SmoothL1Criterion_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *target = luaT_checkudata(L, 3, torch_Tensor); int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); real sum; sum = 0; TH_TENSOR_APPLY2(real, input, real, target, real z = fabs(*input_data - *target_data); sum += z < 1 ? 0.5*z*z : z - 0.5;) if(sizeAverage)
static int gpunn_MSECriterion_updateGradInput(lua_State *L) { THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor"); THGPUTensor *target = (THGPUTensor*)luaT_checkudata(L, 3, "torch.GPUTensor"); int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); THGPUTensor *gradInput = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "gradInput", "torch.GPUTensor"); luaL_argcheck(L, THGPUTensor_nElement(input) == THGPUTensor_nElement(target), 2, "input and target need to have the same number of elements"); long size = THGPUTensor_nElement(input); float norm = (sizeAverage ? 2./size : 2.); input = THGPUTensor_newContiguous(input); target = THGPUTensor_newContiguous(target); THGPUTensor_resizeAs(gradInput, input); boltTransform_mse(input, target, gradInput, norm); THGPUTensor_free(input); THGPUTensor_free(target); return 1; }
static int nn_(ClassNLLCriterion_updateGradInput)(lua_State *L) { THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); gradInput = THTensor_(newContiguous)(gradInput); real *grad_data = THTensor_(data)(gradInput); THTensor *input = luaT_checkudata(L, 2, torch_Tensor); input = THTensor_(newContiguous)(input); // real *input_data = THTensor_(data)(input); THTensor *target = luaT_checkudata(L, 3, torch_Tensor); target = THTensor_(newContiguous)(target); real *target_data = THTensor_(data)(target); accreal grad = -1.0; if(input->nDimension == 1) { grad_data[(int)target_data[0]-1] = grad; } else if(input->nDimension == 2) { int nframe = input->size[0]; int ndim = input->size[1]; int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); if (sizeAverage) grad /= nframe; int f; for (f = 0; f < nframe; ++f) { grad_data[f*ndim+(int)target_data[f]-1] = grad; } } else THArgCheck(0, 2, "vector or matrix expected"); THTensor_(free)(target); THTensor_(free)(input); THTensor_(free)(gradInput); return 1; }
static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padT = luaT_getfieldcheckint(L, 1, "padT"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int ceil_mode = luaT_getfieldcheckboolean(L,1,"ceil_mode"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); luaL_argcheck(L, kT/2 >= padT && kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceil_mode) { otime = (int)(ceil((float)(itime - kT + 2 * padT) / dT) + 1); oheight = (int)(ceil((float)(iheight - kH + 2 * padH) / dH) + 1); owidth = (int)(ceil((float)(iwidth - kW + 2 * padW) / dW) + 1); } else { otime = (int)(floor((float)(itime - kT + 2 * padT) / dT) + 1); oheight = (int)(floor((float)(iheight - kH + 2 * padH) / dH) + 1); owidth = (int)(floor((float)(iwidth - kW + 2 * padW) / dW) + 1); } if (padT || padW || padH) { // ensure that the last pooling starts inside the image if ((otime - 1)*dT >= itime + padT) --otime; if ((oheight - 1)*dH >= iheight + padH) --oheight; if ((owidth - 1)*dW >= iwidth + padW) --owidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); /* indices will contain ti,i,j uchar locations packed into float/double */ THTensor_(resize4d)(indices, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data, indices_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH, padT, padW, padH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricMaxPooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, indices_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH, padT, padW, padH); } } /* cleanup */ THTensor_(free)(input); return 1; }
static int nn_(MultiMarginCriterion_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); real *input_data; real *gradInput_data; real *target_data; THTensor *target; long nframe, dim; long t, d; real target_; real g; THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); if(input->nDimension == 1) { nframe = 1; dim = input->size[0]; target_ = luaL_checknumber(L, 3); target = THTensor_(newWithSize1d)(1); THTensor_(fill)(target, target_); } else { nframe = input->size[0]; dim = input->size[1]; target = luaT_checkudata(L, 3, torch_Tensor); THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size"); target = THTensor_(newContiguous)(target); } g = (sizeAverage ? 1./((real)dim) : 1.); input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); THTensor_(resizeAs)(gradInput, input); gradInput_data = THTensor_(data)(gradInput); target_data = THTensor_(data)(target); for(t = 0; t < nframe; t++) { long target_idx = (long)(target_data[t])-1; real input_target = input_data[target_idx]; real gradInput_target = 0; for(d = 0; d < dim; d++) { real z = 1 - input_target + input_data[d]; if(d == target_idx) continue; if(z > 0) { gradInput_target -= g; gradInput_data[d] = g; } else gradInput_data[d] = 0; } gradInput_data[target_idx] = gradInput_target; input_data += dim; gradInput_data += dim; } THTensor_(free)(input); THTensor_(free)(target); return 1; }
static int nn_(MultiMarginCriterion_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); real *input_data, *target_data; long nframe, dim; long t, d; real target_; THTensor *target; real sum; THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); if(input->nDimension == 1) { nframe = 1; dim = input->size[0]; target_ = luaL_checknumber(L, 3); target = THTensor_(newWithSize1d)(1); THTensor_(fill)(target, target_); } else { nframe = input->size[0]; dim = input->size[1]; target = luaT_checkudata(L, 3, torch_Tensor); THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size"); target = THTensor_(newContiguous)(target); } for(t = 0; t < nframe; t++) { real idx = THTensor_(get1d)(target, t); THArgCheck((idx >= 1) && (idx <= dim), 3, "target out of range"); } input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); target_data = THTensor_(data)(target); sum = 0; for(t = 0; t < nframe; t++) { long target_idx = (long)(target_data[t]-1); real input_target = input_data[target_idx]; for(d = 0; d < dim; d++) { real z = 1 - input_target + input_data[d]; if(d == target_idx) continue; if(z > 0) sum += z; } input_data += dim; } if(sizeAverage) sum /= dim; lua_pushnumber(L, sum); lua_setfield(L, 1, "output"); THTensor_(free)(input); THTensor_(free)(target); lua_pushnumber(L, sum); return 1; }