static inline void THNN_(TemporalUpSamplingNearest_shapeCheck) (THTensor *input, THTensor *gradOutput, int scale_factor) { THArgCheck(input != NULL, 2, "3D input tensor expected but got NULL"); THArgCheck(scale_factor > 1, 4, "scale_factor must be greater than 1, but got: %d", scale_factor); THNN_ARGCHECK(!input->is_empty() && (input->dim() == 2 || input->dim() == 3), 2, input, "non-empty 2D or 3D input tensor expected but got: %s"); if (input->dim() == 2) { int nChannels = THTensor_(size)(input, 0); int inputWidth = THTensor_(size)(input, 1); int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, outputWidth); } } else { int nBatch = THTensor_(size)(input, 0); int nChannels = THTensor_(size)(input, 1); int inputWidth = THTensor_(size)(input, 2); int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 3, 2, outputWidth); } } }
static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( THTensor *input, THTensor *gradOutput, THIndexTensor *indices, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationH > 0 && dilationW > 0, 12, "dilation should be greater than zero, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input->dim(); int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, "non-empty 3D or 4D input tensor expected but got: %s"); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); int64_t nInputPlane = input->size(dimh-1); int64_t inputHeight = input->size(dimh); int64_t inputWidth = input->size(dimw); int64_t nOutputPlane = nInputPlane; int64_t outputHeight = pooling_output_shape<int64_t>(inputHeight, kH, padH, dH, dilationH, ceil_mode); int64_t outputWidth = pooling_output_shape<int64_t>(inputWidth, kW, padW, dW, dilationW, ceil_mode); if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } if (indices != NULL) { THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth); } }
static inline void THNN_(SpatialDilatedConvolution_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW) { THNN_ARGCHECK(weight->nDimension == 4, 4, weight, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " "but got: %s"); THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationW > 0 && dilationH > 0, 15, "dilation should be greater than zero, but got dilationH: %d, dilationW: %d", dilationH, dilationW); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); } int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); long nInputPlane = weight->size[1]; long inputHeight = input->size[dimh]; long inputWidth = input->size[dimw]; long nOutputPlane = weight->size[0]; long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%ld x %ld x %ld). " "Calculated output size: (%ld x %ld x %ld). Output size is too small", nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(SpatialFullConvolution_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kH, int kW, int dH, int dW, int padH, int padW, int adjH, int adjW) { THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(adjW < dW && adjH < dH, 15, "output adjustment must be smaller than stride, but got adjH: %d adjW: %d dH: %d dW: %d", adjH, adjW, dH, dW); THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight, "2D or 4D weight tensor expected, but got: %s"); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]); } int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); long nInputPlane = weight->size[0]; long inputHeight = input->size[dimh]; long inputWidth = input->size[dimw]; long nOutputPlane = weight->size[1]; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%d x %d x %d). " "Calculated output size: (%d x %d x %d). Output size is too small", nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(TemporalRowConvolution_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kW, int dW, int padW) { THArgCheck(kW > 0, 5, "kernel size should be greater than zero, but got kW: %d", kW); THArgCheck(dW > 0, 6, "stride should be greater than zero, but got dW: %d", dW); THNN_ARGCHECK(weight->nDimension == 3, 3, weight, "3D weight tensor expected, but got: %s"); THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); } // we're always looking at (possibly batch) x feats x seq int ndim = input->nDimension; int dimF = 0; int dimS = 1; if (ndim == 3) { ++dimS; ++dimF; } THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input, "2D or 3D (batch mode) input tensor expected, but got :%s"); int64_t inputFrameSize = weight->size[0]; int64_t nInputFrame = input->size[dimS]; int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; if (nOutputFrame < 1) { THError("Given input size: (%d x %d). " "Calculated output size: (%d x %d). Output size is too small", inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame); } THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame); } }
static inline void THNN_(SpatialUpSamplingNearest_shapeCheck) (THTensor *input, THTensor *gradOutput, int scale_factor) { THArgCheck(input != NULL, 2, "4D input tensor expected but got NULL"); THArgCheck(scale_factor > 1, 4, "scale_factor must be greater than 1, but got: %d", scale_factor); THNN_ARGCHECK(input->_dim() == 3 || input->_dim() == 4, 2, input, "3D or 4D input tensor expected but got: %s"); if (input->_dim() == 3) { int nChannels = THTensor_(size)(input, 0); int inputHeight = THTensor_(size)(input, 1); int inputWidth = THTensor_(size)(input, 2); int outputHeight = inputHeight * scale_factor; int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 3, 2, outputWidth); } } else { int nBatch = THTensor_(size)(input, 0); int nChannels = THTensor_(size)(input, 1); int inputHeight = THTensor_(size)(input, 2); int inputWidth = THTensor_(size)(input, 3); int outputHeight = inputHeight * scale_factor; int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); } } }
static inline void THNN_(VolumetricFullConvolution_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int dT, int dW, int dH, int pT, int pW, int pH, int aT, int aW, int aH) { THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); // number of input & output planes and kernel size is indirectly defined by the weight tensor THNN_ARGCHECK(weight->nDimension == 5, 4, weight, "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " "expected for weight, but got: %s"); THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); THArgCheck(aT < dT && aW < dW && aH < dH, 15, "output adjustment must be smaller than stride, but got " "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d", aT, aH, aW, dT, dH, dW); int ndim = input->nDimension; const int nInputPlane = (int)weight->size[0]; const int nOutputPlane = (int)weight->size[1]; const int kT = (int)weight->size[2]; const int kH = (int)weight->size[3]; const int kW = (int)weight->size[4]; if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]); } int dimf = 0; int dimd = 1; int dimh = 2; int dimw = 3; if (ndim == 5) { dimf++; dimd++; dimh++; dimw++; } const long inputWidth = input->size[dimw]; const long inputHeight = input->size[dimh]; const long inputDepth = input->size[dimd]; const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW; const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH; const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT; if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth); THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck) (THTensor *input, THTensor *gradOutput, int nBatch, int nChannels, int inputHeight, int inputWidth, int outputHeight, int outputWidth) { THArgCheck(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0, 2, "input and output sizes should be greater than 0," " but got input (H: %d, W: %d) output (H: %d, W: %d)", inputHeight, inputWidth, outputHeight, outputWidth); if (input != NULL) { THNN_ARGCHECK(!input->is_empty() && input->dim() == 4, 2, input, "non-empty 4D input tensor expected but got: %s"); } if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); } }
static inline void THNN_(VolumetricUpSamplingTrilinear_shapeCheck) (THTensor *input, THTensor *gradOutput, int nBatch, int nChannels, int inputDepth, int inputHeight, int inputWidth, int outputDepth, int outputHeight, int outputWidth) { THArgCheck(inputDepth > 0 && inputHeight > 0 && inputWidth > 0 && outputDepth > 0 && outputHeight > 0 && outputWidth > 0, 2, "input and output sizes should be greater than 0," " but got input (D: %d, H: %d, W: %d) output (D: %d, H: %d, W: %d)", inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth); if (input != NULL) { THNN_ARGCHECK(input->nDimension == 5, 2, input, "5D input tensor expected but got: %s"); } if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth); } }
void THNN_(MSECriterion_updateOutput)( THNNState *state, THTensor *input, THTensor *target, THTensor *output, bool sizeAverage) { THNN_CHECK_NELEMENT(input, target); THNN_CHECK_DIM_SIZE(output, 1, 0, 1); real sum = 0; TH_TENSOR_APPLY2(real, input, real, target, real z = (*input_data - *target_data); sum += z*z; );
static inline void THNN_(VolumetricUpSamplingNearest_shapeCheck) (THTensor *input, THTensor *gradOutput, int scale_factor) { THArgCheck(input != NULL, 2, "5D input tensor expected but got NULL"); THArgCheck(scale_factor > 1, 4, "scale_factor must be greater than 1, but got: %d", scale_factor); THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D input tensor expected but got: %s"); if (input->nDimension == 4) { int nChannels = THTensor_(size)(input, 0); int inputDepth = THTensor_(size)(input, 1); int inputHeight = THTensor_(size)(input, 2); int inputWidth = THTensor_(size)(input, 3); int outputDepth = inputDepth * scale_factor; int outputHeight = inputHeight * scale_factor; int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); } } else { int nBatch = THTensor_(size)(input, 0); int nChannels = THTensor_(size)(input, 1); int inputDepth = THTensor_(size)(input, 2); int inputHeight = THTensor_(size)(input, 3); int inputWidth = THTensor_(size)(input, 4); int outputDepth = inputDepth * scale_factor; int outputHeight = inputHeight * scale_factor; int outputWidth = inputWidth * scale_factor; if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch); THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels); THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth); } } }
void THNN_(SoftMarginCriterion_updateOutput)( THNNState *state, THTensor *input, THTensor *target, THTensor *output, bool sizeAverage) { THNN_CHECK_NELEMENT(input, target); THNN_CHECK_DIM_SIZE(output, 1, 0, 1); real sum; sum = 0; TH_TENSOR_APPLY2(real, input, real, target, real z = log(1. + exp(-*input_data* *target_data)); sum += z;) if(sizeAverage)
static inline void THNN_(SpatialConvolutionLocal_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kH, int kW, int dH, int dW, int padH, int padW, long inputHeight, long inputWidth, long outputHeight, long outputWidth) { THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); long nInputPlane = weight->size[2] / (kH * kW); long nOutputPlane = weight->size[1]; if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 3, 0, nOutputPlane); THNN_CHECK_DIM_SIZE(bias, 3, 1, outputHeight); THNN_CHECK_DIM_SIZE(bias, 3, 2, outputWidth); } THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( THTensor *input, THTensor *gradOutput, THIndexTensor *indices, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationH > 0 && dilationW > 0, 12, "dilation should be greater than zero, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image (H: %d, W: %d) smaller than kernel " "size - padding( kH: %d padH: %d kW: %d padW: %d", input->size[dimh], input->size[dimw], kH, padH, kW, padW); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); long nInputPlane = input->size[dimh-1]; long inputHeight = input->size[dimh]; long inputWidth = input->size[dimw]; long outputHeight, outputWidth; long nOutputPlane = nInputPlane; if (ceil_mode) { outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } else { outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } if (indices != NULL) { THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth); } }
static void inline THNN_(VolumetricConvolutionMM_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int weight_nullable) { THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); if (weight != NULL) { THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 5, 5, weight, "2D or 5D weight tensor expected, but got: %s"); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); } } else if (!weight_nullable) { THError("weight tensor is expected to be non-nullable"); } int ndim = input->nDimension; int dimf = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (ndim == 5) { dimf++; dimt++; dimh++; dimw++; } int64_t inputDepth; int64_t inputHeight; int64_t inputWidth; int64_t exactInputDepth; int64_t exactInputHeight; int64_t exactInputWidth; int64_t outputDepth; int64_t outputHeight; int64_t outputWidth; inputDepth = input->size[dimt]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; exactInputDepth = inputDepth + 2*pT; exactInputHeight = inputHeight + 2*pH; exactInputWidth = inputWidth + 2*pW; if (exactInputDepth < kT || exactInputHeight < kH || exactInputWidth < kW) { THError("Calculated padded input size per channel: (%ld x %ld x %ld). " "Kernel size: (%ld x %ld x %ld). Kernel size can't greater than actual input size", exactInputDepth, exactInputHeight, exactInputWidth, kT, kH, kW); } outputDepth = (exactInputDepth - kT) / dT + 1; outputHeight = (exactInputHeight - kH) / dH + 1; outputWidth = (exactInputWidth - kW) / dW + 1; if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) { THError("Given input size per channel: (%ld x %ld x %ld). " "Calculated output size per channel: (%ld x %ld x %ld). Output size is too small", inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth); } if (weight != NULL) { int64_t nInputPlane = weight->size[1]; if (weight->nDimension == 2) { nInputPlane /= (kT * kH * kW); } THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); } if (gradOutput != NULL) { if (weight != NULL) { int64_t nOutputPlane = weight->size[0]; THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } else if (bias != NULL) { int64_t nOutputPlane = bias->size[0]; THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(VolumetricAveragePooling_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, int kT, int kW, int kH, int dT, int dW, int dH, int padT, int padW, int padH, bool ceil_mode) { long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; int ndim = input->nDimension; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } THArgCheck(kT > 0 && kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); THArgCheck(dT > 0 && dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image (T: %d H: %d W: %d) smaller than " "kernel size (kT: %d kH: %d kW: %d)", input->size[dimt], input->size[dimh], input->size[dimw], kT, kH, kW); // The second argument is argNumber... here is the index of padH. THArgCheck(kT/2 >= padT && kW/2 >= padW && kH/2 >= padH, 11, "pad should not be greater than half of kernel size, but got " "padT = %d, padW = %d, padH = %d, kT = %d, kW = %d, kH = %d", padT, padW, padH, kT, kW, kH); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceil_mode) { otime = (long)(ceil((float)(itime - kT + 2*padT) / dT)) + 1; oheight = (long)(ceil((float)(iheight - kH + 2*padH) / dH)) + 1; owidth = (long)(ceil((float)(iwidth - kW + 2*padW) / dW)) + 1; } else { otime = (long)(floor((float)(itime - kT + 2*padT) / dT)) + 1; oheight = (long)(floor((float)(iheight - kH + 2*padH) / dH)) + 1; owidth = (long)(floor((float)(iwidth - kW + 2*padW) / dW)) + 1; } if (padT || padW || padH) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode if ((otime - 1)*dT >= itime + padT) --otime; if ((oheight - 1)*dH >= iheight + padH) --oheight; if ((owidth - 1)*dW >= iwidth + padW) --owidth; } if (otime < 1 || owidth < 1 || oheight < 1) THError("Given input size: (%dx%dx%dx%d). " "Calculated output size: (%dx%dx%dx%d). Output size is too small", nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth); } }
static inline void THNN_(VolumetricDilatedConvolution_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kT, int kH, int kW, int dT, int dH, int dW, int padT, int padH, int padW, int dilationT, int dilationH, int dilationW, int weight_nullable) { THNN_ARGCHECK(input->_dim() == 4 || input->_dim() == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 15, "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d", dilationT, dilationH, dilationW); if (weight != NULL) { THNN_ARGCHECK(weight->_dim() == 5, 4, weight, "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " "expected for weight, but got: %s"); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); } } else if (!weight_nullable) { THError("weight tensor is expected to be non-nullable"); } // Params int ndim = input->_dim(); int dimf = 0; int dimd = 1; int dimh = 2; int dimw = 3; if (ndim == 5) { dimf++; dimd++; dimh++; dimw++; } int64_t inputDepth = input->size[dimd]; int64_t inputHeight = input->size[dimh]; int64_t inputWidth = input->size[dimw]; int64_t outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1; int64_t outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; int64_t outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) { THError("Given input size per channel: (%ld x %ld x %ld). " "Calculated output size per channel: (%ld x %ld x %ld). Output size is too small", inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth); } if (weight != NULL) { int64_t nInputPlane = weight->size[1]; THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); } if (gradOutput != NULL) { if (weight != NULL) { int64_t nOutputPlane = weight->size[0]; THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } else if (bias != NULL) { int64_t nOutputPlane = bias->size[0]; THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static void inline THNN_(VolumetricConvolutionMM_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH) { THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); int ndim = input->nDimension; int dimf = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (ndim == 5) { dimf++; dimt++; dimh++; dimw++; } int64_t nInputPlane; int64_t inputDepth; int64_t inputHeight; int64_t inputWidth; int64_t nOutputPlane; int64_t outputDepth; int64_t outputHeight; int64_t outputWidth; nInputPlane = input->size[dimf]; inputDepth = input->size[dimt]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; nOutputPlane = weight->size[0]; outputDepth = (inputDepth + 2*pT - kT) / dT + 1; outputHeight = (inputHeight + 2*pH - kH) / dH + 1; outputWidth = (inputWidth + 2*pW - kW) / dW + 1; if (outputWidth < 1 || outputHeight < 1 || outputDepth < 1) { THError( "Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", nInputPlane, inputDepth, inputHeight, inputWidth, nOutputPlane, outputDepth, outputHeight, outputWidth ); } THArgCheck(weight->nDimension == 2 || weight->nDimension == 5, 4, "weight tensor should be 2D or 5D - got %d", weight->nDimension); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); } THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(VolumetricDilatedMaxPooling_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, THIndexTensor *indices, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int dilationT, int dilationW, int dilationH, bool ceilMode) { int ndim = input->nDimension; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; int64_t nslices; int64_t itime; int64_t iheight; int64_t iwidth; int64_t otime; int64_t oheight; int64_t owidth; THArgCheck(kT > 0 && kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); THArgCheck(dT > 0 && dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 14, "dilation should be greater than 0, but got dilationT: %d dilationH: %d dilationW: %d", dilationT, dilationH, dilationW); THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2, "pad should be smaller than half of kernel size, but got " "kT: %d kW: %d, kH: %d, padT: %d, padW: %d, padH: %d", kT, kW, kH, pT, pW, pH); nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceilMode) { otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; } else { otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; } if (pT || pW || pH) { // ensure that the last pooling starts inside the image if ((otime - 1)*dT >= itime + pT) --otime; if ((oheight - 1)*dH >= iheight + pH) --oheight; if ((owidth - 1)*dW >= iwidth + pW) --owidth; } if (otime < 1 || owidth < 1 || oheight < 1) THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth); } if (indices != NULL) { THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimN, nslices); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimt, otime); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, oheight); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, owidth); } }
static inline void THNN_(SpatialFullDilatedConvolution_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int adjH, int adjW, int weight_nullable) { THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationW > 0 && dilationH > 0, 15, "dilation should be greater than zero, but got dilationH: %d, dilationW: %d", dilationH, dilationW); THArgCheck((adjW < dW || adjW < dilationW) && (adjH < dH || adjH < dilationH), 15, "output padding must be smaller than either stride or dilation, but got adjH: %d adjW: %d dH: %d dW: %d dilationH: %d dilationW: %d", adjH, adjW, dH, dW, dilationH, dilationW); if (weight != NULL) { THNN_ARGCHECK(!weight->is_empty() && (weight->dim() == 2 || weight->dim() == 4), 5, weight, "non-empty 2D or 4D weight tensor expected, but got: %s"); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size(1)); } } else if (!weight_nullable) { THError("weight tensor is expected to be non-nullable"); } int ndim = input->dim(); int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, "non-empty 3D or 4D input tensor expected but got: %s"); int64_t inputHeight = input->size(dimh); int64_t inputWidth = input->size(dimw); int64_t outputHeight = (inputHeight - 1) * dH - 2*padH + (dilationH * (kH - 1) + 1) + adjH; int64_t outputWidth = (inputWidth - 1) * dW - 2*padW + (dilationW * (kW - 1) + 1) + adjW; if (outputWidth < 1 || outputHeight < 1) { THError("Given input size per channel: (%ld x %ld). " "Calculated output size per channel: (%ld x %ld). Output size is too small", inputHeight, inputWidth, outputHeight, outputWidth); } if (weight != NULL) { int64_t nInputPlane = weight->size(0); THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); } if (gradOutput != NULL) { if (weight != NULL) { int64_t nOutputPlane = weight->size(1); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } else if (bias != NULL) { int64_t nOutputPlane = THTensor_sizeLegacyNoScalars(bias, 0); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static inline void THNN_(SpatialAveragePooling_shapeCheck)( THTensor *input, THTensor *gradOutput, int kH, int kW, int dH, int dW, int padH, int padW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); int64_t nInputPlane = input->size[dimh-1]; int64_t inputHeight = input->size[dimh]; int64_t inputWidth = input->size[dimw]; int64_t outputHeight, outputWidth; int64_t nOutputPlane = nInputPlane; if(ceil_mode) { outputHeight = (int64_t)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1; outputWidth = (int64_t)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1; } else { outputHeight = (int64_t)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1; outputWidth = (int64_t)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode if ((outputHeight - 1)*dH >= inputHeight + padH) --outputHeight; if ((outputWidth - 1)*dW >= inputWidth + padW) --outputWidth; } if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
void THNN_(SpatialAveragePooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, int kW, int kH, int dW, int dH, int padW, int padH, bool ceil_mode, bool count_include_pad) { int dimw = 2; int dimh = 1; int dimc = 0; int64_t nbatch = 1; int64_t ndim = 3; int64_t inputWidth; int64_t inputHeight; int64_t outputWidth; int64_t outputHeight; int64_t nInputPlane; // number of channels (or colors) real *gradOutput_data; real *input_data, *gradInput_data; int64_t k; THNN_(SpatialAveragePooling_shapeCheck) (input, gradOutput, kH, kW, dH, dW, padH, padW, ceil_mode); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; dimc++; ndim = 4; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; nInputPlane = input->size[dimc]; if(ceil_mode) { outputWidth = (int64_t)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1; outputHeight = (int64_t)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1; } else { outputWidth = (int64_t)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1; outputHeight = (int64_t)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode if ((outputHeight - 1)*dH >= inputHeight + padH) --outputHeight; if ((outputWidth - 1)*dW >= inputWidth + padW) --outputWidth; } THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); THTensor_(resizeAs)(gradInput, input); gradOutput = THTensor_(newContiguous)(gradOutput); THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous"); gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { int64_t p; for(p = 0; p < nbatch; p++) { real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; int64_t xx, yy; real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; int64_t i; for(i=0; i<inputWidth*inputHeight; i++) ptr_gi[i] = 0.0; for(yy = 0; yy < outputHeight; yy++) { for(xx = 0; xx < outputWidth; xx++) { int64_t hstart = yy * dH - padH; int64_t wstart = xx * dW - padW; int64_t hend = fminf(hstart + kH, inputHeight + padH); int64_t wend = fminf(wstart + kW, inputWidth + padW); int pool_size = (hend - hstart) * (wend - wstart); hstart = fmaxf(hstart, 0); wstart = fmaxf(wstart, 0); hend = fminf(hend, inputHeight); wend = fminf(wend, inputWidth); real z = *ptr_gradOutput++; int divide_factor; if(count_include_pad) divide_factor = pool_size; else divide_factor = (hend - hstart) * (wend - wstart); int64_t kx, ky; for(ky = hstart ; ky < hend; ky++) { for(kx = wstart; kx < wend; kx++) ptr_gradInput[ky*inputWidth + kx] += z/divide_factor; } } } } } THTensor_(free)(gradOutput); }