static inline void THNN_(TemporalUpSamplingNearest_shapeCheck)
     (THTensor *input, THTensor *gradOutput,
      int scale_factor) {
  THArgCheck(input != NULL, 2, "3D input tensor expected but got NULL");
  THArgCheck(scale_factor > 1, 4,
	     "scale_factor must be greater than 1, but got: %d", scale_factor);
  THNN_ARGCHECK(!input->is_empty() && (input->dim() == 2 || input->dim() == 3), 2, input,
		"non-empty 2D or 3D input tensor expected but got: %s");
  if (input->dim() == 2) {
    int nChannels    = THTensor_(size)(input, 0);
    int inputWidth   = THTensor_(size)(input, 1);
    int outputWidth  = inputWidth  * scale_factor;
    if (gradOutput != NULL) {
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nChannels);
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, outputWidth);
    }
  } else {
    int nBatch       = THTensor_(size)(input, 0);
    int nChannels    = THTensor_(size)(input, 1);
    int inputWidth   = THTensor_(size)(input, 2);
    int outputWidth  = inputWidth  * scale_factor;
    if (gradOutput != NULL) {
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nBatch);
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, nChannels);
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 2, outputWidth);
    }
  }
}
static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)(
        THTensor *input, THTensor *gradOutput, THIndexTensor *indices,
        int kH, int kW, int dH, int dW, int padH, int padW,
        int dilationH, int dilationW, bool ceil_mode) {

  THArgCheck(kW > 0 && kH > 0, 5,
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 8,
             "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
  THArgCheck(dilationH > 0 && dilationW > 0, 12,
             "dilation should be greater than zero, but got dilationH: %d dilationW: %d",
             dilationH, dilationW);

  int ndim = input->dim();
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
                "non-empty 3D or 4D input tensor expected but got: %s");

  THArgCheck(kW/2 >= padW && kH/2 >= padH, 2,
             "pad should be smaller than half of kernel size, but got "
             "padW = %d, padH = %d, kW = %d, kH = %d",
             padW, padH, kW, kH);

  int64_t nInputPlane = input->size(dimh-1);
  int64_t inputHeight = input->size(dimh);
  int64_t inputWidth = input->size(dimw);
  int64_t nOutputPlane = nInputPlane;

  int64_t outputHeight = pooling_output_shape<int64_t>(inputHeight, kH, padH, dH, dilationH, ceil_mode);
  int64_t outputWidth = pooling_output_shape<int64_t>(inputWidth, kW, padW, dW, dilationW, ceil_mode);

  if (outputWidth < 1 || outputHeight < 1)
    THError("Given input size: (%dx%dx%d). "
            "Calculated output size: (%dx%dx%d). Output size is too small",
            nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
  if (indices != NULL) {
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(SpatialDilatedConvolution_shapeCheck)(
	THTensor *input, THTensor *gradOutput,
	THTensor *weight, THTensor *bias,
	int kH, int kW, int dH, int dW, int padH, int padW,
	int dilationH, int dilationW) {

  THNN_ARGCHECK(weight->nDimension == 4, 4, weight,
                "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
                "but got: %s");
  THArgCheck(kW > 0 && kH > 0, 9,
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
  THArgCheck(dilationW > 0 && dilationH > 0, 15,
             "dilation should be greater than zero, but got dilationH: %d, dilationW: %d",
             dilationH, dilationW);

  if (bias != NULL) {
    THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
  }

  int ndim = input->nDimension;
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
		"3D or 4D input tensor expected but got: %s");

  long nInputPlane  = weight->size[1];
  long inputHeight  = input->size[dimh];
  long inputWidth   = input->size[dimw];
  long nOutputPlane = weight->size[0];
  long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
  long outputWidth  = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;

  if (outputWidth < 1 || outputHeight < 1)
    THError("Given input size: (%ld x %ld x %ld). "
	    "Calculated output size: (%ld x %ld x %ld). Output size is too small",
	    nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);

  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(SpatialFullConvolution_shapeCheck)(
	THTensor *input, THTensor *gradOutput,
	THTensor *weight, THTensor *bias,
	int kH, int kW, int dH, int dW, int padH, int padW, int adjH, int adjW) {

  THArgCheck(kW > 0 && kH > 0, 9,
	       "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 11,
	     "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
  THArgCheck(adjW < dW && adjH < dH, 15,
        "output adjustment must be smaller than stride, but got adjH: %d adjW: %d dH: %d dW: %d",
        adjH, adjW, dH, dW);
  THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight,
		"2D or 4D weight tensor expected, but got: %s");

  if (bias != NULL) {
    THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]);
  }

  int ndim = input->nDimension;
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
		"3D or 4D input tensor expected but got: %s");

  long nInputPlane  = weight->size[0];
  long inputHeight  = input->size[dimh];
  long inputWidth   = input->size[dimw];
  long nOutputPlane = weight->size[1];
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;

  if (outputWidth < 1 || outputHeight < 1)
    THError("Given input size: (%d x %d x %d). "
	    "Calculated output size: (%d x %d x %d). Output size is too small",
	    nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);

  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(TemporalRowConvolution_shapeCheck)(
	THNNState *state,
	THTensor *input,
	THTensor *gradOutput,
	THTensor *weight,
	THTensor *bias,
	int kW,
	int dW,
	int padW) {

	THArgCheck(kW > 0, 5,
	           "kernel size should be greater than zero, but got kW: %d", kW);
	THArgCheck(dW > 0, 6,
	           "stride should be greater than zero, but got dW: %d", dW);
	THNN_ARGCHECK(weight->nDimension == 3, 3, weight,
	              "3D weight tensor expected, but got: %s");
    THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
    THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");

	if (bias != NULL) {
		THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
	}

	// we're always looking at (possibly batch) x feats x seq
	int ndim = input->nDimension;
	int dimF = 0;
	int dimS = 1;

	if (ndim == 3) {
		++dimS;
		++dimF;
	}

	THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input,
	              "2D or 3D (batch mode) input tensor expected, but got :%s");

	int64_t inputFrameSize = weight->size[0];
	int64_t nInputFrame = input->size[dimS];
	int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;

	if (nOutputFrame < 1) {
		THError("Given input size: (%d x %d). "
		        "Calculated output size: (%d x %d). Output size is too small",
		        inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame);
	}

	THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize);

	if (gradOutput != NULL) {
		THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize);
		THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame);
	}
}
static inline void THNN_(SpatialUpSamplingNearest_shapeCheck)
     (THTensor *input, THTensor *gradOutput,
      int scale_factor) {
  THArgCheck(input != NULL, 2, "4D input tensor expected but got NULL");
  THArgCheck(scale_factor > 1, 4,
	     "scale_factor must be greater than 1, but got: %d", scale_factor);
  THNN_ARGCHECK(input->_dim() == 3 || input->_dim() == 4, 2, input,
		"3D or 4D input tensor expected but got: %s");
  if (input->_dim() == 3) {
    int nChannels    = THTensor_(size)(input, 0);
    int inputHeight  = THTensor_(size)(input, 1);
    int inputWidth   = THTensor_(size)(input, 2);
    int outputHeight = inputHeight * scale_factor;
    int outputWidth  = inputWidth  * scale_factor;
    if (gradOutput != NULL) {
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nChannels);
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, outputHeight);
      THNN_CHECK_DIM_SIZE(gradOutput, 3, 2, outputWidth);
    }
  } else {
    int nBatch       = THTensor_(size)(input, 0);
    int nChannels    = THTensor_(size)(input, 1);
    int inputHeight  = THTensor_(size)(input, 2);
    int inputWidth   = THTensor_(size)(input, 3);  
    int outputHeight = inputHeight * scale_factor;
    int outputWidth  = inputWidth  * scale_factor;
    if (gradOutput != NULL) {
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch);
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels);
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight);
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth);
    }
  }
}
static inline void THNN_(VolumetricFullConvolution_shapeCheck)(
                         THTensor *input, THTensor *gradOutput,
                         THTensor *weight, THTensor *bias,
                         int dT, int dW, int dH, int pT, int pW, int pH,
                         int aT, int aW, int aH) {
  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");
  // number of input & output planes and kernel size is indirectly defined by the weight tensor
  THNN_ARGCHECK(weight->nDimension == 5, 4, weight,
                "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
                "expected for weight, but got: %s");
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
  THArgCheck(aT < dT && aW < dW && aH < dH, 15,
             "output adjustment must be smaller than stride, but got "
             "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d",
             aT, aH, aW, dT, dH, dW);

  int ndim = input->nDimension;
  const int nInputPlane  = (int)weight->size[0];
  const int nOutputPlane = (int)weight->size[1];
  const int kT           = (int)weight->size[2];
  const int kH           = (int)weight->size[3];
  const int kW           = (int)weight->size[4];

  if (bias != NULL) {
    THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]);
  }

  int dimf = 0;
  int dimd = 1;
  int dimh = 2;
  int dimw = 3;

  if (ndim == 5) {
    dimf++;
    dimd++;
    dimh++;
    dimw++;
  }

  const long inputWidth   = input->size[dimw];
  const long inputHeight  = input->size[dimh];
  const long inputDepth   = input->size[dimd];
  const long outputWidth  = (inputWidth  - 1) * dW - 2*pW + kW + aW;
  const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
  const long outputDepth  = (inputDepth  - 1) * dT - 2*pT + kT + aT;

  if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
    THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
            nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);

  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck)
     (THTensor *input, THTensor *gradOutput,
      int nBatch, int nChannels,
      int inputHeight, int inputWidth,
      int outputHeight, int outputWidth) {
  THArgCheck(inputHeight > 0 && inputWidth > 0
	     && outputHeight > 0 && outputWidth > 0, 2,
	     "input and output sizes should be greater than 0,"
	     " but got input (H: %d, W: %d) output (H: %d, W: %d)",
	     inputHeight, inputWidth, outputHeight, outputWidth);
  if (input != NULL) {
    THNN_ARGCHECK(!input->is_empty() && input->dim() == 4, 2, input,
		  "non-empty 4D input tensor expected but got: %s");
  }

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch);
    THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels);
    THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth);
  }
}
static inline void THNN_(VolumetricUpSamplingTrilinear_shapeCheck)
     (THTensor *input, THTensor *gradOutput,
      int nBatch, int nChannels,
      int inputDepth, int inputHeight, int inputWidth,
      int outputDepth, int outputHeight, int outputWidth) {
  THArgCheck(inputDepth > 0 && inputHeight > 0 && inputWidth > 0
	     && outputDepth > 0 && outputHeight > 0 && outputWidth > 0, 2,
	     "input and output sizes should be greater than 0,"
	     " but got input (D: %d, H: %d, W: %d) output (D: %d, H: %d, W: %d)",
	     inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth);
  if (input != NULL) {
    THNN_ARGCHECK(input->nDimension == 5, 2, input,
		  "5D input tensor expected but got: %s");
  }

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch);
    THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels);
    THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth);
    THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth);
  }
}
Exemple #10
0
void THNN_(MSECriterion_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *target,
          THTensor *output,
          bool sizeAverage)
{
  THNN_CHECK_NELEMENT(input, target);
  THNN_CHECK_DIM_SIZE(output, 1, 0, 1);

  real sum = 0;

  TH_TENSOR_APPLY2(real, input, real, target,
    real z = (*input_data - *target_data);
    sum += z*z;
  );
static inline void THNN_(VolumetricUpSamplingNearest_shapeCheck)
     (THTensor *input, THTensor *gradOutput,
      int scale_factor) {
  THArgCheck(input != NULL, 2, "5D input tensor expected but got NULL");
  THArgCheck(scale_factor > 1, 4,
	     "scale_factor must be greater than 1, but got: %d", scale_factor);
  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
		"4D or 5D input tensor expected but got: %s");
  if (input->nDimension == 4) {
    int nChannels    = THTensor_(size)(input, 0);
    int inputDepth   = THTensor_(size)(input, 1);
    int inputHeight  = THTensor_(size)(input, 2);
    int inputWidth   = THTensor_(size)(input, 3);
    int outputDepth  = inputDepth  * scale_factor;
    int outputHeight = inputHeight * scale_factor;
    int outputWidth  = inputWidth  * scale_factor;
    if (gradOutput != NULL) {
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nChannels);
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, outputDepth);
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight);
      THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth);
    }
  } else {
    int nBatch       = THTensor_(size)(input, 0);
    int nChannels    = THTensor_(size)(input, 1);
    int inputDepth   = THTensor_(size)(input, 2);
    int inputHeight  = THTensor_(size)(input, 3);
    int inputWidth   = THTensor_(size)(input, 4);  
    int outputDepth  = inputDepth  * scale_factor;
    int outputHeight = inputHeight * scale_factor;
    int outputWidth  = inputWidth  * scale_factor;
    if (gradOutput != NULL) {
      THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch);
      THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels);
      THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth);
      THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight);
      THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth);
    }
  }
}
void THNN_(SoftMarginCriterion_updateOutput)(
  THNNState *state,
  THTensor *input,
  THTensor *target,
  THTensor *output,
  bool sizeAverage)
{
  THNN_CHECK_NELEMENT(input, target);
  THNN_CHECK_DIM_SIZE(output, 1, 0, 1);

  real sum;

  sum = 0;
  TH_TENSOR_APPLY2(real, input, real, target,
                   real z = log(1. + exp(-*input_data* *target_data));
                   sum += z;)

  if(sizeAverage)
static inline void THNN_(SpatialConvolutionLocal_shapeCheck)(
	THTensor *input, THTensor *gradOutput,
	THTensor *weight, THTensor *bias, 
	int kH, int kW, int dH, 
	int dW, int padH, int padW,
	long inputHeight, long inputWidth,
	long outputHeight, long outputWidth) {

  THArgCheck(kW > 0 && kH > 0, 9,
	       "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 11,
	     "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);

  int ndim = input->nDimension;
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
		"3D or 4D input tensor expected but got: %s");

  long nInputPlane = weight->size[2] / (kH * kW);
  long nOutputPlane = weight->size[1];

  if (bias != NULL) {
    THNN_CHECK_DIM_SIZE(bias, 3, 0, nOutputPlane);
    THNN_CHECK_DIM_SIZE(bias, 3, 1, outputHeight);
    THNN_CHECK_DIM_SIZE(bias, 3, 2, outputWidth);
  }

  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
  
  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)(
    THTensor *input, THTensor *gradOutput, THIndexTensor *indices,
    int kH, int kW, int dH, int dW, int padH, int padW,
    int dilationH, int dilationW, bool ceil_mode) {

    THArgCheck(kW > 0 && kH > 0, 5,
               "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
    THArgCheck(dW > 0 && dH > 0, 8,
               "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
    THArgCheck(dilationH > 0 && dilationW > 0, 12,
               "dilation should be greater than zero, but got dilationH: %d dilationW: %d",
               dilationH, dilationW);

    int ndim = input->nDimension;
    int dimf = 0;
    int dimh = 1;
    int dimw = 2;

    if (ndim == 4) {
        dimf++;
        dimh++;
        dimw++;
    }

    THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
                  "3D or 4D input tensor expected but got: %s");

    THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2,
               "input image (H: %d, W: %d) smaller than kernel "
               "size - padding( kH: %d padH: %d kW: %d padW: %d",
               input->size[dimh], input->size[dimw], kH, padH, kW, padW);
    THArgCheck(kW/2 >= padW && kH/2 >= padH, 2,
               "pad should be smaller than half of kernel size, but got "
               "padW = %d, padH = %d, kW = %d, kH = %d",
               padW, padH, kW, kH);

    long nInputPlane = input->size[dimh-1];
    long inputHeight = input->size[dimh];
    long inputWidth = input->size[dimw];
    long outputHeight, outputWidth;
    long nOutputPlane = nInputPlane;

    if (ceil_mode)
    {
        outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
        outputWidth  = (long)(ceil((float)(inputWidth  - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
    }
    else
    {
        outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
        outputWidth  = (long)(floor((float)(inputWidth  - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
    }

    if (outputWidth < 1 || outputHeight < 1)
        THError("Given input size: (%dx%dx%d). "
                "Calculated output size: (%dx%dx%d). Output size is too small",
                nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth);

    if (gradOutput != NULL) {
        THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
        THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
        THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
    }
    if (indices != NULL) {
        THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane);
        THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight);
        THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth);
    }
}
static void inline THNN_(VolumetricConvolutionMM_shapeCheck)(
                         THNNState *state,
                         THTensor *input,
                         THTensor *gradOutput,
                         THTensor *weight,
                         THTensor *bias,
                         int kT,
                         int kW,
                         int kH,
                         int dT,
                         int dW,
                         int dH,
                         int pT,
                         int pW,
                         int pH,
                         int weight_nullable) {
  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");
  THArgCheck(kT > 0 && kW > 0 && kH > 0, 8,
             "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW);
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);

  if (weight != NULL) {
    THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 5, 5, weight,
                    "2D or 5D weight tensor expected, but got: %s");
    if (bias != NULL) {
      THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
    }
  } else if (!weight_nullable) {
    THError("weight tensor is expected to be non-nullable");
  }

  int ndim = input->nDimension;
  int dimf = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (ndim == 5)
  {
    dimf++;
    dimt++;
    dimh++;
    dimw++;
  }

  int64_t inputDepth;
  int64_t inputHeight;
  int64_t inputWidth;

  int64_t exactInputDepth;
  int64_t exactInputHeight;
  int64_t exactInputWidth;
  int64_t outputDepth;
  int64_t outputHeight;
  int64_t outputWidth;

  inputDepth = input->size[dimt];
  inputHeight  = input->size[dimh];
  inputWidth   = input->size[dimw];

  exactInputDepth = inputDepth + 2*pT;
  exactInputHeight = inputHeight + 2*pH;
  exactInputWidth = inputWidth + 2*pW;

  if (exactInputDepth < kT || exactInputHeight < kH || exactInputWidth < kW) {
    THError("Calculated padded input size per channel: (%ld x %ld x %ld). "
      "Kernel size: (%ld x %ld x %ld). Kernel size can't greater than actual input size",
      exactInputDepth, exactInputHeight, exactInputWidth, kT, kH, kW);
  }

  outputDepth  = (exactInputDepth - kT) / dT + 1;
  outputHeight = (exactInputHeight - kH) / dH + 1;
  outputWidth  = (exactInputWidth - kW) / dW + 1;


  if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) {
    THError("Given input size per channel: (%ld x %ld x %ld). "
      "Calculated output size per channel: (%ld x %ld x %ld). Output size is too small",
      inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth);
  }

  if (weight != NULL) {
    int64_t nInputPlane = weight->size[1];
    if (weight->nDimension == 2) {
      nInputPlane /= (kT * kH * kW);
    }
    THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
  }

  if (gradOutput != NULL) {
    if (weight != NULL) {
      int64_t nOutputPlane = weight->size[0];
      THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    } else if (bias != NULL) {
      int64_t nOutputPlane = bias->size[0];
      THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    }
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(VolumetricAveragePooling_shapeCheck)(
                         THNNState *state,
                         THTensor *input,
                         THTensor *gradOutput,
                         int kT,
                         int kW,
                         int kH,
                         int dT,
                         int dW,
                         int dH,
                         int padT,
                         int padW,
                         int padH,
                         bool ceil_mode)
{
  long nslices;
  long itime;
  long iheight;
  long iwidth;
  long otime;
  long oheight;
  long owidth;
  int ndim = input->nDimension;
  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (input->nDimension == 5)
  {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  THArgCheck(kT > 0 && kW > 0 && kH > 0, 5,
             "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d",
             kT, kH, kW);
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 8,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d",
             dT, dH, dW);
  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");

  THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH
             && input->size[dimt] >= kT, 2,
             "input image (T: %d H: %d W: %d) smaller than "
             "kernel size (kT: %d kH: %d kW: %d)",
             input->size[dimt], input->size[dimh], input->size[dimw],
             kT, kH, kW);

  // The second argument is argNumber... here is the index of padH.
  THArgCheck(kT/2 >= padT && kW/2 >= padW && kH/2 >= padH, 11,
            "pad should not be greater than half of kernel size, but got "
            "padT = %d, padW = %d, padH = %d, kT = %d, kW = %d, kH = %d",
            padT, padW, padH, kT, kW, kH);

  /* sizes */
  nslices = input->size[dimN];
  itime   = input->size[dimt];
  iheight = input->size[dimh];
  iwidth  = input->size[dimw];

  if (ceil_mode) {
    otime   = (long)(ceil((float)(itime   - kT + 2*padT) / dT)) + 1;
    oheight = (long)(ceil((float)(iheight - kH + 2*padH) / dH)) + 1;
    owidth  = (long)(ceil((float)(iwidth  - kW + 2*padW) / dW)) + 1;
  }
  else
  {
    otime   = (long)(floor((float)(itime   - kT + 2*padT) / dT)) + 1;
    oheight = (long)(floor((float)(iheight - kH + 2*padH) / dH)) + 1;
    owidth  = (long)(floor((float)(iwidth  - kW + 2*padW) / dW)) + 1;
  }

  if (padT || padW || padH)
  {
    // ensure that the last pooling starts inside the image
    // needed to avoid problems in ceil mode
    if ((otime   - 1)*dT >= itime   + padT)
      --otime;
    if ((oheight - 1)*dH >= iheight + padH)
      --oheight;
    if ((owidth  - 1)*dW >= iwidth  + padW)
      --owidth;
  }

  if (otime < 1 || owidth < 1 || oheight < 1)
    THError("Given input size: (%dx%dx%dx%d). "
	    "Calculated output size: (%dx%dx%dx%d). Output size is too small",
            nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth);
  }
}
static inline void THNN_(VolumetricDilatedConvolution_shapeCheck)(
                         THTensor *input, THTensor *gradOutput,
                         THTensor *weight, THTensor *bias,
                         int kT, int kH, int kW, int dT, int dH, int dW,
                         int padT, int padH, int padW,
                         int dilationT, int dilationH, int dilationW,
                         int weight_nullable) {
  THNN_ARGCHECK(input->_dim() == 4 || input->_dim() == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");
  THArgCheck(kT > 0 && kW > 0 && kH > 0, 8,
             "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW);
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
  THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 15,
             "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d",
             dilationT, dilationH, dilationW);

  if (weight != NULL) {
    THNN_ARGCHECK(weight->_dim() == 5, 4, weight,
                  "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
                  "expected for weight, but got: %s");
    if (bias != NULL) {
      THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
    }
  } else if (!weight_nullable) {
    THError("weight tensor is expected to be non-nullable");
  }

  // Params
  int ndim = input->_dim();
  int dimf = 0;
  int dimd = 1;
  int dimh = 2;
  int dimw = 3;

  if (ndim == 5) {
    dimf++;
    dimd++;
    dimh++;
    dimw++;
  }

  int64_t inputDepth  = input->size[dimd];
  int64_t inputHeight  = input->size[dimh];
  int64_t inputWidth   = input->size[dimw];
  int64_t outputDepth  = (inputDepth  + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
  int64_t outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
  int64_t outputWidth  = (inputWidth  + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;

  if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) {
    THError("Given input size per channel: (%ld x %ld x %ld). "
      "Calculated output size per channel: (%ld x %ld x %ld). Output size is too small",
      inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth);
  }

  if (weight != NULL) {
    int64_t nInputPlane = weight->size[1];
    THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
  }

  if (gradOutput != NULL) {
    if (weight != NULL) {
      int64_t nOutputPlane = weight->size[0];
      THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    } else if (bias != NULL) {
      int64_t nOutputPlane = bias->size[0];
      THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    }
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static void inline THNN_(VolumetricConvolutionMM_shapeCheck)(
                         THNNState *state,
                         THTensor *input,
                         THTensor *gradOutput,
                         THTensor *weight,
                         THTensor *bias,
                         int kT,
                         int kW,
                         int kH,
                         int dT,
                         int dW,
                         int dH,
                         int pT,
                         int pW,
                         int pH) {
  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");
  THArgCheck(kT > 0 && kW > 0 && kH > 0, 8,
             "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW);
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);

  int ndim = input->nDimension;
  int dimf = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (ndim == 5)
  {
    dimf++;
    dimt++;
    dimh++;
    dimw++;
  }

  int64_t nInputPlane;
  int64_t inputDepth;
  int64_t inputHeight;
  int64_t inputWidth;
  int64_t nOutputPlane;
  int64_t outputDepth;
  int64_t outputHeight;
  int64_t outputWidth;

  nInputPlane = input->size[dimf];
  inputDepth = input->size[dimt];
  inputHeight  = input->size[dimh];
  inputWidth   = input->size[dimw];
  nOutputPlane = weight->size[0];
  outputDepth  = (inputDepth + 2*pT - kT) / dT + 1;
  outputHeight = (inputHeight + 2*pH - kH) / dH + 1;
  outputWidth  = (inputWidth + 2*pW - kW) / dW + 1;

  if (outputWidth < 1 || outputHeight < 1 || outputDepth < 1)
  {
    THError(
      "Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
      nInputPlane, inputDepth, inputHeight, inputWidth,
      nOutputPlane, outputDepth, outputHeight, outputWidth
    );
  }

  THArgCheck(weight->nDimension == 2 || weight->nDimension == 5, 4,
             "weight tensor should be 2D or 5D - got %d", weight->nDimension);

  if (bias != NULL) {
    THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
  }

  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(VolumetricDilatedMaxPooling_shapeCheck)(
                         THNNState *state,
                         THTensor *input,
                         THTensor *gradOutput,
                         THIndexTensor *indices,
                         int kT, int kW, int kH,
                         int dT, int dW, int dH,
                         int pT, int pW, int pH,
                         int dilationT, int dilationW, int dilationH,
                         bool ceilMode) {
  int ndim = input->nDimension;
  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;
  int64_t nslices;
  int64_t itime;
  int64_t iheight;
  int64_t iwidth;
  int64_t otime;
  int64_t oheight;
  int64_t owidth;

  THArgCheck(kT > 0 && kW > 0 && kH > 0, 5,
             "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d",
             kT, kH, kW);
  THArgCheck(dT > 0 && dW > 0 && dH > 0, 8,
             "stride should be greater than zero, but got dT: %d dH: %d dW: %d",
             dT, dH, dW);
  THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 14,
             "dilation should be greater than 0, but got dilationT: %d dilationH: %d dilationW: %d",
             dilationT, dilationH, dilationW);

  THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
                "4D or 5D (batch mode) tensor expected for input, but got: %s");

  if (input->nDimension == 5)
  {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2,
             "pad should be smaller than half of kernel size, but got "
             "kT: %d kW: %d, kH: %d, padT: %d, padW: %d, padH: %d",
             kT, kW, kH, pT, pW, pH);

  nslices = input->size[dimN];
  itime   = input->size[dimt];
  iheight = input->size[dimh];
  iwidth  = input->size[dimw];
  if (ceilMode)
  {
    otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
    oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
    owidth  = (int)(ceil((float)(iwidth  - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
  }
  else
  {
    otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
    oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
    owidth  = (int)(floor((float)(iwidth  - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
  }

  if (pT || pW || pH)
  {
    // ensure that the last pooling starts inside the image
    if ((otime - 1)*dT >= itime + pT)
      --otime;
    if ((oheight - 1)*dH >= iheight + pH)
      --oheight;
    if ((owidth  - 1)*dW >= iwidth  + pW)
      --owidth;
  }

  if (otime < 1 || owidth < 1 || oheight < 1)
    THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
            nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth);
  }
  if (indices != NULL) {
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimN, nslices);
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimt, otime);
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, oheight);
    THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, owidth);
  }
}
static inline void THNN_(SpatialFullDilatedConvolution_shapeCheck)(
        THTensor *input, THTensor *gradOutput,
        THTensor *weight, THTensor *bias,
        int kH, int kW, int dH, int dW, int padH, int padW,
        int dilationH, int dilationW, int adjH, int adjW, int weight_nullable) {

  THArgCheck(kW > 0 && kH > 0, 9,
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 11,
             "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
  THArgCheck(dilationW > 0 && dilationH > 0, 15,
             "dilation should be greater than zero, but got dilationH: %d, dilationW: %d",
             dilationH, dilationW);
  THArgCheck((adjW < dW || adjW < dilationW) && (adjH < dH || adjH < dilationH), 15,
             "output padding must be smaller than either stride or dilation, but got adjH: %d adjW: %d dH: %d dW: %d dilationH: %d dilationW: %d",
             adjH, adjW, dH, dW, dilationH, dilationW);

  if (weight != NULL) {
    THNN_ARGCHECK(!weight->is_empty() && (weight->dim() == 2 || weight->dim() == 4), 5, weight,
                  "non-empty 2D or 4D weight tensor expected, but got: %s");
    if (bias != NULL) {
      THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size(1));
    }
  } else if (!weight_nullable) {
    THError("weight tensor is expected to be non-nullable");
  }

  int ndim = input->dim();
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
                "non-empty 3D or 4D input tensor expected but got: %s");

  int64_t inputHeight  = input->size(dimh);
  int64_t inputWidth   = input->size(dimw);
  int64_t outputHeight = (inputHeight - 1) * dH - 2*padH + (dilationH * (kH - 1) + 1) + adjH;
  int64_t outputWidth  = (inputWidth - 1) * dW - 2*padW + (dilationW * (kW - 1) + 1) + adjW;

  if (outputWidth < 1 || outputHeight < 1) {
    THError("Given input size per channel: (%ld x %ld). "
            "Calculated output size per channel: (%ld x %ld). Output size is too small",
            inputHeight, inputWidth, outputHeight, outputWidth);
  }

  if (weight != NULL) {
    int64_t nInputPlane = weight->size(0);
    THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
  }

  if (gradOutput != NULL) {
    if (weight != NULL) {
      int64_t nOutputPlane = weight->size(1);
      THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    } else if (bias != NULL) {
      int64_t nOutputPlane = THTensor_sizeLegacyNoScalars(bias, 0);
      THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    }
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
static inline void THNN_(SpatialAveragePooling_shapeCheck)(
	THTensor *input, THTensor *gradOutput,
	int kH, int kW, int dH, int dW, int padH, int padW,
	bool ceil_mode) {

  THArgCheck(kW > 0 && kH > 0, 5,
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
  THArgCheck(dW > 0 && dH > 0, 8,
             "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);

  int ndim = input->nDimension;
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
		"3D or 4D input tensor expected but got: %s");

  THArgCheck(kW/2 >= padW && kH/2 >= padH, 2,
	     "pad should be smaller than half of kernel size, but got "
	     "padW = %d, padH = %d, kW = %d, kH = %d",
	     padW, padH, kW, kH);

  int64_t nInputPlane = input->size[dimh-1];
  int64_t inputHeight = input->size[dimh];
  int64_t inputWidth = input->size[dimw];
  int64_t outputHeight, outputWidth;
  int64_t nOutputPlane = nInputPlane;

  if(ceil_mode)
  {
    outputHeight = (int64_t)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
    outputWidth  = (int64_t)(ceil((float)(inputWidth  - kW + 2*padW) / dW)) + 1;
  }
  else
  {
    outputHeight = (int64_t)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
    outputWidth  = (int64_t)(floor((float)(inputWidth  - kW + 2*padW) / dW)) + 1;
  }

  if (padW || padH)
  {
    // ensure that the last pooling starts inside the image
    // needed to avoid problems in ceil mode
    if ((outputHeight - 1)*dH >= inputHeight + padH)
      --outputHeight;
    if ((outputWidth  - 1)*dW >= inputWidth  + padW)
      --outputWidth;
  }

  if (outputWidth < 1 || outputHeight < 1)
    THError("Given input size: (%dx%dx%d). "
	    "Calculated output size: (%dx%dx%d). Output size is too small",
            nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth);

  if (gradOutput != NULL) {
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
    THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
  }
}
void THNN_(SpatialAveragePooling_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH,
          bool ceil_mode,
          bool count_include_pad)
{
  int dimw = 2;
  int dimh = 1;
  int dimc = 0;
  int64_t nbatch = 1;
  int64_t ndim = 3;

  int64_t inputWidth;
  int64_t inputHeight;
  int64_t outputWidth;
  int64_t outputHeight;
  int64_t nInputPlane; // number of channels (or colors)

  real *gradOutput_data;
  real *input_data, *gradInput_data;

  int64_t k;

  THNN_(SpatialAveragePooling_shapeCheck)
    (input, gradOutput, kH, kW, dH, dW, padH, padW, ceil_mode);


  if (input->nDimension == 4) {
    nbatch = input->size[0];
    dimw++;
    dimh++;
    dimc++;
    ndim = 4;
  }

  inputWidth = input->size[dimw];
  inputHeight = input->size[dimh];
  nInputPlane = input->size[dimc];

  if(ceil_mode)
  {
    outputWidth  = (int64_t)(ceil((float)(inputWidth  - kW + 2*padW) / dW)) + 1;
    outputHeight = (int64_t)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
  }
  else
  {
    outputWidth  = (int64_t)(floor((float)(inputWidth  - kW + 2*padW) / dW)) + 1;
    outputHeight = (int64_t)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
  }
  if (padW || padH)
  {
    // ensure that the last pooling starts inside the image
    // needed to avoid problems in ceil mode
    if ((outputHeight - 1)*dH >= inputHeight + padH)
      --outputHeight;
    if ((outputWidth  - 1)*dW >= inputWidth  + padW)
      --outputWidth;
  }

  THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
  THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);

  THTensor_(resizeAs)(gradInput, input);

  gradOutput = THTensor_(newContiguous)(gradOutput);
  THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous");

  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);

#pragma omp parallel for private(k)
  for(k = 0; k < nInputPlane; k++)
  {
    int64_t p;
    for(p = 0; p < nbatch; p++)
    {
      real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
      int64_t xx, yy;

      real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
      real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;

      int64_t i;
      for(i=0; i<inputWidth*inputHeight; i++)
        ptr_gi[i] = 0.0;

      for(yy = 0; yy < outputHeight; yy++)
      {
        for(xx = 0; xx < outputWidth; xx++)
        {
          int64_t hstart = yy * dH - padH;
          int64_t wstart = xx * dW - padW;
          int64_t hend = fminf(hstart + kH, inputHeight + padH);
          int64_t wend = fminf(wstart + kW, inputWidth + padW);
          int pool_size = (hend - hstart) * (wend - wstart);
          hstart = fmaxf(hstart, 0);
          wstart = fmaxf(wstart, 0);
          hend = fminf(hend, inputHeight);
          wend = fminf(wend, inputWidth);

          real z = *ptr_gradOutput++;

          int divide_factor;
          if(count_include_pad)
            divide_factor = pool_size;
          else
            divide_factor = (hend - hstart) * (wend - wstart);

          int64_t kx, ky;
          for(ky = hstart ; ky < hend; ky++)
          {
            for(kx = wstart; kx < wend; kx++)
              ptr_gradInput[ky*inputWidth + kx] += z/divide_factor;
          }
        }
      }
    }
  }

  THTensor_(free)(gradOutput);
}