void THNN_(SpatialFractionalMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, int outputW, int outputH, int poolSizeW, int poolSizeH, THTensor *indices, THTensor *randomSamples) { long numBatch = 1; int planeDim = 0; int heightDim = 1; int widthDim = 2; long numInputDims = THTensor_(nDimension)(input); THNN_ARGCHECK(numInputDims == 3 || numInputDims == 4, 2, input, "3D or 4D (batch mode) tensor expected for input, but got: %s"); if (numInputDims == 4) { numBatch = THTensor_(size)(input, 0); planeDim++; heightDim++; widthDim++; } /* sizes */ long numPlanes = THTensor_(size)(input, planeDim); long inputH = THTensor_(size)(input, heightDim); long inputW = THTensor_(size)(input, widthDim); THArgCheck(outputH + poolSizeH - 1 < inputH, 7, "poolSizeH (%d) too large relative to input height (%d)", poolSizeH, inputH); THArgCheck(outputW + poolSizeW - 1 < inputW, 6, "poolSizeW (%d) too large relative to input width (%d)", poolSizeW, inputW); /* get contiguous input */ input = THTensor_(newContiguous)(input); if (numInputDims == 3) { /* resize output */ THTensor_(resize3d)(output, numPlanes, outputH, outputW); /* indices will contain the locations for each output point */ THTensor_(resize3d)(indices, numPlanes, outputH, outputW); THNN_(SpatialFractionalMaxPooling_updateOutput_frame)( THTensor_(data)(input), THTensor_(data)(output), THTensor_(data)(indices), THTensor_(data)(randomSamples), numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH); } else { THTensor_(resize4d)(output, numBatch, numPlanes, outputH, outputW); /* indices will contain the locations for each output point */ THTensor_(resize4d)(indices, numBatch, numPlanes, outputH, outputW); long batch; #pragma omp parallel for private(batch) for (batch = 0; batch < numBatch; ++batch) { THNN_(SpatialFractionalMaxPooling_updateOutput_frame)( THTensor_(data)(input) + batch * numPlanes * inputH * inputW, THTensor_(data)(output) + batch * numPlanes * outputH * outputW, THTensor_(data)(indices) + batch * numPlanes * outputH * outputW, THTensor_(data)(randomSamples) + batch * numPlanes * 2, numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH); } } /* cleanup */ THTensor_(free)(input); }
static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( THTensor *input, THTensor *gradOutput, THIndexTensor *indices, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationH > 0 && dilationW > 0, 12, "dilation should be greater than zero, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image (H: %d, W: %d) smaller than kernel " "size - padding( kH: %d padH: %d kW: %d padW: %d", input->size[dimh], input->size[dimw], kH, padH, kW, padW); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); long nInputPlane = input->size[dimh-1]; long inputHeight = input->size[dimh]; long inputWidth = input->size[dimw]; long outputHeight, outputWidth; long nOutputPlane = nInputPlane; if (ceil_mode) { outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } else { outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } if (indices != NULL) { THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth); } }
void THNN_(SpatialDilatedMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; long nbatch = 1; long nslices; long iheight; long iwidth; long oheight; long owidth; real *input_data; real *output_data; real *indices_data; THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, "3D or 4D (batch mode) tensor expected for input, but got: %s"); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image (H: %d, W: %d) smaller than kernel " "size - padding( kH: %d padH: %d kW: %d padW: %d", input->size[dimh], input->size[dimw], kH, padH, kW, padW); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceil_mode) { oheight = (long)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; owidth = (long)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } else { oheight = (long)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; owidth = (long)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } if (owidth < 1 || oheight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nslices,iheight,iwidth,nslices,oheight,owidth); if (padW || padH) { // ensure that the last pooling starts inside the image if ((oheight - 1)*dH >= iheight + padH) --oheight; if ((owidth - 1)*dW >= iwidth + padW) --owidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); /* indices will contain the locations for each output point */ THTensor_(resize3d)(indices, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data, output_data, indices_data, nslices, iwidth, iheight, owidth, oheight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } else { long p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); /* indices will contain the locations for each output point */ THTensor_(resize4d)(indices, nbatch, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } } /* cleanup */ THTensor_(free)(input); }
void THNN_(VolumetricDilatedMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *indices, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int dilationT, int dilationW, int dilationH, bool ceilMode) { long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; real *indices_data; THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image (T: %d H: %d W: %d) smaller than " "kernel size (kT: %d kH: %d kW: %d)", input->size[dimt], input->size[dimh], input->size[dimw], kT, kH, kW); THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2, "pad should be smaller than half of kernel size" ); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceilMode) { otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; } else { otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; } if (otime < 1 || owidth < 1 || oheight < 1) THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth); if (pT || pW || pH) { // ensure that the last pooling starts inside the image if ((otime - 1)*dT >= itime + pT) --otime; if ((oheight - 1)*dH >= iheight + pH) --oheight; if ((owidth - 1)*dW >= iwidth + pW) --owidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) /* non-batch mode */ { /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); /* indices will contain ti,i,j uchar locations packed into float/double */ THTensor_(resize4d)(indices, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( input_data, output_data, indices_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH ); } else /* batch mode */ { long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, indices_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH ); } } /* cleanup */ THTensor_(free)(input); }
void THNN_(SpatialAdaptiveMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int osizeW, int osizeH) { int dimW = 2; int dimH = 1; int64_t sizeB = 1; int64_t sizeD = 0; int64_t isizeH = 0; int64_t isizeW = 0; int64_t istrideD = 0; int64_t istrideH = 0; int64_t istrideW = 0; int64_t istrideB = 0; real *input_data = nullptr; real *output_data = nullptr; THIndex_t *indices_data = nullptr; THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, "3D or 4D (batch mode) tensor expected for input, but got: %s"); if (input->nDimension == 4) { istrideB = input->stride[0]; sizeB = input->size[0]; dimW++; dimH++; } /* sizes */ sizeD = input->size[dimH-1]; isizeH = input->size[dimH]; isizeW = input->size[dimW]; /* strides */ istrideD = input->stride[dimH-1]; istrideH = input->stride[dimH]; istrideW = input->stride[dimW]; /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, sizeD, osizeH, osizeW); /* indices will contain i,j locations for each output point */ THIndexTensor_(resize3d)(indices, sizeD, osizeH, osizeW); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, indices_data, sizeD, isizeH, isizeW, osizeH, osizeW, istrideD, istrideH, istrideW); } else { int64_t b; THTensor_(resize4d)(output, sizeB, sizeD, osizeH, osizeW); /* indices will contain i,j locations for each output point */ THIndexTensor_(resize4d)(indices, sizeB, sizeD, osizeH, osizeW); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(b) for (b = 0; b < sizeB; b++) { THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+b*istrideB, output_data+b*sizeD*osizeH*osizeW, indices_data+b*sizeD*osizeH*osizeW, sizeD, isizeH, isizeW, osizeH, osizeW, istrideD, istrideH, istrideW); } } }
static inline void THNN_(SpatialAveragePooling_shapeCheck)( THTensor *input, THTensor *gradOutput, int kH, int kW, int dH, int dW, int padH, int padW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); long nInputPlane = input->size[dimh-1]; long inputHeight = input->size[dimh]; long inputWidth = input->size[dimw]; long outputHeight, outputWidth; long nOutputPlane = nInputPlane; if(ceil_mode) { outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1; outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1; } else { outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1; outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode if ((outputHeight - 1)*dH >= inputHeight + padH) --outputHeight; if ((outputWidth - 1)*dW >= inputWidth + padW) --outputWidth; } if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }
static void inline THNN_(VolumetricConvolutionMM_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *weight, THTensor *bias, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int weight_nullable) { THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); if (weight != NULL) { THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 5, 5, weight, "2D or 5D weight tensor expected, but got: %s"); if (bias != NULL) { THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); } } else if (!weight_nullable) { THError("weight tensor is expected to be non-nullable"); } int ndim = input->nDimension; int dimf = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (ndim == 5) { dimf++; dimt++; dimh++; dimw++; } int64_t inputDepth; int64_t inputHeight; int64_t inputWidth; int64_t exactInputDepth; int64_t exactInputHeight; int64_t exactInputWidth; int64_t outputDepth; int64_t outputHeight; int64_t outputWidth; inputDepth = input->size[dimt]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; exactInputDepth = inputDepth + 2*pT; exactInputHeight = inputHeight + 2*pH; exactInputWidth = inputWidth + 2*pW; if (exactInputDepth < kT || exactInputHeight < kH || exactInputWidth < kW) { THError("Calculated padded input size per channel: (%ld x %ld x %ld). " "Kernel size: (%ld x %ld x %ld). Kernel size can't greater than actual input size", exactInputDepth, exactInputHeight, exactInputWidth, kT, kH, kW); } outputDepth = (exactInputDepth - kT) / dT + 1; outputHeight = (exactInputHeight - kH) / dH + 1; outputWidth = (exactInputWidth - kW) / dW + 1; if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) { THError("Given input size per channel: (%ld x %ld x %ld). " "Calculated output size per channel: (%ld x %ld x %ld). Output size is too small", inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth); } if (weight != NULL) { int64_t nInputPlane = weight->size[1]; if (weight->nDimension == 2) { nInputPlane /= (kT * kH * kW); } THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); } if (gradOutput != NULL) { if (weight != NULL) { int64_t nOutputPlane = weight->size[0]; THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } else if (bias != NULL) { int64_t nOutputPlane = bias->size[0]; THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); } THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } }