void THNN_(ClassNLLCriterion_updateOutput)(THNNState *state, THTensor *input, THIndexTensor *target, THTensor *output, bool sizeAverage, THTensor *weights, THTensor *total_weight) { int n_dims = THTensor_(nDimension)(input); int n_classes = THTensor_(size)(input, n_dims - 1); if (THIndexTensor_(nDimension)(target) > 1) { THError("multi-target not supported"); } if (THTensor_(nDimension)(input) > 2) { THError("input tensor should be 1D or 2D"); } input = THTensor_(newContiguous)(input); target = THIndexTensor_(newContiguous)(target); weights = weights ? THTensor_(newContiguous)(weights) : NULL; real *input_data = THTensor_(data)(input); THIndex_t *target_data = THIndexTensor_(data)(target); real *weights_data = weights ? THTensor_(data)(weights) : NULL; real *output_data = THTensor_(data)(output); real *total_weight_data = THTensor_(data)(total_weight); output_data[0] = total_weight_data[0] = 0.0; if (THTensor_(nDimension)(input) == 1) { int cur_target = target_data[0] - 1; THAssert(cur_target >= 0 && cur_target < n_classes); total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f; output_data[0] = -input_data[cur_target] * total_weight_data[0]; } else if (THTensor_(nDimension)(input) == 2) { int batch_size = THTensor_(size)(input, 0); THAssert(THIndexTensor_(size)(target, 0) == batch_size); int n_target = THTensor_(size)(input, 1); int i; for (i = 0; i < batch_size; i++) { int cur_target = target_data[i] - 1; THAssert(cur_target >= 0 && cur_target < n_classes); real cur_weight = weights ? weights_data[cur_target] : 1.0f; total_weight_data[0] += cur_weight; output_data[0] -= input_data[i * n_target + cur_target] * cur_weight; } } if (sizeAverage && total_weight_data[0]) { output_data[0] /= total_weight_data[0]; } if (weights) { THTensor_(free)(weights); } THTensor_(free)(input); THIndexTensor_(free)(target); }
void THNN_(LookupTable_renorm)( THNNState *state, THIndexTensor *idx, THTensor *weight, real maxNorm, real normType) { if (!THTensor_(isContiguous)(weight)) THError("weight must be contiguous"); if (!THIndexTensor_(isContiguous)(idx)) THError("input must be contiguous"); if (THIndexTensor_(nDimension)(idx) != 1) THError("idx must be a vector"); if (normType <= 0) THError("non-positive-norm not supported"); long i; THIndex_t *row_idx = THIndexTensor_(data)(idx); long numel = THIndexTensor_(nElement)(idx); long numw = THTensor_(size)(weight, 0); long stride = THTensor_(stride)(weight, 0); real *gw = THTensor_(data)(weight); for (i=0; i<numel; i++) if (row_idx[i] < 1 || row_idx[i] > numw) THError("input out of range"); // get unique indices qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex)); long ptr = 0; for (i=0; i<numel; i++) if (i == 0 || row_idx[i] != row_idx[i-1]) row_idx[ptr++] = row_idx[i]; numel = ptr; #ifdef _OPENMP if (numel > 1000) { // The strategy is to parallelize over the rows that appear in // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads]. // This distributes the work evenly to each thread. #pragma omp parallel for private(i) for (i=0; i<numel; i++) { long k = row_idx[i] - 1; THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); } return; } #endif for (i=0; i<numel; i++) { long k = row_idx[i] - 1; THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); } }
void THNN_(SpatialClassNLLCriterion_updateOutput)( THNNState *state, THTensor *input, THIndexTensor *target, THTensor *output, bool sizeAverage, THTensor *weights, THTensor *total_weight) { INITIAL_CHECK; input = THTensor_(newContiguous)(input); target = THIndexTensor_(newContiguous)(target); weights = weights ? THTensor_(newContiguous)(weights) : NULL; real *input_data = THTensor_(data)(input); THIndex_t *target_data = THIndexTensor_(data)(target); real *weights_data = weights ? THTensor_(data)(weights) : NULL; real *output_data = THTensor_(data)(output); real *total_weight_data = THTensor_(data)(total_weight); long batch_size = THTensor_(size)(input, 0); long n_classes = THTensor_(size)(input, 1); long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3); long sample_size = map_size * n_classes; real total_weight_acc = 0; real output_acc = 0; for (int b = 0; b < batch_size; b++) { for (int elem = 0; elem < map_size; elem++) { int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE; THAssert(cur_target >= 0 && cur_target < n_classes); real cur_weight = weights ? weights_data[cur_target] : 1.0f; total_weight_acc += cur_weight; output_acc -= input_data[b * sample_size + cur_target * map_size + elem] * cur_weight; } } *total_weight_data = total_weight_acc; *output_data = output_acc; if (sizeAverage && *total_weight_data) *output_data /= *total_weight_data; THTensor_(free)(input); THIndexTensor_(free)(target); if (weights) THTensor_(free)(weights); }
void THNN_(SpatialClassNLLCriterion_updateGradInput)( THNNState *state, THTensor *input, THIndexTensor *target, THTensor *gradInput, bool sizeAverage, THTensor *weights, THTensor *total_weight) { INITIAL_CHECK; THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous"); real *total_weight_data = THTensor_(data)(total_weight); if (*total_weight_data <= 0) return; target = THIndexTensor_(newContiguous)(target); weights = weights ? THTensor_(newContiguous)(weights) : NULL; THIndex_t *target_data = THIndexTensor_(data)(target); real *weights_data = weights ? THTensor_(data)(weights) : NULL; real *gradInput_data = THTensor_(data)(gradInput); long batch_size = THTensor_(size)(input, 0); long n_classes = THTensor_(size)(input, 1); long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3); long sample_size = map_size * n_classes; real normalize = sizeAverage ? *total_weight_data : 1.0f; int b; #pragma omp parallel for for (b = 0; b < batch_size; b++) { int elem; for (elem = 0; elem < map_size; elem++) { int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE; THAssert(cur_target >= 0 && cur_target < n_classes); gradInput_data[b * sample_size + cur_target * map_size + elem] = -(weights ? weights_data[cur_target] : 1.0f) / normalize; } } THIndexTensor_(free)(target); if (weights) THTensor_(free)(weights); }
static void JHNN_(LookupTable_resetCount)( THInteger_t *count_data, THIndexTensor *input) { int i; THIndex_t *input_data = THIndexTensor_(data)(input); long numel = THIndexTensor_(nElement)(input); for (i = 0; i<numel; i++) { long k = input_data[i] - 1; count_data[k] = 0; } for (i = 0; i<numel; i++) { long k = input_data[i] - 1; count_data[k]++; } }
static void THNN_(LookupTable_resetCount)( THInteger_t *count_data, THIndexTensor *input) { ptrdiff_t i; THIndex_t *input_data = THIndexTensor_(data)(input); ptrdiff_t numel = THIndexTensor_(nElement)(input); for (i = 0; i<numel; i++) { long k = input_data[i] - TH_INDEX_BASE; count_data[k] = 0; } for (i = 0; i<numel; i++) { long k = input_data[i] - TH_INDEX_BASE; count_data[k]++; } }
void THNN_(SpatialMaxUnpooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int owidth, int oheight) { int dimw = 2; int dimh = 1; int nbatch = 1; int nslices; int iheight; int iwidth; real *input_data; real *output_data; THIndex_t *indices_data; THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, "3D or 4D (batch mode) tensor expected for input, but got: %s"); THNN_CHECK_SHAPE_INDICES(input, indices); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; /* get contiguous input and indices */ input = THTensor_(newContiguous)(input); indices = THIndexTensor_(newContiguous)(indices); /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); THTensor_(zero)(output); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { long p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); THTensor_(zero)(output); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight, indices_data+p*nslices*iwidth*iheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ THTensor_(free)(input); THIndexTensor_(free)(indices); }
void THNN_(SpatialMaxUnpooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices, int owidth, int oheight) { int dimw = 2; int dimh = 1; int nbatch = 1; int nslices; int iheight; int iwidth; real *gradInput_data; real *gradOutput_data; THIndex_t *indices_data; THNN_CHECK_SHAPE_INDICES(input, indices); /* get contiguous gradOutput and indices */ gradOutput = THTensor_(newContiguous)(gradOutput); indices = THIndexTensor_(newContiguous)(indices); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if(owidth!=gradOutput->size[dimw] || oheight!=gradOutput->size[dimh]){ THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d", oheight, owidth,gradOutput->size[dimh],gradOutput->size[dimw]); } /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->nDimension == 3) { THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, indices_data+p*nslices*iwidth*iheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ THTensor_(free)(gradOutput); THIndexTensor_(free)(indices); }
// TODO: improve error messages void THNN_(MultiLabelMarginCriterion_updateOutput)( THNNState *state, THTensor *input, THIndexTensor *target, THTensor *output, THTensor *isTarget, bool sizeAverage) { real *input_data, *isTarget_data; THIndex_t *target_data; long nframe, dim; long t, d, dt, ddt; real sum; THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size"); } else { nframe = input->size[0]; dim = input->size[1]; THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size"); } THArgCheck(THIndexTensor_(minall)(target) >= 0, 3, "target out of range"); THArgCheck(THIndexTensor_(maxall)(target) <= dim, 3, "target out of range"); target = THIndexTensor_(newContiguous)(target); input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); target_data = THIndexTensor_(data)(target); THNN_resizeAs_indices(isTarget, target); THTensor_(zero)(isTarget); isTarget_data = THTensor_(data)(isTarget); sum = 0; for (t = 0; t < nframe; t++) { for (ddt = 0; ddt < dim; ddt++) { THIndex_t target_idx = target_data[ddt] - TH_INDEX_BASE; if (target_idx < 0) break; isTarget_data[target_idx] = 1; } for (dt = 0; dt < dim; dt++) { THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE; real input_target; if (target_idx < 0) break; input_target = input_data[target_idx]; for (d = 0; d < dim; d++) { if (!isTarget_data[d]) { real z = 1 - input_target + input_data[d]; if (z > 0) sum += z; } } } input_data += dim; target_data += dim; isTarget_data += dim; } sum /= dim; if (sizeAverage) sum /= nframe; THTensor_(set1d)(output, 0, sum); THTensor_(free)(input); THIndexTensor_(free)(target); }
void THNN_(SpatialMaxUnpooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices, int owidth, int oheight) { int dimw = 2; int dimh = 1; int nbatch = 1; int nslices; int iheight; int iwidth; scalar_t *gradInput_data; scalar_t *gradOutput_data; THIndex_t *indices_data; THNN_CHECK_SHAPE_INDICES(input, indices); /* get contiguous gradOutput and indices */ gradOutput = THTensor_(newContiguous)(gradOutput); indices = THIndexTensor_(newContiguous)(indices); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->dim() == 4) { nbatch = input->size(0); dimw++; dimh++; } /* sizes */ nslices = input->size(dimh-1); iheight = input->size(dimh); iwidth = input->size(dimw); if(owidth!=gradOutput->size(dimw) || oheight!=gradOutput->size(dimh)){ THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d", oheight, owidth, gradOutput->size(dimh), gradOutput->size(dimw)); } /* get raw pointers */ gradInput_data = gradInput->data<scalar_t>(); gradOutput_data = gradOutput->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->dim() == 3) { THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { int p; for (p = 0; p < nbatch; p++) { THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, indices_data+p*nslices*iwidth*iheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ c10::raw::intrusive_ptr::decref(gradOutput); THIndexTensor_(free)(indices); }
void THNN_(VolumetricAdaptiveMaxPooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices) { int dimD = 0; int dimT = 1; int dimH = 2; int dimW = 3; int64_t sizeB = 1; int64_t sizeD; int64_t isizeT; int64_t isizeH; int64_t isizeW; int64_t osizeT; int64_t osizeH; int64_t osizeW; real *gradInput_data; real *gradOutput_data; THIndex_t *indices_data; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->dim() == 5) { sizeB = input->size[0]; dimD++; dimT++; dimH++; dimW++; } /* sizes */ sizeD = input->size[dimD]; isizeT = input->size[dimT]; isizeH = input->size[dimH]; isizeW = input->size[dimW]; osizeT = gradOutput->size[dimT]; osizeH = gradOutput->size[dimH]; osizeW = gradOutput->size[dimW]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->dim() == 4) { THNN_(VolumetricAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, indices_data, sizeD, isizeT, isizeH, isizeW, osizeT, osizeH, osizeW); } else { int64_t b; #pragma omp parallel for private(b) for (b = 0; b < sizeB; b++) { THNN_(VolumetricAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+b*sizeD*isizeT*isizeH*isizeW, gradOutput_data+b*sizeD*osizeT*osizeH*osizeW, indices_data+b*sizeD*osizeT*osizeH*osizeW, sizeD, isizeT, isizeH, isizeW, osizeT, osizeH, osizeW); } } /* cleanup */ THTensor_(free)(gradOutput); }
void THNN_(SpatialDilatedMaxPooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; long nbatch = 1; int nInputPlane; int inputHeight; int inputWidth; int outputHeight; int outputWidth; real *gradInput_data; real *gradOutput_data; THIndex_t *indices_data; THNN_(SpatialDilatedMaxPooling_shapeCheck) (input, gradOutput, indices, kH, kW, dH, dW, padH, padW, dilationH, dilationW, ceil_mode); /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nInputPlane = input->size[dimh-1]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; outputHeight = gradOutput->size[dimh]; outputWidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->nDimension == 3) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) (gradInput_data, gradOutput_data, indices_data, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, dW, dH); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) (gradInput_data+p*nInputPlane*inputWidth*inputHeight, gradOutput_data+p*nInputPlane*outputWidth*outputHeight, indices_data+p*nInputPlane*outputWidth*outputHeight, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); }
void THNN_(SpatialDilatedMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; long nbatch = 1; long nInputPlane; long inputHeight; long inputWidth; long outputHeight; long outputWidth; real *input_data; real *output_data; THIndex_t *indices_data; THNN_(SpatialDilatedMaxPooling_shapeCheck) (input, NULL, NULL, kH, kW, dH, dW, padH, padW, dilationH, dilationW, ceil_mode); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nInputPlane = input->size[dimh-1]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; if (ceil_mode) { outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } else { outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image if ((outputHeight - 1)*dH >= inputHeight + padH) --outputHeight; if ((outputWidth - 1)*dW >= inputWidth + padW) --outputWidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); /* indices will contain the locations for each output point */ THIndexTensor_(resize3d)(indices, nInputPlane, outputHeight, outputWidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data, output_data, indices_data, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } else { long p; THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth); /* indices will contain the locations for each output point */ THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data+p*nInputPlane*inputWidth*inputHeight, output_data+p*nInputPlane*outputWidth*outputHeight, indices_data+p*nInputPlane*outputWidth*outputHeight, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } } /* cleanup */ THTensor_(free)(input); }
void THNN_(SpatialDilatedMaxPooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; int64_t nbatch = 1; int nInputPlane; int inputHeight; int inputWidth; int outputHeight; int outputWidth; scalar_t *gradInput_data; scalar_t *gradOutput_data; THIndex_t *indices_data; THNN_(SpatialDilatedMaxPooling_shapeCheck) (input, gradOutput, indices, kH, kW, dH, dW, padH, padW, dilationH, dilationW, ceil_mode); /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->dim() == 4) { nbatch = input->size(0); dimw++; dimh++; } /* sizes */ nInputPlane = input->size(dimh-1); inputHeight = input->size(dimh); inputWidth = input->size(dimw); outputHeight = gradOutput->size(dimh); outputWidth = gradOutput->size(dimw); /* get raw pointers */ gradInput_data = gradInput->data<scalar_t>(); gradOutput_data = gradOutput->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->dim() == 3) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) (gradInput_data, gradOutput_data, indices_data, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, dW, dH); } else { at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) { for (auto p = start; p < end; p++) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) (gradInput_data+p*nInputPlane*inputWidth*inputHeight, gradOutput_data+p*nInputPlane*outputWidth*outputHeight, indices_data+p*nInputPlane*outputWidth*outputHeight, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, dW, dH); } }); } /* cleanup */ c10::raw::intrusive_ptr::decref(gradOutput); }
void THNN_(SpatialDilatedMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; int64_t nbatch = 1; int64_t nInputPlane; int64_t inputHeight; int64_t inputWidth; int64_t outputHeight; int64_t outputWidth; scalar_t *input_data; scalar_t *output_data; THIndex_t *indices_data; THNN_(SpatialDilatedMaxPooling_shapeCheck) (input, NULL, NULL, kH, kW, dH, dW, padH, padW, dilationH, dilationW, ceil_mode); if (input->dim() == 4) { nbatch = input->size(0); dimw++; dimh++; } /* sizes */ nInputPlane = input->size(dimh-1); inputHeight = input->size(dimh); inputWidth = input->size(dimw); outputHeight = pooling_output_shape<int64_t>(inputHeight, kH, padH, dH, dilationH, ceil_mode); outputWidth = pooling_output_shape<int64_t>(inputWidth, kW, padW, dW, dilationW, ceil_mode); /* get contiguous input */ input = THTensor_(newContiguous)(input); /* resize output */ if (input->dim() == 3) { THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); /* indices will contain the locations for each output point */ THIndexTensor_(resize3d)(indices, nInputPlane, outputHeight, outputWidth); input_data = input->data<scalar_t>(); output_data = output->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data, output_data, indices_data, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } else { THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth); /* indices will contain the locations for each output point */ THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth); input_data = input->data<scalar_t>(); output_data = output->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) { for (auto p = start; p < end; p++) { THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data+p*nInputPlane*inputWidth*inputHeight, output_data+p*nInputPlane*outputWidth*outputHeight, indices_data+p*nInputPlane*outputWidth*outputHeight, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } }); } /* cleanup */ c10::raw::intrusive_ptr::decref(input); }
void THNN_(ClassNLLCriterion_updateGradInput)(THNNState *state, THTensor *input, THIndexTensor *target, THTensor *gradInput, bool sizeAverage, THTensor *weights, THTensor *total_weight) { int n_dims = THTensor_(nDimension)(input); int n_classes = THTensor_(size)(input, n_dims - 1); if (!THTensor_(isContiguous)(gradInput)) { THError("gradInput must be contiguous"); } real *total_weight_data = THTensor_(data)(total_weight); if (!(*total_weight_data > 0)) { return; } if (THIndexTensor_(nDimension)(target) > 1) { THError("multi-target not supported"); } if (THTensor_(nDimension)(input) > 2) { THError("input tensor should be 1D or 2D"); } target = THIndexTensor_(newContiguous)(target); weights = weights ? THTensor_(newContiguous)(weights) : NULL; THIndex_t *target_data = THIndexTensor_(data)(target); real *weights_data = weights ? THTensor_(data)(weights) : NULL; real *gradInput_data = THTensor_(data)(gradInput); if (THTensor_(nDimension)(input) == 1) { int cur_target = target_data[0] - 1; THAssert(cur_target >= 0 && cur_target < n_classes); gradInput_data[cur_target] = (!sizeAverage && weights) ? -weights_data[cur_target] : -1; } else if (THTensor_(nDimension)(input) == 2) { int batch_size = THTensor_(size)(input, 0); int n_target = THTensor_(size)(input, 1); int i; for (i = 0; i < batch_size; i++){ int cur_target = target_data[i] - 1; THAssert(cur_target >= 0 && cur_target < n_classes); gradInput_data[i * n_target + cur_target] = -(weights ? weights_data[cur_target] : 1.0f); if (sizeAverage && *total_weight_data) { gradInput_data[i * n_target + cur_target] /= *total_weight_data; } } } THIndexTensor_(free)(target); if (weights) { THTensor_(free)(weights); } }
void THNN_(VolumetricAdaptiveMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int osizeT, int osizeW, int osizeH) { int dimD = 0; int dimT = 1; int dimH = 2; int dimW = 3; int64_t sizeB = 1; int64_t sizeD = 0; int64_t isizeT = 0; int64_t isizeH = 0; int64_t isizeW = 0; int64_t istrideB = 0; int64_t istrideD = 0; int64_t istrideT = 0; int64_t istrideH = 0; int64_t istrideW = 0; real *input_data = nullptr; real *output_data = nullptr; THIndex_t *indices_data = nullptr; THNN_ARGCHECK(!input->is_empty() && (input->dim() == 4 || input->dim() == 5), 2, input, "non-empty 4D or 5D (batch mode) tensor expected for input, but got: %s"); if (input->dim() == 5) { istrideB = input->stride[0]; sizeB = input->size[0]; dimD++; dimT++; dimH++; dimW++; } /* sizes */ sizeD = input->size[dimD]; isizeT = input->size[dimT]; isizeH = input->size[dimH]; isizeW = input->size[dimW]; /* strides */ istrideD = input->stride[dimD]; istrideT = input->stride[dimT]; istrideH = input->stride[dimH]; istrideW = input->stride[dimW]; /* resize output */ if (input->dim() == 4) { THTensor_(resize4d)(output, sizeD, osizeT, osizeH, osizeW); /* indices will contain max input locations for each output point */ THIndexTensor_(resize4d)(indices, sizeD, osizeT, osizeH, osizeW); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(VolumetricAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, indices_data, sizeD, isizeT, isizeH, isizeW, osizeT, osizeH, osizeW, istrideD, istrideT, istrideH, istrideW); } else { int64_t b; THTensor_(resize5d)(output, sizeB, sizeD, osizeT, osizeH, osizeW); /* indices will contain max input locations for each output point */ THIndexTensor_(resize5d)(indices, sizeB, sizeD, osizeT, osizeH, osizeW); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(b) for (b = 0; b < sizeB; b++) { THNN_(VolumetricAdaptiveMaxPooling_updateOutput_frame)(input_data+b*istrideB, output_data+b*sizeD*osizeT*osizeH*osizeW, indices_data+b*sizeD*osizeT*osizeH*osizeW, sizeD, isizeT, isizeH, isizeW, osizeT, osizeH, osizeW, istrideD, istrideT, istrideH, istrideW); } } }
void THNN_(VolumetricDilatedMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int dilationT, int dilationW, int dilationH, bool ceilMode) { int64_t nslices; int64_t itime; int64_t iheight; int64_t iwidth; int64_t otime; int64_t oheight; int64_t owidth; real *input_data; real *output_data; THIndex_t *indices_data; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } THNN_(VolumetricDilatedMaxPooling_shapeCheck)( state, input, NULL, NULL, kT, kW, kH, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH, ceilMode); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceilMode) { otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; } else { otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; } if (pT || pW || pH) { // ensure that the last pooling starts inside the image if ((otime - 1)*dT >= itime + pT) --otime; if ((oheight - 1)*dH >= iheight + pH) --oheight; if ((owidth - 1)*dW >= iwidth + pW) --owidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) /* non-batch mode */ { /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); /* indices will contain ti,i,j uchar locations packed into float/double */ THIndexTensor_(resize4d)(indices, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( input_data, output_data, indices_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH ); } else /* batch mode */ { int64_t p; int64_t nBatch = input->size[0]; int64_t istride = nslices * itime * iwidth * iheight; int64_t ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THIndexTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, indices_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH ); } } /* cleanup */ THTensor_(free)(input); }
void THNN_(LookupTable_accGradParameters)( THNNState *state, THIndexTensor *input, THTensor *gradOutput, THTensor *gradWeight, THIntegerTensor *count, THTensor *sorted, THTensor *indices, bool scaleGradByFreq, int paddingValue, real scale) { long i; THInteger_t *count_data = NULL; if (scaleGradByFreq) { THIntegerTensor_(resize1d)(count, gradWeight->size[0]); count_data = THIntegerTensor_(data)(count); } if (!THTensor_(isContiguous)(gradWeight)) THError("gradWeight must be contiguous"); if (!THIndexTensor_(isContiguous)(input)) THError("input must be contiguous"); if (THIndexTensor_(nDimension)(input) != 1 && THIndexTensor_(nDimension)(input) != 2) THError("input must be a vector or matrix"); THIndex_t *input_data = THIndexTensor_(data)(input); long numel = THIndexTensor_(nElement)(input); long numw = THTensor_(size)(gradWeight, 0); // check that inputs are all within range for (i=0; i<numel; i++) if (input_data[i] < 1 || input_data[i] > numw) THError("input out of range"); gradOutput = THTensor_(newContiguous)(gradOutput); real *gw = THTensor_(data)(gradWeight); real *go = THTensor_(data)(gradOutput); long stride = THTensor_(stride)(gradWeight, 0); if (count_data) THNN_(LookupTable_resetCount)(count_data, input); #ifdef _OPENMP if (numel > 1000) { // The strategy is to parallelize over sections of the vocabulary, so that // thread 1 handles updates to gradWeight[0..nVocab/nThreads]. Every thread // has to traverse the entire input, but the dominating factor is the axpy // BLAS call. #pragma omp parallel private(i) { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); long start = tid * (numw/nthreads + 1); long end = start + (numw/nthreads + 1); for (i=0; i<numel; i++) { if (input_data[i] != paddingValue) { long k = input_data[i] - 1; if (k >= start && k < end) { real scale_ = scale; if (count_data) scale_ /= count_data[k]; THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1); } } } } THTensor_(free)(gradOutput); return; } #endif for (i=0; i<numel; i++) { if (input_data[i] != paddingValue) { long k = input_data[i] - 1; real scale_ = scale; if (count_data) scale_ /= count_data[k]; THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1); } } THTensor_(free)(gradOutput); }
void THNN_(VolumetricDilatedMaxPooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int dilationT, int dilationW, int dilationH, bool ceilMode) { int nslices; int itime; int iheight; int iwidth; int otime; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; THIndex_t *indices_data; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; THNN_(VolumetricDilatedMaxPooling_shapeCheck)( state, input, gradOutput, indices, kT, kW, kH, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH, ceilMode); // TODO: gradOutput shape check /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = gradOutput->size[dimt]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->nDimension == 4) /* non-batch mode*/ { THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( gradInput_data, gradOutput_data, indices_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH ); } else /* batch mode */ { int64_t p; int64_t nBatch = input->size[0]; int64_t istride = nslices * itime * iwidth * iheight; int64_t ostride = nslices * otime * owidth * oheight; #pragma omp parallel for private(p) for (p = 0; p < nBatch; p++) { THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( gradInput_data + p * istride, gradOutput_data + p * ostride, indices_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, dT, dW, dH, pT, pW, pH, dilationT, dilationW, dilationH ); } } /* cleanup */ THTensor_(free)(gradOutput); }
void THNN_(SpatialMaxUnpooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int owidth, int oheight) { int dimw = 2; int dimh = 1; int nbatch = 1; int nslices; int iheight; int iwidth; scalar_t *input_data; scalar_t *output_data; THIndex_t *indices_data; AT_CHECK(!input->is_empty() && (input->dim() == 3 || input->dim() == 4), "non-empty 3D or 4D (batch mode) tensor expected for input, but got sizes: ", input->sizes()); THNN_CHECK_SHAPE_INDICES(input, indices); if (input->dim() == 4) { nbatch = input->size(0); dimw++; dimh++; } /* sizes */ nslices = input->size(dimh-1); iheight = input->size(dimh); iwidth = input->size(dimw); /* get contiguous input and indices */ input = THTensor_(newContiguous)(input); indices = THIndexTensor_(newContiguous)(indices); /* resize output */ if (input->dim() == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); THTensor_(zero)(output); input_data = input->data<scalar_t>(); output_data = output->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { int p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); THTensor_(zero)(output); input_data = input->data<scalar_t>(); output_data = output->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); for (p = 0; p < nbatch; p++) { THNN_(SpatialMaxUnpooling_updateOutput_frame)( input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight, indices_data+p*nslices*iwidth*iheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ c10::raw::intrusive_ptr::decref(input); THIndexTensor_(free)(indices); }
void THNN_(LookupTable_renorm)( THNNState *state, THIndexTensor *idx, THTensor *weight, accreal maxNorm_, accreal normType_) { real maxNorm = TH_CONVERT_ACCREAL_TO_REAL(maxNorm_); real normType = TH_CONVERT_ACCREAL_TO_REAL(normType_); if (!THTensor_(isContiguous)(weight)) THError("weight must be contiguous"); if (!THIndexTensor_(isContiguous)(idx)) THError("input must be contiguous"); if (THIndexTensor_(nDimension)(idx) != 1) THError("idx must be a vector"); if (normType <= 0) THError("non-positive-norm not supported"); ptrdiff_t i; THIndex_t *row_idx = THIndexTensor_(data)(idx); ptrdiff_t numel = THIndexTensor_(nElement)(idx); long numw = THTensor_(size)(weight, 0); long stride = THTensor_(stride)(weight, 0); real *gw = THTensor_(data)(weight); for (i=0; i<numel; i++) { if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE) { THError("input need to be in the range %ld <= input < %ld, " "but got input of value: %ld", TH_INDEX_BASE, (numw + TH_INDEX_BASE), row_idx[i]); } } // get unique indices qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex)); ptrdiff_t ptr = 0; for (i=0; i<numel; i++) if (i == 0 || row_idx[i] != row_idx[i-1]) row_idx[ptr++] = row_idx[i]; numel = ptr; #ifdef _OPENMP if (numel > 1000) { // The strategy is to parallelize over the rows that appear in // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads]. // This distributes the work evenly to each thread. #pragma omp parallel for private(i) for (i=0; i<numel; i++) { long k = row_idx[i] - TH_INDEX_BASE; THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); } return; } #endif for (i=0; i<numel; i++) { long k = row_idx[i] - TH_INDEX_BASE; THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); } }
void THNN_(MultiLabelMarginCriterion_updateGradInput)( THNNState *state, THTensor *input, THIndexTensor *target, THTensor *gradInput, THTensor *isTarget, bool sizeAverage) { real *input_data; real *gradInput_data; THIndex_t *target_data; real *isTarget_data; long nframe, dim; long t, d, dt; real g; THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size"); THArgCheck((isTarget->nDimension == 1) && (isTarget->size[0] == dim), 3, "inconsistent isTarget size"); } else { nframe = input->size[0]; dim = input->size[1]; THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size"); THArgCheck((isTarget->nDimension == 2) && (isTarget->size[0] == nframe) && (isTarget->size[1] == dim), 3, "inconsistent isTarget size"); } THArgCheck(THIndexTensor_(minall)(target) >= 0, 3, "target out of range"); THArgCheck(THIndexTensor_(maxall)(target) <= dim, 3, "target out of range"); THArgCheck(THTensor_(minall)(isTarget) >= 0, 3, "isTarget out of range"); THArgCheck(THTensor_(maxall)(isTarget) <= 1, 3, "isTarget out of range"); target = THIndexTensor_(newContiguous)(target); input = THTensor_(newContiguous)(input); isTarget = THTensor_(newContiguous)(isTarget); input_data = THTensor_(data)(input); target_data = THIndexTensor_(data)(target); isTarget_data = THTensor_(data)(isTarget); g = sizeAverage ? ( 1./((real)(nframe*dim)) ) : ( 1./((real)dim) ); THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); gradInput_data = THTensor_(data)(gradInput); for (t = 0; t < nframe; t++) { for (dt = 0; dt < dim; dt++) { THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE; real input_target; if (target_idx < 0) break; input_target = input_data[target_idx]; for (d = 0; d < dim; d++) { if (!isTarget_data[d]) { real z = 1 - input_target + input_data[d]; if (z > 0) { gradInput_data[target_idx] -= g; gradInput_data[d] += g; } } } } input_data += dim; target_data += dim; isTarget_data += dim; gradInput_data += dim; } THTensor_(free)(input); THIndexTensor_(free)(target); THTensor_(free)(isTarget); }
void THNN_(SpatialAdaptiveMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int osizeW, int osizeH) { int dimW = 2; int dimH = 1; int64_t sizeB = 1; int64_t sizeD = 0; int64_t isizeH = 0; int64_t isizeW = 0; int64_t istrideD = 0; int64_t istrideH = 0; int64_t istrideW = 0; int64_t istrideB = 0; real *input_data = nullptr; real *output_data = nullptr; THIndex_t *indices_data = nullptr; THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, "3D or 4D (batch mode) tensor expected for input, but got: %s"); if (input->nDimension == 4) { istrideB = input->stride[0]; sizeB = input->size[0]; dimW++; dimH++; } /* sizes */ sizeD = input->size[dimH-1]; isizeH = input->size[dimH]; isizeW = input->size[dimW]; /* strides */ istrideD = input->stride[dimH-1]; istrideH = input->stride[dimH]; istrideW = input->stride[dimW]; /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, sizeD, osizeH, osizeW); /* indices will contain i,j locations for each output point */ THIndexTensor_(resize3d)(indices, sizeD, osizeH, osizeW); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, indices_data, sizeD, isizeH, isizeW, osizeH, osizeW, istrideD, istrideH, istrideW); } else { int64_t b; THTensor_(resize4d)(output, sizeB, sizeD, osizeH, osizeW); /* indices will contain i,j locations for each output point */ THIndexTensor_(resize4d)(indices, sizeB, sizeD, osizeH, osizeW); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(b) for (b = 0; b < sizeB; b++) { THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+b*istrideB, output_data+b*sizeD*osizeH*osizeW, indices_data+b*sizeD*osizeH*osizeW, sizeD, isizeH, isizeW, osizeH, osizeW, istrideD, istrideH, istrideW); } } }