void THNN_(VolumetricConvolutionMM_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, THTensor *fgradInput, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, accreal scale_) { real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); THNN_(VolumetricConvolutionMM_shapeCheck)( state, input, gradOutput, gradWeight, gradBias, kT, kW, kH, dT, dW, dH, pT, pW, pH, 1); input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); if (gradWeight) { gradWeight = THNN_(newViewWeight)(gradWeight); } if (input->nDimension == 4) // non-batch mode { THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else // batch mode { int64_t T = input->size[0]; int64_t t; #ifdef _OPENMP #pragma omp parallel for if(T > CONV3D_OMP_THRESHOLD) private(t) #endif for (t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = NULL; if (gradWeight) { finput_t = THTensor_(newSelect)(finput, 0, t); } THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); if (gradWeight) { THTensor_(free)(finput_t); } } } THTensor_(free)(input); THTensor_(free)(gradOutput); if (gradWeight) { THTensor_(free)(gradWeight); } }
void THNN_(TemporalRowConvolution_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, THTensor *fgradInput, int kW, int dW, int padW, bool featFirst, accreal scale_) { real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); int ndim = input->nDimension; THTensor *tinput, *tgradOutput; if (!featFirst) { tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); input = THTensor_(newContiguous)(tinput); gradOutput = THTensor_(newContiguous)(tgradOutput); } else { input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); } THNN_(TemporalRowConvolution_shapeCheck) (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW); if (ndim == 2) { THNN_(TemporalRowConvolution_accGradParameters_frame)( gradOutput, gradWeight, gradBias, finput, scale); } else { int64_t T = input->size[0]; int64_t t; for (t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); THNN_(TemporalRowConvolution_accGradParameters_frame)( gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } if (!featFirst) { THTensor_(free)(tinput); THTensor_(free)(tgradOutput); } THTensor_(free)(input); THTensor_(free)(gradOutput); }
void THNN_(Col2Im_updateOutput)( THNNState *state, THTensor *input, THTensor *output, int64_t outputHeight, int64_t outputWidth, int64_t kH, int64_t kW, int64_t dilationH, int64_t dilationW, int64_t padH, int64_t padW, int64_t dH, int64_t dW) { THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth, kH, kW, dilationH, dilationW, padH, padW, dH, dW); bool batched_input = true; if (input->dim() == 2) { // Force batch batched_input = false; THTensor_(resize3d)(input, 1, input->size(0), input->size(1)); } long batchSize = input->size(0); long nInputPlane = input->size(1); long nOutputPlane = nInputPlane / (kW * kH); input = THTensor_(newContiguous)(input); THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); THTensor_(zero)(output); THTensor *input_n = THTensor_(new)(); THTensor *output_n = THTensor_(new)(); int64_t height_col = (outputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; int64_t width_col = (outputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; for (int64_t elt = 0; elt < batchSize; elt++) { THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(output_n, output, 0, elt); THNN_(col2im)( input_n->data<scalar_t>(), nOutputPlane, outputHeight, outputWidth, height_col, width_col, kH, kW, padH, padW, dH, dW, dilationH, dilationW, output_n->data<scalar_t>()); } c10::raw::intrusive_ptr::decref(input_n); c10::raw::intrusive_ptr::decref(output_n); if (!batched_input) { THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); } c10::raw::intrusive_ptr::decref(input); }
void THNN_(LookupTable_renorm)( THNNState *state, THIndexTensor *idx, THTensor *weight, real maxNorm, real normType) { if (!THTensor_(isContiguous)(weight)) THError("weight must be contiguous"); if (!THIndexTensor_(isContiguous)(idx)) THError("input must be contiguous"); if (THIndexTensor_(nDimension)(idx) != 1) THError("idx must be a vector"); if (normType <= 0) THError("non-positive-norm not supported"); long i; THIndex_t *row_idx = THIndexTensor_(data)(idx); long numel = THIndexTensor_(nElement)(idx); long numw = THTensor_(size)(weight, 0); long stride = THTensor_(stride)(weight, 0); real *gw = THTensor_(data)(weight); for (i=0; i<numel; i++) if (row_idx[i] < 1 || row_idx[i] > numw) THError("input out of range"); // get unique indices qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex)); long ptr = 0; for (i=0; i<numel; i++) if (i == 0 || row_idx[i] != row_idx[i-1]) row_idx[ptr++] = row_idx[i]; numel = ptr; #ifdef _OPENMP if (numel > 1000) { // The strategy is to parallelize over the rows that appear in // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads]. // This distributes the work evenly to each thread. #pragma omp parallel for private(i) for (i=0; i<numel; i++) { long k = row_idx[i] - 1; THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); } return; } #endif for (i=0; i<numel; i++) { long k = row_idx[i] - 1; THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); } }
void THNN_(SpatialConvolutionMM_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *finput, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH) { long nOutputPlane = weight->size[0]; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" ); THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero"); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero"); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); // depending on the BLAS library, fgradInput (result tensor) might // be left uninitialized on zero alpha, which might lead to weird behavior // hence, to be safe, zero it THTensor_(zero)(fgradInput); THTensor_(transpose)(weight, weight, 0, 1); if(input->nDimension == 3) { THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH); } else { long T = input->size[0]; long t; #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } } THTensor_(transpose)(weight, weight, 0, 1); }
void THNN_(SparseLinear_legacyUpdateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias) { int64_t h, i; int64_t outDim = THTensor_(size)(weight, 0); int64_t inDim = THTensor_(size)(weight, 1); THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2"); THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous"); THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong"); weight = THTensor_(newContiguous)(weight); int64_t batchSize = THTensor_(size)(input, 0); int64_t nnz = THTensor_(size)(input, 1); THTensor_(resize2d)(output, batchSize, outDim); // output = weight * input + bias THTensor_(zero)(output); #pragma omp parallel for private(h, i) schedule(static) if ( \ batchSize > 1 && batchSize * nnz * outDim > 10000) for (h = 0; h < batchSize; h++) { for (i = 0; i < nnz; i++) { real val = THNN_(get3d)(input, h, i, 1); if (val == 0) { continue; } int64_t offset = (int64_t)(THNN_(get3d)(input, h, i, 0)) - 1; if (offset >= 0 && offset < inDim) { THBlas_(axpy)(outDim, val, COL_PTR2(weight, offset), weight->stride[0], ROW_PTR2(output, h), output->stride[1]); } else { THError("index out of bound. updateOutput: %d not between 1 and %d", offset + 1, inDim); } } } THTensor* output_row = THTensor_(new)(); for (h = 0; h < batchSize; h++) { THTensor_(select)(output_row, output, 0, h); THTensor_(cadd)(output_row, bias, 1.0, output_row); } THTensor_(free)(output_row); THTensor_(free)(weight); }
void THNN_(SpatialConvolutionMM_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH, real scale) { int freeWeight = 0; long nOutputPlane = gradWeight->size[0]; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" ); THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero"); THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero"); THArgCheck(gradWeight->nDimension == 2 || gradWeight->nDimension == 4, 4, "gradWeight tensor should be 2D or 4D"); if (gradWeight->nDimension == 4) { long s1 = gradWeight->size[0]; long s2 = gradWeight->size[1] * gradWeight->size[2] * gradWeight->size[3]; gradWeight = THTensor_(newWithStorage2d)(gradWeight->storage, gradWeight->storageOffset, s1, -1, s2, -1); freeWeight = 1; } if(input->nDimension == 3) { THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else { long T = input->size[0]; long t; for(t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } if (freeWeight) THTensor_(free)(gradWeight); }
void THNN_(SpatialConvolutionMM_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *finput, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH) { long nOutputPlane = weight->size[0]; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" ); THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero"); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero"); THTensor_(resizeAs)(gradInput, input); THTensor_(resizeAs)(fgradInput, finput); THTensor_(transpose)(weight, weight, 0, 1); if(input->nDimension == 3) { THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH); } else { long T = input->size[0]; long t; #pragma omp parallel for private(t) for(t = 0; t < T; t++) { THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH); THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); THTensor_(free)(fgradInput_t); } } THTensor_(transpose)(weight, weight, 0, 1); }
void THNN_(VolumetricConvolutionMM_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, real scale) { int freeWeight; int nOutputPlane = (int)gradWeight->size[0]; THNN_(VolumetricConvolutionMM_shapeCheck)( state, input, gradOutput, gradWeight, gradBias, kT, kW, kH, dT, dW, dH, pT, pW, pH); input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); freeWeight = THNN_(view_weight)(&gradWeight); if (input->nDimension == 4) // non-batch mode { THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else // batch mode { long T = input->size[0]; long t; for (t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } THTensor_(free)(input); THTensor_(free)(gradOutput); if (freeWeight) THTensor_(free)(gradWeight); }
void THNN_(SparseLinear_legacyZeroGradParameters)( THNNState *state, THTensor *gradWeight, THTensor *gradBias, THTensor *lastInput) { int64_t h, i, j; int64_t outDim = gradWeight->size[0]; int64_t inDim = gradWeight->size[1]; THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong"); THArgCheck(THNN_(checkLegacyInput)(lastInput), 4, "input size must be batchsize x nnz x 2"); THTensor_(zero)(gradBias); int64_t batchSize = THTensor_(size)(lastInput, 0); int64_t nnz = THTensor_(size)(lastInput, 1); #pragma omp parallel for private(h, i, j) schedule(static) if ( \ batchSize > 1 && batchSize * nnz * outDim > 10000) for (h = 0; h < batchSize; h++) { for (i = 0; i < nnz; i++) { if (THNN_(get3d)(lastInput, h, i, 1) == 0 ) { continue; } int64_t offset = (int64_t)(THNN_(get3d)(lastInput, h, i, 0)) - 1; if (offset >= 0 && offset < inDim) { real* pGradWeight = COL_PTR2(gradWeight, offset); if (gradWeight->stride[0] == 1) { THVector_(fill)(pGradWeight, 0, outDim); } else { int64_t stride = gradWeight->stride[0]; for (j = 0; j < outDim; ++j) { pGradWeight[j * stride] = 0; } } } else { THError( "index out of bound. zeroGradParameters: %d not between 1 and %d", offset + 1, inDim); } } } }
void THNN_(Linear_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *bias, THTensor *gradWeight, THTensor *gradBias, THTensor *addBuffer, real scale) { long dim = THTensor_(nDimension)(input); if (dim == 1) { THTensor_(addr)(gradWeight,1,gradWeight,scale,gradOutput,input); if (bias) { THTensor_(cadd)(gradBias,gradBias,scale,gradOutput); } } else if (dim == 2) { THTensor_(transpose)(gradOutput,gradOutput,0,1); THTensor_(addmm)(gradWeight,1,gradWeight,scale,gradOutput,input); if (bias) { THNN_(Linear_updateAddBuffer)(state,input,addBuffer); THTensor_(addmv)(gradBias,1,gradBias,scale,gradOutput,addBuffer); } THTensor_(transpose)(gradOutput,gradOutput,0,1); } }
static void THNN_(TemporalRowConvolution_updateGradInput_frame)( THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, int kW, int dW, int padW, int64_t inputFrameSize, int64_t nInputFrame, int64_t nOutputFrame) { THTensor *gradOutput3d = THTensor_(newWithStorage3d)( gradOutput->storage, gradOutput->storageOffset, inputFrameSize, -1, 1, -1, nOutputFrame, -1); // weight: inputFrameSize x kW x 1 // gradOutput3d: inputFrameSize x 1 x nOutputFrame THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d); // fgradInput: inputFrameSize x kW x nOutputFrame THTensor_(free)(gradOutput3d); THTensor_(zero)(gradInput); THNN_(unfolded_acc_row)(fgradInput, gradInput, kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame); }
static void THNN_(SpatialConvolutionLocal_updateGradInput_frame) (THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH, long nInputPlane, long inputWidth, long inputHeight, long nOutputPlane, long outputWidth, long outputHeight) { THTensor *gradOutput3d, *fgradInput3d; gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset, outputHeight*outputWidth, 1, nOutputPlane, outputHeight*outputWidth, 1, nOutputPlane*outputHeight*outputWidth); fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset, outputHeight*outputWidth, 1, kW*kH*nInputPlane, outputHeight*outputWidth, 1, kW*kH*nInputPlane*outputHeight*outputWidth); // weight: oH*oW x nInputPlane*kH*kW x nOutputPlane // gradOutput3d: oH*oW x nOutputPlane x 1 THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d); // fgradInput3d: oH*oW x nInputPlane*kH*kW x 1 THTensor_(free)(gradOutput3d); THTensor_(free)(fgradInput3d); THTensor_(zero)(gradInput); THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); }
void THNN_(Linear_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *addBuffer) { int64_t dim = THTensor_(_nDimension)(input); if (dim == 1) { THTensor_(resize1d)(output,THTensor_(size)(weight,0)); if (bias) { THTensor_(copy)(output,bias); } else { THTensor_(zero)(output); } THTensor_(addmv)(output,1,output,1,weight,input); } else if (dim == 2) { int64_t nframe = THTensor_(size)(input,0); int64_t nElement = THTensor_(nElement)(output); THTensor_(resize2d)(output,nframe,THTensor_(size)(weight,0)); if (THTensor_(nElement)(output) != nElement) { THTensor_(zero)(output); } THNN_(Linear_updateAddBuffer)(state,input,addBuffer); THTensor *tweight = THTensor_(new)(); THTensor_(transpose)(tweight,weight,0,1); THTensor_(addmm)(output,0,output,1,input,tweight); THTensor_(free)(tweight); if (bias) { THTensor_(addr)(output,1,output,1,addBuffer,bias); } }
static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)( THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH) { THTensor *gradOutput2d = THTensor_(newWithStorage2d)( gradOutput->storage, gradOutput->storageOffset, gradOutput->size[0], -1, gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1 ); THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); THTensor_(free)(gradOutput2d); THTensor_(zero)(gradInput); THNN_(unfolded_acc_vol)( fgradInput, gradInput, kT, kW, kH, dT, dW, dH, pT, pW, pH, gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2], gradOutput->size[1], gradOutput->size[3], gradOutput->size[2] ); }
static void THNN_(SpatialConvolutionMM_updateGradInput_frame)( THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH) { THTensor *gradOutput2d = THTensor_(newWithStorage2d) (gradOutput->storage, gradOutput->storageOffset, gradOutput->size[0], -1, gradOutput->size[1]*gradOutput->size[2], -1); THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); THTensor_(free)(gradOutput2d); THTensor_(zero)(gradInput); THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, gradInput->size[0], gradInput->size[2], gradInput->size[1], gradOutput->size[2], gradOutput->size[1]); }
void THNN_(SparseLinear_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias) { long h, i; long outDim = THTensor_(size)(weight, 0); long inDim = THTensor_(size)(weight, 1); long batchSize = THTensor_(size)(output, 0); THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3"); THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous"); THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong"); long nnz = THTensor_(size)(input, 0); // output = weight * input + bias THTensor_(zero)(output); #pragma omp parallel for private(i) schedule(static) if (nnz * outDim > 10000) for (i = 0; i < nnz; i++) { real val = THNN_(get2d)(input, i, 2); if (val == 0) { continue; } long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; long h = (long)(THNN_(get2d)(input, i, 0)) - 1; if (offset >= 0 && offset < inDim) { THBlas_(axpy)(outDim, val, COL_PTR2(weight, offset), weight->stride[0], ROW_PTR2(output, h), output->stride[1]); } else { THError("index out of bound. updateOutput: %d not between 1 and %d", offset + 1, inDim); } } THTensor* output_row = THTensor_(new)(); for (h = 0; h < batchSize; h++) { THTensor_(select)(output_row, output, 0, h); THTensor_(cadd)(output_row, bias, 1.0, output_row); } THTensor_(free)(output_row); }
static void THNN_(VolumetricConvolutionMM_updateOutput_frame)( THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, int64_t nInputPlane, int64_t inputDepth, int64_t inputWidth, int64_t inputHeight, int64_t nOutputPlane, int64_t outputDepth, int64_t outputWidth, int64_t outputHeight) { int64_t i; THTensor *output2d; THNN_(unfolded_copy_vol)( finput, input, kT, kW, kH, dT, dW, dH, pT, pW, pH, nInputPlane, inputDepth, inputWidth, inputHeight, outputDepth, outputWidth, outputHeight ); output2d = THTensor_(newWithStorage2d)( output->storage, output->storageOffset, nOutputPlane, -1, outputDepth*outputHeight*outputWidth, -1 ); if (bias) { for (i = 0; i < nOutputPlane; i++) { THVector_(fill)( output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputDepth*outputHeight*outputWidth ); } } else { THTensor_(zero)(output); } THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); THTensor_(free)(output2d); }
void THNN_(VolumetricConvolutionMM_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, real scale) { THArgCheck(gradWeight->nDimension == 2, 4, "2D gradWeight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))" ); int nOutputPlane = (int)gradWeight->size[0]; THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5, "gradBias tensor has wrong size" ); THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" ); if (input->nDimension == 4) // non-batch mode { THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else // batch mode { long T = input->size[0]; long t; for (t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } }
void THNN_(SpatialConvolutionMM_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH, real scale) { long nOutputPlane = gradWeight->size[0]; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" ); THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero"); THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero"); if(input->nDimension == 3) { THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); } else { long T = input->size[0]; long t; for(t = 0; t < T; t++) { THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); THTensor_(free)(gradOutput_t); THTensor_(free)(finput_t); } } }
void THNN_(Col2Im_updateGradInput)( THNNState *state, THTensor *gradOutput, THTensor *gradInput, int64_t kH, int64_t kW, int64_t dilationH, int64_t dilationW, int64_t padH, int64_t padW, int64_t dH, int64_t dW) { THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, kH, kW, dilationH, dilationW, padH, padW, dH, dW); }
void THNN_(SparseLinear_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *weight, THTensor *bias, real weightDecay, real scale) { long h, i; long outDim = THTensor_(size)(weight, 0); long inDim = THTensor_(size)(weight, 1); THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3"); THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4, "gradWeight size wrong"); THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong"); THArgCheck(THTensor_(isContiguous)(gradOutput), 1, "gradOutput must be contiguous"); long nnz = THTensor_(size)(input, 0); // THTensor_(resize2d)(gradOutput, batchSize, outDim); // gradWeight += gradOutput * input #pragma omp parallel for private(h, i) schedule(static) if (\ nnz * outDim > 10000) for (i = 0; i < nnz; i++) { real val = scale * THNN_(get2d)(input, i, 2); long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; long h = (long)(THNN_(get2d)(input, i, 0)) - 1; if (offset >= 0 && offset < inDim) { THBlas_(axpy)(outDim, val, ROW_PTR2(gradOutput, h), gradOutput->stride[1], COL_PTR2(gradWeight, offset), gradWeight->stride[0]); } else { THError( "index out of bound. accGradParameters: %d not between 1 and %d", offset + 1, inDim); } } // gradBias += gradOutput THTensor* buf = THTensor_(new)(); THTensor_(sum)(buf, gradOutput, 0); THTensor_(cadd)(gradBias, gradBias, scale, buf); THTensor_(free)(buf); if (weightDecay != 0) { THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight); } }
void THNN_(VolumetricFullConvolution_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *finput, THTensor *fgradInput, // only used by cuda impl int dT, int dW, int dH, // stride int pT, int pW, int pH, // padding int aT, int aW, int aH) // extra output adjustment { THNN_(VolumetricFullDilatedConvolution_updateGradInput)( state, input, gradOutput, gradInput, weight, finput, fgradInput, dT, dW, dH, pT, pW, pH, 1, 1, 1, aT, aW, aH); }
void THNN_(VolumetricFullConvolution_updateOutput)( THNNState *state, THTensor *input, // 4D or 5D (batch) tensor THTensor *output, THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW) THTensor *bias, THTensor *finput, // internal columns buffer THTensor *fgradInput, // internal ones buffer int dT, int dW, int dH, // stride of the convolution int pT, int pW, int pH, // padding int aT, int aW, int aH) // extra output adjustment { THNN_(VolumetricFullDilatedConvolution_updateOutput)( state, input, output, weight, bias, finput, fgradInput, dT, dW, dH, pT, pW, pH, 1, 1, 1, aT, aW, aH); }
void THNN_(VolumetricMaxPooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *indices, int dT, int dW, int dH, int pT, int pW, int pH) { THNN_(VolumetricDilatedMaxPooling_updateGradInput)( state, input, gradOutput, gradInput, indices, dT, dW, dH, pT, pW, pH, 1, 1, 1); }
void THNN_(VolumetricFullConvolution_accGradParameters)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, THTensor *fgradInput, int dT, int dW, int dH, // stride int pT, int pW, int pH, // padding int aT, int aW, int aH, // extra output adjustment accreal scale_) { THNN_(VolumetricFullDilatedConvolution_accGradParameters)( state, input, gradOutput, gradWeight, gradBias, finput, fgradInput, dT, dW, dH, pT, pW, pH, 1, 1, 1, aT, aW, aH, scale_); }
static void THNN_(SpatialConvolutionMM_updateOutput_frame)( THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput, int kW, int kH, int dW, int dH, int padW, int padH, long nInputPlane, long inputWidth, long inputHeight, long nOutputPlane, long outputWidth, long outputHeight) { long i; THTensor *output2d; THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, nOutputPlane, -1, outputHeight*outputWidth, -1); if (bias) { for(i = 0; i < nOutputPlane; i++) THVector_(fill) (output->storage->data + output->storageOffset + output->stride[0] * i, THTensor_(get1d)(bias, i), outputHeight*outputWidth); } else { THTensor_(zero)(output); } THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); THTensor_(free)(output2d); }
void THNN_(VolumetricMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *indices, int kT, int kW, int kH, int dT, int dW, int dH, int pT, int pW, int pH, bool ceilMode) { THNN_(VolumetricDilatedMaxPooling_updateOutput)( state, input, output, indices, kT, kW, kH, dT, dW, dH, pT, pW, pH, 1, 1, 1, ceilMode); }
static void THNN_(BatchNormalization_MKLDNN_init_backward)( THLongTensor *primitives, int N, int outC, int outH, int outW, double eps) { dnnError_t err; dnnPrimitive_t bn_backward = (dnnPrimitive_t)primitives->storage->data[BN_BACKWARD]; size_t outputSize[dimension] = {outW,outH,outC,N}; size_t outputStrides[dimension] = { 1, outW, outH * outW, outC * outH * outW }; dnnLayout_t lt_user_output,lt_bn_backward_output=NULL; if(primitives->storage->data[BN_LAYOUT_OUTPUT] == 0) { CHECK_ERR( dnnLayoutCreate_F32(<_user_output, dimension, outputSize, outputStrides) , err ); #if CONVERSION_LOG fprintf(stderr ,"MKLDNN BN get output layout FAIL......\n"); #endif } else{ lt_user_output = (dnnLayout_t)primitives->storage->data[BN_LAYOUT_OUTPUT]; #if CONVERSION_LOG fprintf(stderr ,"MKLDNN BN get output layout OK\n"); #endif } dnnLayoutCreateFromPrimitive_F32(<_bn_backward_output, bn_backward, dnnResourceDiffDst); dnnPrimitive_t cv_backward_output = NULL;real * buffer_backward_output = NULL; //backward conversion init CHECK_ERR( THNN_(init_conversion)(&cv_backward_output, &buffer_backward_output, lt_bn_backward_output, lt_user_output), err ); //save the dnnPrimitive to THTensor(long int array) primitives->storage->data[CV_BN_BACKWARD_OUTPUT] = (long long)cv_backward_output; primitives->storage->data[BUFFER_BN_BACKWARD_OUTPUT] = (long long)buffer_backward_output; }
static void THNN_(TemporalRowConvolution_updateOutput_frame)( THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput, int kW, int dW, int padW, int64_t inputFrameSize, int64_t nInputFrame, int64_t nOutputFrame) { int64_t i; THTensor *output3d = THTensor_(newWithStorage3d)( output->storage, output->storageOffset, inputFrameSize, -1, 1, -1, nOutputFrame, -1); THNN_(unfolded_copy_row)(finput, input, kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame); THTensor_(zero)(output); if (bias != NULL) { for (i = 0; i < inputFrameSize; i++) THVector_(fill) (THStorage_(data)(output->storage) + output->storageOffset + output->stride[0] * i, THTensor_(get1d)(bias, i), nOutputFrame); } THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput); THTensor_(free)(output3d); }