void THNN_(VolumetricConvolutionMM_accGradParameters)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *finput,
          THTensor *fgradInput,
          int kT, int kW, int kH,
          int dT, int dW, int dH,
          int pT, int pW, int pH,
          accreal scale_)
{
  real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);

  THNN_(VolumetricConvolutionMM_shapeCheck)(
        state, input, gradOutput, gradWeight, gradBias,
        kT, kW, kH, dT, dW, dH, pT, pW, pH, 1);
  input = THTensor_(newContiguous)(input);
  gradOutput = THTensor_(newContiguous)(gradOutput);

  if (gradWeight) {
    gradWeight = THNN_(newViewWeight)(gradWeight);
  }

  if (input->nDimension == 4)   // non-batch mode
  {
    THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
  }
  else  // batch mode
  {
    int64_t T = input->size[0];
    int64_t t;

#ifdef _OPENMP
    #pragma omp parallel for if(T > CONV3D_OMP_THRESHOLD) private(t)
#endif
    for (t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = NULL;
      if (gradWeight) {
        finput_t = THTensor_(newSelect)(finput, 0, t);
      }

      THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);

      THTensor_(free)(gradOutput_t);
      if (gradWeight) {
        THTensor_(free)(finput_t);
      }
    }
  }

  THTensor_(free)(input);
  THTensor_(free)(gradOutput);
  if (gradWeight) {
    THTensor_(free)(gradWeight);
  }
}
Esempio n. 2
0
void THNN_(TemporalRowConvolution_accGradParameters)(
	THNNState *state,
	THTensor *input,
	THTensor *gradOutput,
	THTensor *gradWeight,
	THTensor *gradBias,
	THTensor *finput,
	THTensor *fgradInput,
	int kW,
	int dW,
	int padW,
	bool featFirst,
	accreal scale_) {

    real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
	int ndim = input->nDimension;

	THTensor *tinput, *tgradOutput;

	if (!featFirst) {
		tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
		tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);

		input = THTensor_(newContiguous)(tinput);
		gradOutput = THTensor_(newContiguous)(tgradOutput);
	} else {
		input = THTensor_(newContiguous)(input);
		gradOutput = THTensor_(newContiguous)(gradOutput);
	}

	THNN_(TemporalRowConvolution_shapeCheck)
	        (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW);

	if (ndim == 2) {
		THNN_(TemporalRowConvolution_accGradParameters_frame)(
			gradOutput, gradWeight, gradBias, finput, scale);
	} else {
		int64_t T = input->size[0];
		int64_t t;

		for (t = 0; t < T; t++) {
			THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
			THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

			THNN_(TemporalRowConvolution_accGradParameters_frame)(
				gradOutput_t, gradWeight, gradBias, finput_t, scale);

			THTensor_(free)(gradOutput_t);
			THTensor_(free)(finput_t);
		}
	}

	if (!featFirst) {
		THTensor_(free)(tinput);
		THTensor_(free)(tgradOutput);
	}

	THTensor_(free)(input);
	THTensor_(free)(gradOutput);
}
Esempio n. 3
0
void THNN_(Col2Im_updateOutput)(
           THNNState *state,
           THTensor *input,
           THTensor *output,
           int64_t outputHeight, int64_t outputWidth,
           int64_t kH, int64_t kW,
           int64_t dilationH, int64_t dilationW,
           int64_t padH, int64_t padW,
           int64_t dH, int64_t dW) {

  THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth,
                           kH, kW, dilationH, dilationW, padH, padW, dH, dW);

  bool batched_input = true;
  if (input->dim() == 2) {
      // Force batch
      batched_input = false;
      THTensor_(resize3d)(input, 1, input->size(0), input->size(1));
  }

  long batchSize = input->size(0);
  long nInputPlane = input->size(1);
  long nOutputPlane = nInputPlane / (kW * kH);

  input = THTensor_(newContiguous)(input);

  THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
  THTensor_(zero)(output);

  THTensor *input_n = THTensor_(new)();
  THTensor *output_n = THTensor_(new)();

  int64_t height_col = (outputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
  int64_t width_col = (outputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;

  for (int64_t elt = 0; elt < batchSize; elt++) {
    THTensor_(select)(input_n, input, 0, elt);
    THTensor_(select)(output_n, output, 0, elt);

    THNN_(col2im)(
      input_n->data<scalar_t>(),
      nOutputPlane,
      outputHeight, outputWidth,
      height_col, width_col,
      kH, kW,
      padH, padW,
      dH, dW,
      dilationH, dilationW, output_n->data<scalar_t>());
  }

  c10::raw::intrusive_ptr::decref(input_n);
  c10::raw::intrusive_ptr::decref(output_n);

  if (!batched_input) {
      THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
  }
  c10::raw::intrusive_ptr::decref(input);
}
Esempio n. 4
0
void THNN_(LookupTable_renorm)(
          THNNState *state,
          THIndexTensor *idx,
          THTensor *weight,
          real maxNorm,
          real normType)
{
  if (!THTensor_(isContiguous)(weight))
    THError("weight must be contiguous");
  if (!THIndexTensor_(isContiguous)(idx))
    THError("input must be contiguous");
  if (THIndexTensor_(nDimension)(idx) != 1)
    THError("idx must be a vector");
  if (normType <= 0)
    THError("non-positive-norm not supported");

  long i;
  THIndex_t *row_idx = THIndexTensor_(data)(idx);
  long numel = THIndexTensor_(nElement)(idx);

  long numw = THTensor_(size)(weight, 0);
  long stride = THTensor_(stride)(weight, 0);
  real *gw = THTensor_(data)(weight);
  for (i=0; i<numel; i++)
    if (row_idx[i] < 1 || row_idx[i] > numw)
      THError("input out of range");
  // get unique indices
  qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
  long ptr = 0;
  for (i=0; i<numel; i++)
    if (i == 0 || row_idx[i] != row_idx[i-1])
      row_idx[ptr++] = row_idx[i];
  numel = ptr;

#ifdef _OPENMP
  if (numel > 1000)
  {
    // The strategy is to parallelize over the rows that appear in
    // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
    // This distributes the work evenly to each thread.
    #pragma omp parallel for private(i)
    for (i=0; i<numel; i++)
    {
      long k = row_idx[i] - 1;
      THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
    }
    return;
  }
#endif
  for (i=0; i<numel; i++)
  {
    long k = row_idx[i] - 1;
    THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
  }
}
Esempio n. 5
0
void THNN_(SpatialConvolutionMM_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *weight,
          THTensor *finput,
          THTensor *fgradInput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH)
{
  long nOutputPlane = weight->size[0];

  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
  THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
  THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(resizeAs)(fgradInput, finput);
  // depending on the BLAS library, fgradInput (result tensor) might
  // be left uninitialized on zero alpha, which might lead to weird behavior
  // hence, to be safe, zero it
  THTensor_(zero)(fgradInput); 
  THTensor_(transpose)(weight, weight, 0, 1);

  if(input->nDimension == 3)
  {
    THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
  }
  else
  {
    long T = input->size[0];
    long t;

#pragma omp parallel for private(t)
    for(t = 0; t < T; t++)
    {
      THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);

      THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);

      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);
      THTensor_(free)(fgradInput_t);
    }
  }

  THTensor_(transpose)(weight, weight, 0, 1);
}
Esempio n. 6
0
void THNN_(SparseLinear_legacyUpdateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THTensor *weight,
          THTensor *bias)
{
  int64_t h, i;
  int64_t outDim = THTensor_(size)(weight, 0);
  int64_t inDim = THTensor_(size)(weight, 1);

  THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2");
  THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
  THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");

  weight = THTensor_(newContiguous)(weight);

  int64_t batchSize = THTensor_(size)(input, 0);
  int64_t nnz = THTensor_(size)(input, 1);
  THTensor_(resize2d)(output, batchSize, outDim);

  // output = weight * input + bias
  THTensor_(zero)(output);
#pragma omp parallel for private(h, i) schedule(static) if (   \
  batchSize > 1 && batchSize * nnz * outDim > 10000)
  for (h = 0; h < batchSize; h++) {
    for (i = 0; i < nnz; i++) {
      real val = THNN_(get3d)(input, h, i, 1);
      if (val == 0) {
        continue;
      }

      int64_t offset = (int64_t)(THNN_(get3d)(input, h, i, 0)) - 1;
      if (offset >= 0 && offset < inDim) {
        THBlas_(axpy)(outDim,
                      val,
                      COL_PTR2(weight, offset), weight->stride[0],
                      ROW_PTR2(output, h), output->stride[1]);
      } else {
        THError("index out of bound. updateOutput: %d not between 1 and %d",
                offset + 1, inDim);
      }
    }
  }

  THTensor* output_row = THTensor_(new)();
  for (h = 0; h < batchSize; h++) {
    THTensor_(select)(output_row, output, 0, h);
    THTensor_(cadd)(output_row, bias, 1.0, output_row);
  }
  THTensor_(free)(output_row);
  THTensor_(free)(weight);
}
Esempio n. 7
0
void THNN_(SpatialConvolutionMM_accGradParameters)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *finput,
          THTensor *fgradInput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH,
          real scale)
{
  int freeWeight = 0;
  long nOutputPlane = gradWeight->size[0];
  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
  THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
  THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
  THArgCheck(gradWeight->nDimension == 2 || gradWeight->nDimension == 4, 4, "gradWeight tensor should be 2D or 4D");

  if (gradWeight->nDimension == 4) {
    long s1 = gradWeight->size[0];
    long s2 = gradWeight->size[1] * gradWeight->size[2] * gradWeight->size[3];
    gradWeight = THTensor_(newWithStorage2d)(gradWeight->storage, gradWeight->storageOffset, s1, -1, s2, -1);
    freeWeight = 1;
  }

  if(input->nDimension == 3)
  {
    THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
  }
  else
  {
    long T = input->size[0];
    long t;

    for(t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);

      THTensor_(free)(gradOutput_t);
      THTensor_(free)(finput_t);
    }
  }
  if (freeWeight)
    THTensor_(free)(gradWeight);
}
Esempio n. 8
0
void THNN_(SpatialConvolutionMM_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *weight,
          THTensor *finput,
          THTensor *fgradInput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH)
{
  long nOutputPlane = weight->size[0];

  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
  THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
  THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(resizeAs)(fgradInput, finput);
  THTensor_(transpose)(weight, weight, 0, 1);

  if(input->nDimension == 3)
  {
    THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
  }
  else
  {
    long T = input->size[0];
    long t;

#pragma omp parallel for private(t)
    for(t = 0; t < T; t++)
    {
      THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);

      THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);

      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);
      THTensor_(free)(fgradInput_t);
    }
  }

  THTensor_(transpose)(weight, weight, 0, 1);
}
Esempio n. 9
0
void THNN_(VolumetricConvolutionMM_accGradParameters)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *finput,
          int kT, int kW, int kH,
          int dT, int dW, int dH,
          int pT, int pW, int pH,
          real scale)
{
  int freeWeight;
  int nOutputPlane = (int)gradWeight->size[0];

  THNN_(VolumetricConvolutionMM_shapeCheck)(
        state, input, gradOutput, gradWeight, gradBias,
        kT, kW, kH, dT, dW, dH, pT, pW, pH);
  input = THTensor_(newContiguous)(input);
  gradOutput = THTensor_(newContiguous)(gradOutput);

  freeWeight = THNN_(view_weight)(&gradWeight);

  if (input->nDimension == 4)   // non-batch mode
  {
    THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
  }
  else  // batch mode
  {
    long T = input->size[0];
    long t;

    for (t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);

      THTensor_(free)(gradOutput_t);
      THTensor_(free)(finput_t);
    }
  }

  THTensor_(free)(input);
  THTensor_(free)(gradOutput);
  if (freeWeight)
    THTensor_(free)(gradWeight);
}
Esempio n. 10
0
void THNN_(SparseLinear_legacyZeroGradParameters)(
          THNNState *state,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *lastInput)
{
  int64_t h, i, j;

  int64_t outDim = gradWeight->size[0];
  int64_t inDim = gradWeight->size[1];

  THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
  THArgCheck(THNN_(checkLegacyInput)(lastInput), 4,
             "input size must be batchsize x nnz x 2");

  THTensor_(zero)(gradBias);

  int64_t batchSize = THTensor_(size)(lastInput, 0);
  int64_t nnz = THTensor_(size)(lastInput, 1);

#pragma omp parallel for private(h, i, j) schedule(static) if (   \
  batchSize > 1 && batchSize * nnz * outDim > 10000)
  for (h = 0; h < batchSize; h++) {
    for (i = 0; i < nnz; i++) {
      if (THNN_(get3d)(lastInput, h, i, 1) == 0 ) {
        continue;
      }

      int64_t offset = (int64_t)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
      if (offset >= 0 && offset < inDim) {
        real* pGradWeight = COL_PTR2(gradWeight, offset);
        if (gradWeight->stride[0] == 1) {
          THVector_(fill)(pGradWeight, 0, outDim);
        } else {
          int64_t stride = gradWeight->stride[0];
          for (j = 0; j < outDim; ++j) {
            pGradWeight[j * stride] = 0;
          }
        }
      } else {
        THError(
          "index out of bound. zeroGradParameters: %d not between 1 and %d",
          offset + 1,
          inDim);
      }
    }
  }
}
Esempio n. 11
0
File: Linear.c Progetto: juesato/nn
void THNN_(Linear_accGradParameters)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *weight,
          THTensor *bias,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *addBuffer,
          real scale)
{
  long dim = THTensor_(nDimension)(input);
  if (dim == 1) {
    THTensor_(addr)(gradWeight,1,gradWeight,scale,gradOutput,input);
    if (bias) {
      THTensor_(cadd)(gradBias,gradBias,scale,gradOutput);
    }
  }
  else if (dim == 2) {
    THTensor_(transpose)(gradOutput,gradOutput,0,1);
    THTensor_(addmm)(gradWeight,1,gradWeight,scale,gradOutput,input);
    if (bias) {
      THNN_(Linear_updateAddBuffer)(state,input,addBuffer);
      THTensor_(addmv)(gradBias,1,gradBias,scale,gradOutput,addBuffer);
    }
    THTensor_(transpose)(gradOutput,gradOutput,0,1);
  }
}
Esempio n. 12
0
static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
	THTensor *gradInput,
	THTensor *gradOutput,
	THTensor *weight,
	THTensor *fgradInput,
	int kW,
	int dW,
	int padW,
	int64_t inputFrameSize,
	int64_t nInputFrame,
	int64_t nOutputFrame) {

	THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
		gradOutput->storage, gradOutput->storageOffset,
		inputFrameSize, -1,
		1, -1,
		nOutputFrame, -1);

	// weight:			inputFrameSize x kW x 1
	// gradOutput3d:	inputFrameSize x 1 x nOutputFrame
	THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d);
	// fgradInput:		inputFrameSize x kW x nOutputFrame
	THTensor_(free)(gradOutput3d);

	THTensor_(zero)(gradInput);

	THNN_(unfolded_acc_row)(fgradInput, gradInput,
	                        kW, dW, padW,
	                        inputFrameSize, nInputFrame, nOutputFrame);
}
Esempio n. 13
0
static void THNN_(SpatialConvolutionLocal_updateGradInput_frame)
     (THTensor *gradInput, THTensor *gradOutput,
      THTensor *weight, THTensor *fgradInput,
      int kW, int kH, int dW, int dH, int padW, int padH, 
      long nInputPlane, long inputWidth, long inputHeight,
      long nOutputPlane, long outputWidth, long outputHeight)
{
  THTensor *gradOutput3d, *fgradInput3d;
  gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
                                             outputHeight*outputWidth, 1,
                                             nOutputPlane, outputHeight*outputWidth,
                                             1, nOutputPlane*outputHeight*outputWidth);
  fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset,
                                             outputHeight*outputWidth, 1,
                                             kW*kH*nInputPlane, outputHeight*outputWidth,
                                             1, kW*kH*nInputPlane*outputHeight*outputWidth);
  // weight:        oH*oW x nInputPlane*kH*kW x nOutputPlane
  // gradOutput3d:  oH*oW x nOutputPlane x 1         
  THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d);
  // fgradInput3d:  oH*oW x nInputPlane*kH*kW x 1  
  
  THTensor_(free)(gradOutput3d);
  THTensor_(free)(fgradInput3d);
  
  THTensor_(zero)(gradInput);
  
  THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, 
		      nInputPlane, inputWidth, inputHeight, 
		      outputWidth, outputHeight);

}
Esempio n. 14
0
void THNN_(Linear_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THTensor *weight,
          THTensor *bias,
          THTensor *addBuffer)
{
  int64_t dim = THTensor_(_nDimension)(input);
  if (dim == 1) {
    THTensor_(resize1d)(output,THTensor_(size)(weight,0));
    if (bias) {
      THTensor_(copy)(output,bias);
    }
    else {
      THTensor_(zero)(output);
    }
    THTensor_(addmv)(output,1,output,1,weight,input);
  }
  else if (dim == 2) {
    int64_t nframe = THTensor_(size)(input,0);
    int64_t nElement = THTensor_(nElement)(output);
    THTensor_(resize2d)(output,nframe,THTensor_(size)(weight,0));
    if (THTensor_(nElement)(output) != nElement) {
      THTensor_(zero)(output);
    }
    THNN_(Linear_updateAddBuffer)(state,input,addBuffer);
    THTensor *tweight = THTensor_(new)();
    THTensor_(transpose)(tweight,weight,0,1);
    THTensor_(addmm)(output,0,output,1,input,tweight);
    THTensor_(free)(tweight);
    if (bias) {
      THTensor_(addr)(output,1,output,1,addBuffer,bias);
    }
  }
Esempio n. 15
0
static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
          THTensor *gradInput,
          THTensor *gradOutput,
          THTensor *weight,
          THTensor *fgradInput,
          int kT,
          int kW,
          int kH,
          int dT,
          int dW,
          int dH,
          int pT,
          int pW,
          int pH)
{
  THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
    gradOutput->storage, gradOutput->storageOffset,
    gradOutput->size[0], -1,
    gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
  );

  THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
  THTensor_(free)(gradOutput2d);

  THTensor_(zero)(gradInput);

  THNN_(unfolded_acc_vol)(
    fgradInput, gradInput,
    kT, kW, kH,
    dT, dW, dH,
    pT, pW, pH,
    gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2],
    gradOutput->size[1], gradOutput->size[3], gradOutput->size[2]
  );
}
Esempio n. 16
0
static void THNN_(SpatialConvolutionMM_updateGradInput_frame)(
          THTensor *gradInput,
          THTensor *gradOutput,
          THTensor *weight,
          THTensor *fgradInput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH)
{
  THTensor *gradOutput2d = THTensor_(newWithStorage2d)
    (gradOutput->storage, gradOutput->storageOffset,
     gradOutput->size[0], -1,
     gradOutput->size[1]*gradOutput->size[2], -1);
  THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
  THTensor_(free)(gradOutput2d);

  THTensor_(zero)(gradInput);

  THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH,
		      padW, padH,
		      gradInput->size[0], gradInput->size[2], gradInput->size[1],
		      gradOutput->size[2], gradOutput->size[1]);
}
Esempio n. 17
0
void THNN_(SparseLinear_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THTensor *weight,
          THTensor *bias)
{
  long h, i;
  long outDim = THTensor_(size)(weight, 0);
  long inDim = THTensor_(size)(weight, 1);
  long batchSize = THTensor_(size)(output, 0);

  THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3");
  THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
  THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");

  long nnz = THTensor_(size)(input, 0);

  // output = weight * input + bias
  THTensor_(zero)(output);
#pragma omp parallel for private(i) schedule(static) if (nnz * outDim > 10000)
  for (i = 0; i < nnz; i++) {
    real val = THNN_(get2d)(input, i, 2);
    if (val == 0) {
      continue;
    }

    long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
    long h = (long)(THNN_(get2d)(input, i, 0)) - 1;
    if (offset >= 0 && offset < inDim) {
      THBlas_(axpy)(outDim,
                    val,
                    COL_PTR2(weight, offset), weight->stride[0],
                    ROW_PTR2(output, h), output->stride[1]);
    } else {
      THError("index out of bound. updateOutput: %d not between 1 and %d",
              offset + 1, inDim);
    }
  }

  THTensor* output_row = THTensor_(new)();
  for (h = 0; h < batchSize; h++) {
    THTensor_(select)(output_row, output, 0, h);
    THTensor_(cadd)(output_row, bias, 1.0, output_row);
  }
  THTensor_(free)(output_row);
}
Esempio n. 18
0
static void THNN_(VolumetricConvolutionMM_updateOutput_frame)(
          THTensor *input,
          THTensor *output,
          THTensor *weight,
          THTensor *bias,
          THTensor *finput,
          int kT,
          int kW,
          int kH,
          int dT,
          int dW,
          int dH,
          int pT,
          int pW,
          int pH,
          int64_t nInputPlane,
          int64_t inputDepth,
          int64_t inputWidth,
          int64_t inputHeight,
          int64_t nOutputPlane,
          int64_t outputDepth,
          int64_t outputWidth,
          int64_t outputHeight)
{
  int64_t i;
  THTensor *output2d;

  THNN_(unfolded_copy_vol)(
    finput, input,
    kT, kW, kH,
    dT, dW, dH,
    pT, pW, pH,
    nInputPlane,
    inputDepth, inputWidth, inputHeight,
    outputDepth, outputWidth, outputHeight
  );

  output2d = THTensor_(newWithStorage2d)(
    output->storage, output->storageOffset, nOutputPlane, -1,
    outputDepth*outputHeight*outputWidth, -1
  );

  if (bias) {
      for (i = 0; i < nOutputPlane; i++)
      {
        THVector_(fill)(
          output->storage->data+output->storageOffset+output->stride[0]*i,
          THTensor_(get1d)(bias, i),
          outputDepth*outputHeight*outputWidth
        );
      }
  } else {
    THTensor_(zero)(output);
  }

  THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);

  THTensor_(free)(output2d);
}
Esempio n. 19
0
void THNN_(VolumetricConvolutionMM_accGradParameters)(
  THNNState *state,
  THTensor *input,
  THTensor *gradOutput,
  THTensor *gradWeight,
  THTensor *gradBias,
  THTensor *finput,
  real scale)
{
  THArgCheck(gradWeight->nDimension == 2, 4,
    "2D gradWeight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))"
  );

  int nOutputPlane = (int)gradWeight->size[0];

  THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
    "gradBias tensor has wrong size"
  );

  THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 3,
    "Number of output features is not equal to nOutputPlane"
  );

  if (input->nDimension == 4)   // non-batch mode
  {
    THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
  }
  else  // batch mode
  {
    long T = input->size[0];
    long t;

    for (t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);

      THTensor_(free)(gradOutput_t);
      THTensor_(free)(finput_t);
    }
  }
}
Esempio n. 20
0
void THNN_(SpatialConvolutionMM_accGradParameters)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *finput,
          THTensor *fgradInput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH,
          real scale)
{
  long nOutputPlane = gradWeight->size[0];
  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
  THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
  THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");

  if(input->nDimension == 3)
  {
    THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
  }
  else
  {
    long T = input->size[0];
    long t;

    for(t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);

      THTensor_(free)(gradOutput_t);
      THTensor_(free)(finput_t);
    }
  }
}
Esempio n. 21
0
void THNN_(Col2Im_updateGradInput)(
           THNNState *state,
           THTensor *gradOutput,
           THTensor *gradInput,
           int64_t kH, int64_t kW,
           int64_t dilationH, int64_t dilationW,
           int64_t padH, int64_t padW,
           int64_t dH, int64_t dW) {

  THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
                             kH, kW, dilationH, dilationW, padH, padW, dH, dW);
}
Esempio n. 22
0
void THNN_(SparseLinear_accGradParameters)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradWeight,
          THTensor *gradBias,
          THTensor *weight,
          THTensor *bias,
          real weightDecay,
          real scale)
{
  long h, i;
  long outDim = THTensor_(size)(weight, 0);
  long inDim = THTensor_(size)(weight, 1);

  THArgCheck(THNN_(checkInput)(input), 2,
             "input must be in coo format, nnz x 3");
  THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
             "gradWeight size wrong");
  THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
             "gradBias size wrong");
  THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
             "gradOutput must be contiguous");

  long nnz = THTensor_(size)(input, 0);
  // THTensor_(resize2d)(gradOutput, batchSize, outDim);

  // gradWeight += gradOutput * input
#pragma omp parallel for private(h, i) schedule(static) if (\
  nnz * outDim > 10000)
  for (i = 0; i < nnz; i++) {
    real val = scale * THNN_(get2d)(input, i, 2);

    long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
    long h = (long)(THNN_(get2d)(input, i, 0)) - 1;
    if (offset >= 0 && offset < inDim) {
      THBlas_(axpy)(outDim,
          val,
          ROW_PTR2(gradOutput, h), gradOutput->stride[1],
          COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
    } else {
      THError(
          "index out of bound. accGradParameters: %d not between 1 and %d",
          offset + 1,
          inDim);
    }
  }

  // gradBias += gradOutput
  THTensor* buf = THTensor_(new)();
  THTensor_(sum)(buf, gradOutput, 0);
  THTensor_(cadd)(gradBias, gradBias, scale, buf);
  THTensor_(free)(buf);

  if (weightDecay != 0) {
    THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
  }
}
void THNN_(VolumetricFullConvolution_updateGradInput)(
  THNNState *state,
  THTensor *input,
  THTensor *gradOutput,
  THTensor *gradInput,
  THTensor *weight,
  THTensor *finput,
  THTensor *fgradInput,     // only used by cuda impl
  int dT, int dW, int dH,   // stride
  int pT, int pW, int pH,   // padding
  int aT, int aW, int aH)   // extra output adjustment
{
  THNN_(VolumetricFullDilatedConvolution_updateGradInput)(
      state, input, gradOutput, gradInput, weight, finput, fgradInput,
      dT, dW, dH, pT, pW, pH, 1, 1, 1, aT, aW, aH);
}
void THNN_(VolumetricFullConvolution_updateOutput)(
  THNNState *state,
  THTensor *input,          // 4D or 5D (batch) tensor
  THTensor *output,
  THTensor *weight,         // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
  THTensor *bias,
  THTensor *finput,         // internal columns buffer
  THTensor *fgradInput,     // internal ones buffer
  int dT, int dW, int dH,   // stride of the convolution
  int pT, int pW, int pH,   // padding
  int aT, int aW, int aH)   // extra output adjustment
{
  THNN_(VolumetricFullDilatedConvolution_updateOutput)(
      state, input, output, weight, bias, finput, fgradInput,
      dT, dW, dH, pT, pW, pH, 1, 1, 1, aT, aW, aH);
}
Esempio n. 25
0
void THNN_(VolumetricMaxPooling_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *indices,
          int dT,
          int dW,
          int dH,
          int pT,
          int pW,
          int pH)
{
  THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
          state, input, gradOutput, gradInput, indices,
          dT, dW, dH, pT, pW, pH, 1, 1, 1);
}
void THNN_(VolumetricFullConvolution_accGradParameters)(
  THNNState *state,
  THTensor *input,
  THTensor *gradOutput,
  THTensor *gradWeight,
  THTensor *gradBias,
  THTensor *finput,
  THTensor *fgradInput,
  int dT, int dW, int dH,   // stride
  int pT, int pW, int pH,   // padding
  int aT, int aW, int aH,   // extra output adjustment
  accreal scale_)
{
  THNN_(VolumetricFullDilatedConvolution_accGradParameters)(
      state, input, gradOutput, gradWeight, gradBias, finput, fgradInput,
      dT, dW, dH, pT, pW, pH, 1, 1, 1, aT, aW, aH, scale_);
}
Esempio n. 27
0
static void THNN_(SpatialConvolutionMM_updateOutput_frame)(
          THTensor *input,
          THTensor *output,
          THTensor *weight,
          THTensor *bias,
          THTensor *finput,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH,
          long nInputPlane,
          long inputWidth,
          long inputHeight,
          long nOutputPlane,
          long outputWidth,
          long outputHeight)
{
  long i;
  THTensor *output2d;

  THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH,
		       nInputPlane, inputWidth, inputHeight,
		       outputWidth, outputHeight);

  output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
                                         nOutputPlane, -1,
                                         outputHeight*outputWidth, -1);
  if (bias) {
    for(i = 0; i < nOutputPlane; i++)
        THVector_(fill)
	  (output->storage->data + output->storageOffset + output->stride[0] * i,
	   THTensor_(get1d)(bias, i), outputHeight*outputWidth);
  } else {
    THTensor_(zero)(output);
  }

  THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);

  THTensor_(free)(output2d);
}
Esempio n. 28
0
void THNN_(VolumetricMaxPooling_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THTensor *indices,
          int kT,
          int kW,
          int kH,
          int dT,
          int dW,
          int dH,
          int pT,
          int pW,
          int pH,
          bool ceilMode)
{
  THNN_(VolumetricDilatedMaxPooling_updateOutput)(
          state, input, output, indices,
          kT, kW, kH, dT, dW, dH,
          pT, pW, pH, 1, 1, 1, ceilMode);
}
Esempio n. 29
0
static void THNN_(BatchNormalization_MKLDNN_init_backward)(
          THLongTensor *primitives,
          int N,
          int outC,
          int outH,
          int outW,
	  double eps)
{
	dnnError_t err;

	dnnPrimitive_t bn_backward = (dnnPrimitive_t)primitives->storage->data[BN_BACKWARD];
	size_t outputSize[dimension] = 	{outW,outH,outC,N};
	size_t outputStrides[dimension] = { 1, outW, outH * outW, outC * outH * outW };

	dnnLayout_t lt_user_output,lt_bn_backward_output=NULL;

	if(primitives->storage->data[BN_LAYOUT_OUTPUT] == 0)
	{
		CHECK_ERR( dnnLayoutCreate_F32(&lt_user_output, dimension, outputSize, outputStrides) , err );
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get output layout FAIL......\n");
#endif
	}
	else{
		lt_user_output = (dnnLayout_t)primitives->storage->data[BN_LAYOUT_OUTPUT];
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get output layout OK\n");
#endif
	}

	dnnLayoutCreateFromPrimitive_F32(&lt_bn_backward_output, bn_backward, dnnResourceDiffDst);
	dnnPrimitive_t cv_backward_output = NULL;real * buffer_backward_output = NULL;
	//backward conversion init
	CHECK_ERR( THNN_(init_conversion)(&cv_backward_output, &buffer_backward_output, lt_bn_backward_output, lt_user_output), err );

	//save the dnnPrimitive to THTensor(long int array)
	primitives->storage->data[CV_BN_BACKWARD_OUTPUT] = (long long)cv_backward_output;
	primitives->storage->data[BUFFER_BN_BACKWARD_OUTPUT] = (long long)buffer_backward_output;
}
Esempio n. 30
0
static void THNN_(TemporalRowConvolution_updateOutput_frame)(
	THTensor *input,
	THTensor *output,
	THTensor *weight,
	THTensor *bias,
	THTensor *finput,
	int kW,
	int dW,
	int padW,
	int64_t inputFrameSize,
	int64_t nInputFrame,
	int64_t nOutputFrame) {

	int64_t i;

	THTensor *output3d = THTensor_(newWithStorage3d)(
		output->storage, output->storageOffset,
		inputFrameSize, -1,
		1, -1,
		nOutputFrame, -1);

	THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
	                         inputFrameSize, nInputFrame, nOutputFrame);

	THTensor_(zero)(output);

	if (bias != NULL) {
		for (i = 0; i < inputFrameSize; i++)
			THVector_(fill)
			        (THStorage_(data)(output->storage) + output->storageOffset
			        + output->stride[0] * i,
			        THTensor_(get1d)(bias, i), nOutputFrame);
	}

	THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput);

	THTensor_(free)(output3d);
}