Exemplo n.º 1
0
static void THNN_(BatchNormalization_MKLDNN_init_backward)(
          THLongTensor *primitives,
          int N,
          int outC,
          int outH,
          int outW,
	  double eps)
{
	dnnError_t err;

	dnnPrimitive_t bn_backward = (dnnPrimitive_t)primitives->storage->data[BN_BACKWARD];
	size_t outputSize[dimension] = 	{outW,outH,outC,N};
	size_t outputStrides[dimension] = { 1, outW, outH * outW, outC * outH * outW };

	dnnLayout_t lt_user_output,lt_bn_backward_output=NULL;

	if(primitives->storage->data[BN_LAYOUT_OUTPUT] == 0)
	{
		CHECK_ERR( dnnLayoutCreate_F32(&lt_user_output, dimension, outputSize, outputStrides) , err );
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get output layout FAIL......\n");
#endif
	}
	else{
		lt_user_output = (dnnLayout_t)primitives->storage->data[BN_LAYOUT_OUTPUT];
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get output layout OK\n");
#endif
	}

	dnnLayoutCreateFromPrimitive_F32(&lt_bn_backward_output, bn_backward, dnnResourceDiffDst);
	dnnPrimitive_t cv_backward_output = NULL;real * buffer_backward_output = NULL;
	//backward conversion init
	CHECK_ERR( THNN_(init_conversion)(&cv_backward_output, &buffer_backward_output, lt_bn_backward_output, lt_user_output), err );

	//save the dnnPrimitive to THTensor(long int array)
	primitives->storage->data[CV_BN_BACKWARD_OUTPUT] = (long long)cv_backward_output;
	primitives->storage->data[BUFFER_BN_BACKWARD_OUTPUT] = (long long)buffer_backward_output;
}
Exemplo n.º 2
0
//useMaxPooling, or averagePooling
//useCaffe use ceil mode for pooling output dim
static void Init_f(
    long long * input,
    long long * output,
    long long * primitives,
    int N, int inC, int inH, int inW,
    int kH, int kW, int dH, int dW,
    int padH,int padW,
    int outC, int outH,int outW,
    int useMaxPooling,
    int useCaffe)
{

    dnnError_t err;

    //dimension
    size_t inputSize[DIM4]     = { inW,  inH,  inC, N};
    size_t outputSize[DIM4]    = {outW, outH, outC, N};
    size_t inputStrides1[DIM4]  = {1,  inW,   inW*inH,    inW*inH*inC};
    size_t outputStrides1[DIM4] = {1, outW, outW*outH, outW*outH*outC};

    //CHWN
    size_t inputStrides[DIM4]  = {N,  N*inW,   N*inW*inH,    1};
    size_t outputStrides[DIM4] = {N, N*outW, N*outW*outH, 1};

    size_t kernelSize[2]       = {   kW,   kH};
    size_t kernelStride[2]     = {   dW,   dH};

    //calculate pad
    int padH2 = (outH-1)*dH + kH - inH - padH;
    int padW2 = (outW-1)*dW + kW - inW - padW;
    int symm = 0;
    if (padH2==padH && padW2==padW) symm = 1;
    if (padH2<0)    padH2 = 0;
    if (padW2<0)    padW2 = 0;

    int pad_dim4[DIM4]      = {-padW, -padH, -padW2,-padH2};
    int pad_dim2[DIM2]      = {-padW, -padH};
    int inputOffset[DIM2]   = {    0,    0};

    //create user layout
    dnnLayout_t lt_out = NULL, lt_in = NULL;
    CHECK_ERR( dnnLayoutCreate_F32(&lt_in,  DIM4,  inputSize,  inputStrides) , err );
    CHECK_ERR( dnnLayoutCreate_F32(&lt_out, DIM4, outputSize, outputStrides) , err );
    primitives[POOL_L_I] = (long long)lt_in;
    primitives[POOL_L_O] = (long long)lt_out;

    //create MKL input layout
    dnnLayout_t lt_in_f = (dnnLayout_t)input[MKLLayout];
    if(lt_in_f==NULL)
    {
        lt_in_f = lt_in;
    }
    primitives[POOL_L_F_I] = (long long)lt_in_f;

    //create operation
    dnnPrimitive_t pool_f = NULL, pool_b = NULL;
    dnnPrimitiveAttributes_t attributes = NULL;
    CHECK_ERR( dnnPrimitiveAttributesCreate_F32(&attributes), err );

    if (useMaxPooling==1)
    {
        if(useCaffe || symm)
        {
            CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err );
            CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err );
        }
        else
        {
            CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err );
            CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err );
        }
    }
    else
    {
        if(useCaffe || symm)
        {
            CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err );
            CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err );

        }
        else
        {
            CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err );
            CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err );
        }
    }
    primitives[POOLING_FORWARD]  = (long long)pool_f;
    primitives[POOLING_BACKWARD] = (long long)pool_b;

    //create mkl layout for output
    dnnLayout_t lt_out_f = NULL, lt_out_b = NULL, lt_in_b = NULL;
    CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_out_f, pool_f, dnnResourceDst),   err );
    CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_in_b,  pool_f, dnnResourceSrc),   err );
    CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_out_b, pool_f, dnnResourceDst),   err );
    primitives[POOL_L_F_O] = (long long)lt_out_f;
    primitives[POOL_L_B_I] = (long long)lt_in_b;
    primitives[POOL_L_B_O] = (long long)lt_out_b;

    //create work space , to record max location?
    dnnLayout_t lt_space = NULL; float* buf_space = NULL;
    CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_space, pool_f, dnnResourceWorkspace), err );
    CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_space, lt_space) , err );
    primitives[BUFFER_POOLING_FORWARD_WORKSPACE] = (long long)buf_space;

    //output layout
    output[CPULayout] = (long long)lt_out;
    float* buf_out_f = NULL;
    CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_out_f), lt_out_f), err );
    primitives[BUFFER_POOLING_FORWARD_OUTPUT] = (long long)buf_out_f;

ERR_RETURN:
    return;
}
Exemplo n.º 3
0
static void THNN_(BatchNormalization_MKLDNN_init_forward)(
          THLongTensor *primitives,
          int N,
          int inC,
          int inH,
          int inW,
	  double eps)
{
	dnnError_t err;
	dnnPrimitive_t bn_forward = NULL;
	dnnPrimitive_t bn_backward = NULL;
	dnnPrimitive_t bn_bwd_scaleshift = NULL;

	size_t inputSize[dimension] = 	{inW,inH,inC,N};
	size_t inputStrides[dimension] = { 1, inW, inH * inW, inC * inH * inW };

	dnnLayout_t lt_user_input = NULL;

	if(primitives->storage->data[BN_LAYOUT_INPUT] == 0)
	{
		CHECK_ERR( dnnLayoutCreate_F32(&lt_user_input, dimension, inputSize, inputStrides) , err );
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get input layout FAIL......\n");
#endif
	}
	else{
		lt_user_input = (dnnLayout_t)primitives->storage->data[BN_LAYOUT_INPUT];
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get input layout OK\n");
#endif
	}

	CHECK_ERR( dnnBatchNormalizationCreateForward_F32(&bn_forward,NULL,lt_user_input,eps), err );
	CHECK_ERR( dnnBatchNormalizationCreateBackwardData_F32(&bn_backward,NULL,lt_user_input,eps), err );
	CHECK_ERR( dnnBatchNormalizationCreateBackwardScaleShift_F32(&bn_bwd_scaleshift,NULL,lt_user_input,eps), err );
	

	dnnLayout_t lt_bn_forward_workspace,lt_bn_forward_scaleshift,lt_bn_forward_output,lt_bn_backward_input;
	real * buffer_forward_workspace = NULL; real * buffer_forward_scaleshift = NULL;
	real * buffer_forward_output = NULL; real * buffer_backward_input = NULL;
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_forward_workspace, bn_forward, dnnResourceWorkspace);
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_forward_output, bn_forward, dnnResourceDst);
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_forward_scaleshift, bn_forward, dnnResourceScaleShift);
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_backward_input, bn_backward, dnnResourceDiffSrc);
		

	CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_workspace), lt_bn_forward_workspace), err );
	CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_scaleshift), lt_bn_forward_scaleshift), err );
	//CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_output), lt_bn_forward_output), err );
	//CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_backward_input), lt_bn_backward_input), err );

	int size1 = dnnLayoutGetMemorySize_F32(lt_bn_forward_output);
	int size2 = inW*inH*inC*N*4;
	if(size1 == size2)
	{
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN forward ouput layout match OK\n");
#endif
	}
	else
	{
		CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_output), lt_bn_forward_output), err );
		fprintf(stderr ,"MKLDNN BN forward ouput layout match FAIL, size1 = %d, size2 = %d \n", size1, size2);
	}

	size1 = dnnLayoutGetMemorySize_F32(lt_bn_backward_input);
	if(size1 == size2)
	{
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN MaxPooling bwddata input layout match OK\n");
#endif
	}
	else
	{
		CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_backward_input), lt_bn_backward_input), err );
		fprintf(stderr ,"MKLDNN MaxPooling bwddata input layout match FAIL, size1 = %d, size2 = %d \n", size1, size2);
	}

	//save the dnnPrimitive to THTensor(long int array)
	primitives->storage->data[BN_LAYOUT_FORWARD_OUTPUT] = (long long)lt_bn_forward_output;
	primitives->storage->data[BN_LAYOUT_BACKWARD_INPUT] = (long long)lt_bn_backward_input;

	primitives->storage->data[BN_FORWARD] = (long long)bn_forward;
	primitives->storage->data[BN_BACKWARD] = (long long)bn_backward;
	primitives->storage->data[BN_SCALESHIFT] = (long long)bn_bwd_scaleshift;
	primitives->storage->data[BUFFER_BN_FORWARD_WORKSPACE] = (long long)buffer_forward_workspace;
	primitives->storage->data[BUFFER_BN_FORWARD_SCALESHIFT] = (long long)buffer_forward_scaleshift;
	primitives->storage->data[BUFFER_BN_FORWARD_OUTPUT] = (long long)buffer_forward_output;
	primitives->storage->data[BUFFER_BN_BACKWARD_INPUT] = (long long)buffer_backward_input;
	primitives->storage->data[BUFFER_BN_BACKWARD_WORKSPACE] = (long long)buffer_forward_workspace;
}
Exemplo n.º 4
0
static int Conv_f_init(
  long long * input,
  long long * output,
  long long * weight,
  long long * primitives,
  int N, int inC, int inH, int inW,
  int kH, int kW,
  int dH, int dW,
  int padH, int padW,
  int outC, int outH, int outW,
  int hasBias)
{
    dnnError_t err;

    //init dimensions
	size_t inputSize[DIM4]  =   { inW,  inH,  inC, N};
	size_t outputSize[DIM4] =   {outW, outH, outC, N};
	size_t filterSize[DIM4] =   {  kW,   kH,  inC, outC};
	size_t stride[DIM2]     =   {  dW,   dH};
	int    pad[DIM2]        =   {-padW, -padH};
	size_t biasSize[1]      =   {outC};
	size_t biasStrides[1]   =   { 1 };

    //using NCHW layout
	size_t filterStridesNCHW[DIM4] = {1,  kW,      kW*kH,     kW*kH*inC};
    size_t inputStridesNCHW[DIM4]  = {1,  inW,   inW*inH,    inW*inH*inC};
    size_t outputStridesNCHW[DIM4] = {1, outW, outW*outH, outW*outH*outC};

    //CHWN
    size_t filterStridesCHWN[DIM4] = {outC,  outC*kW,      outC*kW*kH,     1};
    size_t inputStridesCHWN[DIM4]  = {N,  N*inW,   N*inW*inH,    1};
    size_t outputStridesCHWN[DIM4] = {N,  N*outW,  N*outW*outH,  1};

    //create execute and save into primitives
    dnnPrimitiveAttributes_t attributes = NULL;
	CHECK_ERR( dnnPrimitiveAttributesCreate_F32(&attributes), err );
	dnnPrimitive_t conv_f       = NULL;    //forward operation
	dnnPrimitive_t conv_bdata   = NULL;    //backward calculate gradient input
	dnnPrimitive_t conv_bfilter = NULL;    //backward calculate gradient filter(weight)
    dnnPrimitive_t conv_b_bias  = NULL;    //backward bias

    //create layout and save
    //lt_in, layout of input in NCHW form
    //lt_filter_f, required layout (MKL layout) for forward for weight
    //lt_out_bfilter, required layout for backward weight update for output
	dnnLayout_t lt_in_NCHW,    lt_filter,         lt_out_NCHW,   lt_in_CHWN,  lt_out_CHWN, lt_bias_CHWN=NULL;
	dnnLayout_t lt_in_f,       lt_filter_f,       lt_out_f,      lt_bias_f;
	dnnLayout_t lt_in_bdata,   lt_filter_bdata,   lt_out_bdata,  lt_bias_bdata;
    dnnLayout_t lt_in_bfilter, lt_filter_bfilter, lt_out_bfilter,lt_bias_bias, lt_out_bias;

 	if (hasBias)
 	{
 	    CHECK_ERR(dnnConvolutionCreateForwardBias_F32(   &conv_f,   attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err);
 	    CHECK_ERR(dnnConvolutionCreateForwardBias_F32(   &conv_f,   attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err);
        CHECK_ERR( dnnLayoutCreate_F32(&lt_bias_CHWN, 1, biasSize, biasStrides), err );
    	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_bias_f,   conv_f, dnnResourceBias   ) , err );
        CHECK_ERR(dnnConvolutionCreateBackwardBias_F32(  &conv_b_bias,  attributes, dnnAlgorithmConvolutionDirect, DIM4, outputSize),err);
        CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_bias_bias,   conv_b_bias,  dnnResourceDiffBias) , err );
        CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_out_bias,    conv_b_bias,  dnnResourceDiffDst) , err );
    }
    else
    	CHECK_ERR(dnnConvolutionCreateForward_F32(       &conv_f,   attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err);
 	CHECK_ERR(dnnConvolutionCreateBackwardData_F32(  &conv_bdata,   attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err);
	CHECK_ERR(dnnConvolutionCreateBackwardFilter_F32(&conv_bfilter, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err);

    primitives[FORWARD_INDEX]    = (long long)conv_f;
    primitives[BWD_DATA_INDEX]   = (long long)conv_bdata;
    primitives[BWD_FILTER_INDEX] = (long long)conv_bfilter;
    primitives[BDW_BIAS_INDEX]   = (long long)conv_b_bias;

	CHECK_ERR( dnnLayoutCreate_F32(&lt_in_NCHW,     DIM4, inputSize,  inputStridesNCHW),  err );
	CHECK_ERR( dnnLayoutCreate_F32(&lt_in_CHWN,     DIM4, inputSize,  inputStridesCHWN),  err );
	CHECK_ERR( dnnLayoutCreate_F32(&lt_filter,      DIM4, filterSize, filterStridesCHWN), err );
    CHECK_ERR( dnnLayoutCreate_F32(&lt_out_NCHW,    DIM4, outputSize, outputStridesNCHW), err );
    CHECK_ERR( dnnLayoutCreate_F32(&lt_out_CHWN,    DIM4, outputSize, outputStridesCHWN), err );

    CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_in_f,     conv_f, dnnResourceSrc   ) , err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_filter_f, conv_f, dnnResourceFilter), err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_out_f,    conv_f, dnnResourceDst   ) , err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_in_bdata,     conv_bdata, dnnResourceDiffSrc) , err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_filter_bdata, conv_bdata, dnnResourceFilter) , err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_out_bdata,    conv_bdata, dnnResourceDiffDst) , err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_in_bfilter,     conv_bfilter, dnnResourceSrc) , err );
	CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_filter_bfilter, conv_bfilter, dnnResourceDiffFilter) , err );
    CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(&lt_out_bfilter,    conv_bfilter, dnnResourceDiffDst) , err );

    //here assume NCHW (CHWN will be transposed)
    primitives[L_I]    = (long long)lt_in_NCHW;
    primitives[L_O]    = (long long)lt_out_NCHW;
    primitives[L_W]    = (long long)lt_filter;
    primitives[L_B]    = (long long)lt_bias_CHWN;
    primitives[L_F_I]  = (long long)lt_in_f;
    primitives[L_F_O]  = (long long)lt_out_f;
    primitives[L_F_W]  = (long long)lt_filter_f;
    primitives[L_F_B]  = (long long)lt_bias_f;
    primitives[L_BD_I] = (long long)lt_in_bdata;
    primitives[L_BD_O] = (long long)lt_out_bdata;
    primitives[L_BD_W] = (long long)lt_filter_bdata;
    primitives[L_BF_I] = (long long)lt_in_bfilter;
    primitives[L_BF_O] = (long long)lt_out_bfilter;
    primitives[L_BF_W] = (long long)lt_filter_bfilter;
    primitives[L_I_CHWN] = (long long)lt_in_CHWN;
    primitives[L_O_CHWN] = (long long)lt_out_CHWN;
    primitives[L_B_B]    = (long long)lt_bias_bias;
    primitives[L_B_O]    = (long long)lt_out_bias;
    //input may have user layout (from raw image data,continuous NCHW )
    //  or maybe mkl layout (is previous mkl-based layer's output)
    dnnLayout_t lt_in_real = (dnnLayout_t)input[MKLLayout];
    if(lt_in_real==NULL) lt_in_real = lt_in_NCHW;
    //create conversion and buff if necessary
    dnnPrimitive_t cv_in_f = NULL; float * buf_in_f = NULL;
    CHECK_ERR( try_convert(&cv_in_f, &buf_in_f, lt_in_real, lt_in_f) , err );

    //create transpose if necessary
    float* newPtr = NULL;
	if (input[MKLLayout] == 0)
	{
		newPtr = (float*)malloc(inC*inH*inW*N*sizeof(float));
	}
    primitives[BUFFER_TRANS_INPUT] = (long long)newPtr;

    //save conversion and buff
    primitives[BUFFER_FORWARD_INPUT]  = (long long)buf_in_f;
    primitives[CONVERT_FORWARD_INPUT] = (long long)cv_in_f;

    //filter layout
    dnnPrimitive_t cv_filter_f = NULL; float * buf_filter_f = NULL;
    CHECK_ERR( try_convert(&cv_filter_f, &buf_filter_f, lt_filter, lt_filter_f), err );
    primitives[CONVERT_FORWARD_FILTER] = (long long)cv_filter_f;
    primitives[BUFFER_FORWARD_FILTER]  = (long long)buf_filter_f;

	//save user layout for output, and create mkl buffer
	//output always has mkl buffer and recorded in layer's primitive
	output[CPULayout] = (long long)lt_out_CHWN;
	float* buf_out_f = NULL;
    CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_out_f), lt_out_f), err );
    primitives[BUFFER_FORWARD_OUTPUT] = (long long)buf_out_f;

    //for bias
    dnnPrimitive_t cv_bias_f = NULL; float * buf_bias_f = NULL;
    dnnPrimitive_t cv_bias_b = NULL; float * buf_bias_b = NULL;
    if (hasBias)
    {
        CHECK_ERR( try_convert(&cv_bias_f, &buf_bias_f, lt_bias_CHWN, lt_bias_f), err );
    }
    primitives[CONVERT_FORWARD_BIAS] = (long long)cv_bias_f;
    primitives[BUFFER_FORWARD_BIAS]  = (long long)buf_bias_f;

    return 0;

ERR_RETURN:

    return 1;
}