//useMaxPooling, or averagePooling //useCaffe use ceil mode for pooling output dim static void Init_f( long long * input, long long * output, long long * primitives, int N, int inC, int inH, int inW, int kH, int kW, int dH, int dW, int padH,int padW, int outC, int outH,int outW, int useMaxPooling, int useCaffe) { dnnError_t err; //dimension size_t inputSize[DIM4] = { inW, inH, inC, N}; size_t outputSize[DIM4] = {outW, outH, outC, N}; size_t inputStrides1[DIM4] = {1, inW, inW*inH, inW*inH*inC}; size_t outputStrides1[DIM4] = {1, outW, outW*outH, outW*outH*outC}; //CHWN size_t inputStrides[DIM4] = {N, N*inW, N*inW*inH, 1}; size_t outputStrides[DIM4] = {N, N*outW, N*outW*outH, 1}; size_t kernelSize[2] = { kW, kH}; size_t kernelStride[2] = { dW, dH}; //calculate pad int padH2 = (outH-1)*dH + kH - inH - padH; int padW2 = (outW-1)*dW + kW - inW - padW; int symm = 0; if (padH2==padH && padW2==padW) symm = 1; if (padH2<0) padH2 = 0; if (padW2<0) padW2 = 0; int pad_dim4[DIM4] = {-padW, -padH, -padW2,-padH2}; int pad_dim2[DIM2] = {-padW, -padH}; int inputOffset[DIM2] = { 0, 0}; //create user layout dnnLayout_t lt_out = NULL, lt_in = NULL; CHECK_ERR( dnnLayoutCreate_F32(<_in, DIM4, inputSize, inputStrides) , err ); CHECK_ERR( dnnLayoutCreate_F32(<_out, DIM4, outputSize, outputStrides) , err ); primitives[POOL_L_I] = (long long)lt_in; primitives[POOL_L_O] = (long long)lt_out; //create MKL input layout dnnLayout_t lt_in_f = (dnnLayout_t)input[MKLLayout]; if(lt_in_f==NULL) { lt_in_f = lt_in; } primitives[POOL_L_F_I] = (long long)lt_in_f; //create operation dnnPrimitive_t pool_f = NULL, pool_b = NULL; dnnPrimitiveAttributes_t attributes = NULL; CHECK_ERR( dnnPrimitiveAttributesCreate_F32(&attributes), err ); if (useMaxPooling==1) { if(useCaffe || symm) { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); } else { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); } } else { if(useCaffe || symm) { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); } else { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); } } primitives[POOLING_FORWARD] = (long long)pool_f; primitives[POOLING_BACKWARD] = (long long)pool_b; //create mkl layout for output dnnLayout_t lt_out_f = NULL, lt_out_b = NULL, lt_in_b = NULL; CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_f, pool_f, dnnResourceDst), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_b, pool_f, dnnResourceSrc), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_b, pool_f, dnnResourceDst), err ); primitives[POOL_L_F_O] = (long long)lt_out_f; primitives[POOL_L_B_I] = (long long)lt_in_b; primitives[POOL_L_B_O] = (long long)lt_out_b; //create work space , to record max location? dnnLayout_t lt_space = NULL; float* buf_space = NULL; CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_space, pool_f, dnnResourceWorkspace), err ); CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_space, lt_space) , err ); primitives[BUFFER_POOLING_FORWARD_WORKSPACE] = (long long)buf_space; //output layout output[CPULayout] = (long long)lt_out; float* buf_out_f = NULL; CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_out_f), lt_out_f), err ); primitives[BUFFER_POOLING_FORWARD_OUTPUT] = (long long)buf_out_f; ERR_RETURN: return; }
static int Conv_f_init( long long * input, long long * output, long long * weight, long long * primitives, int N, int inC, int inH, int inW, int kH, int kW, int dH, int dW, int padH, int padW, int outC, int outH, int outW, int hasBias) { dnnError_t err; //init dimensions size_t inputSize[DIM4] = { inW, inH, inC, N}; size_t outputSize[DIM4] = {outW, outH, outC, N}; size_t filterSize[DIM4] = { kW, kH, inC, outC}; size_t stride[DIM2] = { dW, dH}; int pad[DIM2] = {-padW, -padH}; size_t biasSize[1] = {outC}; size_t biasStrides[1] = { 1 }; //using NCHW layout size_t filterStridesNCHW[DIM4] = {1, kW, kW*kH, kW*kH*inC}; size_t inputStridesNCHW[DIM4] = {1, inW, inW*inH, inW*inH*inC}; size_t outputStridesNCHW[DIM4] = {1, outW, outW*outH, outW*outH*outC}; //CHWN size_t filterStridesCHWN[DIM4] = {outC, outC*kW, outC*kW*kH, 1}; size_t inputStridesCHWN[DIM4] = {N, N*inW, N*inW*inH, 1}; size_t outputStridesCHWN[DIM4] = {N, N*outW, N*outW*outH, 1}; //create execute and save into primitives dnnPrimitiveAttributes_t attributes = NULL; CHECK_ERR( dnnPrimitiveAttributesCreate_F32(&attributes), err ); dnnPrimitive_t conv_f = NULL; //forward operation dnnPrimitive_t conv_bdata = NULL; //backward calculate gradient input dnnPrimitive_t conv_bfilter = NULL; //backward calculate gradient filter(weight) dnnPrimitive_t conv_b_bias = NULL; //backward bias //create layout and save //lt_in, layout of input in NCHW form //lt_filter_f, required layout (MKL layout) for forward for weight //lt_out_bfilter, required layout for backward weight update for output dnnLayout_t lt_in_NCHW, lt_filter, lt_out_NCHW, lt_in_CHWN, lt_out_CHWN, lt_bias_CHWN=NULL; dnnLayout_t lt_in_f, lt_filter_f, lt_out_f, lt_bias_f; dnnLayout_t lt_in_bdata, lt_filter_bdata, lt_out_bdata, lt_bias_bdata; dnnLayout_t lt_in_bfilter, lt_filter_bfilter, lt_out_bfilter,lt_bias_bias, lt_out_bias; if (hasBias) { CHECK_ERR(dnnConvolutionCreateForwardBias_F32( &conv_f, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR(dnnConvolutionCreateForwardBias_F32( &conv_f, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR( dnnLayoutCreate_F32(<_bias_CHWN, 1, biasSize, biasStrides), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_bias_f, conv_f, dnnResourceBias ) , err ); CHECK_ERR(dnnConvolutionCreateBackwardBias_F32( &conv_b_bias, attributes, dnnAlgorithmConvolutionDirect, DIM4, outputSize),err); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_bias_bias, conv_b_bias, dnnResourceDiffBias) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_bias, conv_b_bias, dnnResourceDiffDst) , err ); } else CHECK_ERR(dnnConvolutionCreateForward_F32( &conv_f, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR(dnnConvolutionCreateBackwardData_F32( &conv_bdata, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR(dnnConvolutionCreateBackwardFilter_F32(&conv_bfilter, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); primitives[FORWARD_INDEX] = (long long)conv_f; primitives[BWD_DATA_INDEX] = (long long)conv_bdata; primitives[BWD_FILTER_INDEX] = (long long)conv_bfilter; primitives[BDW_BIAS_INDEX] = (long long)conv_b_bias; CHECK_ERR( dnnLayoutCreate_F32(<_in_NCHW, DIM4, inputSize, inputStridesNCHW), err ); CHECK_ERR( dnnLayoutCreate_F32(<_in_CHWN, DIM4, inputSize, inputStridesCHWN), err ); CHECK_ERR( dnnLayoutCreate_F32(<_filter, DIM4, filterSize, filterStridesCHWN), err ); CHECK_ERR( dnnLayoutCreate_F32(<_out_NCHW, DIM4, outputSize, outputStridesNCHW), err ); CHECK_ERR( dnnLayoutCreate_F32(<_out_CHWN, DIM4, outputSize, outputStridesCHWN), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_f, conv_f, dnnResourceSrc ) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_filter_f, conv_f, dnnResourceFilter), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_f, conv_f, dnnResourceDst ) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_bdata, conv_bdata, dnnResourceDiffSrc) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_filter_bdata, conv_bdata, dnnResourceFilter) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_bdata, conv_bdata, dnnResourceDiffDst) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_bfilter, conv_bfilter, dnnResourceSrc) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_filter_bfilter, conv_bfilter, dnnResourceDiffFilter) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_bfilter, conv_bfilter, dnnResourceDiffDst) , err ); //here assume NCHW (CHWN will be transposed) primitives[L_I] = (long long)lt_in_NCHW; primitives[L_O] = (long long)lt_out_NCHW; primitives[L_W] = (long long)lt_filter; primitives[L_B] = (long long)lt_bias_CHWN; primitives[L_F_I] = (long long)lt_in_f; primitives[L_F_O] = (long long)lt_out_f; primitives[L_F_W] = (long long)lt_filter_f; primitives[L_F_B] = (long long)lt_bias_f; primitives[L_BD_I] = (long long)lt_in_bdata; primitives[L_BD_O] = (long long)lt_out_bdata; primitives[L_BD_W] = (long long)lt_filter_bdata; primitives[L_BF_I] = (long long)lt_in_bfilter; primitives[L_BF_O] = (long long)lt_out_bfilter; primitives[L_BF_W] = (long long)lt_filter_bfilter; primitives[L_I_CHWN] = (long long)lt_in_CHWN; primitives[L_O_CHWN] = (long long)lt_out_CHWN; primitives[L_B_B] = (long long)lt_bias_bias; primitives[L_B_O] = (long long)lt_out_bias; //input may have user layout (from raw image data,continuous NCHW ) // or maybe mkl layout (is previous mkl-based layer's output) dnnLayout_t lt_in_real = (dnnLayout_t)input[MKLLayout]; if(lt_in_real==NULL) lt_in_real = lt_in_NCHW; //create conversion and buff if necessary dnnPrimitive_t cv_in_f = NULL; float * buf_in_f = NULL; CHECK_ERR( try_convert(&cv_in_f, &buf_in_f, lt_in_real, lt_in_f) , err ); //create transpose if necessary float* newPtr = NULL; if (input[MKLLayout] == 0) { newPtr = (float*)malloc(inC*inH*inW*N*sizeof(float)); } primitives[BUFFER_TRANS_INPUT] = (long long)newPtr; //save conversion and buff primitives[BUFFER_FORWARD_INPUT] = (long long)buf_in_f; primitives[CONVERT_FORWARD_INPUT] = (long long)cv_in_f; //filter layout dnnPrimitive_t cv_filter_f = NULL; float * buf_filter_f = NULL; CHECK_ERR( try_convert(&cv_filter_f, &buf_filter_f, lt_filter, lt_filter_f), err ); primitives[CONVERT_FORWARD_FILTER] = (long long)cv_filter_f; primitives[BUFFER_FORWARD_FILTER] = (long long)buf_filter_f; //save user layout for output, and create mkl buffer //output always has mkl buffer and recorded in layer's primitive output[CPULayout] = (long long)lt_out_CHWN; float* buf_out_f = NULL; CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_out_f), lt_out_f), err ); primitives[BUFFER_FORWARD_OUTPUT] = (long long)buf_out_f; //for bias dnnPrimitive_t cv_bias_f = NULL; float * buf_bias_f = NULL; dnnPrimitive_t cv_bias_b = NULL; float * buf_bias_b = NULL; if (hasBias) { CHECK_ERR( try_convert(&cv_bias_f, &buf_bias_f, lt_bias_CHWN, lt_bias_f), err ); } primitives[CONVERT_FORWARD_BIAS] = (long long)cv_bias_f; primitives[BUFFER_FORWARD_BIAS] = (long long)buf_bias_f; return 0; ERR_RETURN: return 1; }