static void Init_b(long long * gradIn, long long * gradOut, long long * primitives) { dnnError_t err; //gradOut, layout is user or mkl dnnLayout_t lt_out_b = (dnnLayout_t)primitives[POOL_L_B_O]; dnnLayout_t lt_out = (dnnLayout_t)gradOut[MKLLayout]; if (lt_out==NULL) lt_out = (dnnLayout_t)primitives[POOL_L_O]; //create conversion and buff if necessary dnnPrimitive_t cv_out_b = NULL; float * buf_out_b = NULL; CHECK_ERR( try_convert(&cv_out_b, &buf_out_b, lt_out, lt_out_b) , err ); //save primitives[CV_POOLING_BACKWARD_OUTPUT] = (long long)cv_out_b; primitives[BUFFER_POOLING_BACKWARD_OUTPUT] = (long long)buf_out_b; //gradIn, layout gradIn[CPULayout] = primitives[POOL_L_I]; dnnLayout_t lt_in_b = (dnnLayout_t)primitives[POOL_L_B_I]; float* buf_in_b = NULL; CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_in_b), lt_in_b), err ); primitives[BUFFER_POOLING_BACKWARD_INPUT] = (long long)buf_in_b; ERR_RETURN: return; }
//gradOut: output gradient of CONV layer, known parameters //gradIn: input gradient, to be calculated static void Conv_bdata_init( long long * gradIn, long long * gradOut, int N, int oC, int oH, int oW, long long * weight, long long * primitives) { dnnError_t err; //get gradOut layout, create conversion if necessary dnnLayout_t lt_out = (dnnLayout_t)gradOut[MKLLayout]; if(lt_out==NULL) { lt_out = (dnnLayout_t)primitives[L_O]; } dnnPrimitive_t cv_out_bdata = NULL; float * buf_out_bdata = NULL; CHECK_ERR( try_convert(&cv_out_bdata, &buf_out_bdata, lt_out, (dnnLayout_t)primitives[L_BD_O]) , err ); primitives[CONVERT_BWDDATA_OUTPUT] = (long long)cv_out_bdata; primitives[BUFFER_BWDDATA_OUTPUT] = (long long)buf_out_bdata; //for filter dnnLayout_t lt_filter = (dnnLayout_t)primitives[L_W]; dnnLayout_t lt_filter_bdata = (dnnLayout_t)primitives[L_BD_W]; dnnPrimitive_t cv_filter_bdata = NULL; float * buf_filter_bdata = NULL; CHECK_ERR( try_convert(&cv_filter_bdata, &buf_filter_bdata, lt_filter, lt_filter_bdata), err ); primitives[BUFFER_BWDDATA_FILTER] = (long long)buf_filter_bdata; primitives[CONVERT_BWDDATA_FILTER] = (long long)cv_filter_bdata; //create gradInput layout and memory dnnLayout_t lt_in_bdata = (dnnLayout_t)primitives[L_BD_I]; float * buf_in_bdata = (float*)(gradIn[CPUPtr]); CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_in_bdata), lt_in_bdata), err ); primitives[BUFFER_BWDDATA_INPUT] = (long long)buf_in_bdata; gradIn[CPULayout] = (long long)(dnnLayout_t)primitives[L_I_CHWN]; float* gradOutTransPtr = NULL; if (gradOut[MKLLayout] == 0) { gradOutTransPtr = (float*)malloc(N*oC*oH*oW*sizeof(float)); } primitives[BUFFER_TRANS_OUTPUT] = (long long)gradOutTransPtr; ERR_RETURN: return; }
void MaxPooling_bprop( unsigned long long gradOutput, //input, N*outC*outH*outW unsigned long long gradInput, //output result unsigned long long dnnprimitives, int initOK, const float beta) { dnnError_t err; long long* primitives = (long long*)dnnprimitives; if (initOK == 0) { Init_b((long long *)gradInput, (long long *)gradOutput, primitives); } //get resource float* resPool[dnnResourceNumber] = {0}; float* OutPtr= GetPtr(gradOutput); resPool[dnnResourceDiffSrc] = (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT]; resPool[dnnResourceDiffDst] = OutPtr; resPool[dnnResourceWorkspace] = (float*)primitives[BUFFER_POOLING_FORWARD_WORKSPACE]; //make conversion for gradeOut if necessary dnnPrimitive_t cv_out_b = (dnnPrimitive_t)(primitives[CV_POOLING_BACKWARD_OUTPUT]); if (cv_out_b) { float* buf_out_b = (float*)primitives[BUFFER_POOLING_BACKWARD_OUTPUT]; CHECK_ERR( dnnConversionExecute_F32(cv_out_b, OutPtr, buf_out_b), err ); resPool[dnnResourceDiffDst] = buf_out_b; } long long grad_in_len = (long long)dnnLayoutGetMemorySize_F32((dnnLayout_t)primitives[POOL_L_B_I]) ; float * tempPtr = (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT]; #pragma omp parallel for for (long long i = 0; i < grad_in_len/4; ++i) { tempPtr[i] = 0; } CHECK_ERR( dnnExecute_F32((dnnPrimitive_t)primitives[POOLING_BACKWARD], (void**)resPool), err ); if(beta != 0.0) { //require to add previous delta long long* ptr_gradInput = (long long*)gradInput; float* pFirstBuf = GetPtr(gradInput); dnnLayout_t layout_pre_delta = (dnnLayout_t)ptr_gradInput[MKLLayout]; if(layout_pre_delta == NULL) layout_pre_delta = (dnnLayout_t)primitives[POOL_L_I]; dnnLayout_t layout_add_delta = (dnnLayout_t)primitives[POOL_L_B_I]; float* temp_memory = NULL; if (!dnnLayoutCompare_F32(layout_add_delta, layout_pre_delta)) { CHECK_ERR( dnnAllocateBuffer_F32((void**)&temp_memory, layout_add_delta) , err ); dnnPrimitive_t cv = NULL; CHECK_ERR( dnnConversionCreate_F32(&cv, layout_pre_delta, layout_add_delta), err ); CHECK_ERR( dnnConversionExecute_F32(cv, pFirstBuf, temp_memory), err ); pFirstBuf = temp_memory; } long len = (long long)dnnLayoutGetMemorySize_F32(layout_add_delta) / 4 ; cblas_saxpy(len, 1.0, pFirstBuf, 1, (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT], 1); if (temp_memory != NULL) dnnReleaseBuffer_F32(temp_memory); } ((long long *)gradInput)[MKLLayout] = primitives[POOL_L_B_I]; ((long long *)gradInput)[MKLPtr] = primitives[BUFFER_POOLING_BACKWARD_INPUT]; ERR_RETURN: return; }
//useMaxPooling, or averagePooling //useCaffe use ceil mode for pooling output dim static void Init_f( long long * input, long long * output, long long * primitives, int N, int inC, int inH, int inW, int kH, int kW, int dH, int dW, int padH,int padW, int outC, int outH,int outW, int useMaxPooling, int useCaffe) { dnnError_t err; //dimension size_t inputSize[DIM4] = { inW, inH, inC, N}; size_t outputSize[DIM4] = {outW, outH, outC, N}; size_t inputStrides1[DIM4] = {1, inW, inW*inH, inW*inH*inC}; size_t outputStrides1[DIM4] = {1, outW, outW*outH, outW*outH*outC}; //CHWN size_t inputStrides[DIM4] = {N, N*inW, N*inW*inH, 1}; size_t outputStrides[DIM4] = {N, N*outW, N*outW*outH, 1}; size_t kernelSize[2] = { kW, kH}; size_t kernelStride[2] = { dW, dH}; //calculate pad int padH2 = (outH-1)*dH + kH - inH - padH; int padW2 = (outW-1)*dW + kW - inW - padW; int symm = 0; if (padH2==padH && padW2==padW) symm = 1; if (padH2<0) padH2 = 0; if (padW2<0) padW2 = 0; int pad_dim4[DIM4] = {-padW, -padH, -padW2,-padH2}; int pad_dim2[DIM2] = {-padW, -padH}; int inputOffset[DIM2] = { 0, 0}; //create user layout dnnLayout_t lt_out = NULL, lt_in = NULL; CHECK_ERR( dnnLayoutCreate_F32(<_in, DIM4, inputSize, inputStrides) , err ); CHECK_ERR( dnnLayoutCreate_F32(<_out, DIM4, outputSize, outputStrides) , err ); primitives[POOL_L_I] = (long long)lt_in; primitives[POOL_L_O] = (long long)lt_out; //create MKL input layout dnnLayout_t lt_in_f = (dnnLayout_t)input[MKLLayout]; if(lt_in_f==NULL) { lt_in_f = lt_in; } primitives[POOL_L_F_I] = (long long)lt_in_f; //create operation dnnPrimitive_t pool_f = NULL, pool_b = NULL; dnnPrimitiveAttributes_t attributes = NULL; CHECK_ERR( dnnPrimitiveAttributesCreate_F32(&attributes), err ); if (useMaxPooling==1) { if(useCaffe || symm) { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); } else { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingMax,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); } } else { if(useCaffe || symm) { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim2, dnnBorderZeros), err ); } else { CHECK_ERR( dnnPoolingCreateForward_F32 (&pool_f, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); CHECK_ERR( dnnPoolingCreateBackward_F32(&pool_b, attributes, dnnAlgorithmPoolingAvg,lt_in_f, kernelSize, kernelStride, pad_dim4, dnnBorderZerosAsymm), err ); } } primitives[POOLING_FORWARD] = (long long)pool_f; primitives[POOLING_BACKWARD] = (long long)pool_b; //create mkl layout for output dnnLayout_t lt_out_f = NULL, lt_out_b = NULL, lt_in_b = NULL; CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_f, pool_f, dnnResourceDst), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_b, pool_f, dnnResourceSrc), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_b, pool_f, dnnResourceDst), err ); primitives[POOL_L_F_O] = (long long)lt_out_f; primitives[POOL_L_B_I] = (long long)lt_in_b; primitives[POOL_L_B_O] = (long long)lt_out_b; //create work space , to record max location? dnnLayout_t lt_space = NULL; float* buf_space = NULL; CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_space, pool_f, dnnResourceWorkspace), err ); CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_space, lt_space) , err ); primitives[BUFFER_POOLING_FORWARD_WORKSPACE] = (long long)buf_space; //output layout output[CPULayout] = (long long)lt_out; float* buf_out_f = NULL; CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_out_f), lt_out_f), err ); primitives[BUFFER_POOLING_FORWARD_OUTPUT] = (long long)buf_out_f; ERR_RETURN: return; }
static void THNN_(BatchNormalization_MKLDNN_init_forward)( THLongTensor *primitives, int N, int inC, int inH, int inW, double eps) { dnnError_t err; dnnPrimitive_t bn_forward = NULL; dnnPrimitive_t bn_backward = NULL; dnnPrimitive_t bn_bwd_scaleshift = NULL; size_t inputSize[dimension] = {inW,inH,inC,N}; size_t inputStrides[dimension] = { 1, inW, inH * inW, inC * inH * inW }; dnnLayout_t lt_user_input = NULL; if(primitives->storage->data[BN_LAYOUT_INPUT] == 0) { CHECK_ERR( dnnLayoutCreate_F32(<_user_input, dimension, inputSize, inputStrides) , err ); #if CONVERSION_LOG fprintf(stderr ,"MKLDNN BN get input layout FAIL......\n"); #endif } else{ lt_user_input = (dnnLayout_t)primitives->storage->data[BN_LAYOUT_INPUT]; #if CONVERSION_LOG fprintf(stderr ,"MKLDNN BN get input layout OK\n"); #endif } CHECK_ERR( dnnBatchNormalizationCreateForward_F32(&bn_forward,NULL,lt_user_input,eps), err ); CHECK_ERR( dnnBatchNormalizationCreateBackwardData_F32(&bn_backward,NULL,lt_user_input,eps), err ); CHECK_ERR( dnnBatchNormalizationCreateBackwardScaleShift_F32(&bn_bwd_scaleshift,NULL,lt_user_input,eps), err ); dnnLayout_t lt_bn_forward_workspace,lt_bn_forward_scaleshift,lt_bn_forward_output,lt_bn_backward_input; real * buffer_forward_workspace = NULL; real * buffer_forward_scaleshift = NULL; real * buffer_forward_output = NULL; real * buffer_backward_input = NULL; dnnLayoutCreateFromPrimitive_F32(<_bn_forward_workspace, bn_forward, dnnResourceWorkspace); dnnLayoutCreateFromPrimitive_F32(<_bn_forward_output, bn_forward, dnnResourceDst); dnnLayoutCreateFromPrimitive_F32(<_bn_forward_scaleshift, bn_forward, dnnResourceScaleShift); dnnLayoutCreateFromPrimitive_F32(<_bn_backward_input, bn_backward, dnnResourceDiffSrc); CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_workspace), lt_bn_forward_workspace), err ); CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_scaleshift), lt_bn_forward_scaleshift), err ); //CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_output), lt_bn_forward_output), err ); //CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_backward_input), lt_bn_backward_input), err ); int size1 = dnnLayoutGetMemorySize_F32(lt_bn_forward_output); int size2 = inW*inH*inC*N*4; if(size1 == size2) { #if CONVERSION_LOG fprintf(stderr ,"MKLDNN BN forward ouput layout match OK\n"); #endif } else { CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_output), lt_bn_forward_output), err ); fprintf(stderr ,"MKLDNN BN forward ouput layout match FAIL, size1 = %d, size2 = %d \n", size1, size2); } size1 = dnnLayoutGetMemorySize_F32(lt_bn_backward_input); if(size1 == size2) { #if CONVERSION_LOG fprintf(stderr ,"MKLDNN MaxPooling bwddata input layout match OK\n"); #endif } else { CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_backward_input), lt_bn_backward_input), err ); fprintf(stderr ,"MKLDNN MaxPooling bwddata input layout match FAIL, size1 = %d, size2 = %d \n", size1, size2); } //save the dnnPrimitive to THTensor(long int array) primitives->storage->data[BN_LAYOUT_FORWARD_OUTPUT] = (long long)lt_bn_forward_output; primitives->storage->data[BN_LAYOUT_BACKWARD_INPUT] = (long long)lt_bn_backward_input; primitives->storage->data[BN_FORWARD] = (long long)bn_forward; primitives->storage->data[BN_BACKWARD] = (long long)bn_backward; primitives->storage->data[BN_SCALESHIFT] = (long long)bn_bwd_scaleshift; primitives->storage->data[BUFFER_BN_FORWARD_WORKSPACE] = (long long)buffer_forward_workspace; primitives->storage->data[BUFFER_BN_FORWARD_SCALESHIFT] = (long long)buffer_forward_scaleshift; primitives->storage->data[BUFFER_BN_FORWARD_OUTPUT] = (long long)buffer_forward_output; primitives->storage->data[BUFFER_BN_BACKWARD_INPUT] = (long long)buffer_backward_input; primitives->storage->data[BUFFER_BN_BACKWARD_WORKSPACE] = (long long)buffer_forward_workspace; }
static void Conv_bfilter_init( long long * input, long long * gradOutput, long long * gradWeight, long long * primitives, int N, int oC, int oH, int oW) { dnnError_t err; //for gradOut dnnLayout_t lt_out = (dnnLayout_t)(gradOutput[MKLLayout]); if(lt_out==NULL) lt_out = (dnnLayout_t)primitives[L_O]; dnnPrimitive_t cv_out_bfilter = NULL; float* buf_out_bfilter = NULL; CHECK_ERR( try_convert(&cv_out_bfilter, &buf_out_bfilter, lt_out, (dnnLayout_t)primitives[L_BF_O]) , err ); primitives[CONVERT_BWDFILTER_OUTPUT] = (long long)cv_out_bfilter; primitives[BUFFER_BWDFILTER_OUTPUT] = (long long)buf_out_bfilter; //for the first layer without delta, input gradOut should first be transposed float* gradOutTransPtr = NULL; if ( gradOutput[MKLLayout] == 0 && primitives[BUFFER_TRANS_OUTPUT] == 0) { gradOutTransPtr = (float*)malloc(N*oC*oH*oW*sizeof(float)); primitives[BUFFER_TRANS_OUTPUT] = (long long)gradOutTransPtr; } //for filter dnnLayout_t lt_filter = (dnnLayout_t)primitives[L_W]; dnnLayout_t lt_filter_bfilter = (dnnLayout_t)primitives[L_BF_W]; dnnPrimitive_t cv_filter_bfilter = NULL; float * buf_filter_bfilter = NULL; if(!dnnLayoutCompare_F32(lt_filter_bfilter, lt_filter)) { CHECK_ERR( dnnConversionCreate_F32(&cv_filter_bfilter, lt_filter_bfilter, lt_filter), err); CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_filter_bfilter, lt_filter_bfilter), err); } primitives[BUFFER_BWDFILTER_FILTER] = (long long)buf_filter_bfilter; primitives[CONVERT_BWDFILTER_FILTER] = (long long)cv_filter_bfilter; //for input dnnLayout_t lt_in_real = (dnnLayout_t)input[MKLLayout]; if(lt_in_real==NULL) { lt_in_real = (dnnLayout_t)primitives[L_I]; } dnnLayout_t lt_in_bfilter = (dnnLayout_t)primitives[L_BF_I]; dnnPrimitive_t cv_in_bfilter = NULL; float* buf_in_bfilter = (float*)(input[CPUPtr]); CHECK_ERR( try_convert(&cv_in_bfilter, &buf_in_bfilter, lt_in_real, lt_in_bfilter), err ); primitives[BUFFER_BWDFILTER_INPUT] = (long long)buf_in_bfilter; primitives[CONVERT_BWDFILTER_INPUT] = (long long)cv_in_bfilter; //if has bias if (primitives[BDW_BIAS_INDEX] != 0) { //convert for grad_bias if necessary dnnLayout_t lt_bias_bias = (dnnLayout_t)primitives[L_B_B]; dnnLayout_t lt_bias = (dnnLayout_t)primitives[L_B]; dnnPrimitive_t cv_bias_bias = NULL; float * buf_bias_bias = NULL; CHECK_ERR( dnnConversionCreate_F32(&cv_bias_bias, lt_bias_bias, lt_bias), err); CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_bias_bias, lt_bias_bias), err); primitives[BUFFER_BIAS_BIAS] = (long long)buf_bias_bias; primitives[CV_BIAS_BIAS] = (long long)cv_bias_bias; //convert for grad_out if necessary dnnLayout_t lt_bias_out = (dnnLayout_t)primitives[L_B_O]; dnnPrimitive_t cv_out_bias = NULL; float* buf_out_bias = (float*)(input[CPUPtr]); CHECK_ERR( try_convert(&cv_out_bias, &buf_out_bias, lt_out, lt_bias_out), err ); primitives[BUFFER_BIAS_OUT] = (long long)buf_out_bias; primitives[CV_BIAS_OUT] = (long long)cv_out_bias; } ERR_RETURN: return; }
static int Conv_f_init( long long * input, long long * output, long long * weight, long long * primitives, int N, int inC, int inH, int inW, int kH, int kW, int dH, int dW, int padH, int padW, int outC, int outH, int outW, int hasBias) { dnnError_t err; //init dimensions size_t inputSize[DIM4] = { inW, inH, inC, N}; size_t outputSize[DIM4] = {outW, outH, outC, N}; size_t filterSize[DIM4] = { kW, kH, inC, outC}; size_t stride[DIM2] = { dW, dH}; int pad[DIM2] = {-padW, -padH}; size_t biasSize[1] = {outC}; size_t biasStrides[1] = { 1 }; //using NCHW layout size_t filterStridesNCHW[DIM4] = {1, kW, kW*kH, kW*kH*inC}; size_t inputStridesNCHW[DIM4] = {1, inW, inW*inH, inW*inH*inC}; size_t outputStridesNCHW[DIM4] = {1, outW, outW*outH, outW*outH*outC}; //CHWN size_t filterStridesCHWN[DIM4] = {outC, outC*kW, outC*kW*kH, 1}; size_t inputStridesCHWN[DIM4] = {N, N*inW, N*inW*inH, 1}; size_t outputStridesCHWN[DIM4] = {N, N*outW, N*outW*outH, 1}; //create execute and save into primitives dnnPrimitiveAttributes_t attributes = NULL; CHECK_ERR( dnnPrimitiveAttributesCreate_F32(&attributes), err ); dnnPrimitive_t conv_f = NULL; //forward operation dnnPrimitive_t conv_bdata = NULL; //backward calculate gradient input dnnPrimitive_t conv_bfilter = NULL; //backward calculate gradient filter(weight) dnnPrimitive_t conv_b_bias = NULL; //backward bias //create layout and save //lt_in, layout of input in NCHW form //lt_filter_f, required layout (MKL layout) for forward for weight //lt_out_bfilter, required layout for backward weight update for output dnnLayout_t lt_in_NCHW, lt_filter, lt_out_NCHW, lt_in_CHWN, lt_out_CHWN, lt_bias_CHWN=NULL; dnnLayout_t lt_in_f, lt_filter_f, lt_out_f, lt_bias_f; dnnLayout_t lt_in_bdata, lt_filter_bdata, lt_out_bdata, lt_bias_bdata; dnnLayout_t lt_in_bfilter, lt_filter_bfilter, lt_out_bfilter,lt_bias_bias, lt_out_bias; if (hasBias) { CHECK_ERR(dnnConvolutionCreateForwardBias_F32( &conv_f, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR(dnnConvolutionCreateForwardBias_F32( &conv_f, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR( dnnLayoutCreate_F32(<_bias_CHWN, 1, biasSize, biasStrides), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_bias_f, conv_f, dnnResourceBias ) , err ); CHECK_ERR(dnnConvolutionCreateBackwardBias_F32( &conv_b_bias, attributes, dnnAlgorithmConvolutionDirect, DIM4, outputSize),err); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_bias_bias, conv_b_bias, dnnResourceDiffBias) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_bias, conv_b_bias, dnnResourceDiffDst) , err ); } else CHECK_ERR(dnnConvolutionCreateForward_F32( &conv_f, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR(dnnConvolutionCreateBackwardData_F32( &conv_bdata, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); CHECK_ERR(dnnConvolutionCreateBackwardFilter_F32(&conv_bfilter, attributes, dnnAlgorithmConvolutionDirect, DIM4, inputSize, outputSize, filterSize, stride, pad, dnnBorderZeros),err); primitives[FORWARD_INDEX] = (long long)conv_f; primitives[BWD_DATA_INDEX] = (long long)conv_bdata; primitives[BWD_FILTER_INDEX] = (long long)conv_bfilter; primitives[BDW_BIAS_INDEX] = (long long)conv_b_bias; CHECK_ERR( dnnLayoutCreate_F32(<_in_NCHW, DIM4, inputSize, inputStridesNCHW), err ); CHECK_ERR( dnnLayoutCreate_F32(<_in_CHWN, DIM4, inputSize, inputStridesCHWN), err ); CHECK_ERR( dnnLayoutCreate_F32(<_filter, DIM4, filterSize, filterStridesCHWN), err ); CHECK_ERR( dnnLayoutCreate_F32(<_out_NCHW, DIM4, outputSize, outputStridesNCHW), err ); CHECK_ERR( dnnLayoutCreate_F32(<_out_CHWN, DIM4, outputSize, outputStridesCHWN), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_f, conv_f, dnnResourceSrc ) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_filter_f, conv_f, dnnResourceFilter), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_f, conv_f, dnnResourceDst ) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_bdata, conv_bdata, dnnResourceDiffSrc) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_filter_bdata, conv_bdata, dnnResourceFilter) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_bdata, conv_bdata, dnnResourceDiffDst) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_in_bfilter, conv_bfilter, dnnResourceSrc) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_filter_bfilter, conv_bfilter, dnnResourceDiffFilter) , err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_F32(<_out_bfilter, conv_bfilter, dnnResourceDiffDst) , err ); //here assume NCHW (CHWN will be transposed) primitives[L_I] = (long long)lt_in_NCHW; primitives[L_O] = (long long)lt_out_NCHW; primitives[L_W] = (long long)lt_filter; primitives[L_B] = (long long)lt_bias_CHWN; primitives[L_F_I] = (long long)lt_in_f; primitives[L_F_O] = (long long)lt_out_f; primitives[L_F_W] = (long long)lt_filter_f; primitives[L_F_B] = (long long)lt_bias_f; primitives[L_BD_I] = (long long)lt_in_bdata; primitives[L_BD_O] = (long long)lt_out_bdata; primitives[L_BD_W] = (long long)lt_filter_bdata; primitives[L_BF_I] = (long long)lt_in_bfilter; primitives[L_BF_O] = (long long)lt_out_bfilter; primitives[L_BF_W] = (long long)lt_filter_bfilter; primitives[L_I_CHWN] = (long long)lt_in_CHWN; primitives[L_O_CHWN] = (long long)lt_out_CHWN; primitives[L_B_B] = (long long)lt_bias_bias; primitives[L_B_O] = (long long)lt_out_bias; //input may have user layout (from raw image data,continuous NCHW ) // or maybe mkl layout (is previous mkl-based layer's output) dnnLayout_t lt_in_real = (dnnLayout_t)input[MKLLayout]; if(lt_in_real==NULL) lt_in_real = lt_in_NCHW; //create conversion and buff if necessary dnnPrimitive_t cv_in_f = NULL; float * buf_in_f = NULL; CHECK_ERR( try_convert(&cv_in_f, &buf_in_f, lt_in_real, lt_in_f) , err ); //create transpose if necessary float* newPtr = NULL; if (input[MKLLayout] == 0) { newPtr = (float*)malloc(inC*inH*inW*N*sizeof(float)); } primitives[BUFFER_TRANS_INPUT] = (long long)newPtr; //save conversion and buff primitives[BUFFER_FORWARD_INPUT] = (long long)buf_in_f; primitives[CONVERT_FORWARD_INPUT] = (long long)cv_in_f; //filter layout dnnPrimitive_t cv_filter_f = NULL; float * buf_filter_f = NULL; CHECK_ERR( try_convert(&cv_filter_f, &buf_filter_f, lt_filter, lt_filter_f), err ); primitives[CONVERT_FORWARD_FILTER] = (long long)cv_filter_f; primitives[BUFFER_FORWARD_FILTER] = (long long)buf_filter_f; //save user layout for output, and create mkl buffer //output always has mkl buffer and recorded in layer's primitive output[CPULayout] = (long long)lt_out_CHWN; float* buf_out_f = NULL; CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buf_out_f), lt_out_f), err ); primitives[BUFFER_FORWARD_OUTPUT] = (long long)buf_out_f; //for bias dnnPrimitive_t cv_bias_f = NULL; float * buf_bias_f = NULL; dnnPrimitive_t cv_bias_b = NULL; float * buf_bias_b = NULL; if (hasBias) { CHECK_ERR( try_convert(&cv_bias_f, &buf_bias_f, lt_bias_CHWN, lt_bias_f), err ); } primitives[CONVERT_FORWARD_BIAS] = (long long)cv_bias_f; primitives[BUFFER_FORWARD_BIAS] = (long long)buf_bias_f; return 0; ERR_RETURN: return 1; }