Beispiel #1
0
//convert tensor in MKL layout back to Numpy NCHW layout
//if layout diff, do conversion, else, copy memory directly
void ConvertBack(unsigned long long tensor_, int N, int C, int H, int W)
{
    long long * tensor = (long long *)tensor_;
    if (tensor[CPUPtr] == 0 )
    {
        printf("error to converback tensor!\n");
        return;
    }

	if (tensor[MKLLayout] == 0)    return;//do not need convert
    dnnError_t err;
	size_t inSize[DIM4]   =   { W, H, C, N};
	size_t inStride[DIM4] =   { 1, W, W*H, W*H*C};
	dnnLayout_t lt_NCHW = NULL, lt_CHWN = NULL;
	float* newPtr = NULL;
	CHECK_ERR( dnnLayoutCreate_F32(&lt_NCHW, DIM4, inSize, inStride),  err );
	if (!dnnLayoutCompare_F32((dnnLayout_t)tensor[MKLLayout], (dnnLayout_t)tensor[CPULayout]))
	{
		float* cpuPtr = (float *)tensor[CPUPtr];
		float* mklPtr = (float *)tensor[MKLPtr];

		if (!dnnLayoutCompare_F32((dnnLayout_t)tensor[MKLLayout], lt_NCHW))
		{
		    dnnPrimitive_t cv;
            CHECK_ERR( dnnConversionCreate_F32(&cv, (dnnLayout_t)tensor[MKLLayout],lt_NCHW), err );
            newPtr = (float*)malloc(N*C*H*W*sizeof(float));
            CHECK_ERR( dnnConversionExecute_F32(cv, mklPtr, newPtr), err );
            mklPtr = newPtr;
		}
        mkl_somatcopy('r', 't', N, C*H*W, 1.0, mklPtr, C*H*W, cpuPtr, N);
	}
	else
	{
	    long long grad_in_len = (long long)dnnLayoutGetMemorySize_F32((dnnLayout_t)tensor[MKLLayout]) ;
        float * destPtr = (float*)tensor[CPUPtr];
        float * srcPtr = (float*)tensor[MKLPtr];
        #pragma omp parallel for
        for (long long i = 0; i < grad_in_len/4; ++i)
        {
            destPtr[i] = srcPtr[i];
        }
    }
ERR_RETURN:
    if (newPtr!=NULL)
 	{
 	    free(newPtr);
 	}
}
Beispiel #2
0
void ConvertToMKL(unsigned long long tensor_)
{
    long long * tensor = (long long *)tensor_;
    if (tensor[CPUPtr] == 0 )
    {
        printf("error to conver to MKL tensor!\n");
        return;
    }

	if (tensor[MKLLayout] == 0)    return;//do not need convert

	if (!dnnLayoutCompare_F32((dnnLayout_t)tensor[MKLLayout],
			(dnnLayout_t)tensor[CPULayout]))
	{
		dnnError_t err; dnnPrimitive_t cv;
		CHECK_ERR( dnnConversionCreate_F32(&cv, (dnnLayout_t)tensor[CPULayout],
		            (dnnLayout_t)tensor[MKLLayout]), err );
		CHECK_ERR( dnnConversionExecute_F32(cv, (float *)tensor[CPUPtr], (float *)tensor[MKLPtr]), err );
	}
	else
	{
	    memcpy((void*)tensor[MKLPtr], (void*)tensor[CPUPtr], dnnLayoutGetMemorySize_F32((dnnLayout_t)tensor[MKLLayout]));
    }
ERR_RETURN:
    return;
}
Beispiel #3
0
void MaxPooling_bprop(
    unsigned long long gradOutput,  //input, N*outC*outH*outW
    unsigned long long gradInput,   //output result
    unsigned long long dnnprimitives,
    int initOK, const float beta)
{
    dnnError_t err;
    long long* primitives = (long long*)dnnprimitives;
    if (initOK == 0)
    {
        Init_b((long long *)gradInput, (long long *)gradOutput, primitives);
    }

    //get resource
    float* resPool[dnnResourceNumber] = {0};
    float* OutPtr= GetPtr(gradOutput);

    resPool[dnnResourceDiffSrc]   = (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT];
    resPool[dnnResourceDiffDst]   = OutPtr;
    resPool[dnnResourceWorkspace] = (float*)primitives[BUFFER_POOLING_FORWARD_WORKSPACE];

    //make conversion for gradeOut if necessary
    dnnPrimitive_t cv_out_b = (dnnPrimitive_t)(primitives[CV_POOLING_BACKWARD_OUTPUT]);
    if (cv_out_b)
    {
        float* buf_out_b = (float*)primitives[BUFFER_POOLING_BACKWARD_OUTPUT];
        CHECK_ERR( dnnConversionExecute_F32(cv_out_b, OutPtr, buf_out_b), err );
        resPool[dnnResourceDiffDst] = buf_out_b;
    }

    long long grad_in_len = (long long)dnnLayoutGetMemorySize_F32((dnnLayout_t)primitives[POOL_L_B_I]) ;
    float * tempPtr = (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT];
    #pragma omp parallel for
    for (long long i = 0; i < grad_in_len/4; ++i)
    {
        tempPtr[i] = 0;
    }

    CHECK_ERR( dnnExecute_F32((dnnPrimitive_t)primitives[POOLING_BACKWARD], (void**)resPool), err );

    if(beta != 0.0)
    {
        //require to add previous delta
        long long* ptr_gradInput = (long long*)gradInput;
        float* pFirstBuf = GetPtr(gradInput);
        dnnLayout_t layout_pre_delta = (dnnLayout_t)ptr_gradInput[MKLLayout];
        if(layout_pre_delta == NULL) layout_pre_delta = (dnnLayout_t)primitives[POOL_L_I];
        dnnLayout_t layout_add_delta = (dnnLayout_t)primitives[POOL_L_B_I];
        float* temp_memory = NULL;
        if (!dnnLayoutCompare_F32(layout_add_delta, layout_pre_delta))
        {
            CHECK_ERR( dnnAllocateBuffer_F32((void**)&temp_memory, layout_add_delta) , err );
            dnnPrimitive_t cv = NULL;
            CHECK_ERR( dnnConversionCreate_F32(&cv, layout_pre_delta, layout_add_delta), err );
            CHECK_ERR( dnnConversionExecute_F32(cv, pFirstBuf, temp_memory), err );
            pFirstBuf = temp_memory;
        }
        long len = (long long)dnnLayoutGetMemorySize_F32(layout_add_delta) / 4 ;
        cblas_saxpy(len, 1.0, pFirstBuf, 1, (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT], 1);
        if (temp_memory != NULL)
            dnnReleaseBuffer_F32(temp_memory);
    }

    ((long long *)gradInput)[MKLLayout] = primitives[POOL_L_B_I];
    ((long long *)gradInput)[MKLPtr]    = primitives[BUFFER_POOLING_BACKWARD_INPUT];

ERR_RETURN:
    return;
}
Beispiel #4
0
static void Conv_bfilter_init(
  long long * input,
  long long * gradOutput,
  long long * gradWeight,
  long long * primitives,
  int N, int oC, int oH, int oW)
{
    dnnError_t err;

    //for gradOut
    dnnLayout_t lt_out = (dnnLayout_t)(gradOutput[MKLLayout]);
    if(lt_out==NULL) lt_out = (dnnLayout_t)primitives[L_O];
    dnnPrimitive_t cv_out_bfilter = NULL; float* buf_out_bfilter = NULL;
    CHECK_ERR( try_convert(&cv_out_bfilter, &buf_out_bfilter, lt_out, (dnnLayout_t)primitives[L_BF_O]) , err );
	primitives[CONVERT_BWDFILTER_OUTPUT] = (long long)cv_out_bfilter;
	primitives[BUFFER_BWDFILTER_OUTPUT]  = (long long)buf_out_bfilter;

	//for the first layer without delta, input gradOut should first be transposed
    float* gradOutTransPtr = NULL;
    if ( gradOutput[MKLLayout] == 0 && primitives[BUFFER_TRANS_OUTPUT] == 0)
	{
		gradOutTransPtr = (float*)malloc(N*oC*oH*oW*sizeof(float));
		primitives[BUFFER_TRANS_OUTPUT] = (long long)gradOutTransPtr;
	}

    //for filter
	dnnLayout_t lt_filter         = (dnnLayout_t)primitives[L_W];
	dnnLayout_t lt_filter_bfilter = (dnnLayout_t)primitives[L_BF_W];
    dnnPrimitive_t cv_filter_bfilter = NULL; float * buf_filter_bfilter = NULL;
    if(!dnnLayoutCompare_F32(lt_filter_bfilter, lt_filter))
    {
        CHECK_ERR( dnnConversionCreate_F32(&cv_filter_bfilter, lt_filter_bfilter, lt_filter), err);
		CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_filter_bfilter, lt_filter_bfilter), err);
    }
    primitives[BUFFER_BWDFILTER_FILTER]  = (long long)buf_filter_bfilter;
	primitives[CONVERT_BWDFILTER_FILTER] = (long long)cv_filter_bfilter;

    //for input
    dnnLayout_t lt_in_real = (dnnLayout_t)input[MKLLayout];
    if(lt_in_real==NULL)
    {
        lt_in_real = (dnnLayout_t)primitives[L_I];
    }
    dnnLayout_t lt_in_bfilter = (dnnLayout_t)primitives[L_BF_I];
    dnnPrimitive_t cv_in_bfilter = NULL; float* buf_in_bfilter = (float*)(input[CPUPtr]);
    CHECK_ERR( try_convert(&cv_in_bfilter, &buf_in_bfilter, lt_in_real, lt_in_bfilter), err );
    primitives[BUFFER_BWDFILTER_INPUT]  = (long long)buf_in_bfilter;
    primitives[CONVERT_BWDFILTER_INPUT] = (long long)cv_in_bfilter;

    //if has bias
    if (primitives[BDW_BIAS_INDEX] != 0)
    {
        //convert for grad_bias if necessary
        dnnLayout_t lt_bias_bias  = (dnnLayout_t)primitives[L_B_B];
        dnnLayout_t lt_bias       = (dnnLayout_t)primitives[L_B];
        dnnPrimitive_t cv_bias_bias = NULL; float * buf_bias_bias = NULL;
        CHECK_ERR( dnnConversionCreate_F32(&cv_bias_bias, lt_bias_bias, lt_bias), err);
		CHECK_ERR( dnnAllocateBuffer_F32((void**)&buf_bias_bias, lt_bias_bias), err);
        primitives[BUFFER_BIAS_BIAS]  = (long long)buf_bias_bias;
        primitives[CV_BIAS_BIAS] =      (long long)cv_bias_bias;

        //convert for grad_out if necessary
        dnnLayout_t lt_bias_out = (dnnLayout_t)primitives[L_B_O];
        dnnPrimitive_t cv_out_bias = NULL; float* buf_out_bias = (float*)(input[CPUPtr]);
        CHECK_ERR( try_convert(&cv_out_bias, &buf_out_bias, lt_out, lt_bias_out), err );
        primitives[BUFFER_BIAS_OUT]  = (long long)buf_out_bias;
        primitives[CV_BIAS_OUT] = (long long)cv_out_bias;
    }


ERR_RETURN:
    return;
}