Beispiel #1
0
void ConvertToMKL(unsigned long long tensor_)
{
    long long * tensor = (long long *)tensor_;
    if (tensor[CPUPtr] == 0 )
    {
        printf("error to conver to MKL tensor!\n");
        return;
    }

	if (tensor[MKLLayout] == 0)    return;//do not need convert

	if (!dnnLayoutCompare_F32((dnnLayout_t)tensor[MKLLayout],
			(dnnLayout_t)tensor[CPULayout]))
	{
		dnnError_t err; dnnPrimitive_t cv;
		CHECK_ERR( dnnConversionCreate_F32(&cv, (dnnLayout_t)tensor[CPULayout],
		            (dnnLayout_t)tensor[MKLLayout]), err );
		CHECK_ERR( dnnConversionExecute_F32(cv, (float *)tensor[CPUPtr], (float *)tensor[MKLPtr]), err );
	}
	else
	{
	    memcpy((void*)tensor[MKLPtr], (void*)tensor[CPUPtr], dnnLayoutGetMemorySize_F32((dnnLayout_t)tensor[MKLLayout]));
    }
ERR_RETURN:
    return;
}
Beispiel #2
0
//convert tensor in MKL layout back to Numpy NCHW layout
//if layout diff, do conversion, else, copy memory directly
void ConvertBack(unsigned long long tensor_, int N, int C, int H, int W)
{
    long long * tensor = (long long *)tensor_;
    if (tensor[CPUPtr] == 0 )
    {
        printf("error to converback tensor!\n");
        return;
    }

	if (tensor[MKLLayout] == 0)    return;//do not need convert
    dnnError_t err;
	size_t inSize[DIM4]   =   { W, H, C, N};
	size_t inStride[DIM4] =   { 1, W, W*H, W*H*C};
	dnnLayout_t lt_NCHW = NULL, lt_CHWN = NULL;
	float* newPtr = NULL;
	CHECK_ERR( dnnLayoutCreate_F32(&lt_NCHW, DIM4, inSize, inStride),  err );
	if (!dnnLayoutCompare_F32((dnnLayout_t)tensor[MKLLayout], (dnnLayout_t)tensor[CPULayout]))
	{
		float* cpuPtr = (float *)tensor[CPUPtr];
		float* mklPtr = (float *)tensor[MKLPtr];

		if (!dnnLayoutCompare_F32((dnnLayout_t)tensor[MKLLayout], lt_NCHW))
		{
		    dnnPrimitive_t cv;
            CHECK_ERR( dnnConversionCreate_F32(&cv, (dnnLayout_t)tensor[MKLLayout],lt_NCHW), err );
            newPtr = (float*)malloc(N*C*H*W*sizeof(float));
            CHECK_ERR( dnnConversionExecute_F32(cv, mklPtr, newPtr), err );
            mklPtr = newPtr;
		}
        mkl_somatcopy('r', 't', N, C*H*W, 1.0, mklPtr, C*H*W, cpuPtr, N);
	}
	else
	{
	    long long grad_in_len = (long long)dnnLayoutGetMemorySize_F32((dnnLayout_t)tensor[MKLLayout]) ;
        float * destPtr = (float*)tensor[CPUPtr];
        float * srcPtr = (float*)tensor[MKLPtr];
        #pragma omp parallel for
        for (long long i = 0; i < grad_in_len/4; ++i)
        {
            destPtr[i] = srcPtr[i];
        }
    }
ERR_RETURN:
    if (newPtr!=NULL)
 	{
 	    free(newPtr);
 	}
}
Beispiel #3
0
void MaxPooling_bprop(
    unsigned long long gradOutput,  //input, N*outC*outH*outW
    unsigned long long gradInput,   //output result
    unsigned long long dnnprimitives,
    int initOK, const float beta)
{
    dnnError_t err;
    long long* primitives = (long long*)dnnprimitives;
    if (initOK == 0)
    {
        Init_b((long long *)gradInput, (long long *)gradOutput, primitives);
    }

    //get resource
    float* resPool[dnnResourceNumber] = {0};
    float* OutPtr= GetPtr(gradOutput);

    resPool[dnnResourceDiffSrc]   = (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT];
    resPool[dnnResourceDiffDst]   = OutPtr;
    resPool[dnnResourceWorkspace] = (float*)primitives[BUFFER_POOLING_FORWARD_WORKSPACE];

    //make conversion for gradeOut if necessary
    dnnPrimitive_t cv_out_b = (dnnPrimitive_t)(primitives[CV_POOLING_BACKWARD_OUTPUT]);
    if (cv_out_b)
    {
        float* buf_out_b = (float*)primitives[BUFFER_POOLING_BACKWARD_OUTPUT];
        CHECK_ERR( dnnConversionExecute_F32(cv_out_b, OutPtr, buf_out_b), err );
        resPool[dnnResourceDiffDst] = buf_out_b;
    }

    long long grad_in_len = (long long)dnnLayoutGetMemorySize_F32((dnnLayout_t)primitives[POOL_L_B_I]) ;
    float * tempPtr = (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT];
    #pragma omp parallel for
    for (long long i = 0; i < grad_in_len/4; ++i)
    {
        tempPtr[i] = 0;
    }

    CHECK_ERR( dnnExecute_F32((dnnPrimitive_t)primitives[POOLING_BACKWARD], (void**)resPool), err );

    if(beta != 0.0)
    {
        //require to add previous delta
        long long* ptr_gradInput = (long long*)gradInput;
        float* pFirstBuf = GetPtr(gradInput);
        dnnLayout_t layout_pre_delta = (dnnLayout_t)ptr_gradInput[MKLLayout];
        if(layout_pre_delta == NULL) layout_pre_delta = (dnnLayout_t)primitives[POOL_L_I];
        dnnLayout_t layout_add_delta = (dnnLayout_t)primitives[POOL_L_B_I];
        float* temp_memory = NULL;
        if (!dnnLayoutCompare_F32(layout_add_delta, layout_pre_delta))
        {
            CHECK_ERR( dnnAllocateBuffer_F32((void**)&temp_memory, layout_add_delta) , err );
            dnnPrimitive_t cv = NULL;
            CHECK_ERR( dnnConversionCreate_F32(&cv, layout_pre_delta, layout_add_delta), err );
            CHECK_ERR( dnnConversionExecute_F32(cv, pFirstBuf, temp_memory), err );
            pFirstBuf = temp_memory;
        }
        long len = (long long)dnnLayoutGetMemorySize_F32(layout_add_delta) / 4 ;
        cblas_saxpy(len, 1.0, pFirstBuf, 1, (float*)primitives[BUFFER_POOLING_BACKWARD_INPUT], 1);
        if (temp_memory != NULL)
            dnnReleaseBuffer_F32(temp_memory);
    }

    ((long long *)gradInput)[MKLLayout] = primitives[POOL_L_B_I];
    ((long long *)gradInput)[MKLPtr]    = primitives[BUFFER_POOLING_BACKWARD_INPUT];

ERR_RETURN:
    return;
}
Beispiel #4
0
static void THNN_(BatchNormalization_MKLDNN_init_forward)(
          THLongTensor *primitives,
          int N,
          int inC,
          int inH,
          int inW,
	  double eps)
{
	dnnError_t err;
	dnnPrimitive_t bn_forward = NULL;
	dnnPrimitive_t bn_backward = NULL;
	dnnPrimitive_t bn_bwd_scaleshift = NULL;

	size_t inputSize[dimension] = 	{inW,inH,inC,N};
	size_t inputStrides[dimension] = { 1, inW, inH * inW, inC * inH * inW };

	dnnLayout_t lt_user_input = NULL;

	if(primitives->storage->data[BN_LAYOUT_INPUT] == 0)
	{
		CHECK_ERR( dnnLayoutCreate_F32(&lt_user_input, dimension, inputSize, inputStrides) , err );
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get input layout FAIL......\n");
#endif
	}
	else{
		lt_user_input = (dnnLayout_t)primitives->storage->data[BN_LAYOUT_INPUT];
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN get input layout OK\n");
#endif
	}

	CHECK_ERR( dnnBatchNormalizationCreateForward_F32(&bn_forward,NULL,lt_user_input,eps), err );
	CHECK_ERR( dnnBatchNormalizationCreateBackwardData_F32(&bn_backward,NULL,lt_user_input,eps), err );
	CHECK_ERR( dnnBatchNormalizationCreateBackwardScaleShift_F32(&bn_bwd_scaleshift,NULL,lt_user_input,eps), err );
	

	dnnLayout_t lt_bn_forward_workspace,lt_bn_forward_scaleshift,lt_bn_forward_output,lt_bn_backward_input;
	real * buffer_forward_workspace = NULL; real * buffer_forward_scaleshift = NULL;
	real * buffer_forward_output = NULL; real * buffer_backward_input = NULL;
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_forward_workspace, bn_forward, dnnResourceWorkspace);
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_forward_output, bn_forward, dnnResourceDst);
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_forward_scaleshift, bn_forward, dnnResourceScaleShift);
	dnnLayoutCreateFromPrimitive_F32(&lt_bn_backward_input, bn_backward, dnnResourceDiffSrc);
		

	CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_workspace), lt_bn_forward_workspace), err );
	CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_scaleshift), lt_bn_forward_scaleshift), err );
	//CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_output), lt_bn_forward_output), err );
	//CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_backward_input), lt_bn_backward_input), err );

	int size1 = dnnLayoutGetMemorySize_F32(lt_bn_forward_output);
	int size2 = inW*inH*inC*N*4;
	if(size1 == size2)
	{
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN BN forward ouput layout match OK\n");
#endif
	}
	else
	{
		CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_forward_output), lt_bn_forward_output), err );
		fprintf(stderr ,"MKLDNN BN forward ouput layout match FAIL, size1 = %d, size2 = %d \n", size1, size2);
	}

	size1 = dnnLayoutGetMemorySize_F32(lt_bn_backward_input);
	if(size1 == size2)
	{
#if CONVERSION_LOG
		fprintf(stderr ,"MKLDNN MaxPooling bwddata input layout match OK\n");
#endif
	}
	else
	{
		CHECK_ERR( dnnAllocateBuffer_F32((void**)(&buffer_backward_input), lt_bn_backward_input), err );
		fprintf(stderr ,"MKLDNN MaxPooling bwddata input layout match FAIL, size1 = %d, size2 = %d \n", size1, size2);
	}

	//save the dnnPrimitive to THTensor(long int array)
	primitives->storage->data[BN_LAYOUT_FORWARD_OUTPUT] = (long long)lt_bn_forward_output;
	primitives->storage->data[BN_LAYOUT_BACKWARD_INPUT] = (long long)lt_bn_backward_input;

	primitives->storage->data[BN_FORWARD] = (long long)bn_forward;
	primitives->storage->data[BN_BACKWARD] = (long long)bn_backward;
	primitives->storage->data[BN_SCALESHIFT] = (long long)bn_bwd_scaleshift;
	primitives->storage->data[BUFFER_BN_FORWARD_WORKSPACE] = (long long)buffer_forward_workspace;
	primitives->storage->data[BUFFER_BN_FORWARD_SCALESHIFT] = (long long)buffer_forward_scaleshift;
	primitives->storage->data[BUFFER_BN_FORWARD_OUTPUT] = (long long)buffer_forward_output;
	primitives->storage->data[BUFFER_BN_BACKWARD_INPUT] = (long long)buffer_backward_input;
	primitives->storage->data[BUFFER_BN_BACKWARD_WORKSPACE] = (long long)buffer_forward_workspace;
}