Beispiel #1
0
Datei: test.c Projekt: 8l/mxp
int main(void)
{

	vbx_timestamp_t time_start, time_stop;
	double scalar_time, vbx_time, vbx_time_masked;
	int i, j, k, l, m, n;
	int errors = 0;

	vbx_test_init();
	vbx_mxp_print_params();
    pixel *input, *scalar_input, *vbx_input, *vbx_input_masked;
    uint16_t *scalar_short;

	input         = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	scalar_input  = (pixel *)vbx_remap_cached(input, IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	scalar_short  = (uint16_t *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(uint16_t));
	vbx_input    = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	vbx_input_masked  = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));

#if UNIT
    unsigned char *vbx_img8;
    unsigned short *img, *vbx_img;
    unsigned int *iImg, *vbx_iImg;
    unsigned int *iiImg, *vbx_iiImg;
    img = (unsigned short*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short));
    vbx_img = (unsigned short*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short));
    vbx_img8 = (unsigned char*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned char));

    iImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));
    vbx_iImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));

    iiImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));
    vbx_iiImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));
#endif//UNIT

	printf("Resolution = %dx%d\n", IMAGE_WIDTH, IMAGE_HEIGHT);
    printf("Initializing data\n");
	vbx_timestamp_start();
    for(l = 0; l < 1; l++){
        char *src;
        char *sdst;
        char *vdst;
        char *mdst;
        if(l == 0){
            load_lenna(input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_lenna(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_lenna(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT);
            printf("\nLenna\n");
            src = "lenna";
            sdst = "s_lenna";
            vdst = "v_lenna";
            mdst = "m_lenna";
        }else if(l == 1){
            load_ms(input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_ms(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_ms(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT);
            printf("\nMicrosoft\n");
            src = "ms";
            sdst = "s_ms";
            vdst = "v_ms";
            mdst = "m_ms";
        }else if(l == 2){
            load_blank(input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_blank(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_blank(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT);
            printf("\nblank\n");
            src = "blank";
            sdst = "s_blank";
            vdst = "v_blank";
            mdst = "m_blank";
        }
#if UNIT
    int window = 20;
    int log=0;
    while(((window/3)>>log) >= 2) log++;


    errors += compare_scalar_rgb2luma_to_vbw_rgb2luma16(img, vbx_img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, MAX_PRINT_ERRORS);
    vbw_rgb2luma8(vbx_img8, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH);


    int s;
#if LUT_CI
#if DOUBLE_LUT
    printf("Testing double lut\n");

    printf("Assign lbp double lut\n");
    assign_lbp_lut_ci2();
    int prev = errors;
    printf("Cascade check\n");
    /* errors += cascade_check_2w(face_lbp, face_lbp_max_stage, 256); */
    /* errors += cascade_check_2h(face_lbp, face_lbp_max_stage, 256); */
    errors += cascade_check_2b(face_lbp, face_lbp_max_stage, 256);
    if (errors) {
        printf("errors %d\n", errors-prev);
    }
#else
    assign_lbp_lut_ci();

    printf("Testing cascade\n");

    int prev = errors;

    printf("lut check\n");

#if 0
#if 0
    errors += lut_check(256, 0, 0, 0);
    if (errors) {
        printf("errors %d\n", errors-prev);
    }
#elif 1

    int print_errors = 0;

	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	int vci_lanes = this_mxp->vcustom0_lanes;
    int num_features = cascade_max_feature();
    int input_length = 10;
    int lut_length = num_features*vci_lanes;
    int lut_iterations = 15;
#if 1
    lut_length = input_length = 128;
    lut_iterations = 13;
    print_errors = 0;
    errors += lut_check2(input_length, lut_length, lut_iterations, print_errors);
    if (errors) {
        printf("errors %d\n", errors-prev);
    }
#elif 1
    input_length = 64;
    lut_length = input_length;
    lut_iterations = 13;
    print_errors = 1;
    errors += lut_check2(input_length, lut_length, lut_iterations, print_errors);
    if (errors) {
        printf("errors %d\n", errors-prev);
    }
#else
    for(s = 2; s < 100; s=s+10){
        errors += lut_check2(s, lut_length, lut_iterations, print_errors);
        if (errors - prev > 0) {
            printf("%d\terrors %d\n", s, errors-prev);
        } else {
            printf("%d\n", s);
        }
        prev = errors;
    }
#endif
#else
    for(s = 0; s < 2000; s=s+100){
        errors += lut_check(s, 0, 0, 0);
        if (errors - prev > 0) {
            printf("%d\terrors %d\n", s, errors-prev);
        } else {
            printf("%d\n", s);
        }
        prev = errors;
    }
#endif

#elif 1

#else
    printf("check cascade\n");
    prev = errors;
    errors += cascade_check(face_lbp, face_lbp_max_stage, 256);
    if (errors) {
        printf("errors %d\n", errors-prev);
    }

    printf("Testing LBP LUT CI\n");
    prev = errors;
    for(s = 0; s < face_lbp_max_stage; s++){
        errors += compare_vbx_lut_to_vbx_lut_ci(s, MAX_PRINT_ERRORS);
    }
    if (errors) {
        printf("errors %d\n", errors-prev);
        prev = errors;
    }
#endif
#endif
#endif

#if 0
    printf("Printing grey scale img\n");
    printf("grey = [");
    for (j = 0; j < IMAGE_HEIGHT; j++) {
        printf("[");
        for (i = 0; i < IMAGE_WIDTH; i++) {
            printf("%d, ", vbx_img8[j*IMAGE_WIDTH+i]);
        }
        printf("],\n");
    }
    printf("]\n");
#endif
#if LBP_CI
    printf("Testing LBP Pattern CI\n");
    errors += compare_LBPRestrictedCI_to_test_scalar_patterns(vbx_img, vbx_img8, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
#endif

#if BLIP
    printf("Testing BLIP\n");
    for(s = 1; s < 10; s++){
        errors += compare_scalar_BLIP2_to_vector_BLIP(img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, s);
    }
#endif
#if 0
    errors += compare_LBPRestrictedSums_to_test_scalar_sums_byte(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
    errors += compare_LBPRestrictedSums2_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
    errors += compare_ScalarLBPRestrictedSums_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
    errors += compare_ScalarLBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
    errors += compare_LBPRestrictedPatterns2_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
    errors += compare_LBPRestricted_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
    /* overflow issues -- using bytes changes lbp pattern */
    errors += compare_LBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    /* requires SKIP_INTEGRALS 0 */
    errors += compare_gen_integrals_to_vector_get_img(img, iImg, iiImg, vbx_img, vbx_iImg, vbx_iiImg, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);


    /* redundant test, compare to test_scalar_patterns instead */
    errors += compare_ScalarLBPRestrictedPatterns_to_SATBinaryPattern(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    errors += compare_SATBinaryPattern_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    errors += compare_LBPPassStage_to_restricted(vbx_img, log, face_lbp[0], window, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
#endif
#else // UNIT

#if PRINT
        print_python_pixel(scalar_input, src, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif

        time_start = vbx_timestamp();
        scalar_rgb2luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH);
        scalar_face_detect_luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, sdst);
        time_stop = vbx_timestamp();
        scalar_time = vbx_print_scalar_time(time_start, time_stop);
#if PRINT
        print_python_pixel(scalar_input, sdst, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif
        printf("\nVector");
        time_start = vbx_timestamp();
        vector_face_detect((pixel *)vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 0, vdst);
        time_stop = vbx_timestamp();
        vbx_time = vbx_print_vector_time(time_start, time_stop, scalar_time);
#if PRINT
        print_python_pixel(vbx_input, vdst, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif

        printf("\nVector Masked");
        time_start = vbx_timestamp();
        vector_face_detect((pixel *)vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 1, mdst);
        time_stop = vbx_timestamp();
        vbx_time_masked = vbx_print_vector_time(time_start, time_stop, scalar_time);
#if PRINT
        print_python_pixel(vbx_input_masked, mdst, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif
        /* errors += match_array_pixel(input, vbx_input, "vector", IMAGE_WIDTH, IMAGE_HEIGHT, 0, MAX_PRINT_ERRORS, 0); */
        /* errors += match_array_pixel(input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, 0, MAX_PRINT_ERRORS, 0); */
        errors += match_array_pixel(vbx_input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, 0, MAX_PRINT_ERRORS, 0);
#endif // UNIT
    }
	VBX_TEST_END(errors);
	return errors;
}
Beispiel #2
0
Datei: test.c Projekt: 8l/mxp
int VBX_T(vbw_vec_reverse_test_mm)()
{
	unsigned int aN[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 20, 25, 31, 32, 33, 35, 40, 48, 60, 61, 62, 63, 64, 64, 65,
	                      66, 67, 68, 70, 80, 90, 99, 100, 101, 110, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 224, 224,
	                      256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 400, 450, 512, 550, 600, 650, 700, 768, 768, 900,
	                      900, 1023, 1024, 1200, 1400, 1600, 1800, 2048, 2048, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800,
	                      2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4096, 4096, 4100, 4200, 4300,
	                      4400, 4500, 4600, 4700, 4800, 4900, 5000, 6000, 7000, 8000, 8192, 8192, 9000, 10000, 11000, 12000,
	                      13000, 14000, 15000, 16000, 16384, 16384, 20000, 25000, 30000, 32767, 32768, 32768, 35000, 40000,
	                      45000, 50000, 55000, 60000, 65000, 65535, 65536, 65536, 65537, 100000, 128000, 256000, 333333, 528374,
	                      528374 };

	int retval;
	unsigned int N;
	unsigned int NBYTES;
	unsigned int NREPS = 100;
	unsigned int NREPSFORLARGE = 10;
	unsigned int i,j,k;
	vbx_timestamp_t start=0,finish=0;

	for( i=0; i<sizeof(aN)/4; i++ ) {
		N = aN[i];
		//printf( "testing with vector size %d\n", N );

		if(N > 10000) NREPS = NREPSFORLARGE;

		NBYTES = N*sizeof(vbx_mm_t);

		vbx_mm_t *src = (vbx_mm_t *) vbx_shared_malloc( NBYTES );
		vbx_mm_t *dst = (vbx_mm_t *) vbx_shared_malloc( NBYTES );
		//printf("bytes alloc: %d\n", NBYTES );

		if( !src ) VBX_EXIT(-1);
		if( !dst ) VBX_EXIT(-1);

		for ( j=0; j<N; j++ ) {
			dst[j] = -1;                 // Fill the destination with -1
			src[j] = j;                  // Fill the source with enumerated values
		}

//			VBX_T(vbw_vec_reverse_ext)( dst, src, N );

		/** measure performance of function call **/
		start = vbx_timestamp();
		for(k=0; __builtin_expect(k<NREPS,1); k++ ) {
			retval = VBX_T(vbw_vec_reverse_ext)( dst, src, N );
		}
		finish = vbx_timestamp();
		printf( "length %d (%s):\tvbware mm f():\t%llu", N, VBX_EXPAND_AND_QUOTE(BYTEHALFWORD), (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) );

		#if VERIFY_VBWARE_ALGORITHM
			VBX_T(verify_vector)( src, dst, N );
		#else
			printf(" [VERIFY OFF]");
		#endif

		printf("\treturn value: %X", retval);

		/** measure performance of scalar **/
		vbx_mm_t *A = vbx_remap_cached( src, N*sizeof(vbx_mm_t) );   // Use cached pointers for better performance
		vbx_mm_t *B = vbx_remap_cached( dst, N*sizeof(vbx_mm_t) );
		start = vbx_timestamp();
		for(k=0; k<NREPS; k++ ) {
			unsigned int m;
			for(m=0; m<N; m++) {
				B[N-1-m]=A[m];
			}
		vbx_dcache_flush( A, N*sizeof(vbx_mm_t) );               // Make sure to read from main memory
		vbx_dcache_flush( B, N*sizeof(vbx_mm_t) );               // Make sure writes are committed to memory
		}
		finish = vbx_timestamp();

		printf( "\tscalar (cache friendly):\t%llu", (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) );

		#if VERIFY_SIMPLE_ALGORITHM
			VBX_T(verify_vector)( src, dst, N );
		#else
			printf(" [VERIFY OFF]");
		#endif
			printf("\tcycles\n");

			vbx_shared_free(src);
			vbx_shared_free(dst);
	}

	printf("All tests passed successfully.\n");

	return 0;
}
Beispiel #3
0
int main(void)
{

	vbx_timestamp_t time_start, time_stop;
	double scalar_time, vbx_time, vbx_time_masked;
	int i, j, k, l, m, n;
	int errors = 0;

	vbx_test_init();
	vbx_mxp_print_params();
    pixel *input, *scalar_input, *vbx_input, *vbx_input_masked;
    uint16_t *scalar_short;

	input         = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	scalar_input  = (pixel *)vbx_remap_cached(input, IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	scalar_short  = (uint16_t *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(uint16_t));
	vbx_input    = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	vbx_input_masked  = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));

#if UNIT
    unsigned short *img, *vbx_img;
    unsigned int *iImg, *vbx_iImg;
    unsigned int *iiImg, *vbx_iiImg;
    img = (unsigned short*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short));
    vbx_img = (unsigned short*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short));

    iImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));
    vbx_iImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));

    iiImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));
    vbx_iiImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int));
#endif//UNIT

	printf("Resolution = %dx%d\n", IMAGE_WIDTH, IMAGE_HEIGHT);
    printf("Initializing data\n");
	vbx_timestamp_start();
    for(l = 0; l < 1; l++){
        char *src;
        char *sdst;
        char *vdst;
        char *mdst;
        if(l == 0){
            load_lenna(input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_lenna(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_lenna(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT);
            printf("\nLenna\n");
            src = "lenna";
            sdst = "s_lenna";
            vdst = "v_lenna";
            mdst = "m_lenna";
        }else if(l == 1){
            load_ms(input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_ms(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_ms(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT);
            printf("\nMicrosoft\n");
            src = "ms";
            sdst = "s_ms";
            vdst = "v_ms";
            mdst = "m_ms";
        }else if(l == 2){
            load_blank(input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_blank(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT);
            load_blank(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT);
            printf("\nblank\n");
            src = "blank";
            sdst = "s_blank";
            vdst = "v_blank";
            mdst = "m_blank";
        }
#if UNIT
    int window = 20;
    int log=0;
    while(((window/3)>>log) >= 2) log++;

#if LUT_CI
    /* errors += compare_vbx_lut_to_vbx_lut_ci(1024, MAX_PRINT_ERRORS); */
#endif
#if LBP_CI
    errors += compare_vbx_lbp_ci_to_scalar_patterns(vbx_img, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
#endif
    errors += compare_scalar_rgb2luma_to_vbw_rgb2luma16(img, vbx_img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, MAX_PRINT_ERRORS);
    /* errors += compare_LBPRestrictedSums_to_test_scalar_sums_byte(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */
    /* errors += compare_LBPRestrictedSums2_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */
    /* errors += compare_ScalarLBPRestrictedSums_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */
    /* errors += compare_ScalarLBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */
    /* errors += compare_LBPRestrictedPatterns2_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */
    errors += compare_LBPRestricted_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
#if 0
    /* overflow issues -- using bytes changes lbp pattern */
    errors += compare_LBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    /* requires SKIP_INTEGRALS 0 */
    errors += compare_gen_integrals_to_vector_get_img(img, iImg, iiImg, vbx_img, vbx_iImg, vbx_iiImg, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    /* currently last values have errors if the scaled images size is not an integer, width * f/ (f+1) */
    errors += compare_scalar_BLIP2_to_vector_BLIP(img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    /* redundant test, compare to test_scalar_patterns instead */
    errors += compare_ScalarLBPRestrictedPatterns_to_SATBinaryPattern(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

    errors += compare_SATBinaryPattern_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);

#endif
    errors += compare_LBPPassStage_to_restricted(vbx_img, log, face_lbp[0], window, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS);
#else // UNIT

#if PRINT
        print_python_pixel(scalar_input, src, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif

        time_start = vbx_timestamp();
        scalar_rgb2luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH);
        scalar_face_detect_luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, sdst);
        time_stop = vbx_timestamp();
        scalar_time = vbx_print_scalar_time(time_start, time_stop);
#if PRINT
        print_python_pixel(scalar_input, sdst, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif
        printf("\nVector");
        time_start = vbx_timestamp();
        vector_face_detect((pixel *)vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 0, vdst);
        time_stop = vbx_timestamp();
        vbx_time = vbx_print_vector_time(time_start, time_stop, scalar_time);
#if PRINT
        print_python_pixel(vbx_input, vdst, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif

        printf("\nVector Masked");
        time_start = vbx_timestamp();
        vector_face_detect((pixel *)vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 1, mdst);
        time_stop = vbx_timestamp();
        vbx_time_masked = vbx_print_vector_time(time_start, time_stop, scalar_time);
#if PRINT
        print_python_pixel(vbx_input_masked, mdst, IMAGE_WIDTH, IMAGE_HEIGHT);
#endif
        /* errors += match_array_pixel(input, vbx_input, "vector", IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, 0); */
        /* errors += match_array_pixel(input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, 0); */
        errors += match_array_pixel(vbx_input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, 0);
#endif // UNIT
    }
	VBX_TEST_END(errors);
	return errors;
}
Beispiel #4
0
int vbw_vec_reverse_ext( vbx_mm_t *dst, vbx_mm_t *src, const unsigned int N )
{

	typedef vbx_mm_t vbx_sp_t;
	const int VBW_ROT16= sizeof(vbx_sp_t) <=sizeof(vbx_half_t);
	const int VBW_ROT8= sizeof(vbx_sp_t)== sizeof(vbx_byte_t);
	const int VBW_RSHIFT_T_TO_W= (sizeof(vbx_sp_t)==sizeof(vbx_word_t)? 0:
	                              sizeof(vbx_sp_t)==sizeof(vbx_half_t)? 1:/*byte_sized*/2);
	const int VBW_LSHIFT_W_TO_T= VBW_RSHIFT_T_TO_W;
	// Catch when N is very small
	if( N<4 ) {
		unsigned int i = 0;
		while(i<N) {
			dst[N-i-1]=src[i];
			i++;
		}
		return VBW_SUCCESS;
	}

	vbx_mxp_t *this_mxp          = VBX_GET_THIS_MXP();
	unsigned int SP_WIDTH_B      = this_mxp->scratchpad_alignment_bytes;
	unsigned int FREE_BYTES      = vbx_sp_getfree();


	// Catch when N is small enough that cached scalar does a better job
	if( N <= MM_CACHED_SCALAR_THRESHOLD || FREE_BYTES < SP_WIDTH_B*5 ){
		unsigned int i;
		vbx_mm_t *A = (vbx_mm_t*)vbx_remap_cached(src,N*sizeof(vbx_mm_t));
		vbx_mm_t *B = (vbx_mm_t*)vbx_remap_cached(dst,N*sizeof(vbx_mm_t));
		for( i=0; i<N; i++ ) {
			B[N-i-1]=A[i];
		}
		vbx_dcache_flush(B,N*sizeof(vbx_mm_t));
		return VBW_SUCCESS;
	}

	unsigned int NUM_LANES   = this_mxp->vector_lanes;
	unsigned int tile_size_b = VBX_PAD_DN(((FREE_BYTES-SP_WIDTH_B)/2),SP_WIDTH_B);
	unsigned int tile_size_w = tile_size_b/4;
	unsigned int tile_size_t = tile_size_w << VBW_LSHIFT_W_TO_T;


	unsigned int num_tiles = N / tile_size_t;
	unsigned int rows_per_tile = tile_size_b / SP_WIDTH_B;

	unsigned int tile_part_t = N - num_tiles * tile_size_t;
	unsigned int threshold_w = NUM_LANES >= 32 ? VL1_THRESHOLD_V32_UP :
		NUM_LANES == 16 ? VL1_THRESHOLD_V16    :
		NUM_LANES == 8  ? VL1_THRESHOLD_V8     : UINT_MAX;


	if(tile_part_t){
		vbx_sp_push();
		vbx_sp_t *v_0 = (vbx_sp_t *)vbx_sp_malloc(tile_part_t*sizeof(vbx_sp_t));
		vbx_sp_t *v_1 = (vbx_sp_t *)vbx_sp_malloc(tile_part_t*sizeof(vbx_sp_t));

#if !VBX_SKIP_ALL_CHECKS
		if( !v_0 || !v_1) {
			VBX_PRINTF("vbx_sp_malloc failed when it was predetermined to have enough space.");
			VBX_EXIT(-1);
		}
#endif

		vbx_dma_to_vector(v_0, src+N-tile_part_t, tile_part_t*sizeof(vbx_mm_t));
		vbw_vec_reverse(v_1, v_0, tile_part_t);
		vbx_dma_to_host(dst, v_1, tile_part_t*sizeof(vbx_sp_t));
		dst += tile_part_t;
		vbx_sp_pop();
	}

	if(!num_tiles) {
		return VBW_SUCCESS;
	}

	vbx_sp_push();
	vbx_word_t *v_mask = (vbx_word_t *)vbx_sp_malloc(SP_WIDTH_B);
	vbx_word_t *v_scratch[2] = { (vbx_word_t *)vbx_sp_malloc(tile_size_b), (vbx_word_t *)vbx_sp_malloc(tile_size_b) };
	vbx_word_t *result;

#if !VBX_SKIP_ALL_CHECKS
	if( !v_scratch[0] || !v_scratch[1] || !v_mask ) {
		VBX_PRINTF("vbx_sp_malloc failed when it was predetermined to have enough space.");
		VBX_EXIT(-1);
	}
#endif

	src += (num_tiles - 1) * tile_size_t;

	if( tile_size_w <= threshold_w) {
		while( num_tiles ) {
			vbx_dma_to_vector( v_scratch[0], src, tile_size_b );
			if(VBW_ROT16){
				vec_rev_rot16_w(v_scratch[1], v_scratch[0], tile_size_w);
			}else{
				vec_rev_w(v_scratch[1], v_scratch[0], tile_size_w);
			}
			if( VBW_ROT8){
				vec_rot8_h( v_scratch[1], v_scratch[1], tile_size_w*2 );
			}
			vbx_dma_to_host( dst, v_scratch[1], tile_size_b );
			dst += tile_size_t;
			src -= tile_size_t;
			num_tiles--;
		}
	} else {
		while( num_tiles ) {
			vbx_dma_to_vector( v_scratch[0], src, tile_size_b );
			result = vec_rev_merge_w( v_scratch[1], v_scratch[0], tile_size_w, v_scratch[0], v_mask, SP_WIDTH_B,
			                          rows_per_tile, VBW_ROT16 );
			if(VBW_ROT8){
				vec_rot8_h( result, result, tile_size_w*2 );
			}
			vbx_dma_to_host( dst, result, tile_size_b );
			dst += tile_size_t;
			src -= tile_size_t;
			num_tiles--;
		}
	}

	vbx_sp_pop();
	return VBW_SUCCESS;
}
Beispiel #5
0
int main(void)
{
	pixel *input;
	pixel *scalar_input;

#if USE_LUMA
	unsigned char  *vbx_luma;
#endif
	unsigned short *scalar_luma;

	pixel *vbx_output;
	pixel *scalar_output;

	vbx_timestamp_t time_start, time_stop;
	double scalar_time, vbx_time;
	int x, y;
	int errors = 0;

	vbx_test_init();

	vbx_mxp_print_params();

	input         = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	scalar_input  = (pixel *)vbx_remap_cached(input, IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
#if USE_LUMA
	vbx_luma      = (unsigned char *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned char));
#endif
	scalar_luma   = (unsigned short *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short));
	vbx_output    = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));
	scalar_output = (pixel *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel));

	printf("\nInitializing data\n");
	printf("Resolution = %dx%d\n", IMAGE_WIDTH, IMAGE_HEIGHT);
	init_matrix(input, IMAGE_WIDTH, IMAGE_HEIGHT);

	printf("Starting Sobel 3x3 edge-detection test\n");

#if USE_LUMA
	scalar_rgb2luma(scalar_luma, scalar_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH);
#endif
	vbx_timestamp_start();
	time_start = vbx_timestamp();
#if !USE_LUMA
	scalar_rgb2luma(scalar_luma, scalar_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH);
#endif
	scalar_sobel_argb32_3x3(scalar_output, scalar_luma, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH, RENORM_AMOUNT);
	time_stop = vbx_timestamp();
	scalar_time = vbx_print_scalar_time(time_start, time_stop);

#if USE_LUMA
	vbw_rgb2luma8(vbx_luma, (unsigned *)input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH);
#endif
	vbx_timestamp_start();
	time_start = vbx_timestamp();
#if USE_LUMA
	vbw_sobel_luma8_3x3((unsigned *)vbx_output, vbx_luma, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH, RENORM_AMOUNT);
#else
	vbw_sobel_argb32_3x3((unsigned *)vbx_output, (unsigned *)input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH, RENORM_AMOUNT);
#endif
	time_stop = vbx_timestamp();
	vbx_time = vbx_print_vector_time(time_start, time_stop, scalar_time);

	for (y = 0; y < IMAGE_HEIGHT; y++) {
		for (x = 0; x < IMAGE_WIDTH; x++) {
#if USE_LUMA
			if (scalar_luma[y*IMAGE_WIDTH+x] != vbx_luma[y*IMAGE_WIDTH+x]) {
				if (errors < MAX_PRINT_ERRORS) {
					printf("Y Error at %d, %d: Expected = %02X, got = %02X\n",
							y, x, scalar_luma[y*IMAGE_WIDTH+x], vbx_luma[y*IMAGE_WIDTH+x]);
				}
				errors++;
			}
#endif
			if (scalar_output[y*IMAGE_WIDTH+x].r != vbx_output[y*IMAGE_WIDTH+x].r) {
				if (errors < MAX_PRINT_ERRORS) {
					printf("R Error at %d, %d: Expected = %02X, got = %02X\n",
							y, x, scalar_output[y*IMAGE_WIDTH+x].r, vbx_output[y*IMAGE_WIDTH+x].r);
				}
				errors++;
			}
			if (scalar_output[y*IMAGE_WIDTH+x].g != vbx_output[y*IMAGE_WIDTH+x].g) {
				if (errors < MAX_PRINT_ERRORS) {
					printf("G Error at %d, %d: Expected = %02X, got = %02X\n",
							y, x, scalar_output[y*IMAGE_WIDTH+x].g, vbx_output[y*IMAGE_WIDTH+x].g);
				}
				errors++;
			}
			if (scalar_output[y*IMAGE_WIDTH+x].b != vbx_output[y*IMAGE_WIDTH+x].b) {
				if (errors < MAX_PRINT_ERRORS) {
					printf("B Error at %d, %d: Expected = %02X, got = %02X\n",
							y, x, scalar_output[y*IMAGE_WIDTH+x].b, vbx_output[y*IMAGE_WIDTH+x].b);
				}
				errors++;
			}
		}
	}

	VBX_TEST_END(errors);
	return errors;
}