int main(void) { vbx_test_init(); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; const int required_vectors = 4; int N = VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t) / required_vectors; int PRINT_LENGTH = min( N, MAX_PRINT_LENGTH ); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf( "\nAdd test...\n" ); printf( "Vector length: %d\n", N ); vbx_mm_t *scalar_in1 = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_in2 = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_out = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in1 = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in2 = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); // vbx_mm_t *vector_out = vector_in2 - 5; vbx_sp_t *v_in1 = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); vbx_sp_t *v_in2 = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); vbx_sp_t *v_out = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); // vbx_sp_t *v_out = v_in2-5; VBX_T(test_zero_array)( scalar_out, N ); VBX_T(test_zero_array)( vector_out, N ); VBX_T(test_init_array)( scalar_in1, N, 1 ); VBX_T(test_copy_array)( vector_in1, scalar_in1, N ); VBX_T(test_init_array)( scalar_in2, N, 1 ); VBX_T(test_copy_array)( vector_in2, scalar_in2, N ); VBX_T(test_print_array)( scalar_in1, PRINT_LENGTH ); VBX_T(test_print_array)( scalar_in2, PRINT_LENGTH ); scalar_time = test_scalar( scalar_out, scalar_in1, scalar_in2, N ); VBX_T(test_print_array)( scalar_out, PRINT_LENGTH); vbx_dma_to_vector( v_in1, (void *)vector_in1, N*sizeof(vbx_sp_t) ); vbx_dma_to_vector( v_in2, (void *)vector_in1, N*sizeof(vbx_sp_t) ); vector_time = test_vector( v_out, v_in1, v_in2, N, scalar_time ); vbx_dma_to_host( (void *)vector_out, v_out, N*sizeof(vbx_sp_t) ); vbx_sync(); VBX_T(test_print_array)( vector_out, PRINT_LENGTH ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, N ); VBX_TEST_END(errors); return 0; }
int main(void) { double scalar_time, vector_time; int errors=0; vbx_test_init(); vbx_mxp_print_params(); printf("\nVector FIR test...\n"); vbx_mm_t *scalar_sample = malloc( (SAMP_SIZE+NTAPS)*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_coeffs = malloc( NTAPS*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_out = malloc( SAMP_SIZE*sizeof(vbx_mm_t) ); vbx_mm_t *sample = vbx_shared_malloc( (SAMP_SIZE+NTAPS)*sizeof(vbx_mm_t) ); vbx_mm_t *coeffs = vbx_shared_malloc( NTAPS*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( SAMP_SIZE*sizeof(vbx_mm_t) ); VBX_T(test_zero_array)( scalar_out, SAMP_SIZE ); VBX_T(test_zero_array)( vector_out, SAMP_SIZE ); VBX_T(test_init_array)( scalar_sample, SAMP_SIZE, 0xff ); VBX_T(test_copy_array)( sample, scalar_sample, SAMP_SIZE ); VBX_T(test_init_array)( scalar_coeffs, NTAPS, 1 ); VBX_T(test_copy_array)( coeffs, scalar_coeffs, NTAPS ); VBX_T(test_zero_array)( scalar_sample+SAMP_SIZE, NTAPS ); VBX_T(test_zero_array)( sample+SAMP_SIZE, NTAPS ); printf("\nSamples:\n"); VBX_T(test_print_array)( scalar_sample, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); printf("\nCoefficients:\n"); VBX_T(test_print_array)( scalar_coeffs, min(NTAPS,MAX_PRINT_LENGTH) ); scalar_time = test_scalar( scalar_out, scalar_sample, scalar_coeffs); VBX_T(test_print_array)( scalar_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); #ifdef USE_TRANSPOSE vector_time = test_vector_transpose( vector_out, sample, coeffs, scalar_time ); VBX_T(test_print_array)( vector_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS ); #endif //USE_TRANSPOSE #ifdef USE_1D vector_time = test_vector_1d( vector_out, sample, coeffs, scalar_time ); VBX_T(test_print_array)( vector_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS ); #endif //USE_1D #ifdef USE_2D vector_time = test_vector_2d( vector_out, sample, coeffs, scalar_time ); VBX_T(test_print_array)( vector_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS ); #endif //USE_2D VBX_TEST_END(errors); return 0; }
int compare_vbx_lut_to_vbx_lut_ci(int sz, int max_print_errors) { int f, n, errors; vbx_byte_t* v_pass = (vbx_byte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_pattern = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_lutc = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_group = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_sel = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_lut = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_word_t)); vbx_ubyte_t* v_idx = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_word_t)); unsigned char* lut = (unsigned char*)vbx_shared_malloc(sz*sizeof(unsigned char)); unsigned char* lut_c = (unsigned char*)vbx_shared_malloc(sz*sizeof(unsigned char)); for (n = 0; n < sz; n++) { v_pattern[n] = n & 0xff; } int s, stage = 11; for (f = 0; f < face_lbp[stage].count; f++) { lbp_feat_t feat = face_lbp[stage].feats[f]; vbx_set_vl(sz); int total = f; s = 0; while(s < stage){ total += face_lbp[s].count; s++; } vbx(SVBU, VCUSTOM0, v_lutc, total, v_pattern); vbx(SVB, VMOV, v_pass, feat.fail, 0); /* check if pattern is in lut */ vbx(SVBU, VSHR, v_group, 5, v_pattern); for (n = 0; n < 8; n++) { vbx(SVB, VADD, v_sel, -n, v_group); vbx(SVBW, VCMV_Z, v_lut, feat.lut[n], v_sel); } vbx(SVBWU, VAND, v_idx, 0x1f, v_pattern); vbx(VVWB, VSHR, v_lut, v_idx, v_lut); vbx(SVB, VAND, v_lut, 1, v_lut); vbx(SVB, VCMV_LEZ, v_pass, feat.pass, v_lut); vbx_dma_to_host(lut_c, v_lutc, sz*sizeof(unsigned char)); vbx_dma_to_host(lut, v_pass, sz*sizeof(unsigned char)); vbx_sync(); errors = match_array_byte(lut_c, lut, "custom_lut", sz, 1, max_print_errors, 0, 0); } vbx_sp_free(); vbx_shared_free(lut); vbx_shared_free(lut_c); return errors; }
int main(void) { vbx_test_init(); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; int N = VBX_SCRATCHPAD_SIZE/sizeof(vbx_word_t)/12; N=1024; int PRINT_LENGTH = min(N, MAX_PRINT_LENGTH); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf("\nVector power test...\n"); printf("Vector length: %d\n", N); vbx_word_t *scalar_in1 = malloc( N*sizeof(vbx_word_t) ); vbx_word_t *scalar_in2 = malloc( N*sizeof(vbx_word_t) ); vbx_word_t *scalar_out = malloc( N*sizeof(vbx_word_t) ); vbx_word_t *vector_in1 = vbx_shared_malloc( N*sizeof(vbx_word_t) ); vbx_word_t *vector_in2 = vbx_shared_malloc( N*sizeof(vbx_word_t) ); vbx_word_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_word_t) ); if(vector_out==NULL){ printf("malloc_failed\n"); return 1; } test_zero_array_word( scalar_out, N ); test_zero_array_word( vector_out, N ); test_init_array_word( scalar_in1, N, 5 ); test_copy_array_word( vector_in1, scalar_in1, N ); test_init_array_word( scalar_in2, N, 112 ); test_copy_array_word( vector_in2, scalar_in2, N ); test_print_array_word( scalar_in1, PRINT_LENGTH ); test_print_array_word( scalar_in2, PRINT_LENGTH ); scalar_time = test_scalar_power( scalar_out, scalar_in1, scalar_in2, N); test_print_array_word( scalar_out, PRINT_LENGTH ); vector_time = test_vector_power( vector_out, vector_in1, vector_in2, N, scalar_time ); test_print_array_word( vector_out, PRINT_LENGTH ); errors += test_verify_array_word( scalar_out, vector_out, N ); VBX_TEST_END(errors); return 0; }
int compare_vbx_lbp_ci_to_scalar_patterns(unsigned short* img, int width, int height, int max_print_errors) { int j, errors = 0; unsigned char** scalar_patterns = test_scalar_patterns(img, 0, width, height); vbx_ubyte_t* v_in = (vbx_ubyte_t*)vbx_sp_malloc(3*width*sizeof(vbx_word_t)); vbx_ubyte_t* v_top = (vbx_byte_t*)vbx_sp_malloc(width*sizeof(vbx_byte_t)); vbx_ubyte_t* v_bot = (vbx_byte_t*)vbx_sp_malloc(width*sizeof(vbx_byte_t)); vbx_ubyte_t* v_lbp = v_bot; unsigned char* lbp = (unsigned char*)vbx_shared_malloc(width*sizeof(unsigned char)); vbx_set_vl(width); for(j=0; j < height - 2; j++){ vbx_dma_to_vector(v_in, img+j*width, 3*width*sizeof(unsigned char)); vbx(VVHU, VCUSTOM1, v_top, v_in, v_in+width); vbx(VVHU, VCUSTOM1, v_bot, v_in+width, v_in+2*width); vbx(SVHBU, VAND, v_top, 0xf0, v_top); vbx(SVHBU, VAND, v_bot, 0x0f, v_bot); vbx(VVBU, VADD, v_lbp, v_bot, v_top); vbx_dma_to_host(lbp, v_lbp, width*sizeof(unsigned char)); vbx_sync(); errors = match_array_byte(lbp, scalar_patterns[0]+j*width, "custom_lbp", width-2, 1, max_print_errors, 1, j); } vbx_sp_free(); vbx_shared_free(lbp); return errors; }
int dma_bandwidth_test() { const int num_iter = 64; vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); int scratchpad_size = this_mxp->scratchpad_size; uint8_t *buf = vbx_shared_malloc(scratchpad_size); vbx_ubyte_t *v_buf = vbx_sp_malloc(scratchpad_size); vbx_timestamp_t time_start, time_stop; int i; int len; int to_host; int errors = 0; vbx_mxp_print_params(); // dma_alignment_bytes gives DMA master data bus width in bytes. double bytes_per_sec = \ (((double) this_mxp->core_freq) * this_mxp->dma_alignment_bytes); double max_megabytes_per_sec = bytes_per_sec/(1024*1024); printf("\nMax available bandwidth = %s Megabytes/s\n", vbx_eng(max_megabytes_per_sec, 4)); printf("\n"); for (to_host = 0; to_host < 2; to_host++) { for (len = 32; len <= scratchpad_size ; len *= 2) { printf("DMA %s, %d bytes\n", to_host ? "write" : "read", len); vbx_timestamp_start(); if (to_host) { time_start = vbx_timestamp(); for (i = 0; i < num_iter; i++) { vbx_dma_to_host(buf, v_buf, len); } vbx_sync(); time_stop = vbx_timestamp(); } else { time_start = vbx_timestamp(); for (i = 0; i < num_iter; i++) { vbx_dma_to_vector(v_buf, buf, len); } vbx_sync(); time_stop = vbx_timestamp(); } print_dma_bandwidth(time_start, time_stop, len, num_iter, max_megabytes_per_sec); printf("\n"); } printf("\n"); } vbx_shared_free(buf); vbx_sp_free(); return errors; }
int deep_vector_copy_ext_test() { vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); int retval; int num_test; int total_errors = 0; const int NUM_TESTS = TEST_DEEP_MM_NUM_TESTS; int NB = this_mxp->scratchpad_size * 10; int NT = NB / sizeof(vbx_mm_t); vbx_mm_t *v = vbx_shared_malloc( NB ); srand( 0x1a84c92a ); int i; for( num_test=0; num_test < NUM_TESTS ; num_test++ ) { // initialize the whole working space for( i=0; i<NT; i++ ) { v[i] = i & MSK; } // choose random src/dest/length: // -- randomly pick the dest // -- set a window size of 2*K around the dest // -- randomly pick the src within the window // -- randomly pick the length, subject to end-of-scratchpad // -- this 'window' rule increases probability of overlaps // -- rough distribution: 30% short (pipeline) overlaps, 20% long overlaps, 50% no overlap int K, N1, N2, NN; N1 = rand() % NT; K = 1 + rand() % ((N1 > 0)? min(min(N1, NT-N1), 1024): min(NT, 1024)); N2 = N1 - K + rand() % (2*K); NN = rand() % (NT - max(N1,N2)); vbx_mm_t *dst = v + N1; vbx_mm_t *src = v + N2; printf("test:%d src:0x%08x dst:0x%08x len:%08d", num_test, N1, N2, NN ); // do the copy retval = VBX_T(vbw_vec_copy_ext)( dst, src, NN ); vbx_sync(); printf(" retval:0x%04x\n",retval); // ensure the copy was done properly int errors = verify_copy(v, 0, N1, 0, "head") + verify_copy(v, N1, NN+N1, (N2-N1), "copy") + verify_copy(v, NN+N1, NT, 0, "tail"); total_errors += errors; if( errors ) { //break; } } return total_errors; }
int compare_ScalarLBPRestrictedPatterns_to_SATBinaryPattern(unsigned short *vbx_img, int log, int width, int height, int max_print_errors) { int l, i, j, cell, errors = 0; /* generate patterns */ unsigned short **sums = ScalarLBPRestrictedSums(vbx_img, width, height, log); unsigned char **patterns = ScalarLBPRestrictedPatterns(sums, width, height, log); unsigned char **sat_patterns = (unsigned char**)vbx_shared_malloc((log+1)*sizeof(unsigned char*)); for (l=0; l<log+1; l++) { sat_patterns[l] = (unsigned char*)vbx_shared_malloc(width*height*sizeof(unsigned char)); } unsigned int *iImg, *iiImg; iImg = (unsigned int *)vbx_shared_malloc(width*height*sizeof(unsigned int)); iiImg = (unsigned int *)vbx_shared_malloc(width*height*sizeof(unsigned int)); gen_integrals(vbx_img, iImg, iiImg, width, height); image_t lbp_img = {iImg, {width, height}}; for (l=0; l<log+1; l++) { cell = 1 << l; lbp_feat_t lbp_feat = {{{0, 0}, {cell, cell}}, 0, 0, {0, 0, 0, 0, 0, 0, 0, 0}}; for (j = 0; j < height - (3*cell-1); j++) { for (i = 0; i < width - (3*cell-1); i++) { pair_t lbp_p = {i, j}; sat_patterns[l][j*width+i] = SATBinaryPattern(lbp_img, &lbp_feat, lbp_p); } } } /* test patterns vs sat binary patterns */ for (l=0; l<log+1; l++) { cell = 1 << l; for (j = 0; j < height - (3*cell-1); j++) { errors += match_array_byte(patterns[l] + j*width, sat_patterns[l] + j*width, "patterns", width - (3*cell-1), 1, 0, max_print_errors, 1, j); if (errors > max_print_errors){ max_print_errors = 0; } } } return errors; }
vbx_mtx_fdct_t * vbx_mtx_fdct_init( dt *coeff_v, dt *image ) { const int BIG_TILE_SIZE = NUM_TILE_X * NUM_TILE_Y * DCT_SIZE; const int num_bytes = BIG_TILE_SIZE * sizeof(dt); const int co_bytes = NUM_TILE_X* DCT_SIZE *sizeof(dt); //compute coeffs matrix in double and truncated to dt int i, j; double s; for (i = 0; i < BLOCK_SIZE; i++) { s = (i == 0) ? sqrt(0.125) : 0.5; for (j = 0; j < BLOCK_SIZE; j++) { c2[i][j] = s * cos((double) ((PI / 8.0) * i * j + 0.5)); cs[i][j] = (dt) (c2[i][j] * SHIFT_DOUBLE + 0.499999); } } vbx_sp_push(); vbx_mtx_fdct_t *v = vbx_shared_malloc( sizeof(vbx_mtx_fct_t) ); v->vcoeff = (vbx_half_t *)vbx_sp_malloc( co_bytes ); v->vprods = (vbx_half_t *)vbx_sp_malloc( num_bytes ); #if USE_ACCUM_FLAGS v->vaccum = (vbx_half_t *)vbx_sp_malloc( num_bytes ); v->vflags = (vbx_half_t *)vbx_sp_malloc( num_bytes ); #endif // interleave ordering to ensure no false hazards v->vblock[2] = (vbx_half_t *)vbx_sp_malloc( num_bytes ); v->vimage[0] = (vbx_half_t *)vbx_sp_malloc( num_bytes ); v->vblock[0] = (vbx_half_t *)vbx_sp_malloc( num_bytes ); v->vimage[1] = (vbx_half_t *)vbx_sp_malloc( num_bytes ); v->vblock[1] = (vbx_half_t *)vbx_sp_malloc( num_bytes ); if( !v->vblock[1] ) { VBX_PRINTF( "ERROR: out of memory.\n" ); VBX_EXIT(-1); } vbx_dma_to_vector( v->vcoeff, coeff_v, co_bytes ); int row; for( row=0; row < BLOCK_SIZE; row++ ) { getBigTileImageY(v->vimage[v->db],image,row); } #if USE_ACCUM_FLAGS // create a flag vector first element 0, next 'BLOCK_SIZE-1' element non-zero, etc vbx_set_vl( NUM_TILE_X * BLOCK_SIZE * NUM_TILE_Y * BLOCK_SIZE - (BLOCK_SIZE-1) ); vbx( SEH, VAND, v->vflags, BLOCK_SIZE-1, 0 ); #endif return v; }
int compare_vbx_lbp_ci_to_scalar_patterns(unsigned short* img, int log, int width, int height, int max_print_errors) { int j, l, cell, max_cell, errors = 0; unsigned char** scalar_patterns = test_scalar_patterns(img, log, width, height); max_cell = 1<<log; vbx_uhalf_t* v_in = (vbx_uhalf_t*)vbx_sp_malloc((1+2*max_cell)*width*sizeof(vbx_half_t)); vbx_uhalf_t* v_top = (vbx_half_t*)vbx_sp_malloc(width*sizeof(vbx_half_t)); vbx_uhalf_t* v_bot = (vbx_half_t*)vbx_sp_malloc(width*sizeof(vbx_half_t)); vbx_ubyte_t* v_lbp = (vbx_ubyte_t*)v_bot; unsigned char* lbp = (unsigned char*)vbx_shared_malloc(width*sizeof(unsigned char)); vbx_set_vl(width); for(l = 0; l < 1; l++){ cell = 1<<l; for(j=0; j < height - 2*cell; j++){ vbx_dma_to_vector(v_in, img+j*width, (1+2*cell)*width*sizeof(unsigned short)); vbx(VVHU, VCUSTOM1, v_top, v_in, v_in+(1*cell)*width); vbx(VVHU, VCUSTOM1, v_bot, v_in+(1*cell)*width, v_in+(2*cell)*width); vbx(SVHBU, VAND, (vbx_ubyte_t*)v_top, 0xf0, v_top); vbx(SVHBU, VAND, (vbx_ubyte_t*)v_bot, 0x0f, v_bot); vbx(VVBU, VADD, v_lbp, v_bot, v_top); vbx_dma_to_host(lbp, v_lbp, width*sizeof(unsigned char)); vbx_sync(); errors += match_array_byte(lbp, scalar_patterns[l]+j*width, "custom_lbp", width-2*cell, 1, 0, max_print_errors, 1, j); if (errors > max_print_errors){ max_print_errors = 0; } } } vbx_sp_free(); vbx_shared_free(lbp); return errors; }
int compare_LBPRestrictedCI_to_test_scalar_patterns(unsigned short* vbx_img, unsigned char* vbx_img8, int log, int width, int height, int max_print_errors) { int i, j, l, cell, errors = 0, cell_errors, row_errors; /* generate patterns */ unsigned char **patterns = (unsigned char**)malloc((log+1)*sizeof(unsigned char*)); for (l=0; l<log+1; l++) { patterns[l] = (unsigned char*)vbx_shared_malloc(height*width*sizeof(unsigned char)); } /* LBPRestrictedCI28(patterns, vbx_img8, width, height, log); */ LBPRestricted_CI_column_8(patterns, vbx_img8, width, height, log); /* LBPRestricted_CI_column_8_scratch(patterns, vbx_img8, width, height, log); */ unsigned char **scalar_patterns = test_scalar_patterns(vbx_img, log, width, height); /* test sums vs scalar sums */ for (l=0; l<log+1; l++) { cell = 1 << l; cell_errors = 0; printf("Testing cell %d\n", cell); for (j = 0; j < height - (3*cell-1); j++) { row_errors = match_array_byte(scalar_patterns[l]+j*width, patterns[l]+j*width, "restricted patterns", width - (3*cell-1), 1, 0, max_print_errors, 1, j); if (row_errors) { printf("errors in row %d\n", j); } cell_errors += row_errors; errors += row_errors; if(errors > max_print_errors) { max_print_errors = 0; } } printf("Total errors: %d\n\n", cell_errors); } return errors; }
int test_lbp_ci(unsigned short* img, int width, int height) { vbx_uhalf_t* v_a1 = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_b1 = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_1h = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_a2 = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_b2 = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_2h = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_a4 = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_b4 = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_uhalf_t* v_4h = (vbx_uhalf_t*)vbx_sp_malloc(width*sizeof(vbx_uhalf_t)); vbx_ubyte_t* v_1b = (vbx_ubyte_t*)vbx_sp_malloc(width*sizeof(vbx_ubyte_t)); vbx_ubyte_t* v_2b = (vbx_ubyte_t*)vbx_sp_malloc(width*sizeof(vbx_ubyte_t)); vbx_ubyte_t* v_4b = (vbx_ubyte_t*)vbx_sp_malloc(width*sizeof(vbx_ubyte_t)); unsigned short* lbp1h = (unsigned short*)vbx_shared_malloc(width*sizeof(unsigned short)); unsigned short* lbp2h = (unsigned short*)vbx_shared_malloc(width*sizeof(unsigned short)); unsigned short* lbp4h = (unsigned short*)vbx_shared_malloc(width*sizeof(unsigned short)); unsigned char* lbp1b = (unsigned char*)vbx_shared_malloc(width*sizeof(unsigned char)); unsigned char* lbp2b = (unsigned char*)vbx_shared_malloc(width*sizeof(unsigned char)); unsigned char* lbp4b = (unsigned char*)vbx_shared_malloc(width*sizeof(unsigned char)); img = img + width; vbx_dma_to_vector(v_a1, img, width*sizeof(unsigned short)); vbx_dma_to_vector(v_b1, img + width, width*sizeof(unsigned short)); vbx_dma_to_vector(v_a2, img, width*sizeof(unsigned short)); vbx_dma_to_vector(v_b2, img + width, width*sizeof(unsigned short)); vbx_dma_to_vector(v_a4, img, width*sizeof(unsigned short)); vbx_dma_to_vector(v_b4, img + width, width*sizeof(unsigned short)); vbx_sync(); int i; int m = 48; for(i=0; i<m; i++){ v_a1[i] = 0; v_b1[i] = 0; v_a2[i] = 0; v_b2[i] = 0; v_a4[i] = 0; v_b4[i] = 0; } int n = 12; int src_a1[] = {0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int src_b1[] = {0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int src_a2[] = {0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int src_b2[] = {0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int src_a4[] = {0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0}; int src_b4[] = {0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0}; for(i=0; i<16; i++){ v_a1[i] = src_a1[i]; v_b1[i] = src_b1[i]; v_a2[i] = src_a2[i]; v_b2[i] = src_b2[i]; v_a4[i] = src_a4[i]; v_b4[i] = src_b4[i]; } vbx_set_vl(width); vbx(VVHU, VCUSTOM1, v_1h, v_a1, v_b1); vbx(VVHU, VCUSTOM2, v_2h, v_a2, v_b2); vbx(VVHU, VCUSTOM3, v_4h, v_a4, v_b4); vbx(VVHB, VADD, v_1b, v_1h, ((vbx_byte_t*)v_1h) + 1); vbx(VVHB, VADD, v_2b, v_2h, ((vbx_byte_t*)v_2h) + 1); vbx(VVHB, VADD, v_4b, v_4h, ((vbx_byte_t*)v_4h) + 1); vbx_dma_to_host(lbp1h, v_1h, width*sizeof(unsigned short)); vbx_dma_to_host(lbp2h, v_2h, width*sizeof(unsigned short)); vbx_dma_to_host(lbp4h, v_4h, width*sizeof(unsigned short)); vbx_dma_to_host(lbp1b, v_1b, width*sizeof(unsigned char)); vbx_dma_to_host(lbp2b, v_2b, width*sizeof(unsigned char)); vbx_dma_to_host(lbp4b, v_4b, width*sizeof(unsigned char)); vbx_sync(); test_print_array_half(v_a1, n); test_print_array_half(v_b1, n); test_print_hex_array_half(lbp1h, n); test_print_hex_array_byte(lbp1b, n); test_print_array_half(v_a2, n); test_print_array_half(v_b2, n); test_print_hex_array_half(lbp2h, n); test_print_hex_array_byte(lbp2b, n); test_print_array_half(v_a4, n); test_print_array_half(v_b4, n); test_print_hex_array_half(lbp4h, n); test_print_hex_array_byte(lbp4b, n); vbx_sp_free(); vbx_shared_free(lbp1h); vbx_shared_free(lbp2h); vbx_shared_free(lbp4h); vbx_shared_free(lbp1b); vbx_shared_free(lbp2b); vbx_shared_free(lbp4b); return 0; }
//FIXME stride for match not implemented int compare_LBPPassStage_to_restricted(unsigned short *vbx_img, int log, lbp_stage_t lbp_stage, int window, int width, int height, int max_print_errors) { int l, i, j, cell, errors = 0; unsigned char** scalar_patterns = test_scalar_patterns(vbx_img, log, width, height); unsigned char *pass, *vbx_pass; pass = (unsigned char*)vbx_shared_malloc(width*height*sizeof(unsigned char)); vbx_pass = (unsigned char*)vbx_shared_malloc(width*height*sizeof(unsigned char)); vbx_byte_t** v_lbp =(vbx_byte_t**)vbx_shared_malloc((log+1)*sizeof(vbx_byte_t*)); for (l=0; l<log+1; l++) { v_lbp[l] = (vbx_byte_t*)vbx_sp_malloc((window+1)*width*sizeof(vbx_byte_t)); } vbx_byte_t* v_lut = (vbx_byte_t*)vbx_sp_malloc(width*sizeof(vbx_byte_t)); vbx_byte_t* v_stage = (vbx_byte_t*)vbx_sp_malloc(width*sizeof(vbx_byte_t)); vbx_byte_t* v_pattern; lbp_feat_t feat; int dx, dy, dw, f; for (l=0; l<log+1; l++) { vbx_dma_to_vector(v_lbp[l]+width, scalar_patterns[l], (window)*width*sizeof(unsigned char)); } vbx_sync(); for(j=0; j < height-(window+1); j++) { for (l=0; l<log+1; l++) { vbx_set_vl(width * window); vbx(VVB, VMOV, v_lbp[l], v_lbp[l]+width, NULL); vbx_dma_to_vector(v_lbp[l] + window*width, scalar_patterns[l]+(j+window)*width, width*sizeof(unsigned char)); } vbx_set_vl(width-(window+1)); vbx(SVB, VMOV, v_stage, 0, NULL); for (f = 0; f < lbp_stage.count; f++) { feat = lbp_stage.feats[f]; dx = feat.pos.src.x; dy = feat.pos.src.y; dw = feat.pos.size.x; v_pattern = v_lbp[dw>>1]+(dy*width+dx); vbx(SVBU, VLBPLUT, v_lut, f, v_pattern); vbx(VVB, VADD, v_stage, v_stage, v_lut); } vbx(SVB, VMOV, v_lut, 0, NULL); vbx(SVB, VCMV_GEZ, v_lut, 1, v_stage); vbx_dma_to_host(vbx_pass + j*width, v_lut, (width-(window+1))*sizeof(unsigned char)); vbx_sync(); } unsigned int *iImg, *iiImg; iImg = (unsigned int *)vbx_shared_malloc(width*height*sizeof(unsigned int)); iiImg = (unsigned int *)vbx_shared_malloc(width*height*sizeof(unsigned int)); gen_integrals(vbx_img, iImg, iiImg, width, height); image_t lbp_img = {iImg, {width, height}}; for (j = 0; j < height - (window + 1); j++) { for (i = 0; i < width - (window + 1); i++) { pair_t lbp_p = {i, j}; pass[j*width+i] = LBPPassStage(lbp_img, lbp_stage, lbp_p); } } /* test pass vs vbx pass */ for (j = 0; j < height - (window + 1); j++) { errors += match_array_byte(vbx_pass + j*width, pass + j*width, "pass stage", width - (window + 1), 1, 0, max_print_errors, 1, j); if (errors > max_print_errors){ max_print_errors = 0; } } return errors; }
int compare_scalar_BLIP2_to_vector_BLIP(unsigned short* img, pixel* vbx_input, int width, int height, int max_print_errors, int scale_factor) { int j, errors = 0; int scaled_width, scaled_height; /* scale facetor v/v+1, v is between 1-10 */ scaled_width = width*scale_factor/(scale_factor+1); scaled_height= height*scale_factor/(scale_factor+1); unsigned short *scaled_img, *vbx_img, *vbx_scaled_img; unsigned char *vbx_img8, *vbx_scaled_img8; unsigned int *iImg, *iiImg, *vbx_iImg, *vbx_iiImg; scaled_img = (unsigned short*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned short)); iImg = (unsigned int*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned int)); iiImg = (unsigned int*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned int)); vbx_scaled_img = (unsigned short*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned short)); vbx_img = (unsigned short*)vbx_shared_malloc(width*height*sizeof(unsigned short)); vbx_img8 = (unsigned char*)vbx_shared_malloc(width*height*sizeof(unsigned char)); vbx_scaled_img8 = (unsigned char*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned char)); vbx_iImg = (unsigned int*)vbx_shared_malloc(width*height*sizeof(unsigned int)); vbx_iiImg = (unsigned int*)vbx_shared_malloc(width*height*sizeof(unsigned int)); #if 0 scalar_BLIP2(img, height, width, scaled_img, scaled_height, scaled_width, scale_factor); #else float percent = 1.0 * (scale_factor+1) / scale_factor; scalar_BLIP(img, height, width, scaled_img, scaled_height, scaled_width, &percent); #endif gen_integrals(scaled_img, iImg, iiImg, scaled_width, scaled_height); vector_get_img(vbx_img, vbx_iImg, vbx_iiImg, vbx_input, 1, width, height, width, 1); vector_BLIP(vbx_img, height, width, vbx_scaled_img, vbx_iImg, vbx_iiImg, scaled_height, scaled_width, scale_factor, 1); vector_get_img8(vbx_img8, vbx_input, 1, width, height, width); /* vector_BLIP8(vbx_img8, height, width, vbx_scaled_img8, scaled_height, scaled_width, scale_factor); */ vbx_timestamp_start(); vbx_timestamp_t time_start, time_stop; double vbx_time; time_start = vbx_timestamp(); #if 1 vector_BLIP8F3(vbx_img8, height, width, vbx_scaled_img8, scaled_height, scaled_width, scale_factor); #else vector_BLIP8F2(vbx_img8, height, width, vbx_scaled_img8, scaled_height, scaled_width, scale_factor); #endif time_stop = vbx_timestamp(); vbx_time = vbx_print_vector_time(time_start, time_stop, 0.0); /* test greyscale image */ for (j = 0; j < height; j++) { errors += match_array_half(img+j*width, vbx_img+j*width, "greyscale", width, 1, 0, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } /* test scaled image */ for (j = 0; j < scaled_height; j++) { errors += match_array_half(scaled_img+j*scaled_width, vbx_scaled_img+j*scaled_width, "scaled greyscale", scaled_width, 1, 1, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } for (j = 0; j < scaled_height; j++) { errors += match_array_half_byte(scaled_img+j*scaled_width, vbx_scaled_img8+j*scaled_width, "scaled greyscale8", scaled_width, 1, 1, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } #if 0 /* test scaled_integral image */ for (j = 0; j < scaled_height; j++) { errors += match_array_word(iImg+j*scaled_width, vbx_iImg+j*scaled_width, "scaled integral", scaled_width, 1, 0, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } /* test scaled squared integral image */ for (j = 0; j < scaled_height; j++) { errors += match_array_word(iiImg+j*scaled_width, vbx_iiImg+j*scaled_width, "scaled squared", scaled_width, 1, 0, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } #endif /* test scaled_integral image */ return errors; }
int main(void) { vbx_timestamp_t time_start, time_stop; double scalar_time, vector_time; input_pointer img1; input_pointer img2; input_pointer sc_img1; input_pointer sc_img2; output_pointer scalar_out; output_pointer vector_out; int i,j; int total_errors = 0; vbx_test_init(); vbx_mxp_print_params(); img1 = vbx_shared_malloc( NUM_OF_ROWS*NUM_OF_COLUMNS*sizeof(input_type) ); img2 = vbx_shared_malloc( NUM_OF_ROWS*NUM_OF_COLUMNS*sizeof(input_type) ); vector_out = vbx_shared_malloc( NUM_OF_ROWS*NUM_OF_COLUMNS*sizeof(output_type) ); sc_img1 = malloc( NUM_OF_ROWS*NUM_OF_COLUMNS*sizeof(input_type) ); sc_img2 = malloc( NUM_OF_ROWS*NUM_OF_COLUMNS*sizeof(input_type) ); scalar_out = malloc( NUM_OF_ROWS*NUM_OF_COLUMNS*sizeof(output_type) ); init_img( img1, img2 ); init_img( sc_img1, sc_img2 ); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_VECTOR_BYTE_LANES = this_mxp->vector_lanes * sizeof(int); printf("\n"); printf("Num of byte lanes: %d\n", VBX_VECTOR_BYTE_LANES); printf("Initialized data\n\n"); printf("Executing Scalar Image Blend...\n"); vbx_timestamp_start(); time_start = vbx_timestamp(); scalar_blend( scalar_out, sc_img1, sc_img2, NUM_OF_ROWS, NUM_OF_COLUMNS, CONST_BLEND ); time_stop = vbx_timestamp(); printf("Finished Scalar Image Blend\n"); scalar_time = vbx_print_scalar_time(time_start, time_stop); printf("\nExecuting Vector Image Blend...\n"); vbx_timestamp_start(); time_start = vbx_timestamp(); vector_blend( vector_out, img1, img2, NUM_OF_ROWS, NUM_OF_COLUMNS, CONST_BLEND); time_stop = vbx_timestamp(); printf("Finished Vector Image Blend\n"); vector_time = vbx_print_vector_time(time_start, time_stop, scalar_time); int errors = 0; for( j=0; j<NUM_OF_ROWS; j++ ) { for( i = 0; i < NUM_OF_COLUMNS; i++ ) { if( vector_out[j*NUM_OF_COLUMNS+i] != scalar_out[j*NUM_OF_COLUMNS+i] ) { if(errors < 5) printf( "\nFail at sample [%3d,%3d]. Scalar: %3d Vector: %3d Img1: %3d Img2: %3d", j, i, scalar_out[j*NUM_OF_COLUMNS+i], vector_out[j*NUM_OF_COLUMNS+i], img1[j*NUM_OF_COLUMNS+i], img2[j*NUM_OF_COLUMNS+i] ); errors++; } } } printf("\n%d errors\n", errors); total_errors += errors; VBX_TEST_END(total_errors); return 0; }
int main(void) { vbx_test_init(); typedef vbx_word_t vbx_mm_t; vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; int N = VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t ); N = 20; int M = 20; int PRINT_LENGTH = N<MAX_PRINT_LENGTH ? N : MAX_PRINT_LENGTH ; // int PRINT_ROWS = PRINT_LENGTH; int PRINT_ROWS = M<MAX_PRINT_LENGTH ? N : MAX_PRINT_LENGTH; int PRINT_COLS = PRINT_LENGTH; double scalar_time, vector_time,vector2_time; int errors=0; vbx_mxp_print_params(); printf( "\nMatrix multiply test...\n" ); printf( "Matrix dimensions: %d,%d\n", N, M ); vbx_mm_t *scalar_in1 = (vbx_mm_t*)malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *scalar_in2 = (vbx_mm_t*)malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *scalar_out = (vbx_mm_t*)malloc( N*N*sizeof(vbx_mm_t ) ); vbx_mm_t *vector_in1 = (vbx_mm_t*)vbx_shared_malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *vector_in2 = (vbx_mm_t*)vbx_shared_malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *vector_out = (vbx_mm_t*)vbx_shared_malloc( N*N*sizeof(vbx_mm_t ) ); if ( scalar_in1 == NULL || scalar_in2 == NULL || scalar_out == NULL || vector_in1 == NULL || vector_in2 == NULL || vector_out == NULL ){ printf("Malloc failed\n"); VBX_TEST_END(1); return 0; } test_zero_array_word(scalar_out, N*N ); test_zero_array_word(vector_out, N*N ); test_init_array_word( scalar_in1, M*N, 1 ); test_copy_array_word( vector_in1, scalar_in1, M*N ); test_init_array_word( scalar_in2, M*N, 999 ); //scalar_mtx_xp_MN_word( vector_in2, scalar_in2, N, N ); test_copy_array_word( vector_in2, scalar_in2, M*N ); test_print_matrix_word( scalar_in1, PRINT_COLS, PRINT_ROWS, M ); test_print_matrix_word( scalar_in2, PRINT_ROWS, PRINT_COLS, N ); //change print sizes for outputs PRINT_ROWS=PRINT_COLS=N<PRINT_LENGTH?N:PRINT_LENGTH; scalar_time = test_scalar( scalar_out, scalar_in1, N, M, scalar_in2, M, N); test_print_matrix_word( scalar_out, PRINT_COLS, PRINT_ROWS, N ); vector_time = test_vector( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time ); test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N ); errors += test_verify_array_word( scalar_out, vector_out, N*N); vector2_time = test_vector_trans( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time ); test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N ); errors += test_verify_array_word( scalar_out, vector_out, N*N); vector2_time = test_vector_sp( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time ); test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N ); errors += test_verify_array_word( scalar_out, vector_out, N*N); vbx_shared_free(vector_out); vbx_shared_free(vector_in2); vbx_shared_free(vector_in1); free(scalar_out); free(scalar_in2); free(scalar_in1); //errors += orig_test(); VBX_TEST_END(errors); return 0; }
int VBX_T(vbw_vec_reverse_test_mm)() { unsigned int aN[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 20, 25, 31, 32, 33, 35, 40, 48, 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 70, 80, 90, 99, 100, 101, 110, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 400, 450, 512, 550, 600, 650, 700, 768, 768, 900, 900, 1023, 1024, 1200, 1400, 1600, 1800, 2048, 2048, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4096, 4096, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 6000, 7000, 8000, 8192, 8192, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 16384, 16384, 20000, 25000, 30000, 32767, 32768, 32768, 35000, 40000, 45000, 50000, 55000, 60000, 65000, 65535, 65536, 65536, 65537, 100000, 128000, 256000, 333333, 528374, 528374 }; int retval; unsigned int N; unsigned int NBYTES; unsigned int NREPS = 100; unsigned int NREPSFORLARGE = 10; unsigned int i,j,k; vbx_timestamp_t start=0,finish=0; for( i=0; i<sizeof(aN)/4; i++ ) { N = aN[i]; //printf( "testing with vector size %d\n", N ); if(N > 10000) NREPS = NREPSFORLARGE; NBYTES = N*sizeof(vbx_mm_t); vbx_mm_t *src = (vbx_mm_t *) vbx_shared_malloc( NBYTES ); vbx_mm_t *dst = (vbx_mm_t *) vbx_shared_malloc( NBYTES ); //printf("bytes alloc: %d\n", NBYTES ); if( !src ) VBX_EXIT(-1); if( !dst ) VBX_EXIT(-1); for ( j=0; j<N; j++ ) { dst[j] = -1; // Fill the destination with -1 src[j] = j; // Fill the source with enumerated values } // VBX_T(vbw_vec_reverse_ext)( dst, src, N ); /** measure performance of function call **/ start = vbx_timestamp(); for(k=0; __builtin_expect(k<NREPS,1); k++ ) { retval = VBX_T(vbw_vec_reverse_ext)( dst, src, N ); } finish = vbx_timestamp(); printf( "length %d (%s):\tvbware mm f():\t%llu", N, VBX_EXPAND_AND_QUOTE(BYTEHALFWORD), (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) ); #if VERIFY_VBWARE_ALGORITHM VBX_T(verify_vector)( src, dst, N ); #else printf(" [VERIFY OFF]"); #endif printf("\treturn value: %X", retval); /** measure performance of scalar **/ vbx_mm_t *A = vbx_remap_cached( src, N*sizeof(vbx_mm_t) ); // Use cached pointers for better performance vbx_mm_t *B = vbx_remap_cached( dst, N*sizeof(vbx_mm_t) ); start = vbx_timestamp(); for(k=0; k<NREPS; k++ ) { unsigned int m; for(m=0; m<N; m++) { B[N-1-m]=A[m]; } vbx_dcache_flush( A, N*sizeof(vbx_mm_t) ); // Make sure to read from main memory vbx_dcache_flush( B, N*sizeof(vbx_mm_t) ); // Make sure writes are committed to memory } finish = vbx_timestamp(); printf( "\tscalar (cache friendly):\t%llu", (unsigned long long) vbx_mxp_cycles((finish-start)/NREPS) ); #if VERIFY_SIMPLE_ALGORITHM VBX_T(verify_vector)( src, dst, N ); #else printf(" [VERIFY OFF]"); #endif printf("\tcycles\n"); vbx_shared_free(src); vbx_shared_free(dst); } printf("All tests passed successfully.\n"); return 0; }
int main(void) { vbx_test_init(); #if 0 vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; int N = VBX_SCRATCHPAD_SIZE/sizeof(vbx_mm_t)/8; #endif int TEST_LENGTH = TEST_ROWS*TEST_COLS; int NTAP_LENGTH = NTAP_ROWS*NTAP_COLS; int PRINT_COLS = min( TEST_COLS, MAX_PRINT_LENGTH ); int PRINT_ROWS = min( TEST_ROWS, MAX_PRINT_LENGTH ); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf( "\nMatrix FIR test...\n" ); printf( "Matrix dimensions: %d,%d\n", TEST_ROWS, TEST_COLS ); vbx_mm_t *scalar_in = malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in = vbx_shared_malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); int32_t *scalar_filt = malloc( NTAP_LENGTH*sizeof(int32_t) ); int32_t *vector_filt = vbx_shared_malloc( NTAP_LENGTH*sizeof(int32_t) ); vbx_mm_t *scalar_out = malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); VBX_T(test_zero_array)( scalar_out, TEST_LENGTH ); VBX_T(test_zero_array)( vector_out, TEST_LENGTH ); VBX_T(test_init_array)( scalar_in, TEST_LENGTH, 1 ); VBX_T(test_copy_array)( vector_in, scalar_in, TEST_LENGTH ); test_init_array_word( scalar_filt, NTAP_LENGTH, 1 ); test_copy_array_word( vector_filt, scalar_filt, NTAP_LENGTH ); VBX_T(test_print_matrix)( scalar_in, PRINT_ROWS, PRINT_COLS, TEST_COLS ); test_print_matrix_word( scalar_filt, NTAP_ROWS, NTAP_COLS, NTAP_COLS ); scalar_time = test_scalar( scalar_out, scalar_in, scalar_filt, TEST_ROWS, TEST_COLS, NTAP_ROWS, NTAP_COLS); VBX_T(test_print_matrix)( scalar_out, PRINT_COLS, PRINT_ROWS, TEST_COLS ); vector_time = test_vector( vector_out, vector_in, vector_filt, TEST_ROWS, TEST_COLS, NTAP_ROWS, NTAP_COLS, scalar_time ); VBX_T(test_print_matrix)( vector_out, PRINT_COLS, PRINT_ROWS, TEST_COLS ); int i; for(i=0; i<TEST_ROWS-NTAP_ROWS; i++){ errors += VBX_T(test_verify_array)( scalar_out+i*TEST_COLS, vector_out+i*TEST_COLS, TEST_COLS-NTAP_COLS ); } VBX_TEST_END(errors); return 0; }
int main(void) { vbx_test_init(); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; const int required_vectors = 4; int N = VBX_PAD_DN(VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t) / required_vectors, this_mxp->scratchpad_alignment_bytes); int PRINT_LENGTH = min( N, MAX_PRINT_LENGTH ); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf( "\nVector copy test...\n" ); printf( "Vector length: %d\n", N ); vbx_mm_t *scalar_in = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_out = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_sp_t *v_out = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); vbx_sp_t *v_in = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); VBX_T(test_zero_array)( scalar_in, N ); VBX_T(test_zero_array)( vector_in, N ); VBX_T(test_init_array)( scalar_in, N, 1 ); VBX_T(test_copy_array)( vector_in, scalar_in, N ); scalar_time = test_scalar( scalar_out, scalar_in, N ); VBX_T(test_print_array)( scalar_out, PRINT_LENGTH ); vbx_dma_to_vector( v_in, vector_in, N*sizeof(vbx_sp_t) ); vector_time = test_vector( v_out, v_in, N, scalar_time ); vbx_dma_to_host(vector_out, v_out, N*sizeof(vbx_sp_t) ); vbx_sync(); VBX_T(test_print_array)( vector_out, PRINT_LENGTH ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, N ); vbx_sp_free(); #if TEST_DEEP_SP errors += deep_vector_copy_test(); #endif #if DEBUG_MAKE_SP_FULL vbx_sp_malloc(vbx_sp_getfree()); #endif #if TEST_DEEP_MM errors += deep_vector_copy_ext_test(); #endif VBX_TEST_END(errors); return 0; }
int compare_vbx_lut_to_vbx_lut_ci(int stage, int max_print_errors) { vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); int vci_lanes = this_mxp->vcustom0_lanes; int sz = this_mxp->scratchpad_size/(16*sizeof(vbx_ubyte_t)); vbx_byte_t* v_pass = (vbx_byte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_pattern = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_lutc = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_group = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_sel = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_byte_t)); vbx_ubyte_t* v_lut = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_word_t)); vbx_ubyte_t* v_idx = (vbx_ubyte_t*)vbx_sp_malloc(sz*sizeof(vbx_word_t)); if(v_idx == NULL) { printf("failed to allocate in compare_vbx_lut_to_vbx_lut_ci\n"); } unsigned char* lut = (unsigned char*)vbx_shared_malloc(sz*sizeof(unsigned char)); unsigned char* lut_c = (unsigned char*)vbx_shared_malloc(sz*sizeof(unsigned char)); int f, n, s, errors = 0; for (n = 0; n < sz; n++) { v_pattern[n] = (n & 0xff); } for (f = 0; f < face_lbp[stage].count; f++) { lbp_feat_t feat = face_lbp[stage].feats[f]; vbx_set_vl(sz); int total = f; s = 0; while(s < stage){ total += face_lbp[s].count; s++; } if(total < 256) { vbx(SVBU, VLBPLUT, v_lutc, total, v_pattern); } else { vbx(SVBS, VLBPLUT, v_lutc, total-256, v_pattern); } vbx(SVB, VMOV, v_pass, feat.fail, 0); /* check if pattern is in lut */ vbx(SVBU, VSHR, v_group, 5, v_pattern); for (n = 0; n < 8; n++) { vbx(SVB, VADD, v_sel, -n, v_group); vbx(SVBW, VCMV_Z, v_lut, feat.lut[n], v_sel); } vbx(SVBWU, VAND, v_idx, 0x1f, v_pattern); vbx(VVWB, VSHR, v_lut, v_idx, v_lut); vbx(SVB, VAND, v_lut, 1, v_lut); vbx(SVB, VCMV_LEZ, v_pass, feat.pass, v_lut); vbx_dma_to_host(lut_c, v_lutc, sz*sizeof(unsigned char)); vbx_dma_to_host(lut, v_pass, sz*sizeof(unsigned char)); vbx_sync(); errors += match_array_byte(lut, lut_c, "custom_lut", sz, 1, 0, max_print_errors, 0, 0); } vbx_sp_free(); vbx_shared_free(lut); vbx_shared_free(lut_c); return errors; }
int main(void) { pixel *input; pixel *scalar_input; #if USE_LUMA unsigned char *vbx_luma; #endif unsigned short *scalar_luma; pixel *vbx_output; pixel *scalar_output; vbx_timestamp_t time_start, time_stop; double scalar_time, vbx_time; int x, y; int errors = 0; vbx_test_init(); vbx_mxp_print_params(); input = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); scalar_input = (pixel *)vbx_remap_cached(input, IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); #if USE_LUMA vbx_luma = (unsigned char *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned char)); #endif scalar_luma = (unsigned short *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short)); vbx_output = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); scalar_output = (pixel *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); printf("\nInitializing data\n"); printf("Resolution = %dx%d\n", IMAGE_WIDTH, IMAGE_HEIGHT); init_matrix(input, IMAGE_WIDTH, IMAGE_HEIGHT); printf("Starting Sobel 3x3 edge-detection test\n"); #if USE_LUMA scalar_rgb2luma(scalar_luma, scalar_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH); #endif vbx_timestamp_start(); time_start = vbx_timestamp(); #if !USE_LUMA scalar_rgb2luma(scalar_luma, scalar_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH); #endif scalar_sobel_argb32_3x3(scalar_output, scalar_luma, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH, RENORM_AMOUNT); time_stop = vbx_timestamp(); scalar_time = vbx_print_scalar_time(time_start, time_stop); #if USE_LUMA vbw_rgb2luma8(vbx_luma, (unsigned *)input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH); #endif vbx_timestamp_start(); time_start = vbx_timestamp(); #if USE_LUMA vbw_sobel_luma8_3x3((unsigned *)vbx_output, vbx_luma, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH, RENORM_AMOUNT); #else vbw_sobel_argb32_3x3((unsigned *)vbx_output, (unsigned *)input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_PITCH, RENORM_AMOUNT); #endif time_stop = vbx_timestamp(); vbx_time = vbx_print_vector_time(time_start, time_stop, scalar_time); for (y = 0; y < IMAGE_HEIGHT; y++) { for (x = 0; x < IMAGE_WIDTH; x++) { #if USE_LUMA if (scalar_luma[y*IMAGE_WIDTH+x] != vbx_luma[y*IMAGE_WIDTH+x]) { if (errors < MAX_PRINT_ERRORS) { printf("Y Error at %d, %d: Expected = %02X, got = %02X\n", y, x, scalar_luma[y*IMAGE_WIDTH+x], vbx_luma[y*IMAGE_WIDTH+x]); } errors++; } #endif if (scalar_output[y*IMAGE_WIDTH+x].r != vbx_output[y*IMAGE_WIDTH+x].r) { if (errors < MAX_PRINT_ERRORS) { printf("R Error at %d, %d: Expected = %02X, got = %02X\n", y, x, scalar_output[y*IMAGE_WIDTH+x].r, vbx_output[y*IMAGE_WIDTH+x].r); } errors++; } if (scalar_output[y*IMAGE_WIDTH+x].g != vbx_output[y*IMAGE_WIDTH+x].g) { if (errors < MAX_PRINT_ERRORS) { printf("G Error at %d, %d: Expected = %02X, got = %02X\n", y, x, scalar_output[y*IMAGE_WIDTH+x].g, vbx_output[y*IMAGE_WIDTH+x].g); } errors++; } if (scalar_output[y*IMAGE_WIDTH+x].b != vbx_output[y*IMAGE_WIDTH+x].b) { if (errors < MAX_PRINT_ERRORS) { printf("B Error at %d, %d: Expected = %02X, got = %02X\n", y, x, scalar_output[y*IMAGE_WIDTH+x].b, vbx_output[y*IMAGE_WIDTH+x].b); } errors++; } } } VBX_TEST_END(errors); return errors; }
int compare_scalar_BLIP2_to_vector_BLIP(unsigned short* img, pixel* vbx_input, int width, int height, int max_print_errors) { int j, errors = 0; int value, scaled_width, scaled_height; /* scale facetor v/v+1, v is between 1-10 */ value = 3; //BAD 2,5,6,8 scaled_width = width*value/(value+1); scaled_height= height*value/(value+1); unsigned short *scaled_img, *vbx_img, *vbx_scaled_img; unsigned int *iImg, *iiImg, *vbx_iImg, *vbx_iiImg; scaled_img = (unsigned short*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned short)); iImg = (unsigned int*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned int)); iiImg = (unsigned int*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned int)); vbx_scaled_img = (unsigned short*)vbx_shared_malloc(scaled_width*scaled_height*sizeof(unsigned short)); vbx_img = (unsigned short*)vbx_shared_malloc(width*height*sizeof(unsigned short)); vbx_iImg = (unsigned int*)vbx_shared_malloc(width*height*sizeof(unsigned int)); vbx_iiImg = (unsigned int*)vbx_shared_malloc(width*height*sizeof(unsigned int)); scalar_BLIP2(img, height, width, scaled_img, scaled_height, scaled_width, value); gen_integrals(scaled_img, iImg, iiImg, scaled_width, scaled_height); vector_get_img(vbx_img, vbx_iImg, vbx_iiImg, vbx_input, 1, width, height, width, 1); vector_BLIP(vbx_img, height, width, vbx_scaled_img, vbx_iImg, vbx_iiImg, scaled_height, scaled_width, value, 1); /* test greyscale image */ for (j = 0; j < height; j++) { errors += match_array_half(img+j*width, vbx_img+j*width, "greyscale", width, 1, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } /* test scaled image */ for (j = 0; j < scaled_height; j++) { errors += match_array_half(scaled_img+j*scaled_width, vbx_scaled_img+j*scaled_width, "scaled greyscale", scaled_width, 1, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } /* test scaled_integral image */ for (j = 0; j < scaled_height; j++) { errors += match_array_word(iImg+j*scaled_width, vbx_iImg+j*scaled_width, "scaled integral", scaled_width, 1, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } /* test scaled squared integral image */ for (j = 0; j < scaled_height; j++) { errors += match_array_word(iiImg+j*scaled_width, vbx_iiImg+j*scaled_width, "scaled squared", scaled_width, 1, max_print_errors, j); if(errors > max_print_errors) { max_print_errors = 0; } } return errors; }
int main_tile() { int i, j, k, l, base, block_num; int x, y; int time_start, time_stop; unsigned int cycles; double vbx_time, scalar_time; int wrong; int total_errors = 0; //all of the initialization can be hard coded without any computation vbx_mtx_fdct_t *v = vbx_mtx_fdct_init( coeff_v, image ); vbx_timestamp_start(); printf("\nGenerating initial data...\n"); dt *image = (dt *) malloc( IMAGE_WIDTH * IMAGE_HEIGHT * sizeof(dt) ); GenerateRandomImage( image, IMAGE_WIDTH, IMAGE_HEIGHT, 0/*seed*/ ); // Allocate memory to store results. // Results are computed BIGTILE_SIZE halfwords at a time. const int BIGTILE_SIZE = NUM_TILE_X * NUM_TILE_Y * DCT_SIZE; dt *block_s = malloc( BIGTILE_SIZE * sizeof(dt) ); dt *block_v = (dt *) vbx_shared_malloc( BIGTILE_SIZE * sizeof(dt) ); dt *coeff_v = (dt *) vbx_shared_malloc( BIGTILE_SIZE * sizeof(dt) ); //Make an uncached 1D version of the coeff matrix for (i = 0; i < NUM_TILE_Y; i++) { // row for (j = 0; j < BLOCK_SIZE; j++) { // row for (k = 0; k < NUM_TILE_X; k++) { // col for (l = 0; l < BLOCK_SIZE; l++) { // col coeff_v[i*NUM_TILE_X*DCT_SIZE + j*DCT_SIZE + k*BLOCK_SIZE + l] = cs[j][l]; } } } } #ifdef DEBUG printf("input matrix is:\n"); for (i = 0; i < BLOCK_SIZE; i++) { base = i * BLOCK_SIZE; for (j = 0; j < BLOCK_SIZE; j++) { printf("%d ", (int) block_s[base + j]); } printf("\n"); } #endif printf("\nRunning DCT...\n"); time_start = vbx_timestamp(); for( y = 0; y < IMG_DOWN; y++ ) { for( x = 0; x < IMG_ACROSS; x++ ) { vbx_mtx_fdct_scalar( block_s, (dt*)cs, image, x/*start_x*/, y/*start_y*/, NUM_TILE_X, NUM_TILE_Y ); } } time_stop = vbx_timestamp(); cycles = time_stop - time_start; scalar_time = (double) cycles; scalar_time /= (double) vbx_timestamp_freq(); scalar_time *= 1000.0; //ms vbx_timestamp_t mxp_cycles = vbx_mxp_cycles(cycles); printf("%dx%d Block Size\n", BLOCK_SIZE, BLOCK_SIZE); printf("Finished, scalar CPU took %0.3f ms \n", scalar_time); printf(" CPU Cycles: %d\n", (int) mxp_cycles); printf(" CPU Cycles per block: %f\n", mxp_cycles / ((double) (NUM_BLOCKS))); vbx_sync(); // wait for image to be prefetched time_start = vbx_timestamp(); for( y = 0; y < IMG_DOWN; y++ ) { for( x = 0; x < IMG_ACROSS; x++ ) { vbx_mtx_fdct( v, block_v, image, x/*start_x*/, y/*start_y*/, IMG_ACROSS-1,IMG_DOWN-1,NUM_TILE_X, NUM_TILE_Y ); } } time_stop = vbx_timestamp(); cycles = time_stop - time_start; vbx_time = (double) cycles; vbx_time /= (double) vbx_timestamp_freq(); vbx_time *= 1000.0; //ms mxp_cycles = vbx_mxp_cycles(cycles); printf("Finished, MXP took %0.3f ms \n", vbx_time); printf(" CPU Cycles: %d\n", (int) mxp_cycles); printf(" CPU Cycles per block: %f\n", mxp_cycles / ((double) (NUM_BLOCKS))); printf(" Speedup: %f\n", scalar_time / vbx_time); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); double vbx_mbps = (double) (NUM_BLOCKS) * 1000 / vbx_time; // blocks per second printf("V%d@%dMHz: %dx%d tile, %dx%d blocks, %f blocks/s, %f megapixel/s\n", this_mxp->vector_lanes, this_mxp->core_freq / 1000000, NUM_TILE_Y, NUM_TILE_X, BLOCK_SIZE, BLOCK_SIZE, vbx_mbps, (vbx_mbps * DCT_SIZE) / 1000000); printf("\nChecking results...\n"); wrong = 0; for (block_num = 0; block_num < NUM_BLOCKS; block_num++) { for (i = 0; i < BLOCK_SIZE; i++) { base = i * BLOCK_SIZE; for (j = 0; j < BLOCK_SIZE; j++) { if (block_s[block_num * DCT_SIZE + base + j] != block_v[block_num * DCT_SIZE + base + j]) { if (wrong < 5) { printf("\nError at %d [%d,%d], result is %d, should be %d\n", block_num, i, j, (int) block_v[block_num * DCT_SIZE + base + j], (int) block_s[block_num * DCT_SIZE + base + j]); } wrong++; } } } } printf("wrong is %d\n\n", wrong); total_errors += wrong; free(block_s); vbx_shared_free(block_v); vbx_shared_free(coeff_v); vbx_mtx_fdct_free( v ); VBX_TEST_END(total_errors); return (0); }
int main(void) { vbx_timestamp_t time_start, time_stop; double scalar_time, vbx_time, vbx_time_masked; int i, j, k, l, m, n; int errors = 0; vbx_test_init(); vbx_mxp_print_params(); pixel *input, *scalar_input, *vbx_input, *vbx_input_masked; uint16_t *scalar_short; input = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); scalar_input = (pixel *)vbx_remap_cached(input, IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); scalar_short = (uint16_t *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(uint16_t)); vbx_input = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); vbx_input_masked = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); #if UNIT unsigned char *vbx_img8; unsigned short *img, *vbx_img; unsigned int *iImg, *vbx_iImg; unsigned int *iiImg, *vbx_iiImg; img = (unsigned short*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short)); vbx_img = (unsigned short*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short)); vbx_img8 = (unsigned char*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned char)); iImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); vbx_iImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); iiImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); vbx_iiImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); #endif//UNIT printf("Resolution = %dx%d\n", IMAGE_WIDTH, IMAGE_HEIGHT); printf("Initializing data\n"); vbx_timestamp_start(); for(l = 0; l < 1; l++){ char *src; char *sdst; char *vdst; char *mdst; if(l == 0){ load_lenna(input, IMAGE_WIDTH, IMAGE_HEIGHT); load_lenna(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT); load_lenna(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT); printf("\nLenna\n"); src = "lenna"; sdst = "s_lenna"; vdst = "v_lenna"; mdst = "m_lenna"; }else if(l == 1){ load_ms(input, IMAGE_WIDTH, IMAGE_HEIGHT); load_ms(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT); load_ms(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT); printf("\nMicrosoft\n"); src = "ms"; sdst = "s_ms"; vdst = "v_ms"; mdst = "m_ms"; }else if(l == 2){ load_blank(input, IMAGE_WIDTH, IMAGE_HEIGHT); load_blank(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT); load_blank(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT); printf("\nblank\n"); src = "blank"; sdst = "s_blank"; vdst = "v_blank"; mdst = "m_blank"; } #if UNIT int window = 20; int log=0; while(((window/3)>>log) >= 2) log++; errors += compare_scalar_rgb2luma_to_vbw_rgb2luma16(img, vbx_img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, MAX_PRINT_ERRORS); vbw_rgb2luma8(vbx_img8, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH); int s; #if LUT_CI #if DOUBLE_LUT printf("Testing double lut\n"); printf("Assign lbp double lut\n"); assign_lbp_lut_ci2(); int prev = errors; printf("Cascade check\n"); /* errors += cascade_check_2w(face_lbp, face_lbp_max_stage, 256); */ /* errors += cascade_check_2h(face_lbp, face_lbp_max_stage, 256); */ errors += cascade_check_2b(face_lbp, face_lbp_max_stage, 256); if (errors) { printf("errors %d\n", errors-prev); } #else assign_lbp_lut_ci(); printf("Testing cascade\n"); int prev = errors; printf("lut check\n"); #if 0 #if 0 errors += lut_check(256, 0, 0, 0); if (errors) { printf("errors %d\n", errors-prev); } #elif 1 int print_errors = 0; vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); int vci_lanes = this_mxp->vcustom0_lanes; int num_features = cascade_max_feature(); int input_length = 10; int lut_length = num_features*vci_lanes; int lut_iterations = 15; #if 1 lut_length = input_length = 128; lut_iterations = 13; print_errors = 0; errors += lut_check2(input_length, lut_length, lut_iterations, print_errors); if (errors) { printf("errors %d\n", errors-prev); } #elif 1 input_length = 64; lut_length = input_length; lut_iterations = 13; print_errors = 1; errors += lut_check2(input_length, lut_length, lut_iterations, print_errors); if (errors) { printf("errors %d\n", errors-prev); } #else for(s = 2; s < 100; s=s+10){ errors += lut_check2(s, lut_length, lut_iterations, print_errors); if (errors - prev > 0) { printf("%d\terrors %d\n", s, errors-prev); } else { printf("%d\n", s); } prev = errors; } #endif #else for(s = 0; s < 2000; s=s+100){ errors += lut_check(s, 0, 0, 0); if (errors - prev > 0) { printf("%d\terrors %d\n", s, errors-prev); } else { printf("%d\n", s); } prev = errors; } #endif #elif 1 #else printf("check cascade\n"); prev = errors; errors += cascade_check(face_lbp, face_lbp_max_stage, 256); if (errors) { printf("errors %d\n", errors-prev); } printf("Testing LBP LUT CI\n"); prev = errors; for(s = 0; s < face_lbp_max_stage; s++){ errors += compare_vbx_lut_to_vbx_lut_ci(s, MAX_PRINT_ERRORS); } if (errors) { printf("errors %d\n", errors-prev); prev = errors; } #endif #endif #endif #if 0 printf("Printing grey scale img\n"); printf("grey = ["); for (j = 0; j < IMAGE_HEIGHT; j++) { printf("["); for (i = 0; i < IMAGE_WIDTH; i++) { printf("%d, ", vbx_img8[j*IMAGE_WIDTH+i]); } printf("],\n"); } printf("]\n"); #endif #if LBP_CI printf("Testing LBP Pattern CI\n"); errors += compare_LBPRestrictedCI_to_test_scalar_patterns(vbx_img, vbx_img8, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); #endif #if BLIP printf("Testing BLIP\n"); for(s = 1; s < 10; s++){ errors += compare_scalar_BLIP2_to_vector_BLIP(img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, s); } #endif #if 0 errors += compare_LBPRestrictedSums_to_test_scalar_sums_byte(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_LBPRestrictedSums2_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_ScalarLBPRestrictedSums_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_ScalarLBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_LBPRestrictedPatterns2_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_LBPRestricted_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); /* overflow issues -- using bytes changes lbp pattern */ errors += compare_LBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); /* requires SKIP_INTEGRALS 0 */ errors += compare_gen_integrals_to_vector_get_img(img, iImg, iiImg, vbx_img, vbx_iImg, vbx_iiImg, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); /* redundant test, compare to test_scalar_patterns instead */ errors += compare_ScalarLBPRestrictedPatterns_to_SATBinaryPattern(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_SATBinaryPattern_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_LBPPassStage_to_restricted(vbx_img, log, face_lbp[0], window, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); #endif #else // UNIT #if PRINT print_python_pixel(scalar_input, src, IMAGE_WIDTH, IMAGE_HEIGHT); #endif time_start = vbx_timestamp(); scalar_rgb2luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH); scalar_face_detect_luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, sdst); time_stop = vbx_timestamp(); scalar_time = vbx_print_scalar_time(time_start, time_stop); #if PRINT print_python_pixel(scalar_input, sdst, IMAGE_WIDTH, IMAGE_HEIGHT); #endif printf("\nVector"); time_start = vbx_timestamp(); vector_face_detect((pixel *)vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 0, vdst); time_stop = vbx_timestamp(); vbx_time = vbx_print_vector_time(time_start, time_stop, scalar_time); #if PRINT print_python_pixel(vbx_input, vdst, IMAGE_WIDTH, IMAGE_HEIGHT); #endif printf("\nVector Masked"); time_start = vbx_timestamp(); vector_face_detect((pixel *)vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 1, mdst); time_stop = vbx_timestamp(); vbx_time_masked = vbx_print_vector_time(time_start, time_stop, scalar_time); #if PRINT print_python_pixel(vbx_input_masked, mdst, IMAGE_WIDTH, IMAGE_HEIGHT); #endif /* errors += match_array_pixel(input, vbx_input, "vector", IMAGE_WIDTH, IMAGE_HEIGHT, 0, MAX_PRINT_ERRORS, 0); */ /* errors += match_array_pixel(input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, 0, MAX_PRINT_ERRORS, 0); */ errors += match_array_pixel(vbx_input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, 0, MAX_PRINT_ERRORS, 0); #endif // UNIT } VBX_TEST_END(errors); return errors; }
int main(void) { vbx_timestamp_t time_start, time_stop; double scalar_time, vbx_time, vbx_time_masked; int i, j, k, l, m, n; int errors = 0; vbx_test_init(); vbx_mxp_print_params(); pixel *input, *scalar_input, *vbx_input, *vbx_input_masked; uint16_t *scalar_short; input = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); scalar_input = (pixel *)vbx_remap_cached(input, IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); scalar_short = (uint16_t *)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(uint16_t)); vbx_input = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); vbx_input_masked = (pixel *)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(pixel)); #if UNIT unsigned short *img, *vbx_img; unsigned int *iImg, *vbx_iImg; unsigned int *iiImg, *vbx_iiImg; img = (unsigned short*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short)); vbx_img = (unsigned short*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned short)); iImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); vbx_iImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); iiImg = (unsigned int*)malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); vbx_iiImg = (unsigned int*)vbx_shared_malloc(IMAGE_WIDTH*IMAGE_HEIGHT*sizeof(unsigned int)); #endif//UNIT printf("Resolution = %dx%d\n", IMAGE_WIDTH, IMAGE_HEIGHT); printf("Initializing data\n"); vbx_timestamp_start(); for(l = 0; l < 1; l++){ char *src; char *sdst; char *vdst; char *mdst; if(l == 0){ load_lenna(input, IMAGE_WIDTH, IMAGE_HEIGHT); load_lenna(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT); load_lenna(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT); printf("\nLenna\n"); src = "lenna"; sdst = "s_lenna"; vdst = "v_lenna"; mdst = "m_lenna"; }else if(l == 1){ load_ms(input, IMAGE_WIDTH, IMAGE_HEIGHT); load_ms(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT); load_ms(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT); printf("\nMicrosoft\n"); src = "ms"; sdst = "s_ms"; vdst = "v_ms"; mdst = "m_ms"; }else if(l == 2){ load_blank(input, IMAGE_WIDTH, IMAGE_HEIGHT); load_blank(vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT); load_blank(vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT); printf("\nblank\n"); src = "blank"; sdst = "s_blank"; vdst = "v_blank"; mdst = "m_blank"; } #if UNIT int window = 20; int log=0; while(((window/3)>>log) >= 2) log++; #if LUT_CI /* errors += compare_vbx_lut_to_vbx_lut_ci(1024, MAX_PRINT_ERRORS); */ #endif #if LBP_CI errors += compare_vbx_lbp_ci_to_scalar_patterns(vbx_img, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); #endif errors += compare_scalar_rgb2luma_to_vbw_rgb2luma16(img, vbx_img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, MAX_PRINT_ERRORS); /* errors += compare_LBPRestrictedSums_to_test_scalar_sums_byte(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */ /* errors += compare_LBPRestrictedSums2_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */ /* errors += compare_ScalarLBPRestrictedSums_to_test_scalar_sums_half(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */ /* errors += compare_ScalarLBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */ /* errors += compare_LBPRestrictedPatterns2_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); */ errors += compare_LBPRestricted_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); #if 0 /* overflow issues -- using bytes changes lbp pattern */ errors += compare_LBPRestrictedPatterns_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); /* requires SKIP_INTEGRALS 0 */ errors += compare_gen_integrals_to_vector_get_img(img, iImg, iiImg, vbx_img, vbx_iImg, vbx_iiImg, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); /* currently last values have errors if the scaled images size is not an integer, width * f/ (f+1) */ errors += compare_scalar_BLIP2_to_vector_BLIP(img, vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); /* redundant test, compare to test_scalar_patterns instead */ errors += compare_ScalarLBPRestrictedPatterns_to_SATBinaryPattern(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); errors += compare_SATBinaryPattern_to_test_scalar_patterns(vbx_img, log, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); #endif errors += compare_LBPPassStage_to_restricted(vbx_img, log, face_lbp[0], window, IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS); #else // UNIT #if PRINT print_python_pixel(scalar_input, src, IMAGE_WIDTH, IMAGE_HEIGHT); #endif time_start = vbx_timestamp(); scalar_rgb2luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH); scalar_face_detect_luma(scalar_short, input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, sdst); time_stop = vbx_timestamp(); scalar_time = vbx_print_scalar_time(time_start, time_stop); #if PRINT print_python_pixel(scalar_input, sdst, IMAGE_WIDTH, IMAGE_HEIGHT); #endif printf("\nVector"); time_start = vbx_timestamp(); vector_face_detect((pixel *)vbx_input, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 0, vdst); time_stop = vbx_timestamp(); vbx_time = vbx_print_vector_time(time_start, time_stop, scalar_time); #if PRINT print_python_pixel(vbx_input, vdst, IMAGE_WIDTH, IMAGE_HEIGHT); #endif printf("\nVector Masked"); time_start = vbx_timestamp(); vector_face_detect((pixel *)vbx_input_masked, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_WIDTH, 1, mdst); time_stop = vbx_timestamp(); vbx_time_masked = vbx_print_vector_time(time_start, time_stop, scalar_time); #if PRINT print_python_pixel(vbx_input_masked, mdst, IMAGE_WIDTH, IMAGE_HEIGHT); #endif /* errors += match_array_pixel(input, vbx_input, "vector", IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, 0); */ /* errors += match_array_pixel(input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, 0); */ errors += match_array_pixel(vbx_input, vbx_input_masked, "masked", IMAGE_WIDTH, IMAGE_HEIGHT, MAX_PRINT_ERRORS, 0); #endif // UNIT } VBX_TEST_END(errors); return errors; }