int main(void) { float a0 = 1.01f; float a1 = 1.02f; float a2 = 1.03f; float a3 = 1.04f; float b0 = 13.33f; float b1 = 13.34f; float b2 = 13.35f; float b3 = 13.36f; float res0 = test_scalar(a0, b0); float res1 = test_scalar(a1, b1); float res2 = test_scalar(a2, b2); float res3 = test_scalar(a3, b3); __m128 av = _mm_set_ps(a0, a1, a2, a3); __m128 bv = _mm_set_ps(b0, b1, b2, b3); // fake use to prevent deletion of target function __m128 resv = test_to_be_generated(av, bv); printf("res (scalar): %f %f %f %f\n", res0, res1, res2, res3); printf("res (packetized): %f %f %f %f\n", ((float*)&resv)[0], ((float*)&resv)[1], ((float*)&resv)[2], ((float*)&resv)[3]); return 0; }
int main(void) { vbx_test_init(); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; const int required_vectors = 4; int N = VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t) / required_vectors; int PRINT_LENGTH = min( N, MAX_PRINT_LENGTH ); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf( "\nAdd test...\n" ); printf( "Vector length: %d\n", N ); vbx_mm_t *scalar_in1 = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_in2 = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_out = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in1 = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in2 = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); // vbx_mm_t *vector_out = vector_in2 - 5; vbx_sp_t *v_in1 = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); vbx_sp_t *v_in2 = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); vbx_sp_t *v_out = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); // vbx_sp_t *v_out = v_in2-5; VBX_T(test_zero_array)( scalar_out, N ); VBX_T(test_zero_array)( vector_out, N ); VBX_T(test_init_array)( scalar_in1, N, 1 ); VBX_T(test_copy_array)( vector_in1, scalar_in1, N ); VBX_T(test_init_array)( scalar_in2, N, 1 ); VBX_T(test_copy_array)( vector_in2, scalar_in2, N ); VBX_T(test_print_array)( scalar_in1, PRINT_LENGTH ); VBX_T(test_print_array)( scalar_in2, PRINT_LENGTH ); scalar_time = test_scalar( scalar_out, scalar_in1, scalar_in2, N ); VBX_T(test_print_array)( scalar_out, PRINT_LENGTH); vbx_dma_to_vector( v_in1, (void *)vector_in1, N*sizeof(vbx_sp_t) ); vbx_dma_to_vector( v_in2, (void *)vector_in1, N*sizeof(vbx_sp_t) ); vector_time = test_vector( v_out, v_in1, v_in2, N, scalar_time ); vbx_dma_to_host( (void *)vector_out, v_out, N*sizeof(vbx_sp_t) ); vbx_sync(); VBX_T(test_print_array)( vector_out, PRINT_LENGTH ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, N ); VBX_TEST_END(errors); return 0; }
int main(void) { double scalar_time, vector_time; int errors=0; vbx_test_init(); vbx_mxp_print_params(); printf("\nVector FIR test...\n"); vbx_mm_t *scalar_sample = malloc( (SAMP_SIZE+NTAPS)*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_coeffs = malloc( NTAPS*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_out = malloc( SAMP_SIZE*sizeof(vbx_mm_t) ); vbx_mm_t *sample = vbx_shared_malloc( (SAMP_SIZE+NTAPS)*sizeof(vbx_mm_t) ); vbx_mm_t *coeffs = vbx_shared_malloc( NTAPS*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( SAMP_SIZE*sizeof(vbx_mm_t) ); VBX_T(test_zero_array)( scalar_out, SAMP_SIZE ); VBX_T(test_zero_array)( vector_out, SAMP_SIZE ); VBX_T(test_init_array)( scalar_sample, SAMP_SIZE, 0xff ); VBX_T(test_copy_array)( sample, scalar_sample, SAMP_SIZE ); VBX_T(test_init_array)( scalar_coeffs, NTAPS, 1 ); VBX_T(test_copy_array)( coeffs, scalar_coeffs, NTAPS ); VBX_T(test_zero_array)( scalar_sample+SAMP_SIZE, NTAPS ); VBX_T(test_zero_array)( sample+SAMP_SIZE, NTAPS ); printf("\nSamples:\n"); VBX_T(test_print_array)( scalar_sample, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); printf("\nCoefficients:\n"); VBX_T(test_print_array)( scalar_coeffs, min(NTAPS,MAX_PRINT_LENGTH) ); scalar_time = test_scalar( scalar_out, scalar_sample, scalar_coeffs); VBX_T(test_print_array)( scalar_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); #ifdef USE_TRANSPOSE vector_time = test_vector_transpose( vector_out, sample, coeffs, scalar_time ); VBX_T(test_print_array)( vector_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS ); #endif //USE_TRANSPOSE #ifdef USE_1D vector_time = test_vector_1d( vector_out, sample, coeffs, scalar_time ); VBX_T(test_print_array)( vector_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS ); #endif //USE_1D #ifdef USE_2D vector_time = test_vector_2d( vector_out, sample, coeffs, scalar_time ); VBX_T(test_print_array)( vector_out, min(SAMP_SIZE,MAX_PRINT_LENGTH) ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS ); #endif //USE_2D VBX_TEST_END(errors); return 0; }
int main(int argc, char **argv) { _Bool my_bool = true; _Bool my_bool_array[NUM_ELEMS]; int i; test_scalar(&my_bool); for(i = 0; i < NUM_ELEMS; i+=2) my_bool_array[i] = true; for(i = 1; i < NUM_ELEMS; i+=2) my_bool_array[i] = false; test_array(my_bool_array, NUM_ELEMS); return 0; }
void test( const std::string & label , const size_t elem_count , const size_t iter_count ) { KokkosArray::Impl::Timer timer ; double seconds_scalar ; double seconds_multi ; double seconds_array1 ; double seconds_array4 ; double seconds_array16 ; { // Loop 16 times: Explicit::TestHexGrad<double,float,Device> test_scalar( elem_count ); timer.reset(); for ( size_t i = 0 ; i < iter_count * 16 ; ++i ) { test_scalar.apply(); } Device::fence(); seconds_scalar = timer.seconds() / ( 16 * iter_count * elem_count ); } { // 16 x elements Explicit::TestHexGrad<double,float,Device> test_multiple( elem_count * 16 ); timer.reset(); for ( size_t i = 0 ; i < iter_count ; ++i ) { test_multiple.apply(); } Device::fence(); seconds_multi = timer.seconds() / ( 16 * iter_count * elem_count ); } { // 16 x elements with Array<1> typedef KokkosArray::Array<double,1> coord_scalar_type ; typedef KokkosArray::Array<float,1> grad_scalar_type ; Explicit::TestHexGrad<coord_scalar_type,grad_scalar_type,Device> test_array( elem_count * 16 ); timer.reset(); for ( size_t i = 0 ; i < iter_count ; ++i ) { test_array.apply(); } Device::fence(); seconds_array1 = timer.seconds() / ( 16 * iter_count * elem_count ); } { // 4 x elements with Array<4> typedef KokkosArray::Array<double,4> coord_scalar_type ; typedef KokkosArray::Array<float,4> grad_scalar_type ; Explicit::TestHexGrad<coord_scalar_type,grad_scalar_type,Device> test_array( elem_count * 4 ); timer.reset(); for ( size_t i = 0 ; i < iter_count ; ++i ) { test_array.apply(); } Device::fence(); seconds_array4 = timer.seconds() / ( 16 * iter_count * elem_count ); } { // 1 x elements with Array<16> typedef KokkosArray::Array<double,16> coord_scalar_type ; typedef KokkosArray::Array<float,16> grad_scalar_type ; Explicit::TestHexGrad<coord_scalar_type,grad_scalar_type,Device> test_array( elem_count ); timer.reset(); for ( size_t i = 0 ; i < iter_count ; ++i ) { test_array.apply(); } Device::fence(); seconds_array16 = timer.seconds() / ( 16 * iter_count * elem_count ); } std::cout << label << " scalar( " << seconds_scalar << " ) multi( " << seconds_multi << " )" << " ) array1( " << seconds_array1 << " )" << " ) array4( " << seconds_array4 << " )" << " ) array16( " << seconds_array16 << " )" << std::endl ; }
int main(void) { vbx_test_init(); #if 0 vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; int N = VBX_SCRATCHPAD_SIZE/sizeof(vbx_mm_t)/8; #endif int TEST_LENGTH = TEST_ROWS*TEST_COLS; int NTAP_LENGTH = NTAP_ROWS*NTAP_COLS; int PRINT_COLS = min( TEST_COLS, MAX_PRINT_LENGTH ); int PRINT_ROWS = min( TEST_ROWS, MAX_PRINT_LENGTH ); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf( "\nMatrix FIR test...\n" ); printf( "Matrix dimensions: %d,%d\n", TEST_ROWS, TEST_COLS ); vbx_mm_t *scalar_in = malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in = vbx_shared_malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); int32_t *scalar_filt = malloc( NTAP_LENGTH*sizeof(int32_t) ); int32_t *vector_filt = vbx_shared_malloc( NTAP_LENGTH*sizeof(int32_t) ); vbx_mm_t *scalar_out = malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( TEST_LENGTH*sizeof(vbx_mm_t) ); VBX_T(test_zero_array)( scalar_out, TEST_LENGTH ); VBX_T(test_zero_array)( vector_out, TEST_LENGTH ); VBX_T(test_init_array)( scalar_in, TEST_LENGTH, 1 ); VBX_T(test_copy_array)( vector_in, scalar_in, TEST_LENGTH ); test_init_array_word( scalar_filt, NTAP_LENGTH, 1 ); test_copy_array_word( vector_filt, scalar_filt, NTAP_LENGTH ); VBX_T(test_print_matrix)( scalar_in, PRINT_ROWS, PRINT_COLS, TEST_COLS ); test_print_matrix_word( scalar_filt, NTAP_ROWS, NTAP_COLS, NTAP_COLS ); scalar_time = test_scalar( scalar_out, scalar_in, scalar_filt, TEST_ROWS, TEST_COLS, NTAP_ROWS, NTAP_COLS); VBX_T(test_print_matrix)( scalar_out, PRINT_COLS, PRINT_ROWS, TEST_COLS ); vector_time = test_vector( vector_out, vector_in, vector_filt, TEST_ROWS, TEST_COLS, NTAP_ROWS, NTAP_COLS, scalar_time ); VBX_T(test_print_matrix)( vector_out, PRINT_COLS, PRINT_ROWS, TEST_COLS ); int i; for(i=0; i<TEST_ROWS-NTAP_ROWS; i++){ errors += VBX_T(test_verify_array)( scalar_out+i*TEST_COLS, vector_out+i*TEST_COLS, TEST_COLS-NTAP_COLS ); } VBX_TEST_END(errors); return 0; }
int main(void) { vbx_test_init(); typedef vbx_word_t vbx_mm_t; vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; int N = VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t ); N = 20; int M = 20; int PRINT_LENGTH = N<MAX_PRINT_LENGTH ? N : MAX_PRINT_LENGTH ; // int PRINT_ROWS = PRINT_LENGTH; int PRINT_ROWS = M<MAX_PRINT_LENGTH ? N : MAX_PRINT_LENGTH; int PRINT_COLS = PRINT_LENGTH; double scalar_time, vector_time,vector2_time; int errors=0; vbx_mxp_print_params(); printf( "\nMatrix multiply test...\n" ); printf( "Matrix dimensions: %d,%d\n", N, M ); vbx_mm_t *scalar_in1 = (vbx_mm_t*)malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *scalar_in2 = (vbx_mm_t*)malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *scalar_out = (vbx_mm_t*)malloc( N*N*sizeof(vbx_mm_t ) ); vbx_mm_t *vector_in1 = (vbx_mm_t*)vbx_shared_malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *vector_in2 = (vbx_mm_t*)vbx_shared_malloc( M*N*sizeof(vbx_mm_t ) ); vbx_mm_t *vector_out = (vbx_mm_t*)vbx_shared_malloc( N*N*sizeof(vbx_mm_t ) ); if ( scalar_in1 == NULL || scalar_in2 == NULL || scalar_out == NULL || vector_in1 == NULL || vector_in2 == NULL || vector_out == NULL ){ printf("Malloc failed\n"); VBX_TEST_END(1); return 0; } test_zero_array_word(scalar_out, N*N ); test_zero_array_word(vector_out, N*N ); test_init_array_word( scalar_in1, M*N, 1 ); test_copy_array_word( vector_in1, scalar_in1, M*N ); test_init_array_word( scalar_in2, M*N, 999 ); //scalar_mtx_xp_MN_word( vector_in2, scalar_in2, N, N ); test_copy_array_word( vector_in2, scalar_in2, M*N ); test_print_matrix_word( scalar_in1, PRINT_COLS, PRINT_ROWS, M ); test_print_matrix_word( scalar_in2, PRINT_ROWS, PRINT_COLS, N ); //change print sizes for outputs PRINT_ROWS=PRINT_COLS=N<PRINT_LENGTH?N:PRINT_LENGTH; scalar_time = test_scalar( scalar_out, scalar_in1, N, M, scalar_in2, M, N); test_print_matrix_word( scalar_out, PRINT_COLS, PRINT_ROWS, N ); vector_time = test_vector( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time ); test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N ); errors += test_verify_array_word( scalar_out, vector_out, N*N); vector2_time = test_vector_trans( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time ); test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N ); errors += test_verify_array_word( scalar_out, vector_out, N*N); vector2_time = test_vector_sp( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time ); test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N ); errors += test_verify_array_word( scalar_out, vector_out, N*N); vbx_shared_free(vector_out); vbx_shared_free(vector_in2); vbx_shared_free(vector_in1); free(scalar_out); free(scalar_in2); free(scalar_in1); //errors += orig_test(); VBX_TEST_END(errors); return 0; }
int main(void) { vbx_test_init(); vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; const int required_vectors = 4; int N = VBX_PAD_DN(VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t) / required_vectors, this_mxp->scratchpad_alignment_bytes); int PRINT_LENGTH = min( N, MAX_PRINT_LENGTH ); double scalar_time, vector_time; int errors=0; vbx_mxp_print_params(); printf( "\nVector copy test...\n" ); printf( "Vector length: %d\n", N ); vbx_mm_t *scalar_in = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *scalar_out = malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_in = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_mm_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_mm_t) ); vbx_sp_t *v_out = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); vbx_sp_t *v_in = vbx_sp_malloc( N*sizeof(vbx_sp_t) ); VBX_T(test_zero_array)( scalar_in, N ); VBX_T(test_zero_array)( vector_in, N ); VBX_T(test_init_array)( scalar_in, N, 1 ); VBX_T(test_copy_array)( vector_in, scalar_in, N ); scalar_time = test_scalar( scalar_out, scalar_in, N ); VBX_T(test_print_array)( scalar_out, PRINT_LENGTH ); vbx_dma_to_vector( v_in, vector_in, N*sizeof(vbx_sp_t) ); vector_time = test_vector( v_out, v_in, N, scalar_time ); vbx_dma_to_host(vector_out, v_out, N*sizeof(vbx_sp_t) ); vbx_sync(); VBX_T(test_print_array)( vector_out, PRINT_LENGTH ); errors += VBX_T(test_verify_array)( scalar_out, vector_out, N ); vbx_sp_free(); #if TEST_DEEP_SP errors += deep_vector_copy_test(); #endif #if DEBUG_MAKE_SP_FULL vbx_sp_malloc(vbx_sp_getfree()); #endif #if TEST_DEEP_MM errors += deep_vector_copy_ext_test(); #endif VBX_TEST_END(errors); return 0; }