Ejemplo n.º 1
0
int main(void) {

	float a0 = 1.01f;
	float a1 = 1.02f;
	float a2 = 1.03f;
	float a3 = 1.04f;
	float b0 = 13.33f;
	float b1 = 13.34f;
	float b2 = 13.35f;
	float b3 = 13.36f;

	float res0 = test_scalar(a0, b0);
	float res1 = test_scalar(a1, b1);
	float res2 = test_scalar(a2, b2);
	float res3 = test_scalar(a3, b3);

	__m128 av = _mm_set_ps(a0, a1, a2, a3);
	__m128 bv = _mm_set_ps(b0, b1, b2, b3);

	// fake use to prevent deletion of target function
	__m128 resv = test_to_be_generated(av, bv);

	printf("res (scalar): %f %f %f %f\n", res0, res1, res2, res3);
	printf("res (packetized): %f %f %f %f\n", ((float*)&resv)[0], ((float*)&resv)[1], ((float*)&resv)[2], ((float*)&resv)[3]);

	return 0;
}
Ejemplo n.º 2
0
Archivo: test.c Proyecto: cirqueit/mxp
int main(void)
{
	vbx_test_init();

	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size;
	const int required_vectors = 4;

	int N = VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t) / required_vectors;

	int PRINT_LENGTH = min( N, MAX_PRINT_LENGTH );

	double scalar_time, vector_time;
	int errors=0;

	vbx_mxp_print_params();
	printf( "\nAdd test...\n" );
	printf( "Vector length: %d\n", N );

	vbx_mm_t *scalar_in1 = malloc( N*sizeof(vbx_mm_t) );
	vbx_mm_t *scalar_in2 = malloc( N*sizeof(vbx_mm_t) );
	vbx_mm_t *scalar_out = malloc( N*sizeof(vbx_mm_t) );

	vbx_mm_t *vector_in1 = vbx_shared_malloc( N*sizeof(vbx_mm_t) );
	vbx_mm_t *vector_in2 = vbx_shared_malloc( N*sizeof(vbx_mm_t) );
	vbx_mm_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_mm_t) );
//	vbx_mm_t *vector_out = vector_in2 - 5;


	vbx_sp_t *v_in1 = vbx_sp_malloc( N*sizeof(vbx_sp_t) );
	vbx_sp_t *v_in2 = vbx_sp_malloc( N*sizeof(vbx_sp_t) );
	vbx_sp_t *v_out = vbx_sp_malloc( N*sizeof(vbx_sp_t) );
//	vbx_sp_t *v_out = v_in2-5;

	VBX_T(test_zero_array)( scalar_out, N );
	VBX_T(test_zero_array)( vector_out, N );

	VBX_T(test_init_array)( scalar_in1, N, 1 );
	VBX_T(test_copy_array)( vector_in1, scalar_in1, N );
	VBX_T(test_init_array)( scalar_in2, N, 1 );
	VBX_T(test_copy_array)( vector_in2, scalar_in2, N );

	VBX_T(test_print_array)( scalar_in1, PRINT_LENGTH );
	VBX_T(test_print_array)( scalar_in2, PRINT_LENGTH );

	scalar_time = test_scalar( scalar_out, scalar_in1, scalar_in2, N );
	VBX_T(test_print_array)( scalar_out, PRINT_LENGTH);

	vbx_dma_to_vector( v_in1, (void *)vector_in1, N*sizeof(vbx_sp_t) );
	vbx_dma_to_vector( v_in2, (void *)vector_in1, N*sizeof(vbx_sp_t) );
	vector_time = test_vector( v_out, v_in1, v_in2, N, scalar_time );
	vbx_dma_to_host( (void *)vector_out, v_out, N*sizeof(vbx_sp_t) );
	vbx_sync();
	VBX_T(test_print_array)( vector_out, PRINT_LENGTH );

	errors += VBX_T(test_verify_array)( scalar_out, vector_out, N );

	VBX_TEST_END(errors);
	return 0;
}
Ejemplo n.º 3
0
int main(void)
{
	double scalar_time, vector_time;
	int errors=0;

	vbx_test_init();

	vbx_mxp_print_params();
	printf("\nVector FIR test...\n");

	vbx_mm_t *scalar_sample = malloc( (SAMP_SIZE+NTAPS)*sizeof(vbx_mm_t) );
	vbx_mm_t *scalar_coeffs = malloc(             NTAPS*sizeof(vbx_mm_t) );
	vbx_mm_t *scalar_out    = malloc(         SAMP_SIZE*sizeof(vbx_mm_t) );

	vbx_mm_t *sample     = vbx_shared_malloc( (SAMP_SIZE+NTAPS)*sizeof(vbx_mm_t) );
	vbx_mm_t *coeffs     = vbx_shared_malloc(             NTAPS*sizeof(vbx_mm_t) );
	vbx_mm_t *vector_out = vbx_shared_malloc(         SAMP_SIZE*sizeof(vbx_mm_t) );

	VBX_T(test_zero_array)( scalar_out, SAMP_SIZE );
	VBX_T(test_zero_array)( vector_out, SAMP_SIZE );

	VBX_T(test_init_array)( scalar_sample, SAMP_SIZE, 0xff );
	VBX_T(test_copy_array)( sample, scalar_sample, SAMP_SIZE );
	VBX_T(test_init_array)( scalar_coeffs, NTAPS, 1 );
	VBX_T(test_copy_array)( coeffs, scalar_coeffs, NTAPS );

	VBX_T(test_zero_array)( scalar_sample+SAMP_SIZE, NTAPS );
	VBX_T(test_zero_array)( sample+SAMP_SIZE, NTAPS );

	printf("\nSamples:\n");
	VBX_T(test_print_array)( scalar_sample, min(SAMP_SIZE,MAX_PRINT_LENGTH) );
	printf("\nCoefficients:\n");
	VBX_T(test_print_array)( scalar_coeffs, min(NTAPS,MAX_PRINT_LENGTH) );

	scalar_time = test_scalar( scalar_out, scalar_sample, scalar_coeffs);
	VBX_T(test_print_array)( scalar_out,  min(SAMP_SIZE,MAX_PRINT_LENGTH) );

	#ifdef USE_TRANSPOSE
	vector_time = test_vector_transpose( vector_out, sample, coeffs, scalar_time );
	VBX_T(test_print_array)( vector_out,  min(SAMP_SIZE,MAX_PRINT_LENGTH) );
	errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS );
	#endif //USE_TRANSPOSE

	#ifdef USE_1D
	vector_time = test_vector_1d( vector_out, sample, coeffs, scalar_time );
	VBX_T(test_print_array)( vector_out,  min(SAMP_SIZE,MAX_PRINT_LENGTH) );
	errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS );
	#endif //USE_1D

	#ifdef USE_2D
	vector_time = test_vector_2d( vector_out, sample, coeffs, scalar_time );
	VBX_T(test_print_array)( vector_out,  min(SAMP_SIZE,MAX_PRINT_LENGTH) );
	errors += VBX_T(test_verify_array)( scalar_out, vector_out, SAMP_SIZE-NTAPS );
	#endif //USE_2D

	VBX_TEST_END(errors);
	return 0;
}
int main(int argc, char **argv)
{
  _Bool my_bool = true;
  _Bool my_bool_array[NUM_ELEMS];
  int i;

  test_scalar(&my_bool);

  for(i = 0; i < NUM_ELEMS; i+=2)
    my_bool_array[i] = true;
  for(i = 1; i < NUM_ELEMS; i+=2)
    my_bool_array[i] = false;

  test_array(my_bool_array, NUM_ELEMS);
  
  return 0;
}
Ejemplo n.º 5
0
void test( const std::string & label ,
           const size_t elem_count ,
           const size_t iter_count )
{
  KokkosArray::Impl::Timer timer ;

  double seconds_scalar ;
  double seconds_multi ;
  double seconds_array1 ;
  double seconds_array4 ;
  double seconds_array16 ;

  { // Loop 16 times:
    Explicit::TestHexGrad<double,float,Device> test_scalar( elem_count );

    timer.reset();

    for ( size_t i = 0 ; i < iter_count * 16 ; ++i ) {
      test_scalar.apply();
    }

    Device::fence();

    seconds_scalar = timer.seconds() / ( 16 * iter_count * elem_count );
  }

  { // 16 x elements
    Explicit::TestHexGrad<double,float,Device> test_multiple( elem_count * 16 );

    timer.reset();

    for ( size_t i = 0 ; i < iter_count ; ++i ) {
      test_multiple.apply();
    }

    Device::fence();

    seconds_multi = timer.seconds() / ( 16 * iter_count * elem_count );
  }

  { // 16 x elements with Array<1>
    typedef KokkosArray::Array<double,1> coord_scalar_type ;
    typedef KokkosArray::Array<float,1>  grad_scalar_type ;

    Explicit::TestHexGrad<coord_scalar_type,grad_scalar_type,Device>
      test_array( elem_count * 16 );

    timer.reset();

    for ( size_t i = 0 ; i < iter_count ; ++i ) {
      test_array.apply();
    }

    Device::fence();

    seconds_array1 = timer.seconds() / ( 16 * iter_count * elem_count );
  }

  { // 4 x elements with Array<4>
    typedef KokkosArray::Array<double,4> coord_scalar_type ;
    typedef KokkosArray::Array<float,4>  grad_scalar_type ;

    Explicit::TestHexGrad<coord_scalar_type,grad_scalar_type,Device>
      test_array( elem_count * 4 );

    timer.reset();

    for ( size_t i = 0 ; i < iter_count ; ++i ) {
      test_array.apply();
    }

    Device::fence();

    seconds_array4 = timer.seconds() / ( 16 * iter_count * elem_count );
  }

  { // 1 x elements with Array<16>
    typedef KokkosArray::Array<double,16> coord_scalar_type ;
    typedef KokkosArray::Array<float,16>  grad_scalar_type ;

    Explicit::TestHexGrad<coord_scalar_type,grad_scalar_type,Device> test_array( elem_count );

    timer.reset();

    for ( size_t i = 0 ; i < iter_count ; ++i ) {
      test_array.apply();
    }

    Device::fence();

    seconds_array16 = timer.seconds() / ( 16 * iter_count * elem_count );
  }

  std::cout << label
            << " scalar( " << seconds_scalar
            << " ) multi( " << seconds_multi << " )"
            << " ) array1( " << seconds_array1 << " )"
            << " ) array4( " << seconds_array4 << " )"
            << " ) array16( " << seconds_array16 << " )"
            << std::endl ;
}
Ejemplo n.º 6
0
Archivo: test.c Proyecto: 8l/mxp
int main(void)
{

	vbx_test_init();

#if 0
	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size;
	int N = VBX_SCRATCHPAD_SIZE/sizeof(vbx_mm_t)/8;
#endif

	int TEST_LENGTH = TEST_ROWS*TEST_COLS;
	int NTAP_LENGTH = NTAP_ROWS*NTAP_COLS;

	int PRINT_COLS = min( TEST_COLS, MAX_PRINT_LENGTH );
	int PRINT_ROWS = min( TEST_ROWS, MAX_PRINT_LENGTH );

	double scalar_time, vector_time;
	int errors=0;

	vbx_mxp_print_params();
	printf( "\nMatrix FIR test...\n" );
	printf( "Matrix dimensions: %d,%d\n", TEST_ROWS, TEST_COLS );

	vbx_mm_t  *scalar_in   = malloc( TEST_LENGTH*sizeof(vbx_mm_t) );
	vbx_mm_t  *vector_in   = vbx_shared_malloc( TEST_LENGTH*sizeof(vbx_mm_t) );

	int32_t *scalar_filt = malloc( NTAP_LENGTH*sizeof(int32_t) );
	int32_t *vector_filt = vbx_shared_malloc( NTAP_LENGTH*sizeof(int32_t) );

	vbx_mm_t  *scalar_out  = malloc( TEST_LENGTH*sizeof(vbx_mm_t) );
	vbx_mm_t  *vector_out  = vbx_shared_malloc( TEST_LENGTH*sizeof(vbx_mm_t) );

	VBX_T(test_zero_array)( scalar_out, TEST_LENGTH );
	VBX_T(test_zero_array)( vector_out, TEST_LENGTH );

	VBX_T(test_init_array)( scalar_in, TEST_LENGTH, 1 );
	VBX_T(test_copy_array)( vector_in, scalar_in, TEST_LENGTH );

	test_init_array_word( scalar_filt, NTAP_LENGTH, 1 );
	test_copy_array_word( vector_filt, scalar_filt, NTAP_LENGTH );

	VBX_T(test_print_matrix)( scalar_in, PRINT_ROWS, PRINT_COLS, TEST_COLS );
	test_print_matrix_word( scalar_filt, NTAP_ROWS, NTAP_COLS, NTAP_COLS );

	scalar_time = test_scalar( scalar_out, scalar_in, scalar_filt,
			TEST_ROWS, TEST_COLS, NTAP_ROWS, NTAP_COLS);
	VBX_T(test_print_matrix)( scalar_out, PRINT_COLS, PRINT_ROWS, TEST_COLS );

	vector_time = test_vector( vector_out, vector_in, vector_filt,
			TEST_ROWS, TEST_COLS, NTAP_ROWS, NTAP_COLS, scalar_time );
	VBX_T(test_print_matrix)( vector_out, PRINT_COLS, PRINT_ROWS, TEST_COLS );

	int i;
	for(i=0; i<TEST_ROWS-NTAP_ROWS; i++){
		errors += VBX_T(test_verify_array)( scalar_out+i*TEST_COLS, vector_out+i*TEST_COLS, TEST_COLS-NTAP_COLS );
	}

	VBX_TEST_END(errors);
	return 0;
}
Ejemplo n.º 7
0
int main(void)
{
	vbx_test_init();
	typedef vbx_word_t vbx_mm_t;
	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size;
	int N = VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t );
	N = 20;
	int M = 20;

	int PRINT_LENGTH =  N<MAX_PRINT_LENGTH ? N : MAX_PRINT_LENGTH ;
	//	int PRINT_ROWS = PRINT_LENGTH;
	int PRINT_ROWS = M<MAX_PRINT_LENGTH ? N : MAX_PRINT_LENGTH;
	int PRINT_COLS = PRINT_LENGTH;

	double scalar_time, vector_time,vector2_time;
	int errors=0;

	vbx_mxp_print_params();
	printf( "\nMatrix multiply test...\n" );
	printf( "Matrix dimensions: %d,%d\n", N, M );


	vbx_mm_t  *scalar_in1 = (vbx_mm_t*)malloc( M*N*sizeof(vbx_mm_t ) );
	vbx_mm_t  *scalar_in2 = (vbx_mm_t*)malloc( M*N*sizeof(vbx_mm_t ) );
	vbx_mm_t  *scalar_out = (vbx_mm_t*)malloc( N*N*sizeof(vbx_mm_t ) );
	vbx_mm_t  *vector_in1 = (vbx_mm_t*)vbx_shared_malloc( M*N*sizeof(vbx_mm_t ) );
	vbx_mm_t  *vector_in2 = (vbx_mm_t*)vbx_shared_malloc( M*N*sizeof(vbx_mm_t ) );
	vbx_mm_t  *vector_out = (vbx_mm_t*)vbx_shared_malloc( N*N*sizeof(vbx_mm_t ) );
	if ( scalar_in1 == NULL ||
	     scalar_in2 == NULL ||
	     scalar_out == NULL ||
	     vector_in1 == NULL ||
	     vector_in2 == NULL ||
	     vector_out == NULL ){
		printf("Malloc failed\n");
		VBX_TEST_END(1);
		return 0;
	}



	test_zero_array_word(scalar_out, N*N );
	test_zero_array_word(vector_out, N*N );

	test_init_array_word( scalar_in1, M*N, 1 );
	test_copy_array_word( vector_in1, scalar_in1, M*N );
	test_init_array_word( scalar_in2, M*N, 999 );
	//scalar_mtx_xp_MN_word( vector_in2, scalar_in2, N, N );
	test_copy_array_word( vector_in2, scalar_in2, M*N );

	test_print_matrix_word( scalar_in1, PRINT_COLS, PRINT_ROWS, M );
	test_print_matrix_word( scalar_in2, PRINT_ROWS, PRINT_COLS, N );

	//change print sizes for outputs
	PRINT_ROWS=PRINT_COLS=N<PRINT_LENGTH?N:PRINT_LENGTH;

	scalar_time = test_scalar( scalar_out, scalar_in1, N, M, scalar_in2, M, N);
	test_print_matrix_word( scalar_out, PRINT_COLS, PRINT_ROWS, N );


	vector_time = test_vector( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time );
	test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N );
	errors += test_verify_array_word( scalar_out, vector_out, N*N);

	vector2_time = test_vector_trans( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time );
	test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N );
	errors += test_verify_array_word( scalar_out, vector_out, N*N);

	vector2_time = test_vector_sp( vector_out, vector_in1, N, M, vector_in2, M, N, scalar_time );
	test_print_matrix_word( vector_out, PRINT_COLS, PRINT_ROWS, N );
	errors += test_verify_array_word( scalar_out, vector_out, N*N);

	vbx_shared_free(vector_out);
	vbx_shared_free(vector_in2);
	vbx_shared_free(vector_in1);
	free(scalar_out);
	free(scalar_in2);
	free(scalar_in1);

	//errors += orig_test();

	VBX_TEST_END(errors);
	return 0;
}
Ejemplo n.º 8
0
Archivo: test.c Proyecto: 8l/mxp
int main(void)
{
	vbx_test_init();

	vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP();
	const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size;
	const int required_vectors = 4;

	int N = VBX_PAD_DN(VBX_SCRATCHPAD_SIZE / sizeof(vbx_mm_t) / required_vectors, this_mxp->scratchpad_alignment_bytes);

	int PRINT_LENGTH = min( N, MAX_PRINT_LENGTH );

	double scalar_time, vector_time;
	int errors=0;

	vbx_mxp_print_params();
	printf( "\nVector copy test...\n" );
	printf( "Vector length: %d\n", N );

	vbx_mm_t *scalar_in  = malloc( N*sizeof(vbx_mm_t) );
	vbx_mm_t *scalar_out = malloc( N*sizeof(vbx_mm_t) );

	vbx_mm_t *vector_in  = vbx_shared_malloc( N*sizeof(vbx_mm_t) );
	vbx_mm_t *vector_out = vbx_shared_malloc( N*sizeof(vbx_mm_t) );

	vbx_sp_t *v_out = vbx_sp_malloc( N*sizeof(vbx_sp_t) );
	vbx_sp_t *v_in = vbx_sp_malloc( N*sizeof(vbx_sp_t) );

	VBX_T(test_zero_array)( scalar_in, N );
	VBX_T(test_zero_array)( vector_in, N );

	VBX_T(test_init_array)( scalar_in, N, 1 );
	VBX_T(test_copy_array)( vector_in, scalar_in, N );

	scalar_time = test_scalar( scalar_out, scalar_in, N );
	VBX_T(test_print_array)( scalar_out, PRINT_LENGTH );

	vbx_dma_to_vector( v_in, vector_in, N*sizeof(vbx_sp_t) );
	vector_time = test_vector( v_out, v_in, N, scalar_time );
	vbx_dma_to_host(vector_out, v_out, N*sizeof(vbx_sp_t) );
	vbx_sync();
	VBX_T(test_print_array)( vector_out, PRINT_LENGTH );

	errors += VBX_T(test_verify_array)( scalar_out, vector_out, N );

	vbx_sp_free();

#if TEST_DEEP_SP
	errors += deep_vector_copy_test();
#endif

#if DEBUG_MAKE_SP_FULL
	vbx_sp_malloc(vbx_sp_getfree());
#endif

#if TEST_DEEP_MM
	errors += deep_vector_copy_ext_test();
#endif

	VBX_TEST_END(errors);

	return 0;
}