Exemplo n.º 1
0
                        // to tag points as "in the set"
int main(){
  double x=0, volume=0, variance=0, standardDeviation=0, sumSquared=0, volumeSum=0;
  int  i=0, j=0, n=0;
  const long MAX_TRIALS = 10000;
  srand((unsigned)time(NULL));  //initialize random number generator
  for(j=0; j<10; j++){
    for(i=0; i<MAX_TRIALS; i++){
      if(mandel_test (x, r()) == NTRIALS) n++;
      x = 2.5*rand()/RAND_MAX-2.0;
    }
    volume = ((double)n/(double)MAX_TRIALS) *pow(2.5, 3);
  printf("The volume is: %lf\n", volume);
  sumSquared += volume*volume;
  volumeSum += volume;
  n=0;
  }
  variance=1.0/9.0*(sumSquared - 1.0/10.0*volumeSum*volumeSum);
  standardDeviation=sqrt(variance);
  printf("The standard deviation is: %lf\n", standardDeviation);


  return 0;
}
Exemplo n.º 2
0
int main()
{
	std::random_device rd;
	std::mt19937 gen(rd());
	std::uniform_real_distribution<float> dis(0, 255);

	size_t max_iter = 20;
	size_t array_size = 800;
	size_t vector_size = array_size*4;

	vfloat32 *vX1, *vX2, *vY , *vY1 , *vY2;
	std::vector<float> vec1(vector_size) , vec2(vector_size) , vecy(vector_size , 0.) , vecy1(vector_size,0.)
	, vecy2(vector_size, 0.);

	// SIMD vectors must be 16 bits aligned
	vX1 =(vfloat32*) _mm_malloc ((size_t) (array_size*sizeof(vfloat32)), 16);
	vX2 =(vfloat32*) _mm_malloc ((size_t) (array_size*sizeof(vfloat32)), 16);
	vY  =(vfloat32*) _mm_malloc ((size_t) (array_size*sizeof(vfloat32)), 16);
	vY1 =(vfloat32*) _mm_malloc ((size_t) (array_size*sizeof(vfloat32)), 16);
	vY2 =(vfloat32*) _mm_malloc ((size_t) (array_size*sizeof(vfloat32)), 16);

	vfloat32 vy = _mm_set_ps(0,0,0,0);

	int j = 0;

	// Initialize vectors and simd arrays
	for(size_t i = 0 ; i < array_size ; ++i)
	{
		float r1 = dis(gen) , r2 = dis(gen) , r3 = dis(gen) , r4 = dis(gen);
		float r5 = dis(gen) , r6 = dis(gen) , r7 = dis(gen) , r8 = dis(gen);

		vec1[j] = r1; vec1[j+1] = r2 ; vec1[j+2] = r3 ; vec1[j+3] = r4;
		vec2[j] = r5; vec2[j+1] = r6 ; vec2[j+2] = r7 ; vec2[j+3] = r8;

		vfloat32 vx1 = _mm_set_ps(r4 , r3 , r2 , r1  );
		vfloat32 vx2 = _mm_set_ps(r8 , r7 , r6 , r5  );

		_mm_store_ps((float*) &vX1[i], vx1);
		_mm_store_ps((float*) &vX2[i], vx2);
		_mm_store_ps((float*) &vY[i], vy);
		_mm_store_ps((float*) &vY1[i], vy);
		_mm_store_ps((float*) &vY2[i], vy);

		j +=4;
	}

	// test pour l'addition de vectors
	{
		auto start = std::chrono::steady_clock::now();
		vectoradd_simd(vX1,vX2,vY,array_size);
		auto end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff = end-start;
		// std::cout << "vector addition time with simd: " << diff.count() << " s" << std::endl;

		start = std::chrono::steady_clock::now();
		std::transform( vec1.begin() , vec1.end() , vec2.begin() , vecy.begin() , std::plus<float>());

		end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff1 = end-start;
		// std::cout << "vector addition time without simd: " << diff1.count() << " s" << std::endl;

		j = 0;
		bool is_valid = true;
		for(size_t i = 0 ; i < array_size ; ++i)
		{
			float out[4] ;
			_mm_store_ps(out , vY[i]);

			if ( out[0] == vecy[j] && out[1] == vecy[j+1] && out[2] == vecy[j+2] && out[3] == vecy[j+3])
				{ j += 4;}
			else
			{
				is_valid = false;
				break;
			}
		}

		if(is_valid)
		{
			std::cout << "l'addition de vecteurs en simd est correcte" << std::endl;
			std::cout << "speedup obtained for vector addition with simd : " << diff1.count() / diff.count() << std::endl;
		}
		else
		{
			std::cout << " l'addition de vecteurs end simd est incorrecte" << std::endl;
		}

		std::cout << "\n";
	}

	// test pour le dot product
	{
		auto start = std::chrono::steady_clock::now();
		vfloat32 sres = vectordot_simd(vX1 , vX2 , array_size);
		auto end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff = end-start;
		// std::cout << "dot product time with simd: " << diff.count() << " s" << std::endl;

		start = std::chrono::steady_clock::now();
		float res = std::inner_product( vec1.begin() , vec1.end() , vec2.begin() , 0. );
		end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff1 = end-start;
		// std::cout << "dot product time without simd: " << diff1.count() << " s" << std::endl;

		float out[4] ;
		_mm_store_ps( out , sres);

		if(  std::abs(out[0] - res ) < 0.01f )
		{
			std::cout << "le produit de vecteurs en simd est correct" << std::endl;
			std::cout << "speedup obtained for dot product with simd : " << diff1.count() / diff.count() << std::endl;
		}
		else {std::cout << "le produit de vecteurs en simd est incorrect : " << out[0] << "  " << res << std::endl;}

		std::cout << "\n";
	}

	// test for 1D filtre with rotation without border check
	{
		auto start = std::chrono::steady_clock::now();
		float divide = 1./3. ;
		for(std::size_t i = 1 ; i < vector_size-1 ; ++i)
		{
			vecy1[i] = divide * ( vec1[i-1] + vec1[i] + vec1[i+1] );
		}
		auto end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff1 = end-start;;

		start = std::chrono::steady_clock::now();
		vectoravg3_simd(vX1 , vY1 , array_size);
		end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff = end-start;

		j = 4;
		bool is_valid = true;

		for(size_t i = 1 ; i < array_size-1 ; ++i)
		{
			float out[4] ;
			_mm_store_ps(out , vY1[i]);

			if ( is_valid == true && out[0] == vecy1[j] && out[1] == vecy1[j+1] && out[2] == vecy1[j+2] && out[3] == vecy1[j+3])
				{ j += 4;}
			else
			{
				is_valid = false;
				break;
			}
		}

		if(is_valid)
		{
			std::cout << "la filtre moyenneur en simd est correct" << std::endl;
			std::cout << "speedup obtained for average filter with simd : " << diff1.count() / diff.count() << std::endl;
		}
		else
		{
			std::cout << "la filtre moyenneur en simd est incorrect" << std::endl;
		}

		std::cout << "\n";
	}

	bool valid_mandel = false;
	// test for mandelbrot
	{
		std::vector<float> mandel_test(4,0);
		std::vector<float> mandel_test1(4,0);
		std::vector<size_t> indx(4,0);
		vfloat32 mdt = _mm_set1_ps(0);
		vfloat32 mdt1 = _mm_set1_ps(0);

		mandel_test[0] = -0.70;
		mandel_test[1] = -0.80;
		mandel_test[2] = -0.90;
		mandel_test[3] = -1.00;

		mandel_test1[0] = +0.10;
		mandel_test1[1] = +0.30;
		mandel_test1[2] = +0.30;
		mandel_test1[3] = +0.40;

		mdt  = _mm_setr_ps(-1.00, -0.90, -0.80, -0.70);
		mdt1 = _mm_setr_ps(+0.40, +0.30, +0.30, +0.10);

		auto start = std::chrono::steady_clock::now();
		for(std::size_t i = 0 ; i < 4 ; ++i )
		{

			indx[i] = mandelbrot_scalar(mandel_test[i] , mandel_test1[i] , max_iter );
		}
		auto end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff1 = end-start;;

		start = std::chrono::steady_clock::now();
		vuint32 res_mandel = mandelbrot_simd(mdt, mdt1 , max_iter);
		end = std::chrono::steady_clock::now();
		std::chrono::duration<double> diff = end-start;

		unsigned int out[4] __attribute__((aligned(16))) ;

		__m128i* po = (__m128i*) &out[0] ;

		_mm_store_si128(po, res_mandel);

		bool v1 = false , v2 = false;

		if( indx[0] == 20 && indx[1] == 8 && indx[2] == 10 && indx[3] == 6 )
		{
			v1 = true;
			std::cout << "la fonction mandelbrot en scalaire est correcte" << std::endl;
		}
		else
		{
			std::cout << "la fonction mandelbrot en scalaire est incorrecte" << std::endl;
			std::cout << "le bon résultat est : 20 8 10 6 \n" << "vous avez obtenu : ";
			vec_display(indx,0);
		}


		if( out[3] == 20 && out[2] == 8 && out[1] == 10 && out[0] == 6 )
		{
			v2 = true;
			std::cout << "la fonction mandelbrot en SIMD est correcte" << std::endl;
		}
		else
		{
			std::cout << "la fonction mandelbrot en SIMD est incorrecte" << std::endl;
			std::cout << "le bon résultat est 20 8 10 6 \n" << "vous avez obtenu :  ";
			simd_display_i32(res_mandel);
		}


		if ( v1 && v2 )
		{
			std::cout << "speedup obtained for mandelbrot : " << diff1.count() / diff.count() << std::endl;
			valid_mandel = true;
		}
	}

	// test for mandelbrot function

	{
		if(valid_mandel)
		{

			std::cout << "\n-----------------------------" << std::endl;
			std::cout << "------ benchmandelbrot ------" << std::endl;
			std::cout << "-----------------------------\n" << std::endl;

			size_t h = SIZE , w = SIZE ;
			std::vector<size_t> indx(h*w,0);
			vfloat32 mdt = _mm_set1_ps(0);
			vfloat32 mdt1 = _mm_set1_ps(0);

			float a0 = -1.5 , a1 = +0.5;
			float b0 = -1.0 , b1 = +1.0;

			float avg_cycles_vec = 0;
			float avg_time_vec  = 0;

			size_t num_iter = 200;

			for(size_t i =0 ; i < num_iter ; ++i)
			{
				auto start = std::chrono::steady_clock::now();
				auto cycles_s = rdtsc();
				calc_mandelbrot_scalar( indx , h , w , a0 , a1 , b0 , b1  , max_iter );
				auto cycles_e = rdtsc();
				auto end = std::chrono::steady_clock::now();
				std::chrono::duration<double> diff1 = end-start;

				avg_time_vec += diff1.count() ;

				avg_cycles_vec += cycles_e - cycles_s;
			}

			avg_time_vec /= num_iter ;
			avg_cycles_vec /= num_iter ;

			std::cout << " mandelbrot vector time : " << avg_time_vec << std::endl;
			std::cout << " mandelbrot vector cycles time : " << avg_cycles_vec << std::endl;

			vuint32 **Simd_indx = (vuint32**)_mm_malloc ((size_t)( h*sizeof(vuint32*)), 16);
			if (Simd_indx)
			{
				for (size_t i = 0; i < w; i++)
				{
					Simd_indx[i] = (vuint32*) _mm_malloc ((size_t) (w*sizeof(vuint32)), 16);
				}
			}

			float avg_cycles_simd = 0;
			float avg_time_simd  = 0;

			for(size_t i = 0 ; i < num_iter ; ++i)
			{
				auto start = std::chrono::steady_clock::now();
				auto cycles_s = rdtsc();
				calc_mandelbrot_simd( Simd_indx , h , w , a0 , a1 , b0 , b1  , max_iter );
				auto cycles_e = rdtsc();
				auto end = std::chrono::steady_clock::now();
				std::chrono::duration<double> diff = end-start;

				avg_time_simd += diff.count() ;
				avg_cycles_simd += cycles_e - cycles_s;
			}

			avg_time_simd /= num_iter ;
			avg_cycles_simd /= num_iter ;

			std::cout << " mandelbrot SIMD time : " << avg_time_simd << std::endl;
			std::cout << " mandelbrot SIMD cycles time : " << avg_cycles_simd << std::endl;

			std::cout << "speedup obtained for mandelbrot : " << avg_time_vec / avg_time_simd << std::endl;
			std::cout << "speedup in cycles obtained for mandelbrot : " <<  avg_cycles_vec / avg_cycles_simd << std::endl;
		}

	}


	_mm_free(vX1);
	_mm_free(vX2);
	_mm_free(vY);
	_mm_free(vY1);
	_mm_free(vY2);


}