示例#1
0
void write_matrix(Givaro::Integer p, size_t m, size_t n, T* C, size_t ldc){

	size_t www=(p.bitsize()*log(2.))/log(10.);
	for (size_t i=0;i<m;++i){
		cout<<"[ ";
		cout.width(www+1);
		cout<<std::right<<C[i*ldc];
		for (size_t j=1;j<n;++j){
			cout<<" ";
			cout.width(www);
			cout<<std::right<<C[i*ldc+j];
		}
		cout<<"]"<<endl;
	}
	cout<<endl;

}
int tmain(){
	srand( (int)seed);
	srand48(seed);
    Givaro::Integer::seeding(seed);

    typedef Givaro::Modular<Ints> Field;	
	Givaro::Integer p;
	FFLAS::Timer chrono, TimFreivalds;
	double time=0.,timev=0.;
#ifdef BENCH_FLINT
	double timeFlint=0.;
#endif
	for (size_t loop=0;loop<iters;loop++){
		Givaro::Integer::random_exact_2exp(p, b);			
		Givaro::IntPrimeDom IPD;
		IPD.nextprimein(p);
        Ints ip; Givaro::Caster<Ints,Givaro::Integer>(ip,p);
        Givaro::Caster<Givaro::Integer,Ints>(p,ip); // to check consistency

		Field F(ip);
		size_t lda,ldb,ldc;
		lda=k;
		ldb=n; 
		ldc=n;

		typename Field::RandIter Rand(F,seed);
		typename Field::Element_ptr A,B,C;
		A= FFLAS::fflas_new(F,m,lda);
		B= FFLAS::fflas_new(F,k,ldb);
		C= FFLAS::fflas_new(F,m,ldc);
	
// 		for (size_t i=0;i<m;++i)
// 			for (size_t j=0;j<k;++j)
// 				Rand.random(A[i*lda+j]);			
// 		for (size_t i=0;i<k;++i)
// 			for (size_t j=0;j<n;++j)
// 				Rand.random(B[i*ldb+j]);				
// 		for (size_t i=0;i<m;++i)
// 			for (size_t j=0;j<n;++j)
// 				Rand.random(C[i*ldc+j]);	 		

		PAR_BLOCK { FFLAS::pfrand(F,Rand, m,k,A,m/size_t(MAX_THREADS)); }	
		PAR_BLOCK { FFLAS::pfrand(F,Rand, k,n,B,k/MAX_THREADS); }	
		PAR_BLOCK { FFLAS::pfzero(F, m,n,C,m/MAX_THREADS); }
		
	
		Ints alpha,beta;
		alpha=F.one;
		beta=F.zero;
	   

#ifdef	BENCH_FLINT	
		// FLINT MUL //
		fmpz_t modp,tmp;
		fmpz_init(modp);
		fmpz_init(tmp);
		fmpz_set_mpz(modp, *(reinterpret_cast<const mpz_t*>(&p)));
		fmpz_mat_t AA,BB,CC,DD;
		fmpz_mat_init (AA, m, k); 
		fmpz_mat_init (BB, k, n); 
		fmpz_mat_init (CC, m, n); 
		fmpz_mat_init (DD, m, n);
		fmpz_t aalpha, bbeta;
		fmpz_set_mpz(aalpha,*(reinterpret_cast<const mpz_t*>(&alpha)));
		fmpz_set_mpz(bbeta,*(reinterpret_cast<const mpz_t*>(&beta)));

		for (size_t i=0;i<m;++i)
			for (size_t j=0;j<k;++j)
				fmpz_set_mpz(fmpz_mat_entry(AA,i,j),*(reinterpret_cast<const mpz_t*>(A+i*lda+j)));
		for (size_t i=0;i<k;++i)
			for (size_t j=0;j<n;++j)
				fmpz_set_mpz(fmpz_mat_entry(BB,i,j),*(reinterpret_cast<const mpz_t*>(B+i*ldb+j)));
		for (size_t i=0;i<m;++i)
			for (size_t j=0;j<n;++j)
				fmpz_set_mpz(fmpz_mat_entry(CC,i,j),*(reinterpret_cast<const mpz_t*>(C+i*ldc+j)));				
		chrono.clear();chrono.start();
		// DD= A.B
		fmpz_mat_mul(DD,AA,BB);	
		// CC = beta.C 
		fmpz_mat_scalar_mul_fmpz(CC,CC,bbeta);
		// CC = CC + DD.alpha
		fmpz_mat_scalar_addmul_fmpz(CC,DD,aalpha);
		// CC = CC mod p
		for (size_t i=0;i<m;++i)
			for (size_t j=0;j<n;++j)
				fmpz_mod(fmpz_mat_entry(CC,i,j),fmpz_mat_entry(CC,i,j),modp);
	
		chrono.stop();
		timeFlint+=chrono.usertime();
		fmpz_mat_clear(AA);
		fmpz_mat_clear(BB);
#endif
		//END FLINT CODE //
		using  FFLAS::CuttingStrategy::Recursive;
		using  FFLAS::StrategyParameter::TwoDAdaptive;
		// RNS MUL_LA
		chrono.clear();chrono.start();	
// 		PAR_BLOCK{ 
//             FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc, SPLITTER(NUM_THREADS,Recursive,TwoDAdaptive) ); 
// 		}
		{ 
            FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential()); 
		}
		
		chrono.stop();
		time+=chrono.realtime();

		TimFreivalds.start();      
		bool pass = FFLAS::freivalds(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,k, alpha, A, k, B, n, C,n);
		TimFreivalds.stop();
		timev+=TimFreivalds.usertime();
		if (!pass) {
            std::cout<<"FAILED"<<std::endl;
            std::cout << "p:=" << p << ';'<<std::endl;
            write_matrix(std::cout<<"A:=",p,m,k,A,lda)<<';'<<std::endl;
            write_matrix(std::cout<<"B:=",p,k,n,B,ldb)<<';'<<std::endl;
            write_matrix(std::cout<<"C:=",p,m,n,C,ldc)<<';'<<std::endl;
        }
        
		FFLAS::fflas_delete(A);
		FFLAS::fflas_delete(B);
		FFLAS::fflas_delete(C);

	}

	double Gflops=(2.*double(m)/1000.*double(n)/1000.*double(k)/1000.0) / time * double(iters);
// 	Gflops*=p.bitsize()/16.;
	cout<<typeid(Ints).name()
        << " | Time: "<< (time/double(iters)) << " (total:" << time <<")  Gflops: "<<Gflops<<"  | perword: "<< (Gflops*double(p.bitsize()))/64. ;
	FFLAS::writeCommandString(std::cout << '|' << p << " (" << p.bitsize()<<")|", as) << "  | Freivalds: "<< timev/double(iters) << std::endl;

#ifdef BENCH_FLINT	
	cout<<"Time FLINT: "<<timeFlint<<endl;
#endif
		return 0;
	}