void write_matrix(Givaro::Integer p, size_t m, size_t n, T* C, size_t ldc){ size_t www=(p.bitsize()*log(2.))/log(10.); for (size_t i=0;i<m;++i){ cout<<"[ "; cout.width(www+1); cout<<std::right<<C[i*ldc]; for (size_t j=1;j<n;++j){ cout<<" "; cout.width(www); cout<<std::right<<C[i*ldc+j]; } cout<<"]"<<endl; } cout<<endl; }
int tmain(){ srand( (int)seed); srand48(seed); Givaro::Integer::seeding(seed); typedef Givaro::Modular<Ints> Field; Givaro::Integer p; FFLAS::Timer chrono, TimFreivalds; double time=0.,timev=0.; #ifdef BENCH_FLINT double timeFlint=0.; #endif for (size_t loop=0;loop<iters;loop++){ Givaro::Integer::random_exact_2exp(p, b); Givaro::IntPrimeDom IPD; IPD.nextprimein(p); Ints ip; Givaro::Caster<Ints,Givaro::Integer>(ip,p); Givaro::Caster<Givaro::Integer,Ints>(p,ip); // to check consistency Field F(ip); size_t lda,ldb,ldc; lda=k; ldb=n; ldc=n; typename Field::RandIter Rand(F,seed); typename Field::Element_ptr A,B,C; A= FFLAS::fflas_new(F,m,lda); B= FFLAS::fflas_new(F,k,ldb); C= FFLAS::fflas_new(F,m,ldc); // for (size_t i=0;i<m;++i) // for (size_t j=0;j<k;++j) // Rand.random(A[i*lda+j]); // for (size_t i=0;i<k;++i) // for (size_t j=0;j<n;++j) // Rand.random(B[i*ldb+j]); // for (size_t i=0;i<m;++i) // for (size_t j=0;j<n;++j) // Rand.random(C[i*ldc+j]); PAR_BLOCK { FFLAS::pfrand(F,Rand, m,k,A,m/size_t(MAX_THREADS)); } PAR_BLOCK { FFLAS::pfrand(F,Rand, k,n,B,k/MAX_THREADS); } PAR_BLOCK { FFLAS::pfzero(F, m,n,C,m/MAX_THREADS); } Ints alpha,beta; alpha=F.one; beta=F.zero; #ifdef BENCH_FLINT // FLINT MUL // fmpz_t modp,tmp; fmpz_init(modp); fmpz_init(tmp); fmpz_set_mpz(modp, *(reinterpret_cast<const mpz_t*>(&p))); fmpz_mat_t AA,BB,CC,DD; fmpz_mat_init (AA, m, k); fmpz_mat_init (BB, k, n); fmpz_mat_init (CC, m, n); fmpz_mat_init (DD, m, n); fmpz_t aalpha, bbeta; fmpz_set_mpz(aalpha,*(reinterpret_cast<const mpz_t*>(&alpha))); fmpz_set_mpz(bbeta,*(reinterpret_cast<const mpz_t*>(&beta))); for (size_t i=0;i<m;++i) for (size_t j=0;j<k;++j) fmpz_set_mpz(fmpz_mat_entry(AA,i,j),*(reinterpret_cast<const mpz_t*>(A+i*lda+j))); for (size_t i=0;i<k;++i) for (size_t j=0;j<n;++j) fmpz_set_mpz(fmpz_mat_entry(BB,i,j),*(reinterpret_cast<const mpz_t*>(B+i*ldb+j))); for (size_t i=0;i<m;++i) for (size_t j=0;j<n;++j) fmpz_set_mpz(fmpz_mat_entry(CC,i,j),*(reinterpret_cast<const mpz_t*>(C+i*ldc+j))); chrono.clear();chrono.start(); // DD= A.B fmpz_mat_mul(DD,AA,BB); // CC = beta.C fmpz_mat_scalar_mul_fmpz(CC,CC,bbeta); // CC = CC + DD.alpha fmpz_mat_scalar_addmul_fmpz(CC,DD,aalpha); // CC = CC mod p for (size_t i=0;i<m;++i) for (size_t j=0;j<n;++j) fmpz_mod(fmpz_mat_entry(CC,i,j),fmpz_mat_entry(CC,i,j),modp); chrono.stop(); timeFlint+=chrono.usertime(); fmpz_mat_clear(AA); fmpz_mat_clear(BB); #endif //END FLINT CODE // using FFLAS::CuttingStrategy::Recursive; using FFLAS::StrategyParameter::TwoDAdaptive; // RNS MUL_LA chrono.clear();chrono.start(); // PAR_BLOCK{ // FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc, SPLITTER(NUM_THREADS,Recursive,TwoDAdaptive) ); // } { FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential()); } chrono.stop(); time+=chrono.realtime(); TimFreivalds.start(); bool pass = FFLAS::freivalds(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,k, alpha, A, k, B, n, C,n); TimFreivalds.stop(); timev+=TimFreivalds.usertime(); if (!pass) { std::cout<<"FAILED"<<std::endl; std::cout << "p:=" << p << ';'<<std::endl; write_matrix(std::cout<<"A:=",p,m,k,A,lda)<<';'<<std::endl; write_matrix(std::cout<<"B:=",p,k,n,B,ldb)<<';'<<std::endl; write_matrix(std::cout<<"C:=",p,m,n,C,ldc)<<';'<<std::endl; } FFLAS::fflas_delete(A); FFLAS::fflas_delete(B); FFLAS::fflas_delete(C); } double Gflops=(2.*double(m)/1000.*double(n)/1000.*double(k)/1000.0) / time * double(iters); // Gflops*=p.bitsize()/16.; cout<<typeid(Ints).name() << " | Time: "<< (time/double(iters)) << " (total:" << time <<") Gflops: "<<Gflops<<" | perword: "<< (Gflops*double(p.bitsize()))/64. ; FFLAS::writeCommandString(std::cout << '|' << p << " (" << p.bitsize()<<")|", as) << " | Freivalds: "<< timev/double(iters) << std::endl; #ifdef BENCH_FLINT cout<<"Time FLINT: "<<timeFlint<<endl; #endif return 0; }