int main(int argc, char** argv) { // Initialize runtime madness::World& world = madness::initialize(argc, argv); // Get command line arguments if(argc < 2) { std::cout << "Usage: fock_build matrix_size block_size df_size df_block_size [repetitions]\n"; return 0; } const long matrix_size = atol(argv[1]); const long block_size = atol(argv[2]); const long df_size = atol(argv[3]); const long df_block_size = atol(argv[4]); if (matrix_size <= 0) { std::cerr << "Error: matrix size must greater than zero.\n"; return 1; } if (df_size <= 0) { std::cerr << "Error: third rank size must greater than zero.\n"; return 1; } if (block_size <= 0 || df_block_size <= 0) { std::cerr << "Error: block size must greater than zero.\n"; return 1; } if(matrix_size % block_size != 0ul && df_size % df_block_size != 0ul) { std::cerr << "Error: tensor size must be evenly divisible by block size.\n"; return 1; } const long repeat = (argc >= 6 ? atol(argv[5]) : 5); if (repeat <= 0) { std::cerr << "Error: number of repititions must greater than zero.\n"; return 1; } const std::size_t num_blocks = matrix_size / block_size; const std::size_t df_num_blocks = df_size / df_block_size; const std::size_t block_count = num_blocks * num_blocks; const std::size_t df_block_count = df_num_blocks * num_blocks * num_blocks; if(world.rank() == 0) std::cout << "TiledArray: Fock Build Test ...\n" << "Number of nodes = " << world.size() << "\nMatrix size = " << matrix_size << "x" << matrix_size << "\nTensor size = " << matrix_size << "x" << matrix_size << "x" << df_size << "\nBlock size = " << block_size << "x" << block_size << "x" << df_block_size << "\nMemory per matrix = " << double(matrix_size * matrix_size * sizeof(double)) / 1.0e9 << " GB\nMemory per tensor = " << double(matrix_size * matrix_size * df_size * sizeof(double)) / 1.0e9 << " GB\nNumber of matrix blocks = " << block_count << "\nNumber of tensor blocks = " << df_block_count << "\nAverage blocks/node matrix = " << double(block_count) / double(world.size()) << "\nAverage blocks/node tensor = " << double(df_block_count) / double(world.size()) << "\n"; // Construct TiledRange std::vector<unsigned int> blocking; blocking.reserve(num_blocks + 1); for(std::size_t i = 0; i <= matrix_size; i += block_size) blocking.push_back(i); std::vector<unsigned int> df_blocking; blocking.reserve(df_num_blocks + 1); for(std::size_t i = 0; i <= df_size; i += df_block_size) df_blocking.push_back(i); std::vector<TiledArray::TiledRange1> blocking2(2, TiledArray::TiledRange1(blocking.begin(), blocking.end())); std::vector<TiledArray::TiledRange1> blocking3 = { TiledArray::TiledRange1(blocking.begin(), blocking.end()), TiledArray::TiledRange1(blocking.begin(), blocking.end()), TiledArray::TiledRange1(df_blocking.begin(), df_blocking.end()) }; TiledArray::TiledRange trange(blocking2.begin(), blocking2.end()); TiledArray::TiledRange df_trange(blocking3.begin(), blocking3.end()); // Construct and initialize arrays TiledArray::Array<double, 2> D(world, trange); TiledArray::Array<double, 2> DL(world, trange); TiledArray::Array<double, 2> F(world, trange); TiledArray::Array<double, 2> G(world, trange); TiledArray::Array<double, 2> H(world, trange); TiledArray::Array<double, 3> TCInts(world, df_trange); TiledArray::Array<double, 3> ExchTemp(world, df_trange); D.set_all_local(1.0); DL.set_all_local(1.0); H.set_all_local(2.0); TCInts.set_all_local(3.0); // Start clock world.gop.fence(); const double wall_time_start = madness::wall_time(); // Do fock build for(int i = 0; i < repeat; ++i) { // Assume we have the cholesky decompositon of the density matrix ExchTemp("s,j,P") = DL("s,n") * TCInts("n,j,P"); // Compute coulomb and exchange G("i,j") = 2.0 * TCInts("i,j,P") * ( D("n,m") * TCInts("n,m,P") ) - ExchTemp("s,i,P") * ExchTemp("s,j,P"); F("i,j") = G("i,j") + H("i,j"); world.gop.fence(); if(world.rank() == 0) std::cout << "Iteration " << i + 1 << "\n"; } // Stop clock const double wall_time_stop = madness::wall_time(); if(world.rank() == 0){ std::cout << "Average wall time = " << (wall_time_stop - wall_time_start) / double(repeat) << " sec\nAverage GFLOPS = " << double(repeat) * (double(4.0 * matrix_size * matrix_size * df_size) + // Coulomb flops double(4.0 * matrix_size * matrix_size * matrix_size * df_size)) // Exchange flops / (wall_time_stop - wall_time_start) / 1.0e9 << "\n"; } madness::finalize(); return 0; }
int main(int argc, char** argv) { int rc = 0; try { // Initialize runtime TiledArray::World& world = TiledArray::initialize(argc, argv); // Get command line arguments if(argc < 2) { std::cout << "Usage: " << argv[0] << " matrix_size block_size [repetitions]\n"; return 0; } const long matrix_size = atol(argv[1]); const long block_size = atol(argv[2]); if(matrix_size <= 0) { std::cerr << "Error: matrix size must be greater than zero.\n"; return 1; } if(block_size <= 0) { std::cerr << "Error: block size must be greater than zero.\n"; return 1; } if((matrix_size % block_size) != 0ul) { std::cerr << "Error: matrix size must be evenly divisible by block size.\n"; return 1; } const long repeat = (argc >= 4 ? atol(argv[3]) : 4); if(repeat <= 0) { std::cerr << "Error: number of repetitions must be greater than zero.\n"; return 1; } // Print information about the test const std::size_t num_blocks = matrix_size / block_size; const double app_flop = 2.0 * matrix_size * matrix_size * matrix_size; std::vector<std::vector<double> > gflops; std::vector<std::vector<double> > times; std::vector<std::vector<double> > app_gflops; if(world.rank() == 0) std::cout << "TiledArray: block-sparse matrix multiply test..." << "\nGit HASH: " << TILEDARRAY_REVISION << "\nNumber of nodes = " << world.size() << "\nMatrix size = " << matrix_size << "x" << matrix_size << "\nBlock size = " << block_size << "x" << block_size; // Construct TiledRange std::vector<unsigned int> blocking; blocking.reserve(num_blocks + 1); for(long i = 0l; i <= matrix_size; i += block_size) blocking.push_back(i); std::vector<TiledArray::TiledRange1> blocking2(2, TiledArray::TiledRange1(blocking.begin(), blocking.end())); TiledArray::TiledRange trange(blocking2.begin(), blocking2.end()); TiledArray::SparseShape<float> forced_shape; for(unsigned int left_sparsity = 10; left_sparsity <= 100; left_sparsity += 10){ std::vector<double> inner_gflops, inner_times, inner_app_gflops; for(unsigned int right_sparsity = 10; right_sparsity <= left_sparsity; right_sparsity += 10){ const long l_block_count = (double(left_sparsity) / 100.0) * double(num_blocks * num_blocks); const long r_block_count = (double(right_sparsity) / 100.0) * double(num_blocks * num_blocks); if(world.rank() == 0) std::cout << "\nMemory per left matrix = " << double(l_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB" << "\nMemory per right matrix = " << double(r_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB" << "\nNumber of left blocks = " << l_block_count << " " << 100.0 * double(l_block_count) / double(num_blocks * num_blocks) << "%" << "\nNumber of right blocks = " << r_block_count << " " << 100.0 * double(r_block_count) / double(num_blocks * num_blocks) << "%" << "\nAverage left blocks/node = " << double(l_block_count) / double(world.size()) << "\nAverage right blocks/node = " << double(r_block_count) / double(world.size()) << "\n"; // Construct shape TiledArray::Tensor<float> a_tile_norms(trange.tiles_range(), 0.0), b_tile_norms(trange.tiles_range(), 0.0); if(world.rank() == 0) { world.srand(time(NULL)); for(long count = 0l; count < l_block_count; ++count) { std::size_t index = world.rand() % trange.tiles_range().volume(); // Avoid setting the same tile to non-zero. while(a_tile_norms[index] > TiledArray::SparseShape<float>::threshold()) index = world.rand() % trange.tiles_range().volume(); a_tile_norms[index] = std::sqrt(float(block_size * block_size)); } for(long count = 0l; count < r_block_count; ++count) { std::size_t index = world.rand() % trange.tiles_range().volume(); // Avoid setting the same tile to non-zero. while(b_tile_norms[index] > TiledArray::SparseShape<float>::threshold()) index = world.rand() % trange.tiles_range().volume(); b_tile_norms[index] = std::sqrt(float(block_size * block_size)); } } TiledArray::SparseShape<float> a_shape(world, a_tile_norms, trange), b_shape(world, b_tile_norms, trange); if(left_sparsity == 10){ forced_shape = a_shape; } // Construct and initialize arrays TiledArray::TSpArrayD a(world, trange, a_shape); TiledArray::TSpArrayD b(world, trange, b_shape); TiledArray::TSpArrayD c; a.fill(1.0); b.fill(1.0); // Start clock TiledArray::TSpArrayD::wait_for_lazy_cleanup(world); world.gop.fence(); if(world.rank() == 0) std::cout << "Starting iterations:\n"; double total_time = 0.0, flop = 0.0; // Do matrix multiplication try { for(int i = 0; i < repeat; ++i) { const double start = madness::wall_time(); c("m,n") = (a("m,k") * b("k,n")).set_shape(forced_shape); const double time = madness::wall_time() - start; total_time += time; if(flop < 1.0) flop = 2.0 * c("m,n").sum(); if(world.rank() == 0) std::cout << "Iteration " << i + 1 << " time=" << time << " GFLOPS=" << flop / time / 1.0e9 << " apparent GFLOPS=" << app_flop / time / 1.0e9 << "\n"; std::cout << "C sparsity = " << c.shape().sparsity() << "\n"; } } catch(...) { if(world.rank() == 0) { std::stringstream ss; ss << "left shape = " << a.shape().data() << "\n" << "right shape = " << b.shape().data() << "\n"; std::cout << ss.str(); } throw; } // Stop clock inner_gflops.push_back(double(repeat) * flop / total_time / 1.0e9); inner_times.push_back(total_time / repeat); inner_app_gflops.push_back(double(repeat) * app_flop / total_time / 1.0e9); // Print results if(world.rank() == 0) { std::cout << "Average wall time = " << total_time / double(repeat) << "\nAverage GFLOPS = " << double(repeat) * double(flop) / total_time / 1.0e9 << "\nAverage apparent GFLOPS = " << double(repeat) * double(app_flop) / total_time / 1.0e9 << "\n"; } } gflops.push_back(inner_gflops); times.push_back(inner_times); app_gflops.push_back(inner_app_gflops); } if(world.rank() == 0) { std::cout << "\n--------------------------------------------------------------------------------------------------------\nGFLOPS\n"; print_results(world, gflops); std::cout << "\n--------------------------------------------------------------------------------------------------------\nAverage wall times\n"; print_results(world, times); std::cout << "\n--------------------------------------------------------------------------------------------------------\nApparent GFLOPS\n"; print_results(world, app_gflops); } TiledArray::finalize(); } catch(TiledArray::Exception& e) { std::cerr << "!! TiledArray exception: " << e.what() << "\n"; rc = 1; } catch(madness::MadnessException& e) { std::cerr << "!! MADNESS exception: " << e.what() << "\n"; rc = 1; } catch(SafeMPI::Exception& e) { std::cerr << "!! SafeMPI exception: " << e.what() << "\n"; rc = 1; } catch(std::exception& e) { std::cerr << "!! std exception: " << e.what() << "\n"; rc = 1; } catch(...) { std::cerr << "!! exception: unknown exception\n"; rc = 1; } return rc; }
int main(int argc, char** argv) { int rc = 0; try { // Initialize runtime madness::World& world = madness::initialize(argc, argv); // Get command line arguments if(argc < 2) { std::cout << "Usage: ta_sparse matrix_size block_size [repetitions]\n"; return 0; } const long matrix_size = atol(argv[1]); const long block_size = atol(argv[2]); if(matrix_size <= 0) { std::cerr << "Error: matrix size must greater than zero.\n"; return 1; } if(block_size <= 0) { std::cerr << "Error: block size must greater than zero.\n"; return 1; } if((matrix_size % block_size) != 0ul) { std::cerr << "Error: matrix size must be evenly divisible by block size.\n"; return 1; } const long repeat = (argc >= 4 ? atol(argv[3]) : 4); if(repeat <= 0) { std::cerr << "Error: number of repetitions must greater than zero.\n"; return 1; } // Print information about the test const std::size_t num_blocks = matrix_size / block_size; std::vector<std::vector<double> > gflops; std::vector<std::vector<double> > times; std::cout << "TiledArray: block-sparse matrix multiply test...\n" << "Number of nodes = " << world.size() << "\nMatrix size = " << matrix_size << "x" << matrix_size << "\nBlock size = " << block_size << "x" << block_size; for(unsigned int left_sparsity = 10; left_sparsity <= 100; left_sparsity += 10){ std::vector<double> inner_gflops; std::vector<double> inner_times; for(unsigned int right_sparsity = 10; right_sparsity <= left_sparsity; right_sparsity += 10){ const long l_block_count = (double(left_sparsity) / 100.0) * double(num_blocks * num_blocks); const long r_block_count = (double(right_sparsity) / 100.0) * double(num_blocks * num_blocks); if(world.rank() == 0) std::cout << "\nMemory per left matrix = " << double(l_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB" << "\nMemory per right matrix = " << double(r_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB" << "\nNumber of left blocks = " << l_block_count << " " << left_sparsity << " percent" << "\nNumber of right blocks = " << r_block_count << " " << right_sparsity << " percent" << "\nAverage left blocks/node = " << double(l_block_count) / double(world.size()) << "\nAverage right blocks/node = " << double(r_block_count) / double(world.size()) << "\n"; // Construct TiledRange std::vector<unsigned int> blocking; blocking.reserve(num_blocks + 1); for(long i = 0l; i <= matrix_size; i += block_size) blocking.push_back(i); std::vector<TiledArray::TiledRange1> blocking2(2, TiledArray::TiledRange1(blocking.begin(), blocking.end())); TiledArray::TiledRange trange(blocking2.begin(), blocking2.end()); // Construct shape TiledArray::Tensor<float> a_shape_tensor(trange.tiles(), 0.0), b_shape_tensor(trange.tiles(), 0.0), c_shape_tensor(trange.tiles(), 0.0); if(world.rank() == 0) { world.srand(time(NULL)); const long l_process_block_count = l_block_count / world.size() + (world.rank() < (l_block_count / world.size()) ? 1 : 0); const long r_process_block_count = r_block_count / world.size() + (world.rank() < (r_block_count / world.size()) ? 1 : 0); for(long i = 0; i < l_process_block_count; ++i) a_shape_tensor.data()[world.rand() % trange.tiles().volume()] = 1.0; for(long i = 0; i < r_process_block_count; ++i) b_shape_tensor.data()[world.rand() % trange.tiles().volume()] = 1.0; } TiledArray::SparseShape<float> a_shape(world, a_shape_tensor, trange), b_shape(world, b_shape_tensor, trange), c_shape(world, c_shape_tensor, trange); typedef TiledArray::Array<double, 2, TiledArray::Tensor<double>, TiledArray::SparsePolicy > SpTArray2; // Construct and initialize arrays SpTArray2 a(world, trange, a_shape); SpTArray2 b(world, trange, b_shape); SpTArray2 c(world, trange, c_shape); a.set_all_local(1.0); b.set_all_local(1.0); // Start clock world.gop.fence(); const double wall_time_start = madness::wall_time(); // Do matrix multiplication for(int i = 0; i < repeat; ++i) { c("m,n") = a("m,k") * b("k,n"); world.gop.fence(); if(world.rank() == 0) std::cout << "Iteration " << i + 1 << "\n"; } // Stop clock const double wall_time_stop = madness::wall_time(); const long flop = 2.0 * c("m,n").sum(); inner_gflops.push_back(double(repeat) * double(flop) / (wall_time_stop - wall_time_start) / 1.0e9); inner_times.push_back((wall_time_stop - wall_time_start)/double(repeat)); // Print results if(world.rank() == 0) { std::cout << "Average wall time = " << (wall_time_stop - wall_time_start) / double(repeat) << "\nAverage GFLOPS = " << double(repeat) * double(flop) / (wall_time_stop - wall_time_start) / 1.0e9 << "\n"; } } gflops.push_back(inner_gflops); times.push_back(inner_times); world.gop.fence(); } if(world.rank() == 0){ for(unsigned int i = 0; i < gflops.size(); ++i){ if(i == 0){ std::cout << std::defaultfloat; std::cout << " "; for(unsigned int j = 10; j <= 100; j+=10){ std::cout << " " << j; } std::cout << std::endl; } for(unsigned int j = 0; j < gflops[i].size(); ++j){ if(j == 0){ std::cout << std::defaultfloat; int num = (i+1) * 10; if(num < 100){ std::cout << num << " |"; } else { std::cout << num << "|"; } } std::cout << std::setprecision(3) << std::scientific; std::cout << double(gflops[i][j]) << " "; } std::cout << std::endl; } } if(world.rank() == 0){ for(unsigned int i = 0; i < times.size(); ++i){ if(i == 0){ std::cout << std::defaultfloat; std::cout << " "; for(unsigned int j = 10; j <= 100; j+=10){ std::cout << " " << j; } std::cout << std::endl; } for(unsigned int j = 0; j < times[i].size(); ++j){ if(j == 0){ std::cout << std::defaultfloat; int num = (i+1) * 10; if(num < 100){ std::cout << num << " |"; } else { std::cout << num << "|"; } } std::cout << std::setprecision(3) << std::scientific; std::cout << double(times[i][j]) << " "; } std::cout << std::endl; } } madness::finalize(); } catch(TiledArray::Exception& e) { std::cerr << "!!ERROR TiledArray: " << e.what() << "\n"; rc = 1; } catch(madness::MadnessException& e) { std::cerr << "!!ERROR MADNESS: " << e.what() << "\n"; rc = 1; } catch(SafeMPI::Exception& e) { std::cerr << "!!ERROR SafeMPI: " << e.what() << "\n"; rc = 1; } catch(std::exception& e) { std::cerr << "!!ERROR std: " << e.what() << "\n"; rc = 1; } catch(...) { std::cerr << "!!ERROR: unknown exception\n"; rc = 1; } return rc; }
int main(int argc, char** argv) { // Initialize runtime madness::World& world = madness::initialize(argc, argv); elem::Grid grid(elem::DefaultGrid().Comm()); // Get command line arguments if(argc < 2) { std::cout << "Usage: ta_dense matrix_size block_size [repetitions]\n"; return 0; } const long matrix_size = atol(argv[1]); const long block_size = atol(argv[2]); if (matrix_size <= 0) { std::cerr << "Error: matrix size must greater than zero.\n"; return 1; } if (block_size <= 0) { std::cerr << "Error: block size must greater than zero.\n"; return 1; } if((matrix_size % block_size) != 0ul) { std::cerr << "Error: matrix size must be evenly divisible by block size.\n"; return 1; } const long repeat = (argc >= 4 ? atol(argv[3]) : 5); if (repeat <= 0) { std::cerr << "Error: number of repetitions must greater than zero.\n"; return 1; } const std::size_t num_blocks = matrix_size / block_size; const std::size_t block_count = num_blocks * num_blocks; if(world.rank() == 0) std::cout << "TiledArray: dense matrix multiply test...\n" << "Number of nodes = " << world.size() << "\nMatrix size = " << matrix_size << "x" << matrix_size << "\nBlock size = " << block_size << "x" << block_size << "\nMemory per matrix = " << double(matrix_size * matrix_size * sizeof(double)) / 1.0e9 << " GB\nNumber of blocks = " << block_count << "\nAverage blocks/node = " << double(block_count) / double(world.size()) << "\n"; // Construct TiledRange std::vector<unsigned int> blocking; blocking.reserve(num_blocks + 1); for(std::size_t i = 0; i <= matrix_size; i += block_size) blocking.push_back(i); std::vector<TiledArray::TiledRange1> blocking2(2, TiledArray::TiledRange1(blocking.begin(), blocking.end())); TiledArray::TiledRange trange(blocking2.begin(), blocking2.end()); // Construct and initialize arrays TiledArray::Array<double, 2> a = make_random_array(world, trange); TiledArray::Array<double, 2> b = make_random_array(world, trange); TiledArray::Array<double, 2> c(world, trange); if(world.rank() == 0 && matrix_size < 11){ std::cout << "a = \n" << a << std::endl; std::cout << "b = \n" << b << std::endl; } // Start clock world.gop.fence(); const double wall_time_start = madness::wall_time(); // Do matrix multiplication for(int i = 0; i < repeat; ++i) { c("m,n") = a("m,k") * b("k,n"); world.gop.fence(); if(world.rank() == 0) std::cout << "Iteration " << i + 1 << "\n"; } // Stop clock const double wall_time_stop = madness::wall_time(); if(world.rank() == 0){ std::cout << "Average wall time = " << (wall_time_stop - wall_time_start) / double(repeat) << " sec\nAverage GFLOPS = " << double(repeat) * 2.0 * double(matrix_size * matrix_size * matrix_size) / (wall_time_stop - wall_time_start) / 1.0e9 << "\n" << std::endl; } // Copying matrices to elemental elem::DistMatrix<double> a_elem = array_to_elem(a,grid); elem::DistMatrix<double> b_elem = array_to_elem(b,grid); elem::mpi::Barrier(grid.Comm()); if(matrix_size < 11){ Print(a_elem, "a from elem"); Print(b_elem, "b from elem"); } // Timed copy const double wall_time_copy0 = madness::wall_time(); int j = 0; while(j++ < repeat){ a_elem = array_to_elem(a,grid); b_elem = array_to_elem(b,grid); elem::mpi::Barrier(grid.Comm()); } const double wall_time_copy1 = madness::wall_time(); // How long the copy took if(world.rank() == 0){ std::cout << "Spent " << (wall_time_copy1 - wall_time_copy0)/(2.0 * double(repeat)) << " s for an array copy to elemental on average.\n" << std::endl; } // Make the data output array elem::DistMatrix<double> c_elem(matrix_size, matrix_size, grid); elem::Zero(c_elem); elem::mpi::Barrier(grid.Comm()); // Do the multiply const double wt_elem_start = madness::wall_time(); for(std::size_t i = 0; i < repeat; ++i){ elem::Gemm(elem::NORMAL, elem::NORMAL, 1., a_elem, b_elem, 0., c_elem); elem::mpi::Barrier(grid.Comm()); if(grid.Rank() == 0){ std::cout << "Elem Iteration " << i + 1 << "\n"; } } const double wt_elem_end = madness::wall_time(); // Time elemental if(world.rank() == 0){ std::cout << "Average Elemental wall time = " << (wt_elem_end - wt_elem_start) / double(repeat) << " sec\nAverage GFLOPS = " << double(repeat) * 2.0 * double(matrix_size * matrix_size * matrix_size) / (wt_elem_end - wt_elem_start) / 1.0e9 << "\n"; } // copy back to ta int i = 0; const double e_to_t_start = madness::wall_time(); while(i++ < repeat){ TiledArray::elem_to_array(c, c_elem); elem::mpi::Barrier(grid.Comm()); } const double e_to_t_end = madness::wall_time(); if(world.rank() == 0){ std::cout << "Copying to TA from Elemental took " << (e_to_t_end - e_to_t_start)/(double(repeat)) << " s on average." << std::endl; } madness::finalize(); return 0; }