int main(int argc, char *argv[]) { LOGOG_INITIALIZE(); TCLAP::CmdLine cmd("Simple matrix vector multiplication test", ' ', "0.1"); // Define a value argument and add it to the command line. // A value arg defines a flag and a type of value that it expects, // such as "-m matrix". TCLAP::ValueArg<std::string> matrix_arg("m", "matrix", "input matrix file", true, "", "string"); // Add the argument mesh_arg to the CmdLine object. The CmdLine object // uses this Arg to parse the command line. cmd.add( matrix_arg ); TCLAP::ValueArg<unsigned> n_cores_arg("p", "number-cores", "number of cores to use", false, 1, "number"); cmd.add( n_cores_arg ); TCLAP::ValueArg<unsigned> n_mults_arg("n", "number-of-multiplications", "number of multiplications to perform", true, 10, "number"); cmd.add( n_mults_arg ); TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); TCLAP::ValueArg<unsigned> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); // read the number of multiplication to execute unsigned n_mults (n_mults_arg.getValue()); std::string fname_mat (matrix_arg.getValue()); FormatterCustom *custom_format (new FormatterCustom); logog::Cout *logogCout(new logog::Cout); logogCout->SetFormatter(*custom_format); logog::LogFile *logog_file(NULL); if (! output_arg.getValue().empty()) { logog_file = new logog::LogFile(output_arg.getValue().c_str()); logog_file->SetFormatter( *custom_format ); } // read number of threads unsigned n_threads (n_cores_arg.getValue()); INFO("%s was build with compiler %s", argv[0], BaseLib::BuildInfo::cmake_cxx_compiler.c_str()); #ifdef NDEBUG INFO("CXX_FLAGS: %s %s", BaseLib::BuildInfo::cmake_cxx_flags.c_str(), BaseLib::BuildInfo::cmake_cxx_flags_release.c_str()); #else INFO("CXX_FLAGS: %s %s", BaseLib::BuildInfo::cmake_cxx_flags.c_str(), BaseLib::BuildInfo::cmake_cxx_flags_debug.c_str()); #endif #ifdef UNIX const int max_host_name_len (255); char *hostname(new char[max_host_name_len]); if (gethostname(hostname, max_host_name_len) == 0) INFO("hostname: %s", hostname); delete [] host_name_len; #endif // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { INFO("reading matrix from %s ...", fname_mat.c_str()); BaseLib::RunTime timer; timer.start(); CS_read(in, n, iA, jA, A); INFO("\t- took %e s", timer.elapsed()); } else { INFO("error reading matrix from %s", fname_mat.c_str()); return -1; } unsigned nnz(iA[n]); INFO("\tParameters read: n=%d, nnz=%d", n, nnz); #ifdef _OPENMP omp_set_num_threads(n_threads); unsigned *mat_entries_per_core(new unsigned[n_threads]); for (unsigned k(0); k<n_threads; k++) { mat_entries_per_core[k] = 0; } OPENMP_LOOP_TYPE i; { #pragma omp parallel for for (i = 0; i < n; i++) { mat_entries_per_core[omp_get_thread_num()] += iA[i + 1] - iA[i]; } } INFO("*** work per core ***"); for (unsigned k(0); k<n_threads; k++) { INFO("\t%d\t%d", k, mat_entries_per_core[k]); } #endif #ifdef _OPENMP omp_set_num_threads(n_threads); MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A); #else MathLib::CRSMatrix<double, unsigned> mat (n, iA, jA, A); #endif double *x(new double[n]); double *y(new double[n]); for (unsigned k(0); k<n; ++k) x[k] = 1.0; INFO("*** %d matrix vector multiplications (MVM) with Toms amuxCRS (%d threads) ...", n_mults, n_threads); BaseLib::RunTime run_timer; BaseLib::CPUTime cpu_timer; run_timer.start(); cpu_timer.start(); for (std::size_t k(0); k<n_mults; k++) { mat.amux (1.0, x, y); } INFO("\t[MVM] - took %e sec cpu time, %e sec run time", cpu_timer.elapsed(), run_timer.elapsed()); delete [] x; delete [] y; delete custom_format; delete logogCout; delete logog_file; LOGOG_SHUTDOWN(); return 0; }
int main(int argc, char *argv[]) { if (argc < 4) { std::cout << "Usage: " << argv[0] << " num_of_threads matrix number_of_multiplications resultfile" << std::endl; exit (1); } // read number of threads unsigned n_threads (1); n_threads = atoi (argv[1]); // read the number of multiplication to execute unsigned n_mults (0); n_mults = atoi (argv[3]); std::string fname_mat (argv[2]); bool verbose (true); // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { if (verbose) { std::cout << "reading matrix from " << fname_mat << " ... " << std::flush; } RunTimeTimer timer; timer.start(); CS_read(in, n, iA, jA, A); timer.stop(); if (verbose) { std::cout << "ok, " << timer.elapsed() << " s)" << std::endl; } } else { std::cout << "error reading matrix from " << fname_mat << std::endl; } unsigned nnz(iA[n]); if (verbose) { std::cout << "Parameters read: n=" << n << ", nnz=" << nnz << std::endl; } #ifdef _OPENMP omp_set_num_threads(n_threads); MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A); #else MathLib::CRSMatrix<double, unsigned> mat (n, iA, jA, A); #endif // CRSMatrixPThreads<double> mat (n, iA, jA, A, n_threads); std::cout << mat.getNRows() << " x " << mat.getNCols() << std::endl; double *x(new double[n]); double *y(new double[n]); for (unsigned k(0); k<n; ++k) x[k] = 1.0; if (verbose) { std::cout << "matrix vector multiplication with Toms amuxCRS (" << n_threads << " threads) ... " << std::flush; } RunTimeTimer run_timer; CPUTimeTimer cpu_timer; run_timer.start(); cpu_timer.start(); for (size_t k(0); k<n_mults; k++) { mat.amux (1.0, x, y); } cpu_timer.stop(); run_timer.stop(); if (verbose) { std::cout << "done [" << cpu_timer.elapsed() << " sec cpu time], [" << run_timer.elapsed() << " sec run time]" << std::endl; } else { if (argc == 5) { std::ofstream result_os (argv[4], std::ios::app); if (result_os) { result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl; } result_os.close(); } else { std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl; } } delete [] x; delete [] y; return 0; }