int main(int argc, char *argv[]) { if (argc < 3) { std::cout << "Usage: " << argv[0] << " input-matrix output-matrix" << std::endl; return 1; } std::string fname_mat (argv[1]); bool verbose (true); // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { if (verbose) { std::cout << "reading matrix from " << fname_mat << " ... " << std::flush; } BaseLib::RunTime timer; timer.start(); CS_read(in, n, iA, jA, A); in.close(); timer.stop(); if (verbose) { std::cout << "ok, " << timer.elapsed() << " s" << std::endl; } } else { std::cout << "error reading matrix from " << fname_mat << std::endl; } unsigned nnz(iA[n]); if (verbose) { std::cout << "Parameters read: n=" << n << ", nnz=" << nnz << std::endl; } MathLib::CRSMatrix<double, unsigned> *mat (new MathLib::CRSMatrix<double, unsigned>(n, iA, jA, A)); const unsigned n_rows_cols_to_erase(300); unsigned *rows_cols_to_erase(new unsigned[n_rows_cols_to_erase]); for (unsigned k(0); k<n_rows_cols_to_erase; k++) { rows_cols_to_erase[k] = (k+1)*2; } BaseLib::RunTime timer; std::cout << "erasing " << n_rows_cols_to_erase << " rows and columns ... " << std::flush; timer.start(); mat->eraseEntries(n_rows_cols_to_erase, rows_cols_to_erase); timer.stop(); std::cout << "ok, " << timer.elapsed() << " s" << std::endl; delete[] rows_cols_to_erase; fname_mat = argv[2]; std::ofstream out (fname_mat.c_str(), std::ios::binary); CS_write (out, mat->getNRows(), mat->getRowPtrArray(), mat->getColIdxArray(), mat->getEntryArray()); out.close(); std::cout << "wrote " << fname_mat << " with " << mat->getNRows() << " rows and " << mat->getRowPtrArray()[mat->getNRows()] << " entries" << std::endl; delete mat; }
int main(int argc, char *argv[]) { LOGOG_INITIALIZE(); TCLAP::CmdLine cmd("The purpose of this program is the speed test of sparse matrix vector multiplication (MVM), where the matrix is stored in CRS format. Before executing the MVM a nested dissection reordering is performed.", ' ', "0.1"); // Define a value argument and add it to the command line. // A value arg defines a flag and a type of value that it expects, // such as "-m matrix". TCLAP::ValueArg<std::string> matrix_arg("m","matrix","input matrix file in CRS format",true,"","file name of the matrix in CRS format"); // Add the argument matrix_arg to the CmdLine object. The CmdLine object // uses this Arg to parse the command line. cmd.add( matrix_arg ); // TCLAP::ValueArg<unsigned> n_cores_arg("n", "number-cores", "number of cores to use", true, "1", "number"); // cmd.add( n_cores_arg ); TCLAP::ValueArg<unsigned> n_mults_arg("n", "number-of-multiplications", "number of multiplications to perform", true, 10, "number of multiplications"); cmd.add( n_mults_arg ); TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); TCLAP::ValueArg<bool> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); // read the number of multiplication to execute unsigned n_mults (n_mults_arg.getValue()); std::string fname_mat (matrix_arg.getValue()); bool verbose (verbosity_arg.getValue()); BaseLib::LogogSimpleFormatter *custom_format (new BaseLib::LogogSimpleFormatter); logog::Cout *logogCout(new logog::Cout); logogCout->SetFormatter(*custom_format); INFO("%s was build with compiler %s", argv[0], BaseLib::BuildInfo::cmake_cxx_compiler.c_str()); #ifdef NDEBUG INFO("CXX_FLAGS: %s %s", BaseLib::BuildInfo::cmake_cxx_flags.c_str(), BaseLib::BuildInfo::cmake_cxx_flags_release.c_str()); #else INFO("CXX_FLAGS: %s %s", BaseLib::BuildInfo::cmake_cxx_flags.c_str(), BaseLib::BuildInfo::cmake_cxx_flags_debug.c_str()); #endif #ifdef UNIX const std::size_t length(256); char *hostname(new char[length]); gethostname (hostname, length); INFO("hostname: %s", hostname); delete [] hostname; #endif // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { if (verbose) { INFO("reading matrix from %s ...", fname_mat.c_str()); } BaseLib::RunTime timer; timer.start(); CS_read(in, n, iA, jA, A); if (verbose) { INFO("\t- took %e s", timer.elapsed()); } } else { ERR("error reading matrix from %s", fname_mat.c_str()); return -1; } unsigned nnz(iA[n]); if (verbose) { INFO("\tParameters read: n=%d, nnz=%d", n, nnz); } MathLib::CRSMatrixReordered mat(n, iA, jA, A); double *x(new double[n]); double *y(new double[n]); for (unsigned k(0); k<n; ++k) x[k] = 1.0; // create time measurement objects BaseLib::RunTime run_timer; BaseLib::CPUTime cpu_timer; // calculate the nested dissection reordering if (verbose) { INFO("*** calculating nested dissection (ND) permutation of matrix ..."); } run_timer.start(); cpu_timer.start(); MathLib::Cluster cluster_tree(n, iA, jA); unsigned *op_perm(new unsigned[n]); unsigned *po_perm(new unsigned[n]); for (unsigned k(0); k<n; k++) op_perm[k] = po_perm[k] = k; cluster_tree.createClusterTree(op_perm, po_perm, 1000); if (verbose) { INFO("\t[ND] - took %e sec \t%e sec", cpu_timer.elapsed(), run_timer.elapsed()); } // applying the nested dissection reordering if (verbose) { INFO("\t[ND] applying nested dissection permutation to FEM matrix ... "); } run_timer.start(); cpu_timer.start(); mat.reorderMatrix(op_perm, po_perm); if (verbose) { INFO("\t[ND]: - took %e sec\t%e sec", cpu_timer.elapsed(), run_timer.elapsed()); } #ifndef NDEBUG // std::string fname_mat_out(fname_mat.substr(0,fname_mat.length()-4)+"-reordered.bin"); // std::ofstream os (fname_mat_out.c_str(), std::ios::binary); // if (os) { // std::cout << "writing matrix to " << fname_mat_out << " ... " << std::flush; // CS_write(os, n, mat.getRowPtrArray(), mat.getColIdxArray(), mat.getEntryArray()); // std::cout << "done" << std::endl; // } #endif if (verbose) { INFO("*** %d matrix vector multiplications (MVM) with Toms amuxCRS ... ", n_mults); } run_timer.start(); cpu_timer.start(); for (std::size_t k(0); k<n_mults; k++) { mat.amux (1.0, x, y); } if (verbose) { INFO("\t[MVM] - took %e sec\t %e sec", cpu_timer.elapsed(), run_timer.elapsed()); } delete [] x; delete [] y; delete custom_format; delete logogCout; LOGOG_SHUTDOWN(); return 0; }
int main(int argc, char *argv[]) { LOGOG_INITIALIZE(); TCLAP::CmdLine cmd("Simple matrix vector multiplication test", ' ', "0.1"); // Define a value argument and add it to the command line. // A value arg defines a flag and a type of value that it expects, // such as "-m matrix". TCLAP::ValueArg<std::string> matrix_arg("m", "matrix", "input matrix file", true, "", "string"); // Add the argument mesh_arg to the CmdLine object. The CmdLine object // uses this Arg to parse the command line. cmd.add( matrix_arg ); TCLAP::ValueArg<unsigned> n_cores_arg("p", "number-cores", "number of cores to use", false, 1, "number"); cmd.add( n_cores_arg ); TCLAP::ValueArg<unsigned> n_mults_arg("n", "number-of-multiplications", "number of multiplications to perform", true, 10, "number"); cmd.add( n_mults_arg ); TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); TCLAP::ValueArg<unsigned> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); // read the number of multiplication to execute unsigned n_mults (n_mults_arg.getValue()); std::string fname_mat (matrix_arg.getValue()); FormatterCustom *custom_format (new FormatterCustom); logog::Cout *logogCout(new logog::Cout); logogCout->SetFormatter(*custom_format); logog::LogFile *logog_file(NULL); if (! output_arg.getValue().empty()) { logog_file = new logog::LogFile(output_arg.getValue().c_str()); logog_file->SetFormatter( *custom_format ); } // read number of threads unsigned n_threads (n_cores_arg.getValue()); INFO("%s was build with compiler %s", argv[0], BaseLib::BuildInfo::cmake_cxx_compiler.c_str()); #ifdef NDEBUG INFO("CXX_FLAGS: %s %s", BaseLib::BuildInfo::cmake_cxx_flags.c_str(), BaseLib::BuildInfo::cmake_cxx_flags_release.c_str()); #else INFO("CXX_FLAGS: %s %s", BaseLib::BuildInfo::cmake_cxx_flags.c_str(), BaseLib::BuildInfo::cmake_cxx_flags_debug.c_str()); #endif #ifdef UNIX const int max_host_name_len (255); char *hostname(new char[max_host_name_len]); if (gethostname(hostname, max_host_name_len) == 0) INFO("hostname: %s", hostname); delete [] host_name_len; #endif // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { INFO("reading matrix from %s ...", fname_mat.c_str()); BaseLib::RunTime timer; timer.start(); CS_read(in, n, iA, jA, A); INFO("\t- took %e s", timer.elapsed()); } else { INFO("error reading matrix from %s", fname_mat.c_str()); return -1; } unsigned nnz(iA[n]); INFO("\tParameters read: n=%d, nnz=%d", n, nnz); #ifdef _OPENMP omp_set_num_threads(n_threads); unsigned *mat_entries_per_core(new unsigned[n_threads]); for (unsigned k(0); k<n_threads; k++) { mat_entries_per_core[k] = 0; } OPENMP_LOOP_TYPE i; { #pragma omp parallel for for (i = 0; i < n; i++) { mat_entries_per_core[omp_get_thread_num()] += iA[i + 1] - iA[i]; } } INFO("*** work per core ***"); for (unsigned k(0); k<n_threads; k++) { INFO("\t%d\t%d", k, mat_entries_per_core[k]); } #endif #ifdef _OPENMP omp_set_num_threads(n_threads); MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A); #else MathLib::CRSMatrix<double, unsigned> mat (n, iA, jA, A); #endif double *x(new double[n]); double *y(new double[n]); for (unsigned k(0); k<n; ++k) x[k] = 1.0; INFO("*** %d matrix vector multiplications (MVM) with Toms amuxCRS (%d threads) ...", n_mults, n_threads); BaseLib::RunTime run_timer; BaseLib::CPUTime cpu_timer; run_timer.start(); cpu_timer.start(); for (std::size_t k(0); k<n_mults; k++) { mat.amux (1.0, x, y); } INFO("\t[MVM] - took %e sec cpu time, %e sec run time", cpu_timer.elapsed(), run_timer.elapsed()); delete [] x; delete [] y; delete custom_format; delete logogCout; delete logog_file; LOGOG_SHUTDOWN(); return 0; }
int main(int argc, char *argv[]) { LOGOG_INITIALIZE(); TCLAP::CmdLine cmd("The purpose of this program is the speed test of sparse matrix vector multiplication (MVM) employing OpenMP technique, where the matrix is stored in CRS format. Before executing the MVM a nested dissection reordering is performed.", ' ', "0.1"); // Define a value argument and add it to the command line. // A value arg defines a flag and a type of value that it expects, // such as "-m matrix". TCLAP::ValueArg<std::string> matrix_arg("m","matrix","input matrix file in CRS format",true,"","file name of the matrix in CRS format"); // Add the argument matrix_arg to the CmdLine object. The CmdLine object // uses this Arg to parse the command line. cmd.add( matrix_arg ); TCLAP::ValueArg<unsigned> n_cores_arg("p", "number-cores", "number of cores to use", true, 1, "number of cores"); cmd.add( n_cores_arg ); TCLAP::ValueArg<unsigned> n_mults_arg("n", "number-of-multiplications", "number of multiplications to perform", true, 10, "number of multiplications"); cmd.add( n_mults_arg ); TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); TCLAP::ValueArg<bool> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); // read the number of multiplication to execute unsigned n_mults (n_mults_arg.getValue()); std::string fname_mat (matrix_arg.getValue()); bool verbose (verbosity_arg.getValue()); FormatterCustom *custom_format (new FormatterCustom); logog::Cout *logogCout(new logog::Cout); logogCout->SetFormatter(*custom_format); // read number of threads unsigned n_threads (n_cores_arg.getValue()); #ifdef OGS_BUILD_INFO INFO("%s was build with compiler %s", argv[0], CMAKE_CXX_COMPILER); if (std::string(CMAKE_BUILD_TYPE).compare("Release") == 0) { INFO("CXX_FLAGS: %s %s", CMAKE_CXX_FLAGS, CMAKE_CXX_FLAGS_RELEASE); } else { INFO("CXX_FLAGS: %s %s", CMAKE_CXX_FLAGS, CMAKE_CXX_FLAGS_DEBUG); } #endif #ifdef UNIX const size_t length(256); char *hostname(new char[length]); gethostname (hostname, length); INFO("hostname: %s", hostname); delete [] hostname; #endif // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { if (verbose) { INFO("reading matrix from %s ...", fname_mat.c_str()); } BaseLib::RunTime timer; timer.start(); CS_read(in, n, iA, jA, A); timer.stop(); if (verbose) { INFO("\t- took %e s", timer.elapsed()); } } else { ERR("error reading matrix from %s", fname_mat.c_str()); return -1; } unsigned nnz(iA[n]); if (verbose) { INFO("\tParameters read: n=%d, nnz=%d", n, nnz); } #ifdef _OPENMP omp_set_num_threads(n_threads); MathLib::CRSMatrixReorderedOpenMP mat(n, iA, jA, A); #else delete [] iA; delete [] jA; delete [] A; ERROR("program is not using OpenMP"); return -1; #endif double *x(new double[n]); double *y(new double[n]); for (unsigned k(0); k<n; ++k) x[k] = 1.0; // create time measurement objects BaseLib::RunTime run_timer; BaseLib::CPUTime cpu_timer; // calculate the nested dissection reordering if (verbose) { INFO("*** calculating nested dissection (ND) permutation of matrix ..."); } run_timer.start(); cpu_timer.start(); MathLib::Cluster cluster_tree(n, iA, jA); unsigned *op_perm(new unsigned[n]); unsigned *po_perm(new unsigned[n]); for (unsigned k(0); k<n; k++) op_perm[k] = po_perm[k] = k; cluster_tree.createClusterTree(op_perm, po_perm, 1000); cpu_timer.stop(); run_timer.stop(); if (verbose) { INFO("\t[ND] - took %e sec \t%e sec", cpu_timer.elapsed(), run_timer.elapsed()); } // applying the nested dissection reordering if (verbose) { INFO("\t[ND] applying nested dissection permutation to FEM matrix ... "); } run_timer.start(); cpu_timer.start(); mat.reorderMatrix(op_perm, po_perm); cpu_timer.stop(); run_timer.stop(); if (verbose) { INFO("\t[ND]: - took %e sec\t%e sec", cpu_timer.elapsed(), run_timer.elapsed()); } if (verbose) { INFO("*** %d matrix vector multiplications (MVM) with Toms amuxCRS (%d threads)... ", n_mults, n_threads); } run_timer.start(); cpu_timer.start(); for (size_t k(0); k<n_mults; k++) { mat.amux (1.0, x, y); } cpu_timer.stop(); run_timer.stop(); if (verbose) { INFO("\t[MVM] - took %e sec cpu time, %e sec run time", cpu_timer.elapsed(), run_timer.elapsed()); } delete [] x; delete [] y; delete custom_format; delete logogCout; LOGOG_SHUTDOWN(); return 0; }
int main(int argc, char *argv[]) { LOGOG_INITIALIZE(); TCLAP::CmdLine cmd("Simple matrix vector multiplication test employing pthreads", ' ', "0.1"); // Define a value argument and add it to the command line. // A value arg defines a flag and a type of value that it expects, // such as "-m matrix". TCLAP::ValueArg<std::string> matrix_arg("m", "matrix", "input matrix file", true, "", "string"); // Add the argument mesh_arg to the CmdLine object. The CmdLine object // uses this Arg to parse the command line. cmd.add( matrix_arg ); TCLAP::ValueArg<unsigned> n_cores_arg("p", "number-cores", "number of cores to use", false, 1, "number"); cmd.add( n_cores_arg ); TCLAP::ValueArg<unsigned> n_mults_arg("n", "number-of-multiplications", "number of multiplications to perform", true, 10, "number"); cmd.add( n_mults_arg ); TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); TCLAP::ValueArg<bool> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); std::string fname_mat (matrix_arg.getValue()); FormatterCustom *custom_format (new FormatterCustom); logog::Cout *logogCout(new logog::Cout); logogCout->SetFormatter(*custom_format); logog::LogFile *logog_file(NULL); if (! output_arg.getValue().empty()) { logog_file = new logog::LogFile(output_arg.getValue().c_str()); logog_file->SetFormatter( *custom_format ); } #ifdef OGS_BUILD_INFO INFO("%s was build with compiler %s", argv[0], CMAKE_CXX_COMPILER); #ifdef CMAKE_BUILD_TYPE if (std::string(CMAKE_BUILD_TYPE).compare("Release") == 0) { INFO("CXX_FLAGS: %s %s", CMAKE_CXX_FLAGS, CMAKE_CXX_FLAGS_RELEASE); } else { INFO("CXX_FLAGS: %s %s", CMAKE_CXX_FLAGS, CMAKE_CXX_FLAGS_DEBUG); } #else INFO("CXX_FLAGS: %s", CMAKE_CXX_FLAGS); #endif #endif #ifdef UNIX const int max_host_name_len (255); char *hostname(new char[max_host_name_len]); if (gethostname(hostname, max_host_name_len) == 0) INFO("hostname: %s", hostname); delete [] host_name_len; #endif // *** reading matrix in crs format from file std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); double *A(NULL); unsigned *iA(NULL), *jA(NULL), n; if (in) { INFO("reading matrix from %s ...", fname_mat.c_str()); BaseLib::RunTime timer; timer.start(); CS_read(in, n, iA, jA, A); timer.stop(); INFO("\t- took %e s", timer.elapsed()); } else { ERR("error reading matrix from %s", fname_mat.c_str()); return -1; } unsigned nnz(iA[n]); INFO("\tParameters read: n=%d, nnz=%d", n, nnz); #ifdef HAVE_PTHREADS unsigned n_threads(n_cores_arg.getValue()); MathLib::CRSMatrixPThreads<double> mat (n, iA, jA, A, n_threads); double *x(new double[n]); double *y(new double[n]); for (unsigned k(0); k<n; ++k) x[k] = 1.0; // read the number of multiplication to execute unsigned n_mults (n_mults_arg.getValue()); INFO("*** %d matrix vector multiplications (MVM) with Toms amuxCRS (%d threads) ...", n_mults, n_threads); BaseLib::RunTime run_timer; BaseLib::CPUTime cpu_timer; run_timer.start(); cpu_timer.start(); for (size_t k(0); k<n_mults; k++) { mat.amux (1.0, x, y); } cpu_timer.stop(); run_timer.stop(); INFO("\t[MVM] - took %e sec cpu time, %e sec run time", cpu_timer.elapsed(), run_timer.elapsed()); delete [] x; delete [] y; #endif delete custom_format; delete logogCout; delete logog_file; LOGOG_SHUTDOWN(); return 0; }
bool PETScLinearSolver::solve(PETScMatrix& A, PETScVector& b, PETScVector& x) { BaseLib::RunTime wtimer; wtimer.start(); // define TEST_MEM_PETSC #ifdef TEST_MEM_PETSC PetscLogDouble mem1, mem2; PetscMemoryGetCurrentUsage(&mem1); #endif #if (PETSC_VERSION_NUMBER > 3040) KSPSetOperators(_solver, A.getRawMatrix(), A.getRawMatrix()); #else KSPSetOperators(_solver, A.getRawMatrix(), A.getRawMatrix(), DIFFERENT_NONZERO_PATTERN); #endif KSPSolve(_solver, b.getRawVector(), x.getRawVector()); KSPConvergedReason reason; KSPGetConvergedReason(_solver, &reason); bool converged = true; if (reason > 0) { const char* ksp_type; const char* pc_type; KSPGetType(_solver, &ksp_type); PCGetType(_pc, &pc_type); PetscPrintf(PETSC_COMM_WORLD, "\n================================================"); PetscPrintf(PETSC_COMM_WORLD, "\nLinear solver %s with %s preconditioner", ksp_type, pc_type); PetscInt its; KSPGetIterationNumber(_solver, &its); PetscPrintf(PETSC_COMM_WORLD, "\nconverged in %d iterations", its); switch (reason) { case KSP_CONVERGED_RTOL: PetscPrintf(PETSC_COMM_WORLD, " (relative convergence criterion fulfilled)."); break; case KSP_CONVERGED_ATOL: PetscPrintf(PETSC_COMM_WORLD, " (absolute convergence criterion fulfilled)."); break; default: PetscPrintf(PETSC_COMM_WORLD, "."); } PetscPrintf(PETSC_COMM_WORLD, "\n================================================\n"); } else if (reason == KSP_DIVERGED_ITS) { const char* ksp_type; const char* pc_type; KSPGetType(_solver, &ksp_type); PCGetType(_pc, &pc_type); PetscPrintf(PETSC_COMM_WORLD, "\nLinear solver %s with %s preconditioner", ksp_type, pc_type); PetscPrintf(PETSC_COMM_WORLD, "\nWarning: maximum number of iterations reached.\n"); } else { converged = false; if (reason == KSP_DIVERGED_INDEFINITE_PC) { PetscPrintf(PETSC_COMM_WORLD, "\nDivergence because of indefinite preconditioner,"); PetscPrintf(PETSC_COMM_WORLD, "\nTry to run again with " "-pc_factor_shift_positive_definite option.\n"); } else if (reason == KSP_DIVERGED_BREAKDOWN_BICG) { PetscPrintf(PETSC_COMM_WORLD, "\nKSPBICG method was detected so the method could not " "continue to enlarge the Krylov space."); PetscPrintf(PETSC_COMM_WORLD, "\nTry to run again with another solver.\n"); } else if (reason == KSP_DIVERGED_NONSYMMETRIC) { PetscPrintf(PETSC_COMM_WORLD, "\nMatrix or preconditioner is unsymmetric but KSP " "requires symmetric.\n"); } else { PetscPrintf(PETSC_COMM_WORLD, "\nDivergence detected, use command option " "-ksp_monitor or -log_summary to check the details.\n"); } } #ifdef TEST_MEM_PETSC PetscMemoryGetCurrentUsage(&mem2); PetscPrintf( PETSC_COMM_WORLD, "###Memory usage by solver. Before: %f After: %f Increase: %d\n", mem1, mem2, (int)(mem2 - mem1)); #endif _elapsed_ctime += wtimer.elapsed(); return converged; }