int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); std::string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 0; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Level to prune tree from bottom"); int fill_level = -1; clp.setOption("fill-level", &fill_level, "Fill level"); int rows_per_team = 4096; clp.setOption("rows-per-team", &rows_per_team, "Workset size"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); #if (defined(HAVE_SHYLUTACHO_SCOTCH) && (defined(HAVE_SHYLUTACHO_CHOLMOD) \ || defined(HAVE_SHYLUTACHO_AMESOS))) r_val = exampleIncompleteSymbolicFactorization<exec_space> (file_input, treecut, prunecut, fill_level, rows_per_team, verbose); #else r_val = -1; std::cout << "Scotch or Cholmod is NOT configured in Trilinos" << std::endl; #endif exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program demonstrates symbolic factorization algorithm on Kokkos::Serial execution space.\n"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level for incomplete factorization"); int league_size = 1; clp.setOption("league-size", &league_size, "League size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 0; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); bool scotch = true; clp.setOption("enable-scotch", "disable-scotch", &scotch, "Flag for Scotch"); bool camd = true; clp.setOption("enable-camd", "disable-camd", &camd, "Flag for CAMD"); bool symbolic = true; clp.setOption("enable-symbolic", "disable-symbolic", &symbolic, "Flag for sybolic factorization"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { Kokkos::initialize(); r_val = exampleSymbolicFactor <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, seed, fill_level, league_size, scotch, camd, symbolic, verbose); Kokkos::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool team_interface = false; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int nrhs = 1; clp.setOption("nrhs", &nrhs, "Number of right hand side"); int nb = nrhs; clp.setOption("nb", &nb, "Blocksize of right hand side"); int niter = 100; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); r_val = exampleTriSolvePerformance <value_type,ordinal_type,size_type,exec_space,void> (file_input, nrhs, nb, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); int mmin = 1000; clp.setOption("mmin", &mmin, "C(mmin,mmin)"); int mmax = 8000; clp.setOption("mmax", &mmax, "C(mmax,mmax)"); int minc = 1000; clp.setOption("minc", &minc, "Increment of m"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(); host_space::initialize(nthreads, numa, core_per_numa); r_val = exampleDenseMatrixBase<exec_space> (mmin, mmax, minc, verbose); exec_space::finalize(); host_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example interface of solver Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int nrhs = 1; clp.setOption("nrhs", &nrhs, "Numer of right hand side"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); exec_space::print_configuration(cout, true); r_val = exampleCholDirectSolver <value_type,ordinal_type,size_type,exec_space,void> (file_input, nrhs, nthreads, verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of task data parallelism (barrier) on Kokkos::Threads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int league_size = 1; clp.setOption("league-size", &league_size, "League size"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int ntasks = 100; clp.setOption("ntasks", &ntasks, "Number of tasks to be spawned"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); exec_space::print_configuration(cout, true); r_val = exampleKokkosDataData<exec_space,value_type>((ntasks > MAXTASKS ? MAXTASKS : ntasks), league_size, team_size, verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program demonstrates TriSolveUnblocked algorithm on Kokkos::Serial execution space.\n"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int nrhs = 1; clp.setOption("nrhs", &nrhs, "Number of right hand side"); int nb = nrhs; clp.setOption("nb", &nb, "Blocksize of right hand side"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { Kokkos::initialize(); r_val = exampleTriSolveByBlocks <value_type,ordinal_type,size_type,exec_space,void> (file_input, nrhs, nb, 1, max_task_dependence, team_size, verbose); Kokkos::finalize(); } return r_val; }
int main(int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Intrepid2::DynRankView_PerfTest01.\n"); int nworkset = 8; clp.setOption("nworkset", &nworkset, "# of worksets"); int C = 4096; clp.setOption("C", &C, "# of Cells in a workset"); int order = 2; clp.setOption("order", &order, "cubature order"); bool verbose = true; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; Kokkos::initialize(); if (verbose) std::cout << "Testing datatype double\n"; const int r_val_double = Intrepid2::Test::ComputeBasis_HGRAD <double,Kokkos::Cuda>(nworkset, C, order, verbose); return r_val_double; }
int main(int argc, char *argv[]) { int r_val = 0; Teuchos::CommandLineProcessor clp; int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl; return -1; } if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) { return 0; } unsigned threads_count = 0; if (Kokkos::hwloc::available()) { const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); const unsigned one = 1u; threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core); cout << " = Kokkos::hwloc = " << endl << "NUMA count = " << numa_count << endl << "Cores per NUMA = " << cores_per_numa << endl << "Threads per core = " << threads_per_core << endl << "Threads count = " << threads_count << endl; } else { threads_count = thread::hardware_concurrency(); cout << " = std::thread::hardware_concurrency = " << endl << "Threads count = " << threads_count << endl; } if (static_cast<unsigned int>(nthreads) > threads_count) { ++r_val; cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl; } else { Kokkos::Threads::initialize( nthreads ); Kokkos::Threads::print_configuration( cout , true /* detailed */ ); //__TestSuiteDoUnitTests__(float,int,unsigned int,Kokkos::Serial,void); //__TestSuiteDoUnitTests__(float,long,unsigned long,Kokkos::Serial,void); __TestSuiteDoUnitTests__(double,int,unsigned int,Kokkos::Threads,void); // __TestSuiteDoUnitTests__(double,long,unsigned long,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<float>,int,unsigned int,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<float>,long,unsigned long,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<double>,int,unsigned int,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<double>,long,unsigned long,Kokkos::Serial,void); Kokkos::Threads::finalize(); } string eval; __EVAL_STRING__(r_val, eval); cout << "Testing Kokkos::Threads::" << eval << endl; return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of dense Herk on Kokkos::Threads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int max_concurrency = 250000; clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks"); int memory_pool_grain_size = 16; clp.setOption("memory-pool-grain-size", &memory_pool_grain_size, "Memorypool chunk size (12 - 16)"); int mkl_nthreads = 1; clp.setOption("mkl-nthreads", &mkl_nthreads, "MKL threads for nested parallelism"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); int mmin = 1000; clp.setOption("mmin", &mmin, "C(mmin,mmin)"); int mmax = 8000; clp.setOption("mmax", &mmax, "C(mmax,mmax)"); int minc = 1000; clp.setOption("minc", &minc, "Increment of m"); int k = 1024; clp.setOption("k", &k, "A(mmax,k) or A(k,mmax) according to transpose flags"); int mb = 256; clp.setOption("mb", &mb, "Blocksize"); bool check = true; clp.setOption("enable-check", "disable-check", &check, "Flag for check solution"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); std::cout << std::endl << "DenseHerkByBlocks:: Upper, ConjTranspose, Variant::One (external)" << std::endl; r_val = exampleDenseHerkByBlocks <Uplo::Upper,Trans::ConjTranspose,Variant::One,exec_space> (mmin, mmax, minc, k, mb, max_concurrency, memory_pool_grain_size, mkl_nthreads, check, verbose); exec_space::finalize(); } return r_val; }
int main(int argc, char *argv[]) { int np=1, rank=0; int splitrank, splitsize; int rc = 0; nssi_service xfer_svc; int server_index=0; int rank_in_server=0; int transport_index=-1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Barrier(MPI_COMM_WORLD); Teuchos::oblackholestream blackhole; std::ostream &out = ( rank == 0 ? std::cout : blackhole ); struct xfer_args args; const int num_io_methods = 8; const int io_method_vals[] = { XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC, XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC, XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC, XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC}; const char * io_method_names[] = { "write-encode-sync", "write-encode-async", "write-rdma-sync", "write-rdma-async", "read-encode-sync", "read-encode-async", "read-rdma-sync", "read-rdma-async"}; const int nssi_transport_list[] = { NSSI_RPC_PTL, NSSI_RPC_PTL, NSSI_RPC_IB, NSSI_RPC_IB, NSSI_RPC_GEMINI, NSSI_RPC_GEMINI, NSSI_RPC_BGPDCMF, NSSI_RPC_BGPDCMF, NSSI_RPC_BGQPAMI, NSSI_RPC_BGQPAMI, NSSI_RPC_MPI}; const int num_nssi_transports = 11; const int nssi_transport_vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char * nssi_transport_names[] = { "portals", "ptl", "infiniband", "ib", "gemini", "gni", "bgpdcmf", "dcmf", "bgqpami", "pami", "mpi" }; // Initialize arguments args.transport=NSSI_DEFAULT_TRANSPORT; args.len = 1; args.delay = 1; args.io_method = XFER_WRITE_RDMA_SYNC; args.debug_level = LOG_WARN; args.num_trials = 1; args.num_reqs = 1; args.result_file_mode = "a"; args.result_file = ""; args.url_file = ""; args.logfile = ""; args.client_flag = true; args.server_flag = true; args.num_servers = 1; args.num_threads = 0; args.timeout = 500; args.num_retries = 5; args.validate_flag = true; args.kill_server_flag = true; args.block_distribution = true; bool success = true; /** * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line * options to control the behavior of the test code. To evaluate performance, * the "num-trials", "num-reqs", and "len" options control the amount of data transferred * between client and server. The "io-method" selects the type of data transfer. The * server-url specifies the URL of the server. If running as a server, the server-url * provides a recommended URL when initializing the network transport. */ try { //out << Teuchos::Teuchos_Version() << std::endl << std::endl; // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor parser; parser.setDocString( "This example program demonstrates a simple data-transfer service " "built using the NEtwork Scalable Service Interface (Nessie)." ); /* To set and option, it must be given a name and default value. Additionally, each option can be given a help std::string. Although it is not necessary, a help std::string aids a users comprehension of the acceptable command line arguments. Some examples of setting command line options are: */ parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" ); parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" ); parser.setOption("server", "no-server", &args.server_flag, "Run the server" ); parser.setOption("client", "no-client", &args.client_flag, "Run the client"); parser.setOption("len", &args.len, "The number of structures in an input buffer"); parser.setOption("debug",(int*)(&args.debug_level), "Debug level"); parser.setOption("logfile", &args.logfile, "log file"); parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)"); parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial"); parser.setOption("result-file", &args.result_file, "Where to store results"); parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result"); parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server"); parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data"); parser.setOption("num-servers", &args.num_servers, "Number of server processes"); parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process"); parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment"); parser.setOption("block-distribution", "rr-distribution", &args.block_distribution, "Use a block distribution scheme to assign clients to servers"); // Set an enumeration command line option for the io_method parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names, "I/O Methods for the example: \n" "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n" "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n" "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n" "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n" "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n" "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n" "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n" "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous"); // Set an enumeration command line option for the NNTI transport parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names, "NSSI transports (not all are available on every platform): \n" "\t\t\tportals|ptl : Cray or Schutt\n" "\t\t\tinfiniband|ib : libibverbs\n" "\t\t\tgemini|gni : Cray libugni (Gemini or Aries)\n" "\t\t\tbgpdcmf|dcmf : IBM BG/P DCMF\n" "\t\t\tbgqpami|pami : IBM BG/Q PAMI\n" "\t\t\tmpi : isend/irecv implementation\n" ); /* There are also two methods that control the behavior of the command line processor. First, for the command line processor to allow an unrecognized a command line option to be ignored (and only have a warning printed), use: */ parser.recogniseAllOptions(true); /* Second, by default, if the parser finds a command line option it doesn't recognize or finds the --help option, it will throw an std::exception. If you want prevent a command line processor from throwing an std::exception (which is important in this program since we don't have an try/catch around this) when it encounters a unrecognized option or help is printed, use: */ parser.throwExceptions(false); /* We now parse the command line where argc and argv are passed to the parse method. Note that since we have turned off std::exception throwing above we had better grab the return argument so that we can see what happened and act accordingly. */ Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } // Here is where you would use these command line arguments but for this example program // we will just print the help message with the new values of the command-line arguments. //if (rank == 0) // out << "\nPrinting help message with new values of command-line arguments ...\n\n"; //parser.printHelpMessage(argv[0],out); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success); log_debug(args.debug_level, "transport_index=%d", transport_index); if (transport_index > -1) { args.transport =nssi_transport_list[transport_index]; args.transport_name=std::string(nssi_transport_names[transport_index]); } args.io_method_name=std::string(io_method_names[args.io_method]); log_debug(args.debug_level, "%d: Finished processing arguments", rank); if (!success) { MPI_Abort(MPI_COMM_WORLD, 1); } if (!args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && !args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } log_level debug_level = args.debug_level; // Communicator used for both client and server (may split if using client and server) MPI_Comm comm; log_debug(debug_level, "%d: Starting xfer-service test", rank); #ifdef TRIOS_ENABLE_COMMSPLITTER if (args.transport == NSSI_RPC_MPI) { MPI_Pcontrol(0); } #endif /** * Since this test can be run as a server, client, or both, we need to play some fancy * MPI games to get the communicators working correctly. If we're executing as both * a client and a server, we split the communicator so that the client thinks its * running by itself. */ int color = 0; // color=0-->server, color=1-->client if (args.client_flag && args.server_flag) { if (np < 2) { log_error(debug_level, "Must use at least 2 MPI processes for client and server mode"); MPI_Abort(MPI_COMM_WORLD, -1); } // Split the communicators. Put all the servers as the first ranks. if (rank < args.num_servers) { color = 0; log_debug(debug_level, "rank=%d is a server", rank); } else { color = 1; // all others are clients log_debug(debug_level, "rank=%d is a client", rank); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } else { if (args.client_flag) { color=1; log_debug(debug_level, "rank=%d is a client", rank); } else if (args.server_flag) { color=0; log_debug(debug_level, "rank=%d is a server", rank); } else { log_error(debug_level, "Must be either a client or a server"); MPI_Abort(MPI_COMM_WORLD, -1); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } MPI_Comm_rank(comm, &splitrank); MPI_Comm_size(comm, &splitsize); log_debug(debug_level, "%d: Finished splitting communicators", rank); /** * Initialize the Nessie interface by specifying a transport, encoding scheme, and a * recommended URL. \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it * is often the case that only one type of transport exists on a particular platform. * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and * \ref NSSI_RPC_IB. We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE * should always be used for the second argument. The URL can be specified (as we did for * the server, or NULL (as we did for the client). This is a recommended value. Use the * \ref nssi_get_url function to find the actual value. */ nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL); // Get the Server URL std::string my_url(NSSI_URL_LEN, '\0'); nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN); // If running as both client and server, gather and distribute // the server URLs to all the clients. if (args.server_flag && args.client_flag) { std::string all_urls; // This needs to be a vector of chars, not a string all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0'); // Have servers gather their URLs if (color == 0) { assert(args.num_servers == splitsize); // these should be equal log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str()); // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD) MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR, &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm); } // broadcast the full set of server urls to all processes MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD); log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size()); if (color == 1) { // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } // Copy the server url out of the list of urls int offset = server_index * NSSI_URL_LEN; args.server_url = all_urls.substr(offset, NSSI_URL_LEN); log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str()); } // If running as a client only, have to get the list of servers from the urlfile. else if (!args.server_flag && args.client_flag){ sleep(args.delay); // give server time to get started std::vector< std::string > urlbuf; xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm); args.num_servers = urlbuf.size(); // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } args.server_url = urlbuf[server_index]; log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } else if (args.server_flag && !args.client_flag) { args.server_url = my_url; if (args.url_file.empty()) { log_error(debug_level, "Must set --url-file"); MPI_Abort(MPI_COMM_WORLD, -1); } xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm); } // Set the debug level for the xfer service. xfer_debug_level = args.debug_level; // Print the arguments after they've all been set. log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str()); print_args(out, args, "%"); log_debug(debug_level, "server_url=%s", args.server_url.c_str()); //------------------------------------------------------------------------------ /** If we're running this job with a server, the server always executes on node 0. * In this example, the server is a single process. */ if (color == 0) { rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm); log_debug(debug_level, "Server is finished"); } // ------------------------------------------------------------------------------ /** The parallel client will execute this branch. The root node, node 0, of the client connects * connects with the server, using the \ref nssi_get_service function. Then the root * broadcasts the service description to the other clients before starting the main * loop of the client code by calling \ref xfer_client_main. */ else { int i; int client_rank; // get rank within the client communicator MPI_Comm_rank(comm, &client_rank); nssi_init((nssi_rpc_transport)args.transport); // Only one process needs to connect to the service // TODO: Make get_service a collective call (some transports do not need a connection) //if (client_rank == 0) { { // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str()); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(xfer_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } } // wait for all the clients to connect MPI_Barrier(comm); //MPI_Bcast(&rc, 1, MPI_INT, 0, comm); if (rc == NSSI_OK) { if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i); // Broadcast the service description to the other clients //log_debug(xfer_debug_level, "Bcasting svc to other clients"); //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm); log_debug(debug_level, "Starting client main"); // Start the client code xfer_client_main(args, xfer_svc, comm); MPI_Barrier(comm); // Tell one of the clients to kill the server if ((args.kill_server_flag) && (rank_in_server == 0)) { log_debug(debug_level, "%d: Halting xfer service", rank); rc = nssi_kill(&xfer_svc, 0, 5000); } rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc); if (rc != NSSI_OK) { log_error(xfer_debug_level, "could not free svc description: %s", nssi_err_str(rc)); } } else { if (client_rank == 0) log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i); success = false; //MPI_Abort(MPI_COMM_WORLD, -1); } nssi_fini((nssi_rpc_transport)args.transport); } log_debug(debug_level, "%d: clean up nssi", rank); MPI_Barrier(MPI_COMM_WORLD); // Clean up nssi_rpc rc = nssi_rpc_fini((nssi_rpc_transport)args.transport); if (rc != NSSI_OK) log_error(debug_level, "Error in nssi_rpc_fini"); log_debug(debug_level, "%d: MPI_Finalize()", rank); MPI_Finalize(); logger_fini(); if(success && (rc == NSSI_OK)) out << "\nEnd Result: TEST PASSED" << std::endl; else out << "\nEnd Result: TEST FAILED" << std::endl; return ((success && (rc==NSSI_OK)) ? 0 : 1 ); }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool team_interface = false; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int niter = 10; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { const bool overwrite = true; const int nshepherds = (team_interface ? nthreads/team_size : nthreads); const int nworker_per_shepherd = nthreads/nshepherds; setenv("QT_HWPAR", to_string(nthreads).c_str(), overwrite); setenv("QT_NUM_SHEPHERDS", to_string(nshepherds).c_str(), overwrite); setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite); exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); // r_val = exampleICholPerformance // <value_type,ordinal_type,size_type,exec_space,void> // (file_input, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose); exec_space::finalize(); unsetenv("QT_HWPAR"); unsetenv("QT_NUM_SHEPHERDS"); unsetenv("QT_NUM_WORKERS_PER_SHEPHERD"); } return r_val; }
int main(int argc, char *argv[]) { int np=1, rank=0; int splitrank, splitsize; int rc = 0; nssi_service multicast_svc[2]; int transport_index=-1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Barrier(MPI_COMM_WORLD); Teuchos::oblackholestream blackhole; std::ostream &out = ( rank == 0 ? std::cout : blackhole ); struct multicast_args args; const int num_io_methods = 6; const int io_method_vals[] = { MULTICAST_EMPTY_REQUEST_SYNC, MULTICAST_EMPTY_REQUEST_ASYNC, MULTICAST_GET_SYNC, MULTICAST_GET_ASYNC, MULTICAST_PUT_SYNC, MULTICAST_PUT_ASYNC}; const char * io_method_names[] = { "empty-request-sync", "empty-request-async", "get-sync", "get-async", "put-sync", "put-async"}; const int nssi_transport_list[] = { NSSI_RPC_PTL, NSSI_RPC_PTL, NSSI_RPC_IB, NSSI_RPC_IB, NSSI_RPC_GEMINI, NSSI_RPC_GEMINI, NSSI_RPC_BGPDCMF, NSSI_RPC_BGPDCMF, NSSI_RPC_BGQPAMI, NSSI_RPC_BGQPAMI, NSSI_RPC_MPI}; const int num_nssi_transports = 11; const int nssi_transport_vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char * nssi_transport_names[] = { "portals", "ptl", "infiniband", "ib", "gemini", "gni", "bgpdcmf", "dcmf", "bgqpami", "pami", "mpi" }; // Initialize arguments args.transport=NSSI_DEFAULT_TRANSPORT; args.delay = 1; args.io_method = MULTICAST_EMPTY_REQUEST_SYNC; args.debug_level = LOG_WARN; args.num_trials = 1; args.num_reqs = 1; args.len = 1; args.result_file_mode = "a"; args.result_file = ""; args.url_file[0] = ""; args.url_file[1] = ""; args.logfile = ""; args.client_flag = true; args.server_flag = true; args.timeout = 500; args.num_retries = 5; args.validate_flag = true; args.server_url[0] = ""; args.server_url[1] = ""; bool success = true; /** * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line * options to control the behavior of the test code. To evaluate performance, * the "num-trials", "num-reqs", and "len" options control the amount of data transferred * between client and server. The "io-method" selects the type of data transfer. The * server-url specifies the URL of the server. If running as a server, the server-url * provides a recommended URL when initializing the network transport. */ try { //out << Teuchos::Teuchos_Version() << std::endl << std::endl; // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor parser; parser.setDocString( "This example program demonstrates a simple data-transfer service " "built using the NEtwork Scalable Service Interface (Nessie)." ); /* To set and option, it must be given a name and default value. Additionally, each option can be given a help std::string. Although it is not necessary, a help std::string aids a users comprehension of the acceptable command line arguments. Some examples of setting command line options are: */ parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" ); parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" ); parser.setOption("server", "no-server", &args.server_flag, "Run the server" ); parser.setOption("client", "no-client", &args.client_flag, "Run the client"); parser.setOption("len", &args.len, "The number of structures in an input buffer"); parser.setOption("debug",(int*)(&args.debug_level), "Debug level"); parser.setOption("logfile", &args.logfile, "log file"); parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)"); parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial"); parser.setOption("result-file", &args.result_file, "Where to store results"); parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result"); parser.setOption("server-url-1", &args.server_url[0], "URL client uses to find the server 1"); parser.setOption("server-url-2", &args.server_url[1], "URL client uses to find the server 2"); parser.setOption("server-url-file-1", &args.url_file[0], "File that has URL client uses to find server 1"); parser.setOption("server-url-file-2", &args.url_file[1], "File that has URL client uses to find server 2"); parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data"); // Set an enumeration command line option for the io_method parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names, "I/O Methods for the example: \n" "\t\t\tempty-request-sync : Send an empty request - synchronous\n" "\t\t\tempty-request-async: Send an empty request - asynchronous\n" "\t\t\tget-sync : Servers pull data from client - synchronous\n" "\t\t\tget-async: Servers pull data from client - asynchronous\n" "\t\t\tput-sync : Servers push data from client - synchronous\n" "\t\t\tput-async: Servers push data from client - asynchronous" ); // Set an enumeration command line option for the NNTI transport parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names, "NSSI transports (not all are available on every platform): \n" "\t\t\tportals|ptl : Cray or Schutt\n" "\t\t\tinfiniband|ib : libibverbs\n" "\t\t\tgemini|gni : Cray libugni (Gemini or Aries)\n" "\t\t\tbgpdcmf|dcmf : IBM BG/P DCMF\n" "\t\t\tbgqpami|pami : IBM BG/Q PAMI\n" "\t\t\tmpi : isend/irecv implementation\n" ); /* There are also two methods that control the behavior of the command line processor. First, for the command line processor to allow an unrecognized a command line option to be ignored (and only have a warning printed), use: */ parser.recogniseAllOptions(true); /* Second, by default, if the parser finds a command line option it doesn't recognize or finds the --help option, it will throw an std::exception. If you want prevent a command line processor from throwing an std::exception (which is important in this program since we don't have an try/catch around this) when it encounters a unrecognized option or help is printed, use: */ parser.throwExceptions(false); /* We now parse the command line where argc and argv are passed to the parse method. Note that since we have turned off std::exception throwing above we had better grab the return argument so that we can see what happened and act accordingly. */ Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } // Here is where you would use these command line arguments but for this example program // we will just print the help message with the new values of the command-line arguments. //if (rank == 0) // out << "\nPrinting help message with new values of command-line arguments ...\n\n"; //parser.printHelpMessage(argv[0],out); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success); log_debug(LOG_ALL, "transport_index=%d", transport_index); if (transport_index > -1) { args.transport =nssi_transport_list[transport_index]; args.transport_name=std::string(nssi_transport_names[transport_index]); } args.io_method_name=io_method_names[args.io_method]; log_debug(args.debug_level, "%d: Finished processing arguments", rank); if (!success) { MPI_Abort(MPI_COMM_WORLD, 1); } if (!args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && !args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } log_level debug_level = args.debug_level; // Communicator used for both client and server (may split if using client and server) MPI_Comm comm; log_debug(debug_level, "%d: Starting multicast-service test", rank); /** * Since this test can be run as a server, client, or both, we need to play some fancy * MPI games to get the communicators working correctly. If we're executing as both * a client and a server, we split the communicator so that the client thinks its * running by itself. */ if (args.client_flag && args.server_flag) { if (np < 3) { log_error(debug_level, "Must use at least 3 MPI processes for client and server mode"); MPI_Abort(MPI_COMM_WORLD, -1); } // Split the communicators. Processors with color=0 are servers. int color = ((rank == 0)||(rank == 1)) ? 0 : 1; // two server MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); MPI_Comm_rank(comm, &splitrank); MPI_Comm_size(comm, &splitsize); // std::cout << "rank=" << rank << "/" << np << ", color=" << color << // ", new_rank=" << newrank << "/" << newsize << std::endl << std::endl; // // std::cout << "my_url=" << my_url << ", server_url=" << args.server_url << std::endl; } else { MPI_Comm_dup(MPI_COMM_WORLD, &comm); } /** * Initialize the Nessie interface by specifying a transport, encoding scheme, and a * recommended URL. \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it * is often the case that only one type of transport exists on a particular platform. * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and * \ref NSSI_RPC_IB. We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE * should always be used for the second argument. The URL can be specified (as we did for * the server, or NULL (as we did for the client). This is a recommended value. Use the * \ref nssi_get_url function to find the actual value. */ if (args.server_flag && !args.server_url[rank].empty()) { // use the server URL as suggested URL nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, args.server_url[rank].c_str()); } else { nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL); } // Get the Server URL std::string my_url(NSSI_URL_LEN, '\0'); nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN); // Broadcast the server URL to all the clients args.server_url[0].resize(NSSI_URL_LEN, '\0'); args.server_url[1].resize(NSSI_URL_LEN, '\0'); if (args.server_flag && args.client_flag) { args.server_url[0] = my_url; MPI_Bcast(&args.server_url[0][0], args.server_url[0].size(), MPI_CHAR, 0, MPI_COMM_WORLD); args.server_url[1] = my_url; MPI_Bcast(&args.server_url[1][0], args.server_url[1].size(), MPI_CHAR, 1, MPI_COMM_WORLD); } else if (!args.server_flag && args.client_flag){ if (args.server_url[0].empty()) { // check to see if we're supposed to get the URL from a file if (!args.url_file[0].empty()) { // Fetch the server URL from a file sleep(1); log_debug(debug_level, "Reading from file %s", args.url_file[0].c_str()); std::ifstream urlfile (args.url_file[0].c_str()); if (urlfile.is_open()) { if (urlfile.good()) getline(urlfile, args.server_url[0]); } else { log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[0].c_str()); exit(1); } urlfile.close(); log_debug(debug_level, "URL = %s", args.server_url[0].c_str()); } else { log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]"); } } if (args.server_url[1].empty()) { // check to see if we're supposed to get the URL from a file if (!args.url_file[1].empty()) { // Fetch the server URL from a file sleep(1); log_debug(debug_level, "Reading from file %s", args.url_file[1].c_str()); std::ifstream urlfile (args.url_file[1].c_str()); if (urlfile.is_open()) { if (urlfile.good()) getline(urlfile, args.server_url[1]); } else { log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[1].c_str()); exit(1); } urlfile.close(); log_debug(debug_level, "URL = %s", args.server_url[1].c_str()); } else { log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]"); } } } else if (args.server_flag && !args.client_flag) { args.server_url[0] = my_url; // If the url_file value is set, write the url to a file if (!args.url_file[0].empty()) { std::ofstream urlfile (args.url_file[0].c_str()); if (urlfile.is_open()) { urlfile << args.server_url[0].c_str() << std::endl; } urlfile.close(); log_debug(debug_level, "Wrote url to file %s", args.url_file[0].c_str()); } args.server_url[1] = my_url; // If the url_file value is set, write the url to a file if (!args.url_file[1].empty()) { std::ofstream urlfile (args.url_file[1].c_str()); if (urlfile.is_open()) { urlfile << args.server_url[1].c_str() << std::endl; } urlfile.close(); log_debug(debug_level, "Wrote url to file %s", args.url_file[1].c_str()); } } // Set the debug level for the multicast service. multicast_debug_level = args.debug_level; // Print the arguments after they've all been set. print_args(out, args, "%"); //------------------------------------------------------------------------------ /** If we're running this job with a server, the server always executes on nodes 0 and 1. * In this example, the server is two process. */ if (args.server_flag && ((rank == 0)|(rank == 1))) { rc = multicast_server_main(args, comm); log_debug(debug_level, "Server is finished"); } // ------------------------------------------------------------------------------ /** The parallel client will execute this branch. The root node, nodes 0 and 1, of the client connects * connects with the server, using the \ref nssi_get_service function. Then the root * broadcasts the service description to the other clients before starting the main * loop of the client code by calling \ref multicast_client_main. */ else { int i; int client_rank; // get rank within the client communicator MPI_Comm_rank(comm, &client_rank); nssi_init((nssi_rpc_transport)args.transport); // Only one process needs to connect to the service // TODO: Make get_service a collective call (some transports do not need a connection) //if (client_rank == 0) { { sleep(args.delay); // give server time to get started // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d", i); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[0].c_str(), args.timeout, &multicast_svc[0]); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(multicast_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d", i); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[1].c_str(), args.timeout, &multicast_svc[1]); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(multicast_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } } //MPI_Bcast(&rc, 1, MPI_INT, 0, comm); if (rc == NSSI_OK) { if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i); // Broadcast the service description to the other clients //log_debug(multicast_debug_level, "Bcasting svc to other clients"); //MPI_Bcast(&multicast_svc, sizeof(nssi_service), MPI_BYTE, 0, comm); log_debug(debug_level, "Starting client main"); // Start the client code multicast_client_main(args, &multicast_svc[0], comm); MPI_Barrier(comm); // Tell one of the clients to kill the server if (client_rank == 0) { log_debug(debug_level, "%d: Halting multicast service", rank); rc = nssi_kill(&multicast_svc[0], 0, 5000); rc = nssi_kill(&multicast_svc[1], 0, 5000); } } else { if (client_rank == 0) log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i); success = false; //MPI_Abort(MPI_COMM_WORLD, -1); } nssi_fini((nssi_rpc_transport)args.transport); } log_debug(debug_level, "%d: clean up nssi", rank); MPI_Barrier(MPI_COMM_WORLD); // Clean up nssi_rpc rc = nssi_rpc_fini((nssi_rpc_transport)args.transport); if (rc != NSSI_OK) log_error(debug_level, "Error in nssi_rpc_fini"); log_debug(debug_level, "%d: MPI_Finalize()", rank); MPI_Finalize(); logger_fini(); if(success && (rc == NSSI_OK)) out << "\nEnd Result: TEST PASSED" << std::endl; else out << "\nEnd Result: TEST FAILED" << std::endl; return ((success && (rc==NSSI_OK)) ? 0 : 1 ); }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program show blockwise information on Kokkos::Serial execution space.\n"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level"); int league_size = 1; clp.setOption("league-size", &league_size, "League size"); int treecut = 15; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Level to prune the tree from bottom"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); int histogram_size = 0; clp.setOption("histogram-size", &histogram_size, "Histogram size"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { Kokkos::initialize(); r_val = exampleStatByBlocks <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, prunecut, seed, fill_level, league_size, histogram_size, verbose); Kokkos::finalize(); } return r_val; }
int main(int argc, char *argv[]) { int r_val = 0; Teuchos::CommandLineProcessor clp; int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl; return -1; } if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) { return 0; } unsigned threads_count = 0; if (Kokkos::hwloc::available()) { const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); const unsigned one = 1u; threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core); cout << " = Kokkos::hwloc = " << endl << "NUMA count = " << numa_count << endl << "Cores per NUMA = " << cores_per_numa << endl << "Threads per core = " << threads_per_core << endl << "Threads count = " << threads_count << endl; } else { threads_count = thread::hardware_concurrency(); cout << " = std::thread::hardware_concurrency = " << endl << "Threads count = " << threads_count << endl; } if (static_cast<unsigned int>(nthreads) > threads_count) { ++r_val; cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl; } else { Kokkos::Threads::initialize( nthreads, numa, core_per_numa ); Kokkos::Threads::print_configuration( cout , true /* detailed */ ); const int blk_cnt = 6, blks[blk_cnt] = { 1, 2, 4, 8, 12, 16 }; const int nrhs_cnt = 6, nrhs[nrhs_cnt] = { 1, 2, 4, 8, 12, 16 }; r_val += testTriSolveByBlocksDebug<double,int,int,Kokkos::Threads,void> ("mm_crs_input.mtx", team_size, max_task_dependence, blks[0], nrhs[2]); Kokkos::Threads::finalize(); } string eval; __EVAL_STRING__(r_val, eval); cout << "Testing Kokkos::Threads::" << eval << endl; return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of Chol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level"); bool team_interface = true; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool mkl_interface = false; clp.setOption("enable-mkl-interface", "disable-mkl-interface", &mkl_interface, "Flag for MKL interface"); int stack_size = 8192; clp.setOption("stack-size", &stack_size, "Stack size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 15; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Leve to prune tree from bottom"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); int niter = 10; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { const bool overwrite = true; const int nshepherds = (team_interface ? nthreads/team_size : nthreads); const int nworker_per_shepherd = nthreads/nshepherds; setenv("QT_HWPAR", to_string(nthreads).c_str(), overwrite); setenv("QT_NUM_SHEPHERDS", to_string(nshepherds).c_str(), overwrite); setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite); setenv("QT_STACK_SIZE", to_string(stack_size).c_str(), overwrite); exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); r_val = exampleCholPerformance <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, prunecut, seed, niter, nthreads, max_task_dependence, team_size, fill_level, nshepherds, team_interface, (nthreads != 1), mkl_interface, verbose); exec_space::finalize(); unsetenv("QT_HWPAR"); unsetenv("QT_NUM_SHEPHERDS"); unsetenv("QT_NUM_WORKERS_PER_SHEPHERD"); unsetenv("QT_STACK_SIZE"); } return r_val; }
int main (int argc, char *argv[]) { // command-line arguments log_level debug_level = LOG_ERROR; string logfile(""); int npes, me, i; int num_servers=1; int num_clients=1; int servers_per_node=1; int clients_per_node=1; int client_weight=10; int server_weight=10; int client_server_weight=5; string server_node_file("SNF.txt"); string client_node_file("CNF.txt"); const int num_graphs = 4; const int graph_vals[] = { GRAPH_COMPLETE, GRAPH_CLIENT_COMPLETE, GRAPH_SERVER_COMPLETE, GRAPH_CLIENT_SERVER_ONLY }; const char * graph_names[] = { "complete", "client-complete", "server-complete", "client-server-only" }; enum graph_connection_t graph_connection=GRAPH_COMPLETE; MPI_Init(&argc, &argv); try { Teuchos::CommandLineProcessor parser; // init parser parser.setDocString("Find node placement of server and client ranks"); parser.setOption("strategy", &strategy, "LibTopoMap strategy (greedy, greedy_route, recursive, rcm, scotch, ascending)"); parser.setOption("num-servers", (int *)(&num_servers), "Number of servers to place"); parser.setOption("num-clients", (int *)(&num_clients), "Number of clients to place"); parser.setOption("servers-per-node", (int *)(&servers_per_node), "Number of server ranks per compute node"); parser.setOption("clients-per-node", (int *)(&clients_per_node), "Number of client ranks per compute node"); parser.setOption("server-weight", (int *)(&server_weight), "Edge weight of server-to-server communication"); parser.setOption("client-weight", (int *)(&client_weight), "Edge weight of client-to-client communication"); parser.setOption("client-server-weight", (int *)(&client_server_weight), "Edge weight of client-to-server communication"); parser.setOption("server-node-file", &server_node_file, "Where to write the server placement results"); parser.setOption("client-node-file", &client_node_file, "Where to write the client placement results"); parser.setOption("verbose", (int *)(&debug_level), "Debug level"); parser.setOption("logfile", &logfile, "Path to file for debug statements"); // Set an enumeration command line option for the connection graph parser.setOption("graph-connection", (int*)&graph_connection, num_graphs, graph_vals, graph_names, "Graph Connections for the example: \n" "\t\t\tcomplete : client-client graph is complete, server-server graph is complete\n" "\t\t\tclient-complete: client-client graph is complete, server-server graph is empty\n" "\t\t\tserver-complete : client-client graph is empty, server-server graph is complete\n" "\t\t\tclient-server-only: client-client graph is empty, server-server graph is empty\n" "\t\t\tIn all cases, each client has an edge to one of the servers\n" ); parser.recogniseAllOptions(); parser.throwExceptions(); Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } } catch (...) { exit(-1); } /* initialize the logger */ logger_init(debug_level, logfile.c_str()); MPI_Comm_size(MPI_COMM_WORLD, &npes); MPI_Comm_rank(MPI_COMM_WORLD, &me); if (me==0) { cout << " ---------------- ARGUMENTS --------------- " << std::endl; cout << " \tstrategy = " << strategy << std::endl; cout << " \tgraph-connection = " << graph_names[graph_connection] << std::endl; cout << " \tnum-servers = " << num_servers << std::endl; cout << " \tnum-clients = " << num_clients << std::endl; cout << " \tservers-per-node = " << servers_per_node << std::endl; cout << " \tclients-per-node = " << clients_per_node << std::endl; cout << " \tserver-weight = " << server_weight << std::endl; cout << " \tclient-weight = " << client_weight << std::endl; cout << " \tclient-server-weight = " << client_server_weight << std::endl; cout << " \tserver-node-file = " << server_node_file << std::endl; cout << " \tclient-node-file = " << client_node_file << std::endl; cout << " \tverbose = " << debug_level << std::endl; cout << " \tlogfile = " << logfile << std::endl; cout << " ------------------------------------------- " << std::endl; } MPI_Barrier(MPI_COMM_WORLD); int *rank_map=(int*)malloc(sizeof(int) * npes); int *nid_map=(int*)malloc(sizeof(int) * npes); construct_graph( rank_map, nid_map, num_servers, num_clients, servers_per_node, clients_per_node, server_weight, client_weight, client_server_weight, graph_connection, 0); if (me == 0) { ofstream snf(server_node_file.c_str(), ios_base::out); ofstream cnf(client_node_file.c_str(), ios_base::out); for (i=0;i<npes;i++) { if (rank_map[i] < num_servers) snf << nid_map[i] << "\t" << i << "\t" << rank_map[i] << std::endl; } for (i=0;i<npes;i++) { if (rank_map[i] >= num_servers) cnf << nid_map[i] << "\t" << i << "\t" << rank_map[i] << std::endl; } snf.close(); cnf.close(); } MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int rc; // command-line arguments int retries = 0; int sig = 0; int timeout = 1000; log_level debug_level = LOG_ERROR; string logfile(""); nssi_service svc; char my_url[NSSI_URL_LEN]; std::string server_url(""); char server_str[NSSI_URL_LEN]; std::string contact_file(""); /* the file where the server's url should be written */ try { Teuchos::CommandLineProcessor parser; // init parser parser.setDocString("Kill an NSSI server"); parser.setOption("verbose", (int *)(&debug_level), "Debug level."); parser.setOption("logfile", &logfile, "Path to file for debug statements"); parser.setOption("server-url", &server_url, "URL of NSSI service"); parser.setOption("contact-file", &contact_file, "Where to read the server's URL"); parser.setOption("timeout", &timeout, "Timout for contacting services (ms)"); parser.setOption("retries", &retries, "Number of times to retry before exiting"); parser.setOption("sig", &sig, "Signal to use for the kill command"); parser.recogniseAllOptions(); parser.throwExceptions(); Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } } catch (...) { exit(-1); } /* initialize the logger */ logger_init(debug_level, logfile.c_str()); if (server_url.c_str()[0]=='\0') { sleep(1); log_debug(debug_level, "reading URL from file"); read_contact_info(contact_file.c_str(), server_str, NSSI_URL_LEN); } else { log_debug(debug_level, "using URL from command-line"); strcpy(server_str, server_url.c_str()); } nssi_rpc_init(NSSI_DEFAULT_TRANSPORT, NSSI_DEFAULT_ENCODE, NULL); nssi_get_url(NSSI_DEFAULT_TRANSPORT, my_url, NSSI_URL_LEN); // sleep(1); log_info(debug_level, "\nTrying to get service at %s", server_str); rc=nssi_get_service(NSSI_DEFAULT_TRANSPORT, server_str, timeout, &svc); if (rc != NSSI_OK) { log_error(admin_debug_level, "could not get svc description: %s", nssi_err_str(rc)); return rc; } rc = kill_svc(&svc, sig, timeout); if (rc == NSSI_ETIMEDOUT) { fprintf(stderr, "Timed out trying to kill (%s)\n", server_url.c_str()); return rc; } else if (rc != NSSI_OK) { log_error(admin_debug_level, "failed to kill service: %s", nssi_err_str(rc)); return rc; } nssi_rpc_fini(NSSI_DEFAULT_TRANSPORT); return 0; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); // int numa = 0; // clp.setOption("numa", &numa, "Number of numa node"); // int core_per_numa = 0; // clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); std::string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 0; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Level to prune tree from bottom"); int fill_level = -1; clp.setOption("fill-level", &fill_level, "Fill level"); int rows_per_team = 4096; clp.setOption("rows-per-team", &rows_per_team, "Workset size"); int max_concurrency = 250000; clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks"); int max_task_dependence = 3; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int nrhs = 1; clp.setOption("nrhs", &team_size, "# of right hand side"); int mb = 0; clp.setOption("mb", &mb, "Dense nested blocks size"); int nb = 1; clp.setOption("nb", &nb, "Column block size of right hand side"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads); #if (defined(HAVE_SHYLUTACHO_SCOTCH) && defined(HAVE_SHYLUTACHO_CHOLMOD)) r_val = exampleCholSuperNodesByBlocks<exec_space> (file_input, treecut, prunecut, fill_level, rows_per_team, max_concurrency, max_task_dependence, team_size, nrhs, mb, nb, verbose); #else r_val = -1; std::cout << "Scotch or Cholmod is NOT configured in Trilinos" << std::endl; #endif exec_space::finalize(); } return r_val; }
int main_(Teuchos::CommandLineProcessor &clp, int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; using Teuchos::rcp; using Teuchos::TimeMonitor; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); int numProc = comm->getSize(); int myRank = comm->getRank(); // ========================================================================= // Parameters initialization // ========================================================================= ::Xpetra::Parameters xpetraParameters(clp); bool runHeavyTests = false; clp.setOption("heavytests", "noheavytests", &runHeavyTests, "whether to exercise tests that take a long time to run"); clp.recogniseAllOptions(true); switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); // ========================================================================= // Problem construction // ========================================================================= ParameterList matrixParameters; matrixParameters.set("nx", Teuchos::as<GO>(9999)); matrixParameters.set("matrixType", "Laplace1D"); RCP<Matrix> A = MueLuTests::TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(matrixParameters.get<GO>("nx"), lib); RCP<MultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", A->getRowMap(), matrixParameters); std::string outDir = "Output/"; std::vector<std::string> dirList; if (runHeavyTests) { dirList.push_back("EasyParameterListInterpreter-heavy/"); dirList.push_back("FactoryParameterListInterpreter-heavy/"); } else { dirList.push_back("EasyParameterListInterpreter/"); dirList.push_back("FactoryParameterListInterpreter/"); } #if defined(HAVE_MPI) && defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_AMESOS2_KLU2) // The ML interpreter have internal ifdef, which means that the resulting // output would depend on configuration (reguarl interpreter does not have // that). Therefore, we need to stabilize the configuration here. // In addition, we run ML parameter list tests only if KLU is available dirList.push_back("MLParameterListInterpreter/"); dirList.push_back("MLParameterListInterpreter2/"); #endif int numLists = dirList.size(); bool failed = false; Teuchos::Time timer("Interpreter timer"); //double lastTime = timer.wallTime(); for (int k = 0; k < numLists; k++) { Teuchos::ArrayRCP<std::string> fileList = MueLuTests::TestHelpers::GetFileList(dirList[k], (numProc == 1 ? std::string(".xml") : std::string("_np" + Teuchos::toString(numProc) + ".xml"))); for (int i = 0; i < fileList.size(); i++) { // Set seed std::srand(12345); // Reset (potentially) cached value of the estimate A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); std::string xmlFile = dirList[k] + fileList[i]; std::string outFile = outDir + fileList[i]; std::string baseFile = outFile.substr(0, outFile.find_last_of('.')); std::size_t found = baseFile.find("_np"); if (numProc == 1 && found != std::string::npos) { #ifdef HAVE_MPI baseFile = baseFile.substr(0, found); #else std::cout << "Skipping \"" << xmlFile << "\" as MPI is not enabled" << std::endl; continue; #endif } baseFile = baseFile + (lib == Xpetra::UseEpetra ? "_epetra" : "_tpetra"); std::string goldFile = baseFile + ".gold"; std::ifstream f(goldFile.c_str()); if (!f.good()) { if (myRank == 0) std::cout << "Warning: comparison file " << goldFile << " not found. Skipping test" << std::endl; continue; } std::filebuf buffer; std::streambuf* oldbuffer = NULL; if (myRank == 0) { // Redirect output buffer.open((baseFile + ".out").c_str(), std::ios::out); oldbuffer = std::cout.rdbuf(&buffer); } // NOTE: we cannot use ParameterListInterpreter(xmlFile, comm), because we want to update the ParameterList // first to include "test" verbosity Teuchos::ParameterList paramList; Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(¶mList), *comm); if (dirList[k] == "EasyParameterListInterpreter/" || dirList[k] == "EasyParameterListInterpreter-heavy/") paramList.set("verbosity", "test"); else if (dirList[k] == "FactoryParameterListInterpreter/" || dirList[k] == "FactoryParameterListInterpreter-heavy/") paramList.sublist("Hierarchy").set("verbosity", "Test"); else if (dirList[k] == "MLParameterListInterpreter/") paramList.set("ML output", 42); else if (dirList[k] == "MLParameterListInterpreter2/") paramList.set("ML output", 10); try { timer.start(); Teuchos::RCP<HierarchyManager> mueluFactory; // create parameter list interpreter // here we have to distinguish between the general MueLu parameter list interpreter // and the ML parameter list interpreter. Note that the ML paramter interpreter also // works with Tpetra matrices. if (dirList[k] == "EasyParameterListInterpreter/" || dirList[k] == "EasyParameterListInterpreter-heavy/" || dirList[k] == "FactoryParameterListInterpreter/" || dirList[k] == "FactoryParameterListInterpreter-heavy/") { mueluFactory = Teuchos::rcp(new ParameterListInterpreter(paramList)); } else if (dirList[k] == "MLParameterListInterpreter/") { mueluFactory = Teuchos::rcp(new MLParameterListInterpreter(paramList)); } else if (dirList[k] == "MLParameterListInterpreter2/") { //std::cout << "ML ParameterList: " << std::endl; //std::cout << paramList << std::endl; RCP<ParameterList> mueluParamList = Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(paramList,"SA")); //std::cout << "MueLu ParameterList: " << std::endl; //std::cout << *mueluParamList << std::endl; mueluFactory = Teuchos::rcp(new ParameterListInterpreter(*mueluParamList)); } RCP<Hierarchy> H = mueluFactory->CreateHierarchy(); H->GetLevel(0)->template Set<RCP<Matrix> >("A", A); if (dirList[k] == "MLParameterListInterpreter/") { // MLParameterInterpreter needs the nullspace information if rebalancing is active! // add default constant null space vector RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getRowMap(), 1); nullspace->putScalar(1.0); H->GetLevel(0)->Set("Nullspace", nullspace); } H->GetLevel(0)->Set("Coordinates", coordinates); mueluFactory->SetupHierarchy(*H); if (strncmp(fileList[i].c_str(), "reuse", 5) == 0) { // Build the Hierarchy the second time // Should be faster if we actually do the reuse A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); mueluFactory->SetupHierarchy(*H); } timer.stop(); } catch (Teuchos::ExceptionBase& e) { std::string msg = e.what(); msg = msg.substr(msg.find_last_of('\n')+1); if (myRank == 0) { std::cout << "Caught exception: " << msg << std::endl; // Redirect output back std::cout.rdbuf(oldbuffer); buffer.close(); } if (msg == "Zoltan interface is not available" || msg == "Zoltan2 interface is not available" || msg == "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false.") { if (myRank == 0) std::cout << xmlFile << ": skipped (missing library)" << std::endl; continue; } } std::string cmd; if (myRank == 0) { // Redirect output back std::cout.rdbuf(oldbuffer); buffer.close(); // Create a copy of outputs cmd = "cp -f "; system((cmd + baseFile + ".gold " + baseFile + ".gold_filtered").c_str()); system((cmd + baseFile + ".out " + baseFile + ".out_filtered").c_str()); // Tpetra produces different eigenvalues in Chebyshev due to using // std::rand() for generating random vectors, which may be initialized // using different seed, and may have different algorithm from one // gcc version to another, or to anogther compiler (like clang) // This leads to us always failing this test. // NOTE1 : Epetra, on the other hand, rolls out its out random number // generator, which always produces same results // Ignore the value of "lambdaMax" run_sed("'s/lambdaMax: [0-9]*.[0-9]*/lambdaMax = <ignored>/'", baseFile); // Ignore the value of "lambdaMin" run_sed("'s/lambdaMin: [0-9]*.[0-9]*/lambdaMin = <ignored>/'", baseFile); // Ignore the value of "chebyshev: max eigenvalue" // NOTE: we skip lines with default value ([default]) run_sed("'/[default]/! s/chebyshev: max eigenvalue = [0-9]*.[0-9]*/chebyshev: max eigenvalue = <ignored>/'", baseFile); // Ignore the exact type of direct solver (it is selected semi-automatically // depending on how Trilinos was configured run_sed("'s/Amesos\\([2]*\\)Smoother{type = .*}/Amesos\\1Smoother{type = <ignored>}/'", baseFile); run_sed("'s/SuperLU solver interface, direct solve/<Direct> solver interface/'", baseFile); run_sed("'s/KLU2 solver interface/<Direct> solver interface/'", baseFile); run_sed("'s/Basker solver interface/<Direct> solver interface/'", baseFile); // Strip template args for some classes std::vector<std::string> classes; classes.push_back("Xpetra::Matrix"); classes.push_back("MueLu::Constraint"); classes.push_back("MueLu::SmootherPrototype"); for (size_t q = 0; q < classes.size(); q++) run_sed("'s/" + classes[q] + "<.*>/" + classes[q] + "<ignored> >/'", baseFile); #ifdef __APPLE__ // Some Macs print outs ptrs as 0x0 instead of 0, fix that run_sed("'/RCP/ s/=0x0/=0/g'", baseFile); #endif // Run comparison (ignoring whitespaces) cmd = "diff -u -w -I\"^\\s*$\" " + baseFile + ".gold_filtered " + baseFile + ".out_filtered"; int ret = system(cmd.c_str()); if (ret) failed = true; //std::ios_base::fmtflags ff(std::cout.flags()); //std::cout.precision(2); //std::cout << xmlFile << " (" << std::setiosflags(std::ios::fixed) // << timer.wallTime() - lastTime << " sec.) : " << (ret ? "failed" : "passed") << std::endl; //lastTime = timer.wallTime(); //std::cout.flags(ff); // reset flags to whatever they were prior to printing time std::cout << xmlFile << " : " << (ret ? "failed" : "passed") << std::endl; } } } if (myRank == 0) std::cout << std::endl << "End Result: TEST " << (failed ? "FAILED" : "PASSED") << std::endl; return (failed ? EXIT_FAILURE : EXIT_SUCCESS); }
int main_(Teuchos::CommandLineProcessor &clp, int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; using Teuchos::rcp; using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::TimeMonitor; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // ========================================================================= // Convenient definitions // ========================================================================= typedef Teuchos::ScalarTraits<SC> STS; SC one = STS::one(), zero = STS::zero(); RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= GO nx = 100, ny = 100, nz = 100; Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra std::string xmlFileName = ""; clp.setOption("xml", &xmlFileName, "read parameters from a file"); int numRebuilds = 0; clp.setOption("rebuild", &numRebuilds, "#times to rebuild hierarchy"); bool useFilter = true; clp.setOption("filter", "nofilter", &useFilter, "Print out only Setup times"); bool modify = true; clp.setOption("modify", "nomodify", &modify, "Change values of the matrix used for reuse"); clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); ParameterList paramList; paramList.set("verbosity", "none"); if (xmlFileName != "") Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr<ParameterList>(¶mList), *comm); // Retrieve matrix parameters (they may have been changed on the command line) // [for instance, if we changed matrix type from 2D to 3D we need to update nz] ParameterList galeriList = galeriParameters.GetParameterList(); // ========================================================================= // Problem construction // ========================================================================= // For comments, see Driver.cpp out << "========================================================\n" << xpetraParameters << galeriParameters; std::string matrixType = galeriParameters.GetMatrixType(); RCP<Matrix> A, B; RCP<const Map> map; RCP<MultiVector> coordinates, nullspace; ConstructData(matrixType, galeriList, lib, comm, A, map, coordinates, nullspace); if (modify) { galeriList.set("stretchx", 2.2); galeriList.set("stretchy", 1.2); galeriList.set("stretchz", 0.3); } ConstructData(matrixType, galeriList, lib, comm, B, map, coordinates, nullspace); out << "Processor subdomains in x direction: " << galeriList.get<GO>("mx") << std::endl << "Processor subdomains in y direction: " << galeriList.get<GO>("my") << std::endl << "Processor subdomains in z direction: " << galeriList.get<GO>("mz") << std::endl << "========================================================" << std::endl; // ========================================================================= // Setups and solves // ========================================================================= RCP<Vector> X = VectorFactory::Build(map); RCP<Vector> Y = VectorFactory::Build(map); Y->setSeed(846930886); Y->randomize(); const int nIts = 9; std::string thickSeparator = "============================================================="; std::string thinSeparator = "-------------------------------------------------------------"; // ========================================================================= // Setup #1 (no reuse) // ========================================================================= out << thickSeparator << " no reuse " << thickSeparator << std::endl; { RCP<Hierarchy> H; // Run multiple builds for matrix A and time them RCP<Teuchos::Time> tm = TimeMonitor::getNewTimer("Setup #1: no reuse"); for (int i = 0; i <= numRebuilds; i++) { out << thinSeparator << " no reuse (rebuild #" << i << ") " << thinSeparator << std::endl; // Start timing (skip first build to reduce jitter) if (!(numRebuilds && i == 0)) tm->start(); A->SetMaxEigenvalueEstimate(-one); H = CreateHierarchy(A, paramList, coordinates); // Stop timing if (!(numRebuilds && i == 0)) { tm->stop(); tm->incrementNumCalls(); } } X->putScalar(zero); H->Iterate(*Y, *X, nIts); out << "residual(A) = " << Utilities::ResidualNorm(*A, *X, *Y)[0] << " [no reuse]" << std::endl; // Run a build for matrix B to record its convergence B->SetMaxEigenvalueEstimate(-one); H = CreateHierarchy(B, paramList, coordinates); X->putScalar(zero); H->Iterate(*Y, *X, nIts); out << "residual(B) = " << Utilities::ResidualNorm(*B, *X, *Y)[0] << " [no reuse]" << std::endl; } // ========================================================================= // Setup #2-inf (reuse) // ========================================================================= std::vector<std::string> reuseTypes, reuseNames; reuseTypes.push_back("S"); reuseNames.push_back("smoothers"); reuseTypes.push_back("tP"); reuseNames.push_back("tentative P"); reuseTypes.push_back("RP"); reuseNames.push_back("smoothed P and R"); for (size_t k = 0; k < reuseTypes.size(); k++) { out << thickSeparator << " " << reuseTypes[k] << " " << thickSeparator << std::endl; A->SetMaxEigenvalueEstimate(-one); paramList.set("reuse: type", reuseTypes[k]); out << thinSeparator << " " << reuseTypes[k] << " (initial) " << thinSeparator << std::endl; RCP<Hierarchy> H = CreateHierarchy(A, paramList, coordinates); X->putScalar(zero); H->Iterate(*Y, *X, nIts); out << "residual(A) = " << Utilities::ResidualNorm(*A, *X, *Y)[0] << " [reuse \"" << reuseNames[k] << "\"]" << std::endl; // Reuse setup RCP<Matrix> Bcopy = Xpetra::MatrixFactory2<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildCopy(B); RCP<Teuchos::Time> tm = TimeMonitor::getNewTimer("Setup #" + MueLu::toString(k+2) + ": reuse " + reuseNames[k]); for (int i = 0; i <= numRebuilds; i++) { out << thinSeparator << " " << reuseTypes[k] << " (rebuild #" << i << ") " << thinSeparator << std::endl; // Start timing (skip first build to reduce jitter) if (!(numRebuilds && i == 0)) tm->start(); B->SetMaxEigenvalueEstimate(-one); ReuseHierarchy(B, *H); // Stop timing if (!(numRebuilds && i == 0)) { tm->stop(); tm->incrementNumCalls(); } X->putScalar(zero); H->Iterate(*Y, *X, nIts); out << "residual(B) = " << Utilities::ResidualNorm(*B, *X, *Y)[0] << " [reuse \"" << reuseNames[k] << "\"]" << std::endl; // Change the pointers so that reuse is not a no-op B.swap(Bcopy); } } out << thickSeparator << thickSeparator << std::endl; { const bool alwaysWriteLocal = true; const bool writeGlobalStats = true; const bool writeZeroTimers = false; const bool ignoreZeroTimers = true; const std::string filter = (useFilter ? "Setup #" : ""); TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, alwaysWriteLocal, writeGlobalStats, writeZeroTimers, Teuchos::Union, filter, ignoreZeroTimers); } return EXIT_SUCCESS; }