int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); std::string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 0; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Level to prune tree from bottom"); int fill_level = -1; clp.setOption("fill-level", &fill_level, "Fill level"); int rows_per_team = 4096; clp.setOption("rows-per-team", &rows_per_team, "Workset size"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); #if (defined(HAVE_SHYLUTACHO_SCOTCH) && (defined(HAVE_SHYLUTACHO_CHOLMOD) \ || defined(HAVE_SHYLUTACHO_AMESOS))) r_val = exampleIncompleteSymbolicFactorization<exec_space> (file_input, treecut, prunecut, fill_level, rows_per_team, verbose); #else r_val = -1; std::cout << "Scotch or Cholmod is NOT configured in Trilinos" << std::endl; #endif exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program demonstrates symbolic factorization algorithm on Kokkos::Serial execution space.\n"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level for incomplete factorization"); int league_size = 1; clp.setOption("league-size", &league_size, "League size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 0; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); bool scotch = true; clp.setOption("enable-scotch", "disable-scotch", &scotch, "Flag for Scotch"); bool camd = true; clp.setOption("enable-camd", "disable-camd", &camd, "Flag for CAMD"); bool symbolic = true; clp.setOption("enable-symbolic", "disable-symbolic", &symbolic, "Flag for sybolic factorization"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { Kokkos::initialize(); r_val = exampleSymbolicFactor <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, seed, fill_level, league_size, scotch, camd, symbolic, verbose); Kokkos::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool team_interface = false; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int nrhs = 1; clp.setOption("nrhs", &nrhs, "Number of right hand side"); int nb = nrhs; clp.setOption("nb", &nb, "Blocksize of right hand side"); int niter = 100; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); r_val = exampleTriSolvePerformance <value_type,ordinal_type,size_type,exec_space,void> (file_input, nrhs, nb, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); int mmin = 1000; clp.setOption("mmin", &mmin, "C(mmin,mmin)"); int mmax = 8000; clp.setOption("mmax", &mmax, "C(mmax,mmax)"); int minc = 1000; clp.setOption("minc", &minc, "Increment of m"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(); host_space::initialize(nthreads, numa, core_per_numa); r_val = exampleDenseMatrixBase<exec_space> (mmin, mmax, minc, verbose); exec_space::finalize(); host_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example interface of solver Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int nrhs = 1; clp.setOption("nrhs", &nrhs, "Numer of right hand side"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); exec_space::print_configuration(cout, true); r_val = exampleCholDirectSolver <value_type,ordinal_type,size_type,exec_space,void> (file_input, nrhs, nthreads, verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of task data parallelism (barrier) on Kokkos::Threads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int league_size = 1; clp.setOption("league-size", &league_size, "League size"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int ntasks = 100; clp.setOption("ntasks", &ntasks, "Number of tasks to be spawned"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); exec_space::print_configuration(cout, true); r_val = exampleKokkosDataData<exec_space,value_type>((ntasks > MAXTASKS ? MAXTASKS : ntasks), league_size, team_size, verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program demonstrates TriSolveUnblocked algorithm on Kokkos::Serial execution space.\n"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int nrhs = 1; clp.setOption("nrhs", &nrhs, "Number of right hand side"); int nb = nrhs; clp.setOption("nb", &nb, "Blocksize of right hand side"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { Kokkos::initialize(); r_val = exampleTriSolveByBlocks <value_type,ordinal_type,size_type,exec_space,void> (file_input, nrhs, nb, 1, max_task_dependence, team_size, verbose); Kokkos::finalize(); } return r_val; }
int main(int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Intrepid2::DynRankView_PerfTest01.\n"); int nworkset = 8; clp.setOption("nworkset", &nworkset, "# of worksets"); int C = 4096; clp.setOption("C", &C, "# of Cells in a workset"); int order = 2; clp.setOption("order", &order, "cubature order"); bool verbose = true; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; Kokkos::initialize(); if (verbose) std::cout << "Testing datatype double\n"; const int r_val_double = Intrepid2::Test::ComputeBasis_HGRAD <double,Kokkos::Cuda>(nworkset, C, order, verbose); return r_val_double; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of Chol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level"); bool team_interface = true; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool mkl_interface = false; clp.setOption("enable-mkl-interface", "disable-mkl-interface", &mkl_interface, "Flag for MKL interface"); int stack_size = 8192; clp.setOption("stack-size", &stack_size, "Stack size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 15; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Leve to prune tree from bottom"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); int niter = 10; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { const bool overwrite = true; const int nshepherds = (team_interface ? nthreads/team_size : nthreads); const int nworker_per_shepherd = nthreads/nshepherds; setenv("QT_HWPAR", to_string(nthreads).c_str(), overwrite); setenv("QT_NUM_SHEPHERDS", to_string(nshepherds).c_str(), overwrite); setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite); setenv("QT_STACK_SIZE", to_string(stack_size).c_str(), overwrite); exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); r_val = exampleCholPerformance <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, prunecut, seed, niter, nthreads, max_task_dependence, team_size, fill_level, nshepherds, team_interface, (nthreads != 1), mkl_interface, verbose); exec_space::finalize(); unsetenv("QT_HWPAR"); unsetenv("QT_NUM_SHEPHERDS"); unsetenv("QT_NUM_WORKERS_PER_SHEPHERD"); unsetenv("QT_STACK_SIZE"); } return r_val; }
int main(int argc, char *argv[]) { int r_val = 0; Teuchos::CommandLineProcessor clp; int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl; return -1; } if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) { return 0; } unsigned threads_count = 0; if (Kokkos::hwloc::available()) { const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); const unsigned one = 1u; threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core); cout << " = Kokkos::hwloc = " << endl << "NUMA count = " << numa_count << endl << "Cores per NUMA = " << cores_per_numa << endl << "Threads per core = " << threads_per_core << endl << "Threads count = " << threads_count << endl; } else { threads_count = thread::hardware_concurrency(); cout << " = std::thread::hardware_concurrency = " << endl << "Threads count = " << threads_count << endl; } if (static_cast<unsigned int>(nthreads) > threads_count) { ++r_val; cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl; } else { Kokkos::Threads::initialize( nthreads ); Kokkos::Threads::print_configuration( cout , true /* detailed */ ); //__TestSuiteDoUnitTests__(float,int,unsigned int,Kokkos::Serial,void); //__TestSuiteDoUnitTests__(float,long,unsigned long,Kokkos::Serial,void); __TestSuiteDoUnitTests__(double,int,unsigned int,Kokkos::Threads,void); // __TestSuiteDoUnitTests__(double,long,unsigned long,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<float>,int,unsigned int,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<float>,long,unsigned long,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<double>,int,unsigned int,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<double>,long,unsigned long,Kokkos::Serial,void); Kokkos::Threads::finalize(); } string eval; __EVAL_STRING__(r_val, eval); cout << "Testing Kokkos::Threads::" << eval << endl; return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of dense Herk on Kokkos::Threads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int max_concurrency = 250000; clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks"); int memory_pool_grain_size = 16; clp.setOption("memory-pool-grain-size", &memory_pool_grain_size, "Memorypool chunk size (12 - 16)"); int mkl_nthreads = 1; clp.setOption("mkl-nthreads", &mkl_nthreads, "MKL threads for nested parallelism"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); int mmin = 1000; clp.setOption("mmin", &mmin, "C(mmin,mmin)"); int mmax = 8000; clp.setOption("mmax", &mmax, "C(mmax,mmax)"); int minc = 1000; clp.setOption("minc", &minc, "Increment of m"); int k = 1024; clp.setOption("k", &k, "A(mmax,k) or A(k,mmax) according to transpose flags"); int mb = 256; clp.setOption("mb", &mb, "Blocksize"); bool check = true; clp.setOption("enable-check", "disable-check", &check, "Flag for check solution"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); std::cout << std::endl << "DenseHerkByBlocks:: Upper, ConjTranspose, Variant::One (external)" << std::endl; r_val = exampleDenseHerkByBlocks <Uplo::Upper,Trans::ConjTranspose,Variant::One,exec_space> (mmin, mmax, minc, k, mb, max_concurrency, memory_pool_grain_size, mkl_nthreads, check, verbose); exec_space::finalize(); } return r_val; }
int main(int argc, char *argv[]) { int np=1, rank=0; int splitrank, splitsize; int rc = 0; nssi_service xfer_svc; int server_index=0; int rank_in_server=0; int transport_index=-1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Barrier(MPI_COMM_WORLD); Teuchos::oblackholestream blackhole; std::ostream &out = ( rank == 0 ? std::cout : blackhole ); struct xfer_args args; const int num_io_methods = 8; const int io_method_vals[] = { XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC, XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC, XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC, XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC}; const char * io_method_names[] = { "write-encode-sync", "write-encode-async", "write-rdma-sync", "write-rdma-async", "read-encode-sync", "read-encode-async", "read-rdma-sync", "read-rdma-async"}; const int nssi_transport_list[] = { NSSI_RPC_PTL, NSSI_RPC_PTL, NSSI_RPC_IB, NSSI_RPC_IB, NSSI_RPC_GEMINI, NSSI_RPC_GEMINI, NSSI_RPC_BGPDCMF, NSSI_RPC_BGPDCMF, NSSI_RPC_BGQPAMI, NSSI_RPC_BGQPAMI, NSSI_RPC_MPI}; const int num_nssi_transports = 11; const int nssi_transport_vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char * nssi_transport_names[] = { "portals", "ptl", "infiniband", "ib", "gemini", "gni", "bgpdcmf", "dcmf", "bgqpami", "pami", "mpi" }; // Initialize arguments args.transport=NSSI_DEFAULT_TRANSPORT; args.len = 1; args.delay = 1; args.io_method = XFER_WRITE_RDMA_SYNC; args.debug_level = LOG_WARN; args.num_trials = 1; args.num_reqs = 1; args.result_file_mode = "a"; args.result_file = ""; args.url_file = ""; args.logfile = ""; args.client_flag = true; args.server_flag = true; args.num_servers = 1; args.num_threads = 0; args.timeout = 500; args.num_retries = 5; args.validate_flag = true; args.kill_server_flag = true; args.block_distribution = true; bool success = true; /** * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line * options to control the behavior of the test code. To evaluate performance, * the "num-trials", "num-reqs", and "len" options control the amount of data transferred * between client and server. The "io-method" selects the type of data transfer. The * server-url specifies the URL of the server. If running as a server, the server-url * provides a recommended URL when initializing the network transport. */ try { //out << Teuchos::Teuchos_Version() << std::endl << std::endl; // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor parser; parser.setDocString( "This example program demonstrates a simple data-transfer service " "built using the NEtwork Scalable Service Interface (Nessie)." ); /* To set and option, it must be given a name and default value. Additionally, each option can be given a help std::string. Although it is not necessary, a help std::string aids a users comprehension of the acceptable command line arguments. Some examples of setting command line options are: */ parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" ); parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" ); parser.setOption("server", "no-server", &args.server_flag, "Run the server" ); parser.setOption("client", "no-client", &args.client_flag, "Run the client"); parser.setOption("len", &args.len, "The number of structures in an input buffer"); parser.setOption("debug",(int*)(&args.debug_level), "Debug level"); parser.setOption("logfile", &args.logfile, "log file"); parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)"); parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial"); parser.setOption("result-file", &args.result_file, "Where to store results"); parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result"); parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server"); parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data"); parser.setOption("num-servers", &args.num_servers, "Number of server processes"); parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process"); parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment"); parser.setOption("block-distribution", "rr-distribution", &args.block_distribution, "Use a block distribution scheme to assign clients to servers"); // Set an enumeration command line option for the io_method parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names, "I/O Methods for the example: \n" "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n" "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n" "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n" "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n" "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n" "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n" "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n" "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous"); // Set an enumeration command line option for the NNTI transport parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names, "NSSI transports (not all are available on every platform): \n" "\t\t\tportals|ptl : Cray or Schutt\n" "\t\t\tinfiniband|ib : libibverbs\n" "\t\t\tgemini|gni : Cray libugni (Gemini or Aries)\n" "\t\t\tbgpdcmf|dcmf : IBM BG/P DCMF\n" "\t\t\tbgqpami|pami : IBM BG/Q PAMI\n" "\t\t\tmpi : isend/irecv implementation\n" ); /* There are also two methods that control the behavior of the command line processor. First, for the command line processor to allow an unrecognized a command line option to be ignored (and only have a warning printed), use: */ parser.recogniseAllOptions(true); /* Second, by default, if the parser finds a command line option it doesn't recognize or finds the --help option, it will throw an std::exception. If you want prevent a command line processor from throwing an std::exception (which is important in this program since we don't have an try/catch around this) when it encounters a unrecognized option or help is printed, use: */ parser.throwExceptions(false); /* We now parse the command line where argc and argv are passed to the parse method. Note that since we have turned off std::exception throwing above we had better grab the return argument so that we can see what happened and act accordingly. */ Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } // Here is where you would use these command line arguments but for this example program // we will just print the help message with the new values of the command-line arguments. //if (rank == 0) // out << "\nPrinting help message with new values of command-line arguments ...\n\n"; //parser.printHelpMessage(argv[0],out); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success); log_debug(args.debug_level, "transport_index=%d", transport_index); if (transport_index > -1) { args.transport =nssi_transport_list[transport_index]; args.transport_name=std::string(nssi_transport_names[transport_index]); } args.io_method_name=std::string(io_method_names[args.io_method]); log_debug(args.debug_level, "%d: Finished processing arguments", rank); if (!success) { MPI_Abort(MPI_COMM_WORLD, 1); } if (!args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && !args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } log_level debug_level = args.debug_level; // Communicator used for both client and server (may split if using client and server) MPI_Comm comm; log_debug(debug_level, "%d: Starting xfer-service test", rank); #ifdef TRIOS_ENABLE_COMMSPLITTER if (args.transport == NSSI_RPC_MPI) { MPI_Pcontrol(0); } #endif /** * Since this test can be run as a server, client, or both, we need to play some fancy * MPI games to get the communicators working correctly. If we're executing as both * a client and a server, we split the communicator so that the client thinks its * running by itself. */ int color = 0; // color=0-->server, color=1-->client if (args.client_flag && args.server_flag) { if (np < 2) { log_error(debug_level, "Must use at least 2 MPI processes for client and server mode"); MPI_Abort(MPI_COMM_WORLD, -1); } // Split the communicators. Put all the servers as the first ranks. if (rank < args.num_servers) { color = 0; log_debug(debug_level, "rank=%d is a server", rank); } else { color = 1; // all others are clients log_debug(debug_level, "rank=%d is a client", rank); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } else { if (args.client_flag) { color=1; log_debug(debug_level, "rank=%d is a client", rank); } else if (args.server_flag) { color=0; log_debug(debug_level, "rank=%d is a server", rank); } else { log_error(debug_level, "Must be either a client or a server"); MPI_Abort(MPI_COMM_WORLD, -1); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } MPI_Comm_rank(comm, &splitrank); MPI_Comm_size(comm, &splitsize); log_debug(debug_level, "%d: Finished splitting communicators", rank); /** * Initialize the Nessie interface by specifying a transport, encoding scheme, and a * recommended URL. \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it * is often the case that only one type of transport exists on a particular platform. * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and * \ref NSSI_RPC_IB. We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE * should always be used for the second argument. The URL can be specified (as we did for * the server, or NULL (as we did for the client). This is a recommended value. Use the * \ref nssi_get_url function to find the actual value. */ nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL); // Get the Server URL std::string my_url(NSSI_URL_LEN, '\0'); nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN); // If running as both client and server, gather and distribute // the server URLs to all the clients. if (args.server_flag && args.client_flag) { std::string all_urls; // This needs to be a vector of chars, not a string all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0'); // Have servers gather their URLs if (color == 0) { assert(args.num_servers == splitsize); // these should be equal log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str()); // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD) MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR, &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm); } // broadcast the full set of server urls to all processes MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD); log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size()); if (color == 1) { // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } // Copy the server url out of the list of urls int offset = server_index * NSSI_URL_LEN; args.server_url = all_urls.substr(offset, NSSI_URL_LEN); log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str()); } // If running as a client only, have to get the list of servers from the urlfile. else if (!args.server_flag && args.client_flag){ sleep(args.delay); // give server time to get started std::vector< std::string > urlbuf; xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm); args.num_servers = urlbuf.size(); // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } args.server_url = urlbuf[server_index]; log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } else if (args.server_flag && !args.client_flag) { args.server_url = my_url; if (args.url_file.empty()) { log_error(debug_level, "Must set --url-file"); MPI_Abort(MPI_COMM_WORLD, -1); } xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm); } // Set the debug level for the xfer service. xfer_debug_level = args.debug_level; // Print the arguments after they've all been set. log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str()); print_args(out, args, "%"); log_debug(debug_level, "server_url=%s", args.server_url.c_str()); //------------------------------------------------------------------------------ /** If we're running this job with a server, the server always executes on node 0. * In this example, the server is a single process. */ if (color == 0) { rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm); log_debug(debug_level, "Server is finished"); } // ------------------------------------------------------------------------------ /** The parallel client will execute this branch. The root node, node 0, of the client connects * connects with the server, using the \ref nssi_get_service function. Then the root * broadcasts the service description to the other clients before starting the main * loop of the client code by calling \ref xfer_client_main. */ else { int i; int client_rank; // get rank within the client communicator MPI_Comm_rank(comm, &client_rank); nssi_init((nssi_rpc_transport)args.transport); // Only one process needs to connect to the service // TODO: Make get_service a collective call (some transports do not need a connection) //if (client_rank == 0) { { // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str()); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(xfer_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } } // wait for all the clients to connect MPI_Barrier(comm); //MPI_Bcast(&rc, 1, MPI_INT, 0, comm); if (rc == NSSI_OK) { if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i); // Broadcast the service description to the other clients //log_debug(xfer_debug_level, "Bcasting svc to other clients"); //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm); log_debug(debug_level, "Starting client main"); // Start the client code xfer_client_main(args, xfer_svc, comm); MPI_Barrier(comm); // Tell one of the clients to kill the server if ((args.kill_server_flag) && (rank_in_server == 0)) { log_debug(debug_level, "%d: Halting xfer service", rank); rc = nssi_kill(&xfer_svc, 0, 5000); } rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc); if (rc != NSSI_OK) { log_error(xfer_debug_level, "could not free svc description: %s", nssi_err_str(rc)); } } else { if (client_rank == 0) log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i); success = false; //MPI_Abort(MPI_COMM_WORLD, -1); } nssi_fini((nssi_rpc_transport)args.transport); } log_debug(debug_level, "%d: clean up nssi", rank); MPI_Barrier(MPI_COMM_WORLD); // Clean up nssi_rpc rc = nssi_rpc_fini((nssi_rpc_transport)args.transport); if (rc != NSSI_OK) log_error(debug_level, "Error in nssi_rpc_fini"); log_debug(debug_level, "%d: MPI_Finalize()", rank); MPI_Finalize(); logger_fini(); if(success && (rc == NSSI_OK)) out << "\nEnd Result: TEST PASSED" << std::endl; else out << "\nEnd Result: TEST FAILED" << std::endl; return ((success && (rc==NSSI_OK)) ? 0 : 1 ); }
int main(int argc, char * argv[]) { using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; Teuchos::GlobalMPISession mpiSession(&argc,&argv,&std::cout); std::string output_file_name = "square_mesh.gen"; int xBlocks=1,yBlocks=1, zBlocks=1; int xElements=1,yElements=1, zElements=1; double x0=0.0, xf=1.0; double y0=0.0, yf=1.0; double z0=0.0, zf=1.0; bool threeD = false; // setup input arguments { Teuchos::CommandLineProcessor clp; clp.throwExceptions(false); clp.setOption("o", &output_file_name, "Mesh output filename"); clp.setOption("3d", "2d", &threeD, "Cube versus square mesh."); clp.setOption("x-blocks", &xBlocks, "Number of blocks in 'x' direction"); clp.setOption("y-blocks", &yBlocks, "Number of blocks in 'y' direction"); clp.setOption("z-blocks", &zBlocks, "Number of blocks in 'z' direction"); clp.setOption("x-elmts", &xElements, "Number of elements in 'x' direction in each block"); clp.setOption("y-elmts", &yElements, "Number of elements in 'y' direction in each block"); clp.setOption("z-elmts", &zElements, "Number of elements in 'z' direction in each block"); clp.setOption("x0", &x0, "Location of left edge"); clp.setOption("xf", &xf, "Location of right edge"); clp.setOption("y0", &y0, "Location of left edge"); clp.setOption("yf", &yf, "Location of right edge"); clp.setOption("z0", &z0, "Location of front(?) edge"); clp.setOption("zf", &zf, "Location of back(?) edge"); Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv,&std::cerr); if(parse_return==Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return -1; TEUCHOS_TEST_FOR_EXCEPTION(parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL, std::runtime_error, "Failed to parse command line!"); } RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList); pl->set("X Blocks",xBlocks); pl->set("Y Blocks",yBlocks); pl->set("X Elements",xElements); pl->set("Y Elements",yElements); pl->set("X0",x0); pl->set("Y0",y0); pl->set("Xf",xf); pl->set("Yf",yf); if(threeD) { pl->set("Z Blocks",zBlocks); pl->set("Z Elements",zElements); pl->set("Z0",z0); pl->set("Zf",zf); } int numprocs = stk_classic::parallel_machine_size(MPI_COMM_WORLD); int rank = stk_classic::parallel_machine_rank(MPI_COMM_WORLD); RCP<panzer_stk_classic::STK_MeshFactory> factory; if(!threeD) factory = Teuchos::rcp(new panzer_stk_classic::SquareQuadMeshFactory); else factory = Teuchos::rcp(new panzer_stk_classic::CubeHexMeshFactory); factory->setParameterList(pl); RCP<panzer_stk_classic::STK_Interface> mesh = factory->buildMesh(MPI_COMM_WORLD); mesh->writeToExodus(output_file_name); return 0; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); // int numa = 0; // clp.setOption("numa", &numa, "Number of numa node"); // int core_per_numa = 0; // clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); std::string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 0; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Level to prune tree from bottom"); int fill_level = -1; clp.setOption("fill-level", &fill_level, "Fill level"); int rows_per_team = 4096; clp.setOption("rows-per-team", &rows_per_team, "Workset size"); int max_concurrency = 250000; clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks"); int max_task_dependence = 3; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int nrhs = 1; clp.setOption("nrhs", &team_size, "# of right hand side"); int mb = 0; clp.setOption("mb", &mb, "Dense nested blocks size"); int nb = 1; clp.setOption("nb", &nb, "Column block size of right hand side"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads); #if (defined(HAVE_SHYLUTACHO_SCOTCH) && defined(HAVE_SHYLUTACHO_CHOLMOD)) r_val = exampleCholSuperNodesByBlocks<exec_space> (file_input, treecut, prunecut, fill_level, rows_per_team, max_concurrency, max_task_dependence, team_size, nrhs, mb, nb, verbose); #else r_val = -1; std::cout << "Scotch or Cholmod is NOT configured in Trilinos" << std::endl; #endif exec_space::finalize(); } return r_val; }
int main(int argc, char *argv[]) { int np=1, rank=0; int splitrank, splitsize; int rc = 0; nssi_service multicast_svc[2]; int transport_index=-1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Barrier(MPI_COMM_WORLD); Teuchos::oblackholestream blackhole; std::ostream &out = ( rank == 0 ? std::cout : blackhole ); struct multicast_args args; const int num_io_methods = 6; const int io_method_vals[] = { MULTICAST_EMPTY_REQUEST_SYNC, MULTICAST_EMPTY_REQUEST_ASYNC, MULTICAST_GET_SYNC, MULTICAST_GET_ASYNC, MULTICAST_PUT_SYNC, MULTICAST_PUT_ASYNC}; const char * io_method_names[] = { "empty-request-sync", "empty-request-async", "get-sync", "get-async", "put-sync", "put-async"}; const int nssi_transport_list[] = { NSSI_RPC_PTL, NSSI_RPC_PTL, NSSI_RPC_IB, NSSI_RPC_IB, NSSI_RPC_GEMINI, NSSI_RPC_GEMINI, NSSI_RPC_BGPDCMF, NSSI_RPC_BGPDCMF, NSSI_RPC_BGQPAMI, NSSI_RPC_BGQPAMI, NSSI_RPC_MPI}; const int num_nssi_transports = 11; const int nssi_transport_vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char * nssi_transport_names[] = { "portals", "ptl", "infiniband", "ib", "gemini", "gni", "bgpdcmf", "dcmf", "bgqpami", "pami", "mpi" }; // Initialize arguments args.transport=NSSI_DEFAULT_TRANSPORT; args.delay = 1; args.io_method = MULTICAST_EMPTY_REQUEST_SYNC; args.debug_level = LOG_WARN; args.num_trials = 1; args.num_reqs = 1; args.len = 1; args.result_file_mode = "a"; args.result_file = ""; args.url_file[0] = ""; args.url_file[1] = ""; args.logfile = ""; args.client_flag = true; args.server_flag = true; args.timeout = 500; args.num_retries = 5; args.validate_flag = true; args.server_url[0] = ""; args.server_url[1] = ""; bool success = true; /** * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line * options to control the behavior of the test code. To evaluate performance, * the "num-trials", "num-reqs", and "len" options control the amount of data transferred * between client and server. The "io-method" selects the type of data transfer. The * server-url specifies the URL of the server. If running as a server, the server-url * provides a recommended URL when initializing the network transport. */ try { //out << Teuchos::Teuchos_Version() << std::endl << std::endl; // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor parser; parser.setDocString( "This example program demonstrates a simple data-transfer service " "built using the NEtwork Scalable Service Interface (Nessie)." ); /* To set and option, it must be given a name and default value. Additionally, each option can be given a help std::string. Although it is not necessary, a help std::string aids a users comprehension of the acceptable command line arguments. Some examples of setting command line options are: */ parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" ); parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" ); parser.setOption("server", "no-server", &args.server_flag, "Run the server" ); parser.setOption("client", "no-client", &args.client_flag, "Run the client"); parser.setOption("len", &args.len, "The number of structures in an input buffer"); parser.setOption("debug",(int*)(&args.debug_level), "Debug level"); parser.setOption("logfile", &args.logfile, "log file"); parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)"); parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial"); parser.setOption("result-file", &args.result_file, "Where to store results"); parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result"); parser.setOption("server-url-1", &args.server_url[0], "URL client uses to find the server 1"); parser.setOption("server-url-2", &args.server_url[1], "URL client uses to find the server 2"); parser.setOption("server-url-file-1", &args.url_file[0], "File that has URL client uses to find server 1"); parser.setOption("server-url-file-2", &args.url_file[1], "File that has URL client uses to find server 2"); parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data"); // Set an enumeration command line option for the io_method parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names, "I/O Methods for the example: \n" "\t\t\tempty-request-sync : Send an empty request - synchronous\n" "\t\t\tempty-request-async: Send an empty request - asynchronous\n" "\t\t\tget-sync : Servers pull data from client - synchronous\n" "\t\t\tget-async: Servers pull data from client - asynchronous\n" "\t\t\tput-sync : Servers push data from client - synchronous\n" "\t\t\tput-async: Servers push data from client - asynchronous" ); // Set an enumeration command line option for the NNTI transport parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names, "NSSI transports (not all are available on every platform): \n" "\t\t\tportals|ptl : Cray or Schutt\n" "\t\t\tinfiniband|ib : libibverbs\n" "\t\t\tgemini|gni : Cray libugni (Gemini or Aries)\n" "\t\t\tbgpdcmf|dcmf : IBM BG/P DCMF\n" "\t\t\tbgqpami|pami : IBM BG/Q PAMI\n" "\t\t\tmpi : isend/irecv implementation\n" ); /* There are also two methods that control the behavior of the command line processor. First, for the command line processor to allow an unrecognized a command line option to be ignored (and only have a warning printed), use: */ parser.recogniseAllOptions(true); /* Second, by default, if the parser finds a command line option it doesn't recognize or finds the --help option, it will throw an std::exception. If you want prevent a command line processor from throwing an std::exception (which is important in this program since we don't have an try/catch around this) when it encounters a unrecognized option or help is printed, use: */ parser.throwExceptions(false); /* We now parse the command line where argc and argv are passed to the parse method. Note that since we have turned off std::exception throwing above we had better grab the return argument so that we can see what happened and act accordingly. */ Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } // Here is where you would use these command line arguments but for this example program // we will just print the help message with the new values of the command-line arguments. //if (rank == 0) // out << "\nPrinting help message with new values of command-line arguments ...\n\n"; //parser.printHelpMessage(argv[0],out); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success); log_debug(LOG_ALL, "transport_index=%d", transport_index); if (transport_index > -1) { args.transport =nssi_transport_list[transport_index]; args.transport_name=std::string(nssi_transport_names[transport_index]); } args.io_method_name=io_method_names[args.io_method]; log_debug(args.debug_level, "%d: Finished processing arguments", rank); if (!success) { MPI_Abort(MPI_COMM_WORLD, 1); } if (!args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && !args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } log_level debug_level = args.debug_level; // Communicator used for both client and server (may split if using client and server) MPI_Comm comm; log_debug(debug_level, "%d: Starting multicast-service test", rank); /** * Since this test can be run as a server, client, or both, we need to play some fancy * MPI games to get the communicators working correctly. If we're executing as both * a client and a server, we split the communicator so that the client thinks its * running by itself. */ if (args.client_flag && args.server_flag) { if (np < 3) { log_error(debug_level, "Must use at least 3 MPI processes for client and server mode"); MPI_Abort(MPI_COMM_WORLD, -1); } // Split the communicators. Processors with color=0 are servers. int color = ((rank == 0)||(rank == 1)) ? 0 : 1; // two server MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); MPI_Comm_rank(comm, &splitrank); MPI_Comm_size(comm, &splitsize); // std::cout << "rank=" << rank << "/" << np << ", color=" << color << // ", new_rank=" << newrank << "/" << newsize << std::endl << std::endl; // // std::cout << "my_url=" << my_url << ", server_url=" << args.server_url << std::endl; } else { MPI_Comm_dup(MPI_COMM_WORLD, &comm); } /** * Initialize the Nessie interface by specifying a transport, encoding scheme, and a * recommended URL. \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it * is often the case that only one type of transport exists on a particular platform. * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and * \ref NSSI_RPC_IB. We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE * should always be used for the second argument. The URL can be specified (as we did for * the server, or NULL (as we did for the client). This is a recommended value. Use the * \ref nssi_get_url function to find the actual value. */ if (args.server_flag && !args.server_url[rank].empty()) { // use the server URL as suggested URL nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, args.server_url[rank].c_str()); } else { nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL); } // Get the Server URL std::string my_url(NSSI_URL_LEN, '\0'); nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN); // Broadcast the server URL to all the clients args.server_url[0].resize(NSSI_URL_LEN, '\0'); args.server_url[1].resize(NSSI_URL_LEN, '\0'); if (args.server_flag && args.client_flag) { args.server_url[0] = my_url; MPI_Bcast(&args.server_url[0][0], args.server_url[0].size(), MPI_CHAR, 0, MPI_COMM_WORLD); args.server_url[1] = my_url; MPI_Bcast(&args.server_url[1][0], args.server_url[1].size(), MPI_CHAR, 1, MPI_COMM_WORLD); } else if (!args.server_flag && args.client_flag){ if (args.server_url[0].empty()) { // check to see if we're supposed to get the URL from a file if (!args.url_file[0].empty()) { // Fetch the server URL from a file sleep(1); log_debug(debug_level, "Reading from file %s", args.url_file[0].c_str()); std::ifstream urlfile (args.url_file[0].c_str()); if (urlfile.is_open()) { if (urlfile.good()) getline(urlfile, args.server_url[0]); } else { log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[0].c_str()); exit(1); } urlfile.close(); log_debug(debug_level, "URL = %s", args.server_url[0].c_str()); } else { log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]"); } } if (args.server_url[1].empty()) { // check to see if we're supposed to get the URL from a file if (!args.url_file[1].empty()) { // Fetch the server URL from a file sleep(1); log_debug(debug_level, "Reading from file %s", args.url_file[1].c_str()); std::ifstream urlfile (args.url_file[1].c_str()); if (urlfile.is_open()) { if (urlfile.good()) getline(urlfile, args.server_url[1]); } else { log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[1].c_str()); exit(1); } urlfile.close(); log_debug(debug_level, "URL = %s", args.server_url[1].c_str()); } else { log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]"); } } } else if (args.server_flag && !args.client_flag) { args.server_url[0] = my_url; // If the url_file value is set, write the url to a file if (!args.url_file[0].empty()) { std::ofstream urlfile (args.url_file[0].c_str()); if (urlfile.is_open()) { urlfile << args.server_url[0].c_str() << std::endl; } urlfile.close(); log_debug(debug_level, "Wrote url to file %s", args.url_file[0].c_str()); } args.server_url[1] = my_url; // If the url_file value is set, write the url to a file if (!args.url_file[1].empty()) { std::ofstream urlfile (args.url_file[1].c_str()); if (urlfile.is_open()) { urlfile << args.server_url[1].c_str() << std::endl; } urlfile.close(); log_debug(debug_level, "Wrote url to file %s", args.url_file[1].c_str()); } } // Set the debug level for the multicast service. multicast_debug_level = args.debug_level; // Print the arguments after they've all been set. print_args(out, args, "%"); //------------------------------------------------------------------------------ /** If we're running this job with a server, the server always executes on nodes 0 and 1. * In this example, the server is two process. */ if (args.server_flag && ((rank == 0)|(rank == 1))) { rc = multicast_server_main(args, comm); log_debug(debug_level, "Server is finished"); } // ------------------------------------------------------------------------------ /** The parallel client will execute this branch. The root node, nodes 0 and 1, of the client connects * connects with the server, using the \ref nssi_get_service function. Then the root * broadcasts the service description to the other clients before starting the main * loop of the client code by calling \ref multicast_client_main. */ else { int i; int client_rank; // get rank within the client communicator MPI_Comm_rank(comm, &client_rank); nssi_init((nssi_rpc_transport)args.transport); // Only one process needs to connect to the service // TODO: Make get_service a collective call (some transports do not need a connection) //if (client_rank == 0) { { sleep(args.delay); // give server time to get started // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d", i); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[0].c_str(), args.timeout, &multicast_svc[0]); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(multicast_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d", i); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[1].c_str(), args.timeout, &multicast_svc[1]); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(multicast_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } } //MPI_Bcast(&rc, 1, MPI_INT, 0, comm); if (rc == NSSI_OK) { if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i); // Broadcast the service description to the other clients //log_debug(multicast_debug_level, "Bcasting svc to other clients"); //MPI_Bcast(&multicast_svc, sizeof(nssi_service), MPI_BYTE, 0, comm); log_debug(debug_level, "Starting client main"); // Start the client code multicast_client_main(args, &multicast_svc[0], comm); MPI_Barrier(comm); // Tell one of the clients to kill the server if (client_rank == 0) { log_debug(debug_level, "%d: Halting multicast service", rank); rc = nssi_kill(&multicast_svc[0], 0, 5000); rc = nssi_kill(&multicast_svc[1], 0, 5000); } } else { if (client_rank == 0) log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i); success = false; //MPI_Abort(MPI_COMM_WORLD, -1); } nssi_fini((nssi_rpc_transport)args.transport); } log_debug(debug_level, "%d: clean up nssi", rank); MPI_Barrier(MPI_COMM_WORLD); // Clean up nssi_rpc rc = nssi_rpc_fini((nssi_rpc_transport)args.transport); if (rc != NSSI_OK) log_error(debug_level, "Error in nssi_rpc_fini"); log_debug(debug_level, "%d: MPI_Finalize()", rank); MPI_Finalize(); logger_fini(); if(success && (rc == NSSI_OK)) out << "\nEnd Result: TEST PASSED" << std::endl; else out << "\nEnd Result: TEST FAILED" << std::endl; return ((success && (rc==NSSI_OK)) ? 0 : 1 ); }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program show blockwise information on Kokkos::Serial execution space.\n"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level"); int league_size = 1; clp.setOption("league-size", &league_size, "League size"); int treecut = 15; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Level to prune the tree from bottom"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); int histogram_size = 0; clp.setOption("histogram-size", &histogram_size, "Histogram size"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { Kokkos::initialize(); r_val = exampleStatByBlocks <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, prunecut, seed, fill_level, league_size, histogram_size, verbose); Kokkos::finalize(); } return r_val; }
int main(int argc, char *argv[]) { int r_val = 0; Teuchos::CommandLineProcessor clp; int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl; return -1; } if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) { return 0; } unsigned threads_count = 0; if (Kokkos::hwloc::available()) { const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); const unsigned one = 1u; threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core); cout << " = Kokkos::hwloc = " << endl << "NUMA count = " << numa_count << endl << "Cores per NUMA = " << cores_per_numa << endl << "Threads per core = " << threads_per_core << endl << "Threads count = " << threads_count << endl; } else { threads_count = thread::hardware_concurrency(); cout << " = std::thread::hardware_concurrency = " << endl << "Threads count = " << threads_count << endl; } if (static_cast<unsigned int>(nthreads) > threads_count) { ++r_val; cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl; } else { Kokkos::Threads::initialize( nthreads, numa, core_per_numa ); Kokkos::Threads::print_configuration( cout , true /* detailed */ ); const int blk_cnt = 6, blks[blk_cnt] = { 1, 2, 4, 8, 12, 16 }; const int nrhs_cnt = 6, nrhs[nrhs_cnt] = { 1, 2, 4, 8, 12, 16 }; r_val += testTriSolveByBlocksDebug<double,int,int,Kokkos::Threads,void> ("mm_crs_input.mtx", team_size, max_task_dependence, blks[0], nrhs[2]); Kokkos::Threads::finalize(); } string eval; __EVAL_STRING__(r_val, eval); cout << "Testing Kokkos::Threads::" << eval << endl; return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool team_interface = false; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int niter = 10; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { const bool overwrite = true; const int nshepherds = (team_interface ? nthreads/team_size : nthreads); const int nworker_per_shepherd = nthreads/nshepherds; setenv("QT_HWPAR", to_string(nthreads).c_str(), overwrite); setenv("QT_NUM_SHEPHERDS", to_string(nshepherds).c_str(), overwrite); setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite); exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); // r_val = exampleICholPerformance // <value_type,ordinal_type,size_type,exec_space,void> // (file_input, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose); exec_space::finalize(); unsetenv("QT_HWPAR"); unsetenv("QT_NUM_SHEPHERDS"); unsetenv("QT_NUM_WORKERS_PER_SHEPHERD"); } return r_val; }
int main (int argc, char *argv[]) { int rc; // command-line arguments int retries = 0; int sig = 0; int timeout = 1000; log_level debug_level = LOG_ERROR; string logfile(""); nssi_service svc; char my_url[NSSI_URL_LEN]; std::string server_url(""); char server_str[NSSI_URL_LEN]; std::string contact_file(""); /* the file where the server's url should be written */ try { Teuchos::CommandLineProcessor parser; // init parser parser.setDocString("Kill an NSSI server"); parser.setOption("verbose", (int *)(&debug_level), "Debug level."); parser.setOption("logfile", &logfile, "Path to file for debug statements"); parser.setOption("server-url", &server_url, "URL of NSSI service"); parser.setOption("contact-file", &contact_file, "Where to read the server's URL"); parser.setOption("timeout", &timeout, "Timout for contacting services (ms)"); parser.setOption("retries", &retries, "Number of times to retry before exiting"); parser.setOption("sig", &sig, "Signal to use for the kill command"); parser.recogniseAllOptions(); parser.throwExceptions(); Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } } catch (...) { exit(-1); } /* initialize the logger */ logger_init(debug_level, logfile.c_str()); if (server_url.c_str()[0]=='\0') { sleep(1); log_debug(debug_level, "reading URL from file"); read_contact_info(contact_file.c_str(), server_str, NSSI_URL_LEN); } else { log_debug(debug_level, "using URL from command-line"); strcpy(server_str, server_url.c_str()); } nssi_rpc_init(NSSI_DEFAULT_TRANSPORT, NSSI_DEFAULT_ENCODE, NULL); nssi_get_url(NSSI_DEFAULT_TRANSPORT, my_url, NSSI_URL_LEN); // sleep(1); log_info(debug_level, "\nTrying to get service at %s", server_str); rc=nssi_get_service(NSSI_DEFAULT_TRANSPORT, server_str, timeout, &svc); if (rc != NSSI_OK) { log_error(admin_debug_level, "could not get svc description: %s", nssi_err_str(rc)); return rc; } rc = kill_svc(&svc, sig, timeout); if (rc == NSSI_ETIMEDOUT) { fprintf(stderr, "Timed out trying to kill (%s)\n", server_url.c_str()); return rc; } else if (rc != NSSI_OK) { log_error(admin_debug_level, "failed to kill service: %s", nssi_err_str(rc)); return rc; } nssi_rpc_fini(NSSI_DEFAULT_TRANSPORT); return 0; }
int main (int argc, char **argv) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif if (Comm.MyPID() == 0) { cout << "Converter from MatrixMarket files to HDF5 files" << endl; cout << "For notes on the usage, execute" << endl; cout << " ./HDF5Converter.exe --help" << endl; cout << endl; } // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor CLP; string MapFileName = "not-set"; string XFileName = "not-set"; string BFileName = "not-set"; string MatrixFileName = "not-set"; string HDF5FileName = "myfile.f5"; string MapHDF5Name = "map"; string XHDF5Name = "X"; string BHDF5Name = "B"; string MatrixHDF5Name = "matrix"; CLP.setOption("in-map", &MapFileName, "map file name"); CLP.setOption("in-matrix", &MatrixFileName, "matrix file name"); CLP.setOption("in-x", &XFileName, "x vector file name"); CLP.setOption("in-b", &BFileName, "b vector file name"); CLP.setOption("output", &HDF5FileName, "name of HDF5 file"); CLP.setOption("out-map", &MapHDF5Name, "map name in HDF5 file"); CLP.setOption("out-matrix", &MatrixHDF5Name, "matrix name in HDF5 file"); CLP.setOption("out-x", &XHDF5Name, "x vector name in HDF5 file"); CLP.setOption("out-b", &BHDF5Name, "b vector name in HDF5 file"); CLP.throwExceptions(false); CLP.parse(argc,argv); Epetra_Map* Map = 0; Epetra_CrsMatrix* Matrix = 0; Epetra_MultiVector* X = 0; Epetra_MultiVector* B = 0; if (MapFileName != "not-set") { if (Comm.MyPID() == 0) cout << "Reading map from " << MapFileName << endl; EpetraExt::MatrixMarketFileToMap(MapFileName.c_str(), Comm, Map); } else { cerr << "You need to specify a map, sorry" << endl; #ifdef HAVE_MPI MPI_Finalize(); #endif exit(EXIT_SUCCESS); } if (XFileName != "not-set") { if (Comm.MyPID() == 0) cout << "Reading vector from " << XFileName << endl; EpetraExt::MatrixMarketFileToMultiVector(XFileName.c_str(), *Map, X); } if (BFileName != "not-set") { if (Comm.MyPID() == 0) cout << "Reading vector from " << BFileName << endl; EpetraExt::MatrixMarketFileToMultiVector(BFileName.c_str(), *Map, B); } if (MatrixFileName != "not-set") { if (Comm.MyPID() == 0) cout << "Reading matrix from " << MatrixFileName << endl; EpetraExt::MatrixMarketFileToCrsMatrix(MatrixFileName.c_str(), *Map, Matrix); } // ================================= // // Open HDF5 file and append data in // // ================================= // EpetraExt::HDF5 HDF5(Comm); HDF5.Create(HDF5FileName); if (Map) HDF5.Write(MapHDF5Name + EpetraExt::toString(Comm.NumProc()), *Map); if (Matrix) HDF5.Write(MatrixHDF5Name, *Matrix); if (X) HDF5.Write(XHDF5Name, *X); if (B) HDF5.Write(BHDF5Name, *B); HDF5.Close(); if (Map) delete Map; if (Matrix) delete Matrix; if (X) delete X; if (B) delete B; #ifdef HAVE_MPI MPI_Finalize(); #endif return(EXIT_SUCCESS); }
int main (int argc, char *argv[]) { // command-line arguments log_level debug_level = LOG_ERROR; string logfile(""); int npes, me, i; int num_servers=1; int num_clients=1; int servers_per_node=1; int clients_per_node=1; int client_weight=10; int server_weight=10; int client_server_weight=5; string server_node_file("SNF.txt"); string client_node_file("CNF.txt"); const int num_graphs = 4; const int graph_vals[] = { GRAPH_COMPLETE, GRAPH_CLIENT_COMPLETE, GRAPH_SERVER_COMPLETE, GRAPH_CLIENT_SERVER_ONLY }; const char * graph_names[] = { "complete", "client-complete", "server-complete", "client-server-only" }; enum graph_connection_t graph_connection=GRAPH_COMPLETE; MPI_Init(&argc, &argv); try { Teuchos::CommandLineProcessor parser; // init parser parser.setDocString("Find node placement of server and client ranks"); parser.setOption("strategy", &strategy, "LibTopoMap strategy (greedy, greedy_route, recursive, rcm, scotch, ascending)"); parser.setOption("num-servers", (int *)(&num_servers), "Number of servers to place"); parser.setOption("num-clients", (int *)(&num_clients), "Number of clients to place"); parser.setOption("servers-per-node", (int *)(&servers_per_node), "Number of server ranks per compute node"); parser.setOption("clients-per-node", (int *)(&clients_per_node), "Number of client ranks per compute node"); parser.setOption("server-weight", (int *)(&server_weight), "Edge weight of server-to-server communication"); parser.setOption("client-weight", (int *)(&client_weight), "Edge weight of client-to-client communication"); parser.setOption("client-server-weight", (int *)(&client_server_weight), "Edge weight of client-to-server communication"); parser.setOption("server-node-file", &server_node_file, "Where to write the server placement results"); parser.setOption("client-node-file", &client_node_file, "Where to write the client placement results"); parser.setOption("verbose", (int *)(&debug_level), "Debug level"); parser.setOption("logfile", &logfile, "Path to file for debug statements"); // Set an enumeration command line option for the connection graph parser.setOption("graph-connection", (int*)&graph_connection, num_graphs, graph_vals, graph_names, "Graph Connections for the example: \n" "\t\t\tcomplete : client-client graph is complete, server-server graph is complete\n" "\t\t\tclient-complete: client-client graph is complete, server-server graph is empty\n" "\t\t\tserver-complete : client-client graph is empty, server-server graph is complete\n" "\t\t\tclient-server-only: client-client graph is empty, server-server graph is empty\n" "\t\t\tIn all cases, each client has an edge to one of the servers\n" ); parser.recogniseAllOptions(); parser.throwExceptions(); Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } } catch (...) { exit(-1); } /* initialize the logger */ logger_init(debug_level, logfile.c_str()); MPI_Comm_size(MPI_COMM_WORLD, &npes); MPI_Comm_rank(MPI_COMM_WORLD, &me); if (me==0) { cout << " ---------------- ARGUMENTS --------------- " << std::endl; cout << " \tstrategy = " << strategy << std::endl; cout << " \tgraph-connection = " << graph_names[graph_connection] << std::endl; cout << " \tnum-servers = " << num_servers << std::endl; cout << " \tnum-clients = " << num_clients << std::endl; cout << " \tservers-per-node = " << servers_per_node << std::endl; cout << " \tclients-per-node = " << clients_per_node << std::endl; cout << " \tserver-weight = " << server_weight << std::endl; cout << " \tclient-weight = " << client_weight << std::endl; cout << " \tclient-server-weight = " << client_server_weight << std::endl; cout << " \tserver-node-file = " << server_node_file << std::endl; cout << " \tclient-node-file = " << client_node_file << std::endl; cout << " \tverbose = " << debug_level << std::endl; cout << " \tlogfile = " << logfile << std::endl; cout << " ------------------------------------------- " << std::endl; } MPI_Barrier(MPI_COMM_WORLD); int *rank_map=(int*)malloc(sizeof(int) * npes); int *nid_map=(int*)malloc(sizeof(int) * npes); construct_graph( rank_map, nid_map, num_servers, num_clients, servers_per_node, clients_per_node, server_weight, client_weight, client_server_weight, graph_connection, 0); if (me == 0) { ofstream snf(server_node_file.c_str(), ios_base::out); ofstream cnf(client_node_file.c_str(), ios_base::out); for (i=0;i<npes;i++) { if (rank_map[i] < num_servers) snf << nid_map[i] << "\t" << i << "\t" << rank_map[i] << std::endl; } for (i=0;i<npes;i++) { if (rank_map[i] >= num_servers) cnf << nid_map[i] << "\t" << i << "\t" << rank_map[i] << std::endl; } snf.close(); cnf.close(); } MPI_Finalize(); return 0; }