int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n");

  int nthreads = 0;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int numa = 0;
  clp.setOption("numa", &numa, "Number of numa node");

  int core_per_numa = 0;
  clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  std::string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int treecut = 0;
  clp.setOption("treecut", &treecut, "Level to cut tree from bottom");

  int prunecut = 0;
  clp.setOption("prunecut", &prunecut, "Level to prune tree from bottom");

  int fill_level = -1;
  clp.setOption("fill-level", &fill_level, "Fill level");

  int rows_per_team = 4096;
  clp.setOption("rows-per-team", &rows_per_team, "Workset size");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    exec_space::initialize(nthreads, numa, core_per_numa);

#if (defined(HAVE_SHYLUTACHO_SCOTCH) && (defined(HAVE_SHYLUTACHO_CHOLMOD) \
        || defined(HAVE_SHYLUTACHO_AMESOS)))
    r_val = exampleIncompleteSymbolicFactorization<exec_space>
      (file_input, treecut, prunecut, fill_level, rows_per_team, verbose);
#else
    r_val = -1;
    std::cout << "Scotch or Cholmod is NOT configured in Trilinos" << std::endl;
#endif

    exec_space::finalize();
  }
  
  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program demonstrates symbolic factorization algorithm on Kokkos::Serial execution space.\n");

  int fill_level = 0;
  clp.setOption("fill-level", &fill_level, "Fill level for incomplete factorization");

  int league_size = 1;
  clp.setOption("league-size", &league_size, "League size");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int treecut = 0;
  clp.setOption("treecut", &treecut, "Level to cut tree from bottom");

  int minblksize = 0;
  clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering");

  int seed = 0;
  clp.setOption("seed", &seed, "Seed for random number generator in graph partition");

  bool scotch = true;
  clp.setOption("enable-scotch", "disable-scotch", &scotch, "Flag for Scotch");

  bool camd = true;
  clp.setOption("enable-camd", "disable-camd", &camd, "Flag for CAMD");

  bool symbolic = true;
  clp.setOption("enable-symbolic", "disable-symbolic", &symbolic, "Flag for sybolic factorization");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    Kokkos::initialize();

    r_val = exampleSymbolicFactor
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input, treecut, minblksize, seed,
       fill_level, league_size,
       scotch, camd, symbolic, verbose);

    Kokkos::finalize();
  }

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  bool team_interface = false;
  clp.setOption("enable-team-interface", "disable-team-interface",
                &team_interface, "Flag for team interface");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int nrhs = 1;
  clp.setOption("nrhs", &nrhs, "Number of right hand side");

  int nb = nrhs;
  clp.setOption("nb", &nb, "Blocksize of right hand side");

  int niter = 100;
  clp.setOption("niter", &niter, "Number of iterations for testing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    exec_space::initialize(nthreads);
    exec_space::print_configuration(cout, true);
    
    r_val = exampleTriSolvePerformance
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input, nrhs, nb, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose);

    exec_space::finalize();
  }

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n");

  int nthreads = 0;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int numa = 0;
  clp.setOption("numa", &numa, "Number of numa node");

  int core_per_numa = 0;
  clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  int mmin = 1000;
  clp.setOption("mmin", &mmin, "C(mmin,mmin)");

  int mmax = 8000;
  clp.setOption("mmax", &mmax, "C(mmax,mmax)");

  int minc = 1000;
  clp.setOption("minc", &minc, "Increment of m");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    exec_space::initialize();
    host_space::initialize(nthreads, numa, core_per_numa);

    r_val = exampleDenseMatrixBase<exec_space>
      (mmin, mmax, minc, 
       verbose);
    
    exec_space::finalize();
    host_space::finalize();
  }
  
  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example interface of solver Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int numa = 0;
  clp.setOption("numa", &numa, "Number of numa node");

  int core_per_numa = 0;
  clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int nrhs = 1;
  clp.setOption("nrhs", &nrhs, "Numer of right hand side");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    exec_space::initialize(nthreads, numa, core_per_numa);
    exec_space::print_configuration(cout, true);

    r_val = exampleCholDirectSolver
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input,
       nrhs,
       nthreads,
       verbose);

    exec_space::finalize();
  }

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program measure the performance of task data parallelism (barrier) on Kokkos::Threads execution space.\n");

  int nthreads = 0;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int numa = 0;
  clp.setOption("numa", &numa, "Number of numa node");

  int core_per_numa = 0;
  clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  int league_size = 1;
  clp.setOption("league-size", &league_size, "League size");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  int ntasks = 100;
  clp.setOption("ntasks", &ntasks, "Number of tasks to be spawned");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    exec_space::initialize(nthreads, numa, core_per_numa);
    exec_space::print_configuration(cout, true);

    r_val = exampleKokkosDataData<exec_space,value_type>((ntasks > MAXTASKS ? MAXTASKS : ntasks), league_size, team_size, verbose);

    exec_space::finalize();
  }

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program demonstrates TriSolveUnblocked algorithm on Kokkos::Serial execution space.\n");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int nrhs = 1; 
  clp.setOption("nrhs", &nrhs, "Number of right hand side"); 

  int nb = nrhs; 
  clp.setOption("nb", &nb, "Blocksize of right hand side"); 

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    Kokkos::initialize();
    
    r_val = exampleTriSolveByBlocks
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input, nrhs, nb, 1, max_task_dependence, team_size, verbose);
    
    Kokkos::finalize();
  }

  return r_val;
}
Пример #8
0
int main(int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("Intrepid2::DynRankView_PerfTest01.\n");

  int nworkset = 8;
  clp.setOption("nworkset", &nworkset, "# of worksets");

  int C = 4096;
  clp.setOption("C", &C, "# of Cells in a workset");

  int order = 2;
  clp.setOption("order", &order, "cubature order");

  bool verbose = true;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  Kokkos::initialize();

  if (verbose) 
    std::cout << "Testing datatype double\n";

  const int r_val_double = Intrepid2::Test::ComputeBasis_HGRAD
    <double,Kokkos::Cuda>(nworkset,
                            C,
                            order,
                            verbose);
  return r_val_double;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program measure the performance of Chol algorithms on Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  int fill_level = 0;
  clp.setOption("fill-level", &fill_level, "Fill level");

  bool team_interface = true;
  clp.setOption("enable-team-interface", "disable-team-interface",
                &team_interface, "Flag for team interface");

  bool mkl_interface = false;
  clp.setOption("enable-mkl-interface", "disable-mkl-interface",
                &mkl_interface, "Flag for MKL interface");

  int stack_size = 8192;
  clp.setOption("stack-size", &stack_size, "Stack size");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int treecut = 15;
  clp.setOption("treecut", &treecut, "Level to cut tree from bottom");

  int minblksize = 0;
  clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering");

  int prunecut = 0;
  clp.setOption("prunecut", &prunecut, "Leve to prune tree from bottom");

  int seed = 0;
  clp.setOption("seed", &seed, "Seed for random number generator in graph partition");

  int niter = 10;
  clp.setOption("niter", &niter, "Number of iterations for testing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    const bool overwrite = true;
    const int nshepherds = (team_interface ? nthreads/team_size : nthreads);
    const int nworker_per_shepherd = nthreads/nshepherds;

    setenv("QT_HWPAR",                    to_string(nthreads).c_str(),             overwrite);
    setenv("QT_NUM_SHEPHERDS",            to_string(nshepherds).c_str(),           overwrite);
    setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite);
    setenv("QT_STACK_SIZE",               to_string(stack_size).c_str(),           overwrite);

    exec_space::initialize(nthreads);
    exec_space::print_configuration(cout, true);
    
    r_val = exampleCholPerformance
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input, 
       treecut,
       minblksize,
       prunecut,
       seed,
       niter, 
       nthreads, 
       max_task_dependence, 
       team_size, 
       fill_level,
       nshepherds,
       team_interface, 
       (nthreads != 1), 
       mkl_interface,
       verbose);

    exec_space::finalize();

    unsetenv("QT_HWPAR");
    unsetenv("QT_NUM_SHEPHERDS");
    unsetenv("QT_NUM_WORKERS_PER_SHEPHERD");
    unsetenv("QT_STACK_SIZE");
  }

  return r_val;
}
Пример #10
0
int main(int argc, char *argv[]) {
  int r_val = 0;

  Teuchos::CommandLineProcessor clp;

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) {
    cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl;
    return -1;
  }
  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) {
    return 0;
  }

  unsigned threads_count = 0;
  if (Kokkos::hwloc::available())  {
    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
    
    const unsigned one = 1u;
    threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core);
 
    cout << " = Kokkos::hwloc = " << endl
         << "NUMA count       = " << numa_count << endl
         << "Cores per NUMA   = " << cores_per_numa << endl
         << "Threads per core = " << threads_per_core << endl
         << "Threads count    = " << threads_count << endl;
  } else {
    threads_count = thread::hardware_concurrency();

    cout << " = std::thread::hardware_concurrency = " << endl
         << "Threads count    = " << threads_count << endl;
  }

  if (static_cast<unsigned int>(nthreads) > threads_count) {
    ++r_val;
    cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl;
  } else {
    Kokkos::Threads::initialize( nthreads );
    Kokkos::Threads::print_configuration( cout , true /* detailed */ );
    
    //__TestSuiteDoUnitTests__(float,int,unsigned int,Kokkos::Serial,void);
    //__TestSuiteDoUnitTests__(float,long,unsigned long,Kokkos::Serial,void);
    
    __TestSuiteDoUnitTests__(double,int,unsigned int,Kokkos::Threads,void);
    // __TestSuiteDoUnitTests__(double,long,unsigned long,Kokkos::Serial,void);
    
    // __TestSuiteDoUnitTests__(complex<float>,int,unsigned int,Kokkos::Serial,void);
    // __TestSuiteDoUnitTests__(complex<float>,long,unsigned long,Kokkos::Serial,void);
    
    // __TestSuiteDoUnitTests__(complex<double>,int,unsigned int,Kokkos::Serial,void);
    // __TestSuiteDoUnitTests__(complex<double>,long,unsigned long,Kokkos::Serial,void);
    
    Kokkos::Threads::finalize();
  }

  string eval;
  __EVAL_STRING__(r_val, eval);
  cout << "Testing Kokkos::Threads::" << eval << endl;

  return r_val;
}
int main (int argc, char *argv[]) {

    Teuchos::CommandLineProcessor clp;
    clp.setDocString("This example program measure the performance of dense Herk on Kokkos::Threads execution space.\n");

    int nthreads = 0;
    clp.setOption("nthreads", &nthreads, "Number of threads");

    int numa = 0;
    clp.setOption("numa", &numa, "Number of numa node");

    int core_per_numa = 0;
    clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

    int max_concurrency = 250000;
    clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks");

    int memory_pool_grain_size = 16;
    clp.setOption("memory-pool-grain-size", &memory_pool_grain_size, "Memorypool chunk size (12 - 16)");

    int mkl_nthreads = 1;
    clp.setOption("mkl-nthreads", &mkl_nthreads, "MKL threads for nested parallelism");

    bool verbose = false;
    clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

    int mmin = 1000;
    clp.setOption("mmin", &mmin, "C(mmin,mmin)");

    int mmax = 8000;
    clp.setOption("mmax", &mmax, "C(mmax,mmax)");

    int minc = 1000;
    clp.setOption("minc", &minc, "Increment of m");

    int k = 1024;
    clp.setOption("k", &k, "A(mmax,k) or A(k,mmax) according to transpose flags");

    int mb = 256;
    clp.setOption("mb", &mb, "Blocksize");

    bool check = true;
    clp.setOption("enable-check", "disable-check", &check, "Flag for check solution");

    clp.recogniseAllOptions(true);
    clp.throwExceptions(false);

    Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

    if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
    if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

    int r_val = 0;
    {
        exec_space::initialize(nthreads, numa, core_per_numa);

        std::cout << std::endl << "DenseHerkByBlocks:: Upper, ConjTranspose, Variant::One (external)" << std::endl;
        r_val = exampleDenseHerkByBlocks
                <Uplo::Upper,Trans::ConjTranspose,Variant::One,exec_space>
                (mmin, mmax, minc, k, mb,
                 max_concurrency, memory_pool_grain_size, mkl_nthreads,
                 check,
                 verbose);

        exec_space::finalize();
    }

    return r_val;
}
Пример #12
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service xfer_svc;

    int server_index=0;
    int rank_in_server=0;

    int transport_index=-1;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct xfer_args args;

    const int num_io_methods = 8;
    const int io_method_vals[] = {
            XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC,
            XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC,
            XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC,
            XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC};
    const char * io_method_names[] = {
            "write-encode-sync", "write-encode-async",
            "write-rdma-sync", "write-rdma-async",
            "read-encode-sync", "read-encode-async",
            "read-rdma-sync", "read-rdma-async"};

    const int nssi_transport_list[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_GEMINI,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_MPI};

    const int num_nssi_transports = 11;
    const int nssi_transport_vals[] = {
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10
            };
    const char * nssi_transport_names[] = {
            "portals",
            "ptl",
            "infiniband",
            "ib",
            "gemini",
            "gni",
            "bgpdcmf",
            "dcmf",
            "bgqpami",
            "pami",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.len = 1;
    args.delay = 1;
    args.io_method = XFER_WRITE_RDMA_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.num_servers = 1;
    args.num_threads = 0;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.kill_server_flag = true;
    args.block_distribution = true;


    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");
        parser.setOption("num-servers", &args.num_servers, "Number of server processes");
        parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process");
        parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment");
        parser.setOption("block-distribution", "rr-distribution", &args.block_distribution,
                "Use a block distribution scheme to assign clients to servers");

        // Set an enumeration command line option for the io_method
        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n"
                "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n"
                "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n"
                "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n"
                "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n"
                "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n"
                "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n"
                "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous");


        // Set an enumeration command line option for the NNTI transport
        parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals|ptl    : Cray or Schutt\n"
                "\t\t\tinfiniband|ib  : libibverbs\n"
                "\t\t\tgemini|gni     : Cray libugni (Gemini or Aries)\n"
                "\t\t\tbgpdcmf|dcmf   : IBM BG/P DCMF\n"
                "\t\t\tbgqpami|pami   : IBM BG/Q PAMI\n"
                "\t\t\tmpi            : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(args.debug_level, "transport_index=%d", transport_index);
    if (transport_index > -1) {
    	args.transport     =nssi_transport_list[transport_index];
    	args.transport_name=std::string(nssi_transport_names[transport_index]);
    }
	args.io_method_name=std::string(io_method_names[args.io_method]);

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting xfer-service test", rank);

#ifdef TRIOS_ENABLE_COMMSPLITTER
    if (args.transport == NSSI_RPC_MPI) {
        MPI_Pcontrol(0);
    }
#endif

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    int color = 0;  // color=0-->server, color=1-->client
    if (args.client_flag && args.server_flag) {
        if (np < 2) {
            log_error(debug_level, "Must use at least 2 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Put all the servers as the first ranks.
        if (rank < args.num_servers) {
            color = 0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            color = 1;  // all others are clients
            log_debug(debug_level, "rank=%d is a client", rank);
        }

        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }
    else {
        if (args.client_flag) {
            color=1;
            log_debug(debug_level, "rank=%d is a client", rank);
        }
        else if (args.server_flag) {
            color=0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            log_error(debug_level, "Must be either a client or a server");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }

    MPI_Comm_rank(comm, &splitrank);
    MPI_Comm_size(comm, &splitsize);

    log_debug(debug_level, "%d: Finished splitting communicators", rank);

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // If running as both client and server, gather and distribute
    // the server URLs to all the clients.
    if (args.server_flag && args.client_flag) {

        std::string all_urls;

        // This needs to be a vector of chars, not a string
        all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0');

        // Have servers gather their URLs
        if (color == 0) {
            assert(args.num_servers == splitsize);  // these should be equal

            log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str());

            // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD)
            MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR,
                    &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm);
        }

        // broadcast the full set of server urls to all processes
        MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD);

        log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size());

        if (color == 1) {

            // For block distribution scheme use the utility function (in xfer_util.cpp)
            if (args.block_distribution) {
                // Use this utility function to calculate the server_index
                xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
            }

            // Use a simple round robin distribution scheme
            else {
                server_index   = splitrank % args.num_servers;
                rank_in_server = splitrank / args.num_servers;
            }

            // Copy the server url out of the list of urls
            int offset = server_index * NSSI_URL_LEN;

            args.server_url = all_urls.substr(offset, NSSI_URL_LEN);

            log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
        }


        log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str());
    }

    // If running as a client only, have to get the list of servers from the urlfile.
    else if (!args.server_flag && args.client_flag){

        sleep(args.delay);  // give server time to get started

        std::vector< std::string > urlbuf;
        xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm);
        args.num_servers = urlbuf.size();

        // For block distribution scheme use the utility function (in xfer_util.cpp)
        if (args.block_distribution) {
            // Use this utility function to calculate the server_index
            xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
        }

        // Use a simple round robin distribution scheme
        else {
            server_index   = splitrank % args.num_servers;
            rank_in_server = splitrank / args.num_servers;
        }

        args.server_url = urlbuf[server_index];
        log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url = my_url;

        if (args.url_file.empty()) {
            log_error(debug_level, "Must set --url-file");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm);
    }

    // Set the debug level for the xfer service.
    xfer_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str());

    print_args(out, args, "%");

    log_debug(debug_level, "server_url=%s", args.server_url.c_str());

    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on node 0.
     *  In this example, the server is a single process.
     */
    if (color == 0) {
        rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, node 0, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref xfer_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {


            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str());
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(xfer_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        // wait for all the clients to connect
        MPI_Barrier(comm);

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(xfer_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            xfer_client_main(args, xfer_svc, comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if ((args.kill_server_flag) && (rank_in_server == 0)) {
                log_debug(debug_level, "%d: Halting xfer service", rank);
                rc = nssi_kill(&xfer_svc, 0, 5000);
            }
            rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc);
            if (rc != NSSI_OK) {
                log_error(xfer_debug_level, "could not free svc description: %s",
                        nssi_err_str(rc));
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
    	out << "\nEnd Result: TEST PASSED" << std::endl;
    else
    	out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
Пример #13
0
int main(int argc, char * argv[]) 
{
   using Teuchos::RCP;
   using Teuchos::rcp;
   using Teuchos::rcpFromRef;
 
   Teuchos::GlobalMPISession mpiSession(&argc,&argv,&std::cout);

   std::string output_file_name = "square_mesh.gen";

   int xBlocks=1,yBlocks=1, zBlocks=1;
   int xElements=1,yElements=1, zElements=1;
   double x0=0.0, xf=1.0;
   double y0=0.0, yf=1.0;
   double z0=0.0, zf=1.0;
   bool threeD = false;
 
   // setup input arguments
   { 
     Teuchos::CommandLineProcessor clp;
     clp.throwExceptions(false);

     clp.setOption("o", &output_file_name, "Mesh output filename");

     clp.setOption("3d", "2d", &threeD, "Cube versus square mesh.");

     clp.setOption("x-blocks", &xBlocks, "Number of blocks in 'x' direction");
     clp.setOption("y-blocks", &yBlocks, "Number of blocks in 'y' direction");
     clp.setOption("z-blocks", &zBlocks, "Number of blocks in 'z' direction");

     clp.setOption("x-elmts", &xElements, "Number of elements in 'x' direction in each block");
     clp.setOption("y-elmts", &yElements, "Number of elements in 'y' direction in each block");
     clp.setOption("z-elmts", &zElements, "Number of elements in 'z' direction in each block");

     clp.setOption("x0", &x0, "Location of left edge");
     clp.setOption("xf", &xf, "Location of right edge");

     clp.setOption("y0", &y0, "Location of left edge");
     clp.setOption("yf", &yf, "Location of right edge");

     clp.setOption("z0", &z0, "Location of front(?) edge");
     clp.setOption("zf", &zf, "Location of back(?) edge");
 
     Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv,&std::cerr);

     if(parse_return==Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED)
       return -1;
      
     TEUCHOS_TEST_FOR_EXCEPTION(parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL, 
                                std::runtime_error, "Failed to parse command line!");
   }
  

   RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList);
   pl->set("X Blocks",xBlocks);
   pl->set("Y Blocks",yBlocks);
   pl->set("X Elements",xElements);
   pl->set("Y Elements",yElements);
   pl->set("X0",x0);
   pl->set("Y0",y0);
   pl->set("Xf",xf);
   pl->set("Yf",yf);

   if(threeD) {
     pl->set("Z Blocks",zBlocks);
     pl->set("Z Elements",zElements);
     pl->set("Z0",z0);
     pl->set("Zf",zf);
   }

   int numprocs = stk_classic::parallel_machine_size(MPI_COMM_WORLD);
   int rank = stk_classic::parallel_machine_rank(MPI_COMM_WORLD);

   RCP<panzer_stk_classic::STK_MeshFactory> factory; 
   if(!threeD)
     factory = Teuchos::rcp(new panzer_stk_classic::SquareQuadMeshFactory); 
   else
     factory = Teuchos::rcp(new panzer_stk_classic::CubeHexMeshFactory); 

   factory->setParameterList(pl);

   RCP<panzer_stk_classic::STK_Interface> mesh = factory->buildMesh(MPI_COMM_WORLD);
   mesh->writeToExodus(output_file_name);

   return 0;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n");

  int nthreads = 0;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  // int numa = 0;
  // clp.setOption("numa", &numa, "Number of numa node");

  // int core_per_numa = 0;
  // clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  std::string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int treecut = 0;
  clp.setOption("treecut", &treecut, "Level to cut tree from bottom");

  int prunecut = 0;
  clp.setOption("prunecut", &prunecut, "Level to prune tree from bottom");

  int fill_level = -1;
  clp.setOption("fill-level", &fill_level, "Fill level");

  int rows_per_team = 4096;
  clp.setOption("rows-per-team", &rows_per_team, "Workset size");

  int max_concurrency = 250000;
  clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks");

  int max_task_dependence = 3;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  int nrhs = 1;
  clp.setOption("nrhs", &team_size, "# of right hand side");

  int mb = 0;
  clp.setOption("mb", &mb, "Dense nested blocks size");

  int nb = 1;
  clp.setOption("nb", &nb, "Column block size of right hand side");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    exec_space::initialize(nthreads);

#if (defined(HAVE_SHYLUTACHO_SCOTCH) && defined(HAVE_SHYLUTACHO_CHOLMOD))
    r_val = exampleCholSuperNodesByBlocks<exec_space>
      (file_input, 
       treecut, prunecut, fill_level, rows_per_team, 
       max_concurrency, max_task_dependence, team_size,
       nrhs, mb, nb,
       verbose);
#else
    r_val = -1;
    std::cout << "Scotch or Cholmod is NOT configured in Trilinos" << std::endl;
#endif

    exec_space::finalize();
  }
  
  return r_val;
}
Пример #15
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service multicast_svc[2];

    int transport_index=-1;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct multicast_args args;

    const int num_io_methods = 6;
    const int io_method_vals[] = {
            MULTICAST_EMPTY_REQUEST_SYNC, MULTICAST_EMPTY_REQUEST_ASYNC,
            MULTICAST_GET_SYNC,           MULTICAST_GET_ASYNC,
            MULTICAST_PUT_SYNC,           MULTICAST_PUT_ASYNC};
    const char * io_method_names[] = {
            "empty-request-sync", "empty-request-async",
            "get-sync",           "get-async",
            "put-sync",           "put-async"};

    const int nssi_transport_list[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_GEMINI,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_MPI};

    const int num_nssi_transports = 11;
    const int nssi_transport_vals[] = {
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10
            };
    const char * nssi_transport_names[] = {
            "portals",
            "ptl",
            "infiniband",
            "ib",
            "gemini",
            "gni",
            "bgpdcmf",
            "dcmf",
            "bgqpami",
            "pami",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.delay = 1;
    args.io_method = MULTICAST_EMPTY_REQUEST_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.len = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file[0] = "";
    args.url_file[1] = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.server_url[0] = "";
    args.server_url[1] = "";

    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-1", &args.server_url[0], "URL client uses to find the server 1");
        parser.setOption("server-url-2", &args.server_url[1], "URL client uses to find the server 2");
        parser.setOption("server-url-file-1", &args.url_file[0], "File that has URL client uses to find server 1");
        parser.setOption("server-url-file-2", &args.url_file[1], "File that has URL client uses to find server 2");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");

        // Set an enumeration command line option for the io_method

        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\tempty-request-sync : Send an empty request - synchronous\n"
                "\t\t\tempty-request-async: Send an empty request - asynchronous\n"
                "\t\t\tget-sync : Servers pull data from client - synchronous\n"
                "\t\t\tget-async: Servers pull data from client - asynchronous\n"
                "\t\t\tput-sync : Servers push data from client - synchronous\n"
                "\t\t\tput-async: Servers push data from client - asynchronous"
                );

        // Set an enumeration command line option for the NNTI transport
        parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals|ptl    : Cray or Schutt\n"
                "\t\t\tinfiniband|ib  : libibverbs\n"
                "\t\t\tgemini|gni     : Cray libugni (Gemini or Aries)\n"
                "\t\t\tbgpdcmf|dcmf   : IBM BG/P DCMF\n"
                "\t\t\tbgqpami|pami   : IBM BG/Q PAMI\n"
                "\t\t\tmpi            : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(LOG_ALL, "transport_index=%d", transport_index);
    if (transport_index > -1) {
    	args.transport     =nssi_transport_list[transport_index];
    	args.transport_name=std::string(nssi_transport_names[transport_index]);
    }
    args.io_method_name=io_method_names[args.io_method];

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }


    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting multicast-service test", rank);

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    if (args.client_flag && args.server_flag) {
        if (np < 3) {
            log_error(debug_level, "Must use at least 3 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Processors with color=0 are servers.

        int color = ((rank == 0)||(rank == 1)) ? 0 : 1; // two server
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);

        MPI_Comm_rank(comm, &splitrank);
        MPI_Comm_size(comm, &splitsize);

        //    std::cout << "rank=" << rank << "/" << np << ", color=" << color <<
        //            ", new_rank=" << newrank << "/" << newsize << std::endl << std::endl;
        //
        //    std::cout << "my_url=" << my_url <<  ", server_url=" << args.server_url << std::endl;
    }
    else {
        MPI_Comm_dup(MPI_COMM_WORLD, &comm);
    }

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    if (args.server_flag && !args.server_url[rank].empty()) {
        // use the server URL as suggested URL
        nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, args.server_url[rank].c_str());
    }
    else {
        nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);
    }

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // Broadcast the server URL to all the clients
    args.server_url[0].resize(NSSI_URL_LEN, '\0');
    args.server_url[1].resize(NSSI_URL_LEN, '\0');
    if (args.server_flag && args.client_flag) {
        args.server_url[0] = my_url;
        MPI_Bcast(&args.server_url[0][0], args.server_url[0].size(), MPI_CHAR, 0, MPI_COMM_WORLD);
        args.server_url[1] = my_url;
        MPI_Bcast(&args.server_url[1][0], args.server_url[1].size(), MPI_CHAR, 1, MPI_COMM_WORLD);
    }

    else if (!args.server_flag && args.client_flag){
        if (args.server_url[0].empty()) {

            // check to see if we're supposed to get the URL from a file
            if (!args.url_file[0].empty()) {
                // Fetch the server URL from a file
                sleep(1);
                log_debug(debug_level, "Reading from file %s", args.url_file[0].c_str());
                std::ifstream urlfile (args.url_file[0].c_str());
                if (urlfile.is_open()) {
                    if (urlfile.good())
                        getline(urlfile, args.server_url[0]);
                }
                else {
                    log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[0].c_str());
                    exit(1);
                }
                urlfile.close();
                log_debug(debug_level, "URL = %s", args.server_url[0].c_str());
            }
            else {
                log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]");
            }
        }
        if (args.server_url[1].empty()) {

            // check to see if we're supposed to get the URL from a file
            if (!args.url_file[1].empty()) {
                // Fetch the server URL from a file
                sleep(1);
                log_debug(debug_level, "Reading from file %s", args.url_file[1].c_str());
                std::ifstream urlfile (args.url_file[1].c_str());
                if (urlfile.is_open()) {
                    if (urlfile.good())
                        getline(urlfile, args.server_url[1]);
                }
                else {
                    log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[1].c_str());
                    exit(1);
                }
                urlfile.close();
                log_debug(debug_level, "URL = %s", args.server_url[1].c_str());
            }
            else {
                log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]");
            }
        }
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url[0] = my_url;
        // If the url_file value is set, write the url to a file
        if (!args.url_file[0].empty()) {
            std::ofstream urlfile (args.url_file[0].c_str());
            if (urlfile.is_open()) {
                urlfile << args.server_url[0].c_str() << std::endl;
            }
            urlfile.close();
            log_debug(debug_level, "Wrote url to file %s", args.url_file[0].c_str());
        }

        args.server_url[1] = my_url;
        // If the url_file value is set, write the url to a file
        if (!args.url_file[1].empty()) {
            std::ofstream urlfile (args.url_file[1].c_str());
            if (urlfile.is_open()) {
                urlfile << args.server_url[1].c_str() << std::endl;
            }
            urlfile.close();
            log_debug(debug_level, "Wrote url to file %s", args.url_file[1].c_str());
        }
    }



    // Set the debug level for the multicast service.
    multicast_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    print_args(out, args, "%");


    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on nodes 0 and 1.
     *  In this example, the server is two process.
     */
    if (args.server_flag && ((rank == 0)|(rank == 1))) {
        rc = multicast_server_main(args, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, nodes 0 and 1, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref multicast_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {

            sleep(args.delay);  // give server time to get started

            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d", i);
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[0].c_str(), args.timeout, &multicast_svc[0]);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(multicast_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d", i);
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[1].c_str(), args.timeout, &multicast_svc[1]);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(multicast_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(multicast_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&multicast_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            multicast_client_main(args, &multicast_svc[0], comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if (client_rank == 0) {
                log_debug(debug_level, "%d: Halting multicast service", rank);
                rc = nssi_kill(&multicast_svc[0], 0, 5000);
                rc = nssi_kill(&multicast_svc[1], 0, 5000);
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
      out << "\nEnd Result: TEST PASSED" << std::endl;
    else
        out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program show blockwise information on Kokkos::Serial execution space.\n");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int fill_level = 0;
  clp.setOption("fill-level", &fill_level, "Fill level");

  int league_size = 1;
  clp.setOption("league-size", &league_size, "League size");

  int treecut = 15;
  clp.setOption("treecut", &treecut, "Level to cut tree from bottom");

  int minblksize = 0;
  clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering");

  int prunecut = 0;
  clp.setOption("prunecut", &prunecut, "Level to prune the tree from bottom");

  int seed = 0;
  clp.setOption("seed", &seed, "Seed for random number generator in graph partition");

  int histogram_size = 0;
  clp.setOption("histogram-size", &histogram_size, "Histogram size");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    Kokkos::initialize();
    
    r_val = exampleStatByBlocks
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input,                                                                    
       treecut,                                                                          
       minblksize,
       prunecut,
       seed,                                                                             
       fill_level,                                                                       
       league_size,                                                                      
       histogram_size,                                                                      
       verbose);
    
    Kokkos::finalize();
  }

  return r_val;
}
Пример #17
0
int main(int argc, char *argv[]) {
  int r_val = 0;

  Teuchos::CommandLineProcessor clp;

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int numa = 0;
  clp.setOption("numa", &numa, "Number of numa node");

  int core_per_numa = 0;
  clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) {
    cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl;
    return -1;
  }
  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) {
    return 0;
  }

  unsigned threads_count = 0;
  if (Kokkos::hwloc::available())  {
    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
    
    const unsigned one = 1u;
    threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core);
 
    cout << " = Kokkos::hwloc = " << endl
         << "NUMA count       = " << numa_count << endl
         << "Cores per NUMA   = " << cores_per_numa << endl
         << "Threads per core = " << threads_per_core << endl
         << "Threads count    = " << threads_count << endl;
  } else {
    threads_count = thread::hardware_concurrency();

    cout << " = std::thread::hardware_concurrency = " << endl
         << "Threads count    = " << threads_count << endl;
  }

  if (static_cast<unsigned int>(nthreads) > threads_count) {
    ++r_val;
    cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl;
  } else {
    Kokkos::Threads::initialize( nthreads, numa, core_per_numa );
    Kokkos::Threads::print_configuration( cout , true /* detailed */ );

    const int blk_cnt = 6, blks[blk_cnt] = { 1, 2, 4, 8, 12, 16 };
    const int nrhs_cnt = 6, nrhs[nrhs_cnt] = { 1, 2, 4, 8, 12, 16 };

    r_val += testTriSolveByBlocksDebug<double,int,int,Kokkos::Threads,void>
      ("mm_crs_input.mtx", team_size, max_task_dependence, blks[0], nrhs[2]);
    
    Kokkos::Threads::finalize();
  }

  string eval;
  __EVAL_STRING__(r_val, eval);
  cout << "Testing Kokkos::Threads::" << eval << endl;

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  bool team_interface = false;
  clp.setOption("enable-team-interface", "disable-team-interface",
                &team_interface, "Flag for team interface");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int niter = 10;
  clp.setOption("niter", &niter, "Number of iterations for testing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    const bool overwrite = true;
    const int nshepherds = (team_interface ? nthreads/team_size : nthreads);
    const int nworker_per_shepherd = nthreads/nshepherds;

    setenv("QT_HWPAR",                    to_string(nthreads).c_str(),             overwrite);
    setenv("QT_NUM_SHEPHERDS",            to_string(nshepherds).c_str(),           overwrite);
    setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite);

    exec_space::initialize(nthreads);
    exec_space::print_configuration(cout, true);
    
    // r_val = exampleICholPerformance
    //   <value_type,ordinal_type,size_type,exec_space,void>
    //   (file_input, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose);

    exec_space::finalize();

    unsetenv("QT_HWPAR");
    unsetenv("QT_NUM_SHEPHERDS");
    unsetenv("QT_NUM_WORKERS_PER_SHEPHERD");
  }

  return r_val;
}
Пример #19
0
int
main (int argc, char *argv[])
{
    int rc;

    // command-line arguments
    int retries = 0;
    int sig = 0;
    int timeout = 1000;
    log_level debug_level = LOG_ERROR;
    string logfile("");

    nssi_service svc;
    char my_url[NSSI_URL_LEN];

    std::string server_url("");
    char        server_str[NSSI_URL_LEN];
    std::string contact_file("");   /* the file where the server's url should be written */


    try {
        Teuchos::CommandLineProcessor parser;

        // init parser
        parser.setDocString("Kill an NSSI server");

        parser.setOption("verbose", (int *)(&debug_level), "Debug level.");
        parser.setOption("logfile", &logfile, "Path to file for debug statements");
        parser.setOption("server-url", &server_url, "URL of NSSI service");
        parser.setOption("contact-file", &contact_file, "Where to read the server's URL");
        parser.setOption("timeout", &timeout, "Timout for contacting services (ms)");
        parser.setOption("retries", &retries, "Number of times to retry before exiting");
        parser.setOption("sig", &sig, "Signal to use for the kill command");

        parser.recogniseAllOptions();
        parser.throwExceptions();

        Teuchos::CommandLineProcessor::EParseCommandLineReturn
        parseReturn= parser.parse( argc, argv );
        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }
        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!
        }
    }
    catch (...) {
        exit(-1);
    }

    /* initialize the logger */
    logger_init(debug_level, logfile.c_str());

    if (server_url.c_str()[0]=='\0') {
        sleep(1);
        log_debug(debug_level, "reading URL from file");
        read_contact_info(contact_file.c_str(), server_str, NSSI_URL_LEN);
    } else {
        log_debug(debug_level, "using URL from command-line");
        strcpy(server_str, server_url.c_str());
    }

    nssi_rpc_init(NSSI_DEFAULT_TRANSPORT, NSSI_DEFAULT_ENCODE, NULL);

    nssi_get_url(NSSI_DEFAULT_TRANSPORT, my_url, NSSI_URL_LEN);

//    sleep(1);
    log_info(debug_level, "\nTrying to get service at %s", server_str);

    rc=nssi_get_service(NSSI_DEFAULT_TRANSPORT, server_str, timeout, &svc);
    if (rc != NSSI_OK) {
        log_error(admin_debug_level, "could not get svc description: %s",
                nssi_err_str(rc));
        return rc;
    }
    rc = kill_svc(&svc, sig, timeout);
    if (rc == NSSI_ETIMEDOUT) {
        fprintf(stderr, "Timed out trying to kill (%s)\n",
                server_url.c_str());
        return rc;
    }
    else if (rc != NSSI_OK) {
        log_error(admin_debug_level, "failed to kill service: %s",
                nssi_err_str(rc));
        return rc;
    }

    nssi_rpc_fini(NSSI_DEFAULT_TRANSPORT);

    return 0;
}
Пример #20
0
int main (int argc, char **argv)
{
#ifdef HAVE_MPI
  MPI_Init(&argc, &argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  if (Comm.MyPID() == 0)
  {
    cout << "Converter from MatrixMarket files to HDF5 files" << endl;
    cout << "For notes on the usage, execute" << endl;
    cout << "  ./HDF5Converter.exe --help" << endl;
    cout << endl;
  }

  // Creating an empty command line processor looks like:
  Teuchos::CommandLineProcessor CLP;

  string MapFileName    = "not-set";
  string XFileName      = "not-set";
  string BFileName      = "not-set";
  string MatrixFileName = "not-set";
  string HDF5FileName   = "myfile.f5";
  string MapHDF5Name    = "map";
  string XHDF5Name      = "X";
  string BHDF5Name      = "B";
  string MatrixHDF5Name = "matrix";

  CLP.setOption("in-map",    &MapFileName,    "map file name");
  CLP.setOption("in-matrix", &MatrixFileName, "matrix file name");
  CLP.setOption("in-x",      &XFileName,      "x vector file name");
  CLP.setOption("in-b",      &BFileName,      "b vector file name");
  CLP.setOption("output",    &HDF5FileName,   "name of HDF5 file");
  CLP.setOption("out-map",    &MapHDF5Name,    "map name in HDF5 file");
  CLP.setOption("out-matrix", &MatrixHDF5Name, "matrix name in HDF5 file");
  CLP.setOption("out-x",      &XHDF5Name,      "x vector name in HDF5 file");
  CLP.setOption("out-b",      &BHDF5Name,      "b vector name in HDF5 file");

  CLP.throwExceptions(false);
  CLP.parse(argc,argv);

  Epetra_Map* Map = 0;
  Epetra_CrsMatrix* Matrix = 0;
  Epetra_MultiVector* X = 0;
  Epetra_MultiVector* B = 0;

  if (MapFileName != "not-set")
  {
    if (Comm.MyPID() == 0)
      cout << "Reading map from " << MapFileName << endl;

    EpetraExt::MatrixMarketFileToMap(MapFileName.c_str(), Comm, Map);
  }
  else
  {
    cerr << "You need to specify a map, sorry" << endl;
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    exit(EXIT_SUCCESS);
  }

  if (XFileName != "not-set")
  {
    if (Comm.MyPID() == 0)
      cout << "Reading vector from " << XFileName << endl;

    EpetraExt::MatrixMarketFileToMultiVector(XFileName.c_str(), *Map, X);
  }

  if (BFileName != "not-set")
  {
    if (Comm.MyPID() == 0)
      cout << "Reading vector from " << BFileName << endl;

    EpetraExt::MatrixMarketFileToMultiVector(BFileName.c_str(), *Map, B);
  }

  if (MatrixFileName != "not-set")
  {
    if (Comm.MyPID() == 0)
      cout << "Reading matrix from " << MatrixFileName << endl;

    EpetraExt::MatrixMarketFileToCrsMatrix(MatrixFileName.c_str(), *Map, Matrix);
  }

  // ================================= //
  // Open HDF5 file and append data in //
  // ================================= //
  
  EpetraExt::HDF5 HDF5(Comm);

  HDF5.Create(HDF5FileName);

  if (Map)
    HDF5.Write(MapHDF5Name + EpetraExt::toString(Comm.NumProc()), *Map);
  if (Matrix)
    HDF5.Write(MatrixHDF5Name, *Matrix);
  if (X)
    HDF5.Write(XHDF5Name, *X);
  if (B)
    HDF5.Write(BHDF5Name, *B);
  HDF5.Close();

  if (Map) delete Map;
  if (Matrix) delete Matrix;
  if (X) delete X;
  if (B) delete B;

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return(EXIT_SUCCESS);
}
Пример #21
0
int
main (int argc, char *argv[])
{
    // command-line arguments
    log_level debug_level = LOG_ERROR;
    string logfile("");

    int npes, me, i;

    int num_servers=1;
    int num_clients=1;

    int servers_per_node=1;
    int clients_per_node=1;

    int client_weight=10;
    int server_weight=10;
    int client_server_weight=5;

    string server_node_file("SNF.txt");
    string client_node_file("CNF.txt");

    const int num_graphs = 4;
    const int graph_vals[] = {
            GRAPH_COMPLETE,
            GRAPH_CLIENT_COMPLETE,
            GRAPH_SERVER_COMPLETE,
            GRAPH_CLIENT_SERVER_ONLY
    };
    const char * graph_names[] = {
            "complete",
            "client-complete",
            "server-complete",
            "client-server-only"
    };
    enum graph_connection_t graph_connection=GRAPH_COMPLETE;

    MPI_Init(&argc, &argv);

    try {
        Teuchos::CommandLineProcessor parser;

        // init parser
        parser.setDocString("Find node placement of server and client ranks");

        parser.setOption("strategy", &strategy, "LibTopoMap strategy (greedy, greedy_route, recursive, rcm, scotch, ascending)");
        parser.setOption("num-servers", (int *)(&num_servers), "Number of servers to place");
        parser.setOption("num-clients", (int *)(&num_clients), "Number of clients to place");
        parser.setOption("servers-per-node", (int *)(&servers_per_node), "Number of server ranks per compute node");
        parser.setOption("clients-per-node", (int *)(&clients_per_node), "Number of client ranks per compute node");
        parser.setOption("server-weight", (int *)(&server_weight), "Edge weight of server-to-server communication");
        parser.setOption("client-weight", (int *)(&client_weight), "Edge weight of client-to-client communication");
        parser.setOption("client-server-weight", (int *)(&client_server_weight), "Edge weight of client-to-server communication");
        parser.setOption("server-node-file", &server_node_file, "Where to write the server placement results");
        parser.setOption("client-node-file", &client_node_file, "Where to write the client placement results");
        parser.setOption("verbose", (int *)(&debug_level), "Debug level");
        parser.setOption("logfile", &logfile, "Path to file for debug statements");
        // Set an enumeration command line option for the connection graph
        parser.setOption("graph-connection", (int*)&graph_connection, num_graphs, graph_vals, graph_names,
                "Graph Connections for the example: \n"
                "\t\t\tcomplete : client-client graph is complete, server-server graph is complete\n"
                "\t\t\tclient-complete: client-client graph is complete, server-server graph is empty\n"
                "\t\t\tserver-complete : client-client graph is empty, server-server graph is complete\n"
                "\t\t\tclient-server-only: client-client graph is empty, server-server graph is empty\n"
                "\t\t\tIn all cases, each client has an edge to one of the servers\n"
                );

        parser.recogniseAllOptions();
        parser.throwExceptions();

        Teuchos::CommandLineProcessor::EParseCommandLineReturn
        parseReturn= parser.parse( argc, argv );
        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }
        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!
        }
    }
    catch (...) {
        exit(-1);
    }

    /* initialize the logger */
    logger_init(debug_level, logfile.c_str());

    MPI_Comm_size(MPI_COMM_WORLD, &npes);
    MPI_Comm_rank(MPI_COMM_WORLD, &me);

    if (me==0) {
        cout << " ----------------  ARGUMENTS --------------- " << std::endl;
        cout << " \tstrategy             = " << strategy << std::endl;
        cout << " \tgraph-connection     = " << graph_names[graph_connection] << std::endl;
        cout << " \tnum-servers          = " << num_servers << std::endl;
        cout << " \tnum-clients          = " << num_clients << std::endl;
        cout << " \tservers-per-node     = " << servers_per_node << std::endl;
        cout << " \tclients-per-node     = " << clients_per_node << std::endl;
        cout << " \tserver-weight        = " << server_weight << std::endl;
        cout << " \tclient-weight        = " << client_weight << std::endl;
        cout << " \tclient-server-weight = " << client_server_weight << std::endl;
        cout << " \tserver-node-file     = " << server_node_file << std::endl;
        cout << " \tclient-node-file     = " << client_node_file << std::endl;
        cout << " \tverbose              = " << debug_level << std::endl;
        cout << " \tlogfile              = " << logfile << std::endl;
        cout << " ------------------------------------------- " << std::endl;
    }
    MPI_Barrier(MPI_COMM_WORLD);

    int *rank_map=(int*)malloc(sizeof(int) * npes);
    int *nid_map=(int*)malloc(sizeof(int) * npes);

    construct_graph(
            rank_map,
            nid_map,
            num_servers,
            num_clients,
            servers_per_node,
            clients_per_node,
            server_weight,
            client_weight,
            client_server_weight,
            graph_connection,
            0);

    if (me == 0) {
        ofstream snf(server_node_file.c_str(), ios_base::out);
        ofstream cnf(client_node_file.c_str(), ios_base::out);

        for (i=0;i<npes;i++) {
            if (rank_map[i] < num_servers)
                snf << nid_map[i] << "\t" << i << "\t" << rank_map[i] << std::endl;
        }
        for (i=0;i<npes;i++) {
            if (rank_map[i] >= num_servers)
                cnf << nid_map[i] << "\t" << i << "\t" << rank_map[i] << std::endl;
        }

        snf.close();
        cnf.close();
    }

    MPI_Finalize();

    return 0;
}