Exemplo n.º 1
0
  void FC_FUNC(putintotrilinosmatrix,PUTINTOTRILINOSMATRIX)
	       (int& rowInd, int& colInd, double& val) {

  try {
    int ierr;
    const Epetra_Map& map = interface->getRowMap();
    // If this row is not owned on this processor, then throw error
    TEUCHOS_TEST_FOR_EXCEPTION(!map.MyGID(rowInd), std::logic_error,
       "Error: Trilinos matrix has detected an invalide row entry (row=" 
        << rowInd << ",col=" << colInd << ",val=" << val << ").\n");

    Epetra_CrsMatrix& matrix = *(interface->getOperator());

    if (!interface->isSparsitySet()) {

      // The matrix has not been "FillComplete()"ed. First fill of time step.
      ierr = matrix.InsertGlobalValues(rowInd, 1, &val, &colInd);
      if (ierr<0) {cout << "Error Code for " << rowInd << "  " << colInd << "  = ("<< ierr <<")"<<endl; exit(1);}
      else if (ierr>0) cout << "Warning Code for " << rowInd << "  " << colInd << "  = ("<< ierr <<")"<<endl;
    }
    else {
      // Subsequent matrix fills of each time step.
       ierr = matrix.ReplaceGlobalValues(rowInd, 1, &val, &colInd);

      TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error,
	 "Error: Trilinos matrix has detected a new entry (" 
             << rowInd << ", " << colInd << ", " << val
             << ")\n\t that did not exist before.");
    }
   }
   TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
   if (!success) exit(1);
  }
Exemplo n.º 2
0
  void FC_FUNC(sumintotrilinosmatrix,SUMINTOTRILINOSMATRIX)
	       (int& rowInd, int& numEntries, int* colInd, double* val) {

   try {
    const Epetra_Map& map = interface->getRowMap();

    Epetra_CrsMatrix& matrix = *(interface->getOperator());

    if (!interface->isSparsitySet()) {
      // The matrix has not been "FillComplete()"ed. First fill of time step.
      // Inserted values at this stage will be summed together later
      int ierr = matrix.InsertGlobalValues(rowInd, numEntries, val, colInd);
      if (ierr<0) {cout << "Error Code for " << rowInd << "  " << colInd[0] << "  = ("<< ierr <<")"<<endl; exit(1);}
      else if (ierr>0) cout << "Warning Code for " << rowInd << "  " << colInd[0] << "  = ("<< ierr <<")"<<endl;
    }
    else {
      // Subsequent matrix fills of each time step.
      int ierr = matrix.SumIntoGlobalValues(rowInd, numEntries, val, colInd);
    
      TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error,
	 "Error: Trilinos matrix has detected a new entry (" 
             << rowInd << ", " << colInd[0] << ", " << val[0] 
             << ")\n\t that did not exist before.");
    }
   }
   TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
   if (!success) exit(1);
  }
int main(int argc, char* argv[])
{
  Teuchos::GlobalMPISession mpiSession(&argc, &argv);

#ifdef HAVE_MUELU_KOKKOSCORE
  Kokkos::initialize(argc , argv);
#endif

  bool success = false;
  bool verbose = true;
  int ierr = -1;
  try {
    // Note: the command line parameter --linAlgebra= is take into account.
    // Xpetra parameters are added to the Teuchos::CommandLineProcessor of Teuchos::UnitTestRepository in MueLu_TestHelpers.cpp

#ifdef ParallelDebug
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    int mypid = comm->getRank();

    if (mypid  == 0) std::cout << "Host and Process Ids for tasks" << std::endl;
    for (int i = 0; i <comm->getSize(); i++) {
      if (i == mypid ) {
        char buf[80];
        char hostname[80];
        gethostname(hostname, sizeof(hostname));
        int pid = getpid();
        sprintf(buf, "Host: %s\tMPI rank: %d,\tPID: %d\n\tattach %d\n\tcontinue\n",
            hostname, mypid, pid, pid);
        printf("%s\n",buf);
        fflush(stdout);
        sleep(1);
      }
    }

    if (mypid == 0) {
      printf( "** Enter a character to continue > "); fflush(stdout);
      char go = ' ';
      scanf("%c",&go);
    }
    comm->barrier();
#endif

    ierr = Teuchos::UnitTestRepository::runUnitTestsFromMain(argc, argv);

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MUELU_KOKKOSCORE
  Kokkos::finalize();
#endif

  return (success ? ierr : EXIT_FAILURE);
}
Exemplo n.º 4
0
 void FC_FUNC(zeroouttrilinosmatrix,ZEROOUTTRILINOSMATRIX)() {
  try {
   // Zero out matrix. Don't do anything for first call, when matrix is empty.
   if (interface->isSparsitySet()) {
     Epetra_CrsMatrix& matrix = *(interface->getOperator());
     matrix.PutScalar(0.0);
   }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
  if (!success) exit(1);
 }
Exemplo n.º 5
0
 void FC_FUNC(restoretrilinosmatrix,RESTORTRILINOSMATRIX) (int* i) {
  try {
   if (*i==0)
     interface->updateOperator(savedMatrix_A);
   else if (*i==1)
     interface->updateOperator(savedMatrix_C);
   else
     assert(false);
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
  if (!success) exit(1);
 }
Exemplo n.º 6
0
  void FC_FUNC(solvewithtrilinos,SOLVEWITHTRILINOS)
	       (double* rhs, double* answer, double& elapsedTime) {
   try {
    //Teuchos::Time linearTime("LinearTime"); linearTime.start();

    // Lock in sparsity pattern
    if (!interface->isSparsitySet()) {
      interface->finalizeSparsity();
#ifdef CHECK_FOR_ROGUE_COLUMNS
      check_for_rogue_columns(*interface->getOperator());
#endif
    }

    const Epetra_Map& map = interface->getRowMap(); 
    Teuchos::RCP<Epetra_Vector> epetraSol = soln;
    Teuchos::RCP<Epetra_Vector> epetraRhs;
    epetraRhs = Teuchos::rcp(new Epetra_Vector(View, map, rhs));

    thyraOper = Thyra::epetraLinearOp(interface->getOperator());
    Teuchos::RCP<Thyra::MultiVectorBase<double> >
      thyraRhs = Thyra::create_Vector(epetraRhs, thyraOper->range() );
    Teuchos::RCP<Thyra::MultiVectorBase<double> >
      thyraSol = Thyra::create_Vector(epetraSol, thyraOper->domain() );

    lows = Thyra::linearOpWithSolve(*lowsFactory, thyraOper);

    // Uncomment following block to Dump out two matrices Avv, Auu. 
    // This function is called twice per Picard iter, which is twice
    // per outer GMRES step for Newton solves, so writing at 
    // solvecount==1 is first system, solvecount==51 is 26th Picard iter.
    
#ifdef WRITE_OUT_LINEAR_SYSTEM
    solvecount++; 
    if (solvecount==1) {
      EpetraExt::RowMatrixToMatrixMarketFile("matrix1", *interface->getOperator());
      EpetraExt::MultiVectorToMatrixMarketFile("vector1", *epetraRhs);
    }
#endif

    Thyra::SolveStatus<double>
      status = Thyra::solve(*lows, Thyra::NOTRANS, *thyraRhs, thyraSol.ptr());

    if (printDetails) linSolveDetails(status);

    soln->ExtractCopy(answer);

    //elapsedTime = linearTime.stop(); *out << "Total time elapsed for calling Solve(): " << elapsedTime << endl;
   }
   TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
   if (!success) exit(1);
  }
Exemplo n.º 7
0
  void FC_FUNC(matvecwithtrilinos,MATVECWITHTRILINOS)
	       (double* x, double* answer) {
   try {
    const Epetra_Map& map = interface->getRowMap(); 

    Teuchos::RCP<Epetra_Vector> epetra_x;
    epetra_x  = Teuchos::rcp(new Epetra_Vector(View, map, x));

    Epetra_Vector y(map);
    interface->getOperator()->Multiply(false, *epetra_x, y);

    y.ExtractCopy(answer);
   }
   TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
   if (!success) exit(1);
  }
Exemplo n.º 8
0
int main(int argc, char *argv[]) {

  bool success = false;
  bool verbose = false;
  try {
    // Set up the printing utilities
    Teuchos::ParameterList noxParams;
    Teuchos::ParameterList& printParams = noxParams.sublist("Printing");
    printParams.set("Output Precision", 5);
    if (argc > 1) {
      if (argv[1][0]=='-' && argv[1][1]=='v')
        printParams.set("Output Information",
            NOX::Utils::OuterIteration +
            NOX::Utils::OuterIterationStatusTest +
            NOX::Utils::InnerIteration +
            NOX::Utils::Parameters +
            NOX::Utils::Details +
            NOX::Utils::Warning +
            NOX::Utils::TestDetails);
      else
        printParams.set("Output Information", NOX::Utils::Error);
    }
    NOX::Utils printing(printParams);

    // Identify the test
    if (printing.isPrintType(NOX::Utils::TestDetails)) {
      std::cout << "Starting lapack/NOX_NewTest/NOX_NewTest.exe" << std::endl;
    }

    // *** Insert your testing here! ***

    success = true;

    if (success)
      std::cout << "Test passed!" << std::endl;
    else
      std::cout << "Test failed!" << std::endl;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int main(int argc, char* argv[]) {

  bool success = true;

  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  try {

    const bool result = DenseLinAlgPack::TestingPack::TestDenseLinAlgPack( &std::cout );
    if (!result) success = false;

  } // end try
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
  
  if(success)
    std::cout << "\nEnd Result: TEST PASSED" << std::endl;
    
  return success == true ? 0 : 1;

}
Exemplo n.º 10
0
  void FC_FUNC(savetrilinosmatrix,SAVETRILINOSMATRIX) (int* i) {
   try {
    if (!interface->isSparsitySet()) {
      interface->finalizeSparsity();
#ifdef CHECK_FOR_ROGUE_COLUMNS
      check_for_rogue_columns(*interface->getOperator());
#endif
    }
    if (*i==0)
      savedMatrix_A = Teuchos::rcp(new Epetra_CrsMatrix(*(interface->getOperator())));
    else if (*i==1)
      savedMatrix_C = Teuchos::rcp(new Epetra_CrsMatrix(*(interface->getOperator())));
    else if (*i==2) {
      savedMatrix_A = Teuchos::rcp(new Epetra_CrsMatrix(*(interface->getOperator())));
      savedMatrix_C = Teuchos::rcp(new Epetra_CrsMatrix(*(interface->getOperator())));
    }
    else
      assert(false);
   }
   TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
   if (!success) exit(1);
  }
Exemplo n.º 11
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service multicast_svc[2];

    int transport_index=-1;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct multicast_args args;

    const int num_io_methods = 6;
    const int io_method_vals[] = {
            MULTICAST_EMPTY_REQUEST_SYNC, MULTICAST_EMPTY_REQUEST_ASYNC,
            MULTICAST_GET_SYNC,           MULTICAST_GET_ASYNC,
            MULTICAST_PUT_SYNC,           MULTICAST_PUT_ASYNC};
    const char * io_method_names[] = {
            "empty-request-sync", "empty-request-async",
            "get-sync",           "get-async",
            "put-sync",           "put-async"};

    const int nssi_transport_list[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_GEMINI,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_MPI};

    const int num_nssi_transports = 11;
    const int nssi_transport_vals[] = {
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10
            };
    const char * nssi_transport_names[] = {
            "portals",
            "ptl",
            "infiniband",
            "ib",
            "gemini",
            "gni",
            "bgpdcmf",
            "dcmf",
            "bgqpami",
            "pami",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.delay = 1;
    args.io_method = MULTICAST_EMPTY_REQUEST_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.len = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file[0] = "";
    args.url_file[1] = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.server_url[0] = "";
    args.server_url[1] = "";

    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-1", &args.server_url[0], "URL client uses to find the server 1");
        parser.setOption("server-url-2", &args.server_url[1], "URL client uses to find the server 2");
        parser.setOption("server-url-file-1", &args.url_file[0], "File that has URL client uses to find server 1");
        parser.setOption("server-url-file-2", &args.url_file[1], "File that has URL client uses to find server 2");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");

        // Set an enumeration command line option for the io_method

        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\tempty-request-sync : Send an empty request - synchronous\n"
                "\t\t\tempty-request-async: Send an empty request - asynchronous\n"
                "\t\t\tget-sync : Servers pull data from client - synchronous\n"
                "\t\t\tget-async: Servers pull data from client - asynchronous\n"
                "\t\t\tput-sync : Servers push data from client - synchronous\n"
                "\t\t\tput-async: Servers push data from client - asynchronous"
                );

        // Set an enumeration command line option for the NNTI transport
        parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals|ptl    : Cray or Schutt\n"
                "\t\t\tinfiniband|ib  : libibverbs\n"
                "\t\t\tgemini|gni     : Cray libugni (Gemini or Aries)\n"
                "\t\t\tbgpdcmf|dcmf   : IBM BG/P DCMF\n"
                "\t\t\tbgqpami|pami   : IBM BG/Q PAMI\n"
                "\t\t\tmpi            : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(LOG_ALL, "transport_index=%d", transport_index);
    if (transport_index > -1) {
    	args.transport     =nssi_transport_list[transport_index];
    	args.transport_name=std::string(nssi_transport_names[transport_index]);
    }
    args.io_method_name=io_method_names[args.io_method];

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }


    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting multicast-service test", rank);

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    if (args.client_flag && args.server_flag) {
        if (np < 3) {
            log_error(debug_level, "Must use at least 3 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Processors with color=0 are servers.

        int color = ((rank == 0)||(rank == 1)) ? 0 : 1; // two server
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);

        MPI_Comm_rank(comm, &splitrank);
        MPI_Comm_size(comm, &splitsize);

        //    std::cout << "rank=" << rank << "/" << np << ", color=" << color <<
        //            ", new_rank=" << newrank << "/" << newsize << std::endl << std::endl;
        //
        //    std::cout << "my_url=" << my_url <<  ", server_url=" << args.server_url << std::endl;
    }
    else {
        MPI_Comm_dup(MPI_COMM_WORLD, &comm);
    }

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    if (args.server_flag && !args.server_url[rank].empty()) {
        // use the server URL as suggested URL
        nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, args.server_url[rank].c_str());
    }
    else {
        nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);
    }

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // Broadcast the server URL to all the clients
    args.server_url[0].resize(NSSI_URL_LEN, '\0');
    args.server_url[1].resize(NSSI_URL_LEN, '\0');
    if (args.server_flag && args.client_flag) {
        args.server_url[0] = my_url;
        MPI_Bcast(&args.server_url[0][0], args.server_url[0].size(), MPI_CHAR, 0, MPI_COMM_WORLD);
        args.server_url[1] = my_url;
        MPI_Bcast(&args.server_url[1][0], args.server_url[1].size(), MPI_CHAR, 1, MPI_COMM_WORLD);
    }

    else if (!args.server_flag && args.client_flag){
        if (args.server_url[0].empty()) {

            // check to see if we're supposed to get the URL from a file
            if (!args.url_file[0].empty()) {
                // Fetch the server URL from a file
                sleep(1);
                log_debug(debug_level, "Reading from file %s", args.url_file[0].c_str());
                std::ifstream urlfile (args.url_file[0].c_str());
                if (urlfile.is_open()) {
                    if (urlfile.good())
                        getline(urlfile, args.server_url[0]);
                }
                else {
                    log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[0].c_str());
                    exit(1);
                }
                urlfile.close();
                log_debug(debug_level, "URL = %s", args.server_url[0].c_str());
            }
            else {
                log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]");
            }
        }
        if (args.server_url[1].empty()) {

            // check to see if we're supposed to get the URL from a file
            if (!args.url_file[1].empty()) {
                // Fetch the server URL from a file
                sleep(1);
                log_debug(debug_level, "Reading from file %s", args.url_file[1].c_str());
                std::ifstream urlfile (args.url_file[1].c_str());
                if (urlfile.is_open()) {
                    if (urlfile.good())
                        getline(urlfile, args.server_url[1]);
                }
                else {
                    log_error(debug_level, "Failed to open server_url_file=%s", args.url_file[1].c_str());
                    exit(1);
                }
                urlfile.close();
                log_debug(debug_level, "URL = %s", args.server_url[1].c_str());
            }
            else {
                log_error(debug_level, "Need to set --server-url-1=[ADDR] or --server-url-file-1=[PATH]");
            }
        }
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url[0] = my_url;
        // If the url_file value is set, write the url to a file
        if (!args.url_file[0].empty()) {
            std::ofstream urlfile (args.url_file[0].c_str());
            if (urlfile.is_open()) {
                urlfile << args.server_url[0].c_str() << std::endl;
            }
            urlfile.close();
            log_debug(debug_level, "Wrote url to file %s", args.url_file[0].c_str());
        }

        args.server_url[1] = my_url;
        // If the url_file value is set, write the url to a file
        if (!args.url_file[1].empty()) {
            std::ofstream urlfile (args.url_file[1].c_str());
            if (urlfile.is_open()) {
                urlfile << args.server_url[1].c_str() << std::endl;
            }
            urlfile.close();
            log_debug(debug_level, "Wrote url to file %s", args.url_file[1].c_str());
        }
    }



    // Set the debug level for the multicast service.
    multicast_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    print_args(out, args, "%");


    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on nodes 0 and 1.
     *  In this example, the server is two process.
     */
    if (args.server_flag && ((rank == 0)|(rank == 1))) {
        rc = multicast_server_main(args, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, nodes 0 and 1, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref multicast_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {

            sleep(args.delay);  // give server time to get started

            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d", i);
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[0].c_str(), args.timeout, &multicast_svc[0]);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(multicast_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d", i);
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url[1].c_str(), args.timeout, &multicast_svc[1]);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(multicast_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(multicast_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&multicast_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            multicast_client_main(args, &multicast_svc[0], comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if (client_rank == 0) {
                log_debug(debug_level, "%d: Halting multicast service", rank);
                rc = nssi_kill(&multicast_svc[0], 0, 5000);
                rc = nssi_kill(&multicast_svc[1], 0, 5000);
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
      out << "\nEnd Result: TEST PASSED" << std::endl;
    else
        out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
Exemplo n.º 12
0
int main(int argc, char *argv[])
{
  int ierr = 0, i;

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif

  bool success = false;
  bool verbose = true;
  try {
    //int myRank = Comm.MyPID();

    //int numGlobalElements = 10000000;
    int numGlobalElements = 100;

    Teuchos::CommandLineProcessor cmdp(false,true);
    cmdp.setOption("numGlobalElements",&numGlobalElements,"Global problem size.");
    if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      throw -1;
    }

    Epetra_Map Map(numGlobalElements, 0, Comm);

    int NumMyElements = Map.NumMyElements();

    std::vector<int> MyGlobalElements(NumMyElements);
    Map.MyGlobalElements(&MyGlobalElements[0]);

    int NumNz = 3;
    // std::vector<int> NumNz(NumMyElements);
    // for (i=0; i<NumMyElements; i++)
    //     if (MyGlobalElements[i]==0 || MyGlobalElements[i] == numGlobalElements-1)
    //       NumNz[i] = 2;
    //     else
    //       NumNz[i] = 3;
    //  Epetra_CrsMatrix A(Copy, Map, &NumNz[0]);

    MemoryUsageStart("Epetra");
    PrintMemoryUsage("Initial memory usage", "epetra-init.heap");

    Epetra_CrsMatrix A(Copy, Map, NumNz);

    PrintMemoryUsage("Memory after CrsMatrix constructor", "epetra-after-ctor.heap");

    std::vector<double> Values(2);
    Values[0] = -1.0; Values[1] = -1.0;
    std::vector<int> Indices(2);
    double two = 2.0;
    int NumEntries;

    for (i=0; i<NumMyElements; i++) {

      if (MyGlobalElements[i]==0) {
        Indices[0] = 1;
        NumEntries = 1;
      } else if (MyGlobalElements[i] == numGlobalElements-1) {
        Indices[0] = numGlobalElements-2;
        NumEntries = 1;
      } else {
        Indices[0] = MyGlobalElements[i]-1;
        Indices[1] = MyGlobalElements[i]+1;
        NumEntries = 2;
      }

      ierr = A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert(ierr==0);

      // Put in the diagonal entry
      ierr = A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i]);
      assert(ierr==0);
    }

    PrintMemoryUsage("Memory after InsertGlobalValues()", "epetra-after-insert.heap");

    ierr = A.FillComplete();
    assert(ierr == 0);

    PrintMemoryUsage("Memory after FillComplete()", "epetra-after-fillcomplete.heap");

    MemoryUsageStop();

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 13
0
int main(int argc, char* argv[]) {
    bool success = true;
    try {

        // Set up command line options
        Teuchos::CommandLineProcessor clp(false);
        clp.setDocString("This program tests the speed of various forward mode AD implementations for simple Kokkos kernel");
        int m = 100000;
        clp.setOption("m", &m, "Number of matrix rows");
        int n = 100;
        clp.setOption("n", &n, "Number of matrix columns");
        int p = SFadSize;
        clp.setOption("p", &p, "Number of derivative components");
        int nloop = 10;
        clp.setOption("nloop", &nloop, "Number of loops");
#ifdef KOKKOS_HAVE_SERIAL
        bool serial = 0;
        clp.setOption("serial", "no-serial", &serial, "Whether to run Serial");
#endif
#ifdef KOKKOS_HAVE_OPENMP
        int openmp = 0;
        clp.setOption("openmp", &openmp, "Number of OpenMP threads");
#endif
#ifdef KOKKOS_HAVE_PTHREAD
        int threads = 0;
        clp.setOption("threads", &threads, "Number of pThreads threads");
#endif
#ifdef KOKKOS_HAVE_CUDA
        bool cuda = 0;
        clp.setOption("cuda", "no-cuda", &cuda, "Whether to run CUDA");
#endif
        int numa = 0;
        clp.setOption("numa", &numa,
                      "Number of NUMA domains to use (set to 0 to use all NUMAs");
        int cores_per_numa = 0;
        clp.setOption("cores-per-numa", &cores_per_numa,
                      "Number of CPU cores per NUMA to use (set to 0 to use all cores)");
        bool print_config = false;
        clp.setOption("print-config", "no-print-config", &print_config,
                      "Whether to print Kokkos device configuration");
        LayoutType layout = LAYOUT_DEFAULT;
        clp.setOption("layout", &layout, num_layout_types, layout_values,
                      layout_names, "View layout");
        bool vtune = false;
        clp.setOption("vtune", "no-vtune", &vtune, "Profile with vtune");
        bool value = true;
        clp.setOption("value", "no-value", &value, "Run value calculation");
        bool analytic = true;
        clp.setOption("analytic", "no-analytic", &analytic,
                      "Run analytic derivative calculation");
        bool sfad = true;
        clp.setOption("sfad", "no-sfad", &sfad, "Run SFad derivative calculation");
        bool slfad = true;
        clp.setOption("slfad", "no-slfad", &slfad, "Run SLFad derivative calculation");
        bool dfad = true;
        clp.setOption("dfad", "no-dfad", &dfad, "Run DFad derivative calculation");
        bool check = false;
        clp.setOption("check", "no-check", &check, "Check calculations are correct");

        // Parse options
        switch (clp.parse(argc, argv)) {
        case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:
            return 0;
        case Teuchos::CommandLineProcessor::PARSE_ERROR:
        case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION:
            return 1;
        case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:
            break;
        }

        if (vtune)
            connect_vtune();

#ifdef KOKKOS_HAVE_SERIAL
        if (serial) {
            Kokkos::Serial::initialize();
            if (print_config)
                Kokkos::Serial::print_configuration(std::cout, true);
            do_times_layout<SFadSize,SLFadSize,Kokkos::Serial>(
                m,n,p,nloop,value,analytic,sfad,slfad,dfad,check,layout,"Serial");
            Kokkos::Serial::finalize();
        }
#endif

#ifdef KOKKOS_HAVE_OPENMP
        if (openmp) {
            Kokkos::OpenMP::initialize(openmp, numa, cores_per_numa);
            if (print_config)
                Kokkos::OpenMP::print_configuration(std::cout, true);
            do_times_layout<SFadSize,SLFadSize,Kokkos::OpenMP>(
                m,n,p,nloop,value,analytic,sfad,slfad,dfad,check,layout,"OpenMP");
            Kokkos::OpenMP::finalize();
        }
#endif

#ifdef KOKKOS_HAVE_PTHREAD
        if (threads) {
            Kokkos::Threads::initialize(threads, numa, cores_per_numa);
            if (print_config)
                Kokkos::Threads::print_configuration(std::cout, true);
            do_times_layout<SFadSize,SLFadSize,Kokkos::Threads>(
                m,n,p,nloop,value,analytic,sfad,slfad,dfad,check,layout,"Threads");
            Kokkos::Threads::finalize();
        }
#endif

#ifdef KOKKOS_HAVE_CUDA
        if (cuda) {
            Kokkos::HostSpace::execution_space::initialize();
            Kokkos::Cuda::initialize();
            if (print_config)
                Kokkos::Cuda::print_configuration(std::cout, true);
            do_times_layout<SFadSize,SLFadSize,Kokkos::Cuda>(
                m,n,p,nloop,value,analytic,sfad,slfad,dfad,check,layout,"Cuda");
            Kokkos::HostSpace::execution_space::finalize();
            Kokkos::Cuda::finalize();
        }
#endif

    }
    TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);

    return !success;
}
Exemplo n.º 14
0
int main(int argc, char *argv[]) {

  int status=0; // 0 = pass, failures are incremented
  bool success = true;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);
  Teuchos::RCP<Teuchos::FancyOStream> out(Teuchos::VerboseObjectBase::getDefaultOStream());

  // Command-line argument for input file
  Albany::CmdLineArgs cmd("input.xml", "inputSG.xml", "inputSG_adjoint.xml");
  cmd.parse_cmdline(argc, argv, *out);
  std::string xmlfilename;
  std::string sg_xmlfilename;
  std::string adjsg_xmlfilename;
  bool do_initial_guess;
  if (cmd.has_third_xml_file) {
    xmlfilename = cmd.xml_filename;
    sg_xmlfilename = cmd.xml_filename2;
    adjsg_xmlfilename = cmd.xml_filename3;
    do_initial_guess = true;
  }
  else if (cmd.has_second_xml_file) {
    xmlfilename = "";
    sg_xmlfilename = cmd.xml_filename;
    adjsg_xmlfilename = cmd.xml_filename2;
    do_initial_guess = false;
  }
  else {
    *out << argv[0] << ":  must supply at least 2 input files!\n";
    std::exit(1);
  }

  try {

    Teuchos::RCP<Teuchos::Time> totalTime =
      Teuchos::TimeMonitor::getNewTimer("AlbanySGAdjoint: ***Total Time***");
    Teuchos::TimeMonitor totalTimer(*totalTime); //start timer
    
    // Setup communication objects
    Teuchos::RCP<Epetra_Comm> globalComm = 
      Albany::createEpetraCommFromMpiComm(Albany_MPI_COMM_WORLD);

    Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> sg_forward_solution;

    //
    // Solve forward problem
    //
    {
      
    Teuchos::RCP<Teuchos::Time> forwardTime =
      Teuchos::TimeMonitor::getNewTimer("AlbanySGAdjoint: ***Forward Solver Time***");
    Teuchos::TimeMonitor forwardTimer(*forwardTime); //start timer

    // Parse parameters
    Teuchos::RCP<const Teuchos_Comm> comm =
      Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
    // Connect vtune for performance profiling
    if (cmd.vtune) {
      Albany::connect_vtune(comm->getRank());
    }
    Albany::SolverFactory sg_slvrfctry(sg_xmlfilename, comm);
    Teuchos::ParameterList& albanyParams = sg_slvrfctry.getParameters();
    Teuchos::RCP< Teuchos::ParameterList> piroParams = 
      Teuchos::rcp(&(albanyParams.sublist("Piro")),false);
    
    // Create stochastic Galerkin solver
    Teuchos::RCP<Piro::Epetra::StokhosSolver> sg_solver =
      Teuchos::rcp(new Piro::Epetra::StokhosSolver(piroParams, globalComm));

    // Get comm for spatial problem
    Teuchos::RCP<const Epetra_Comm> app_comm = sg_solver->getSpatialComm();

    //MP, can we use comm instead of tappComm?
    Teuchos::RCP<const Teuchos_Comm> tapp_comm = Albany::createTeuchosCommFromEpetraComm(app_comm);

    // Compute initial guess if requested
    Teuchos::RCP<Epetra_Vector> ig;
    if (do_initial_guess) {

      // Create solver
      Albany::SolverFactory slvrfctry(xmlfilename, tapp_comm);
      Teuchos::RCP<EpetraExt::ModelEvaluator> solver = 
         slvrfctry.create(tapp_comm, tapp_comm);

      // Setup in/out args
      EpetraExt::ModelEvaluator::InArgs params_in = solver->createInArgs();
      EpetraExt::ModelEvaluator::OutArgs responses_out = 
	solver->createOutArgs();
      int np = params_in.Np();
      for (int i=0; i<np; i++) {
	Teuchos::RCP<const Epetra_Vector> p = solver->get_p_init(i);
	params_in.set_p(i, p);
      }
      int ng = responses_out.Ng();
      for (int i=0; i<ng; i++) {
	Teuchos::RCP<Epetra_Vector> g = 
	  Teuchos::rcp(new Epetra_Vector(*(solver->get_g_map(i))));
	responses_out.set_g(i, g);
      }

      // Evaluate model
      solver->evalModel(params_in, responses_out);

      // Print responses (not last one since that is x)
      *out << std::endl;
      out->precision(8);
      for (int i=0; i<ng-1; i++) {
	if (responses_out.get_g(i) != Teuchos::null)
	  *out << "Response " << i << " = " << std::endl 
	       << *(responses_out.get_g(i)) << std::endl;
      }

    }

    // Create SG solver
    Teuchos::RCP<Albany::Application> app;
    Teuchos::RCP<EpetraExt::ModelEvaluator> model; {
      model = sg_slvrfctry.createAlbanyAppAndModel(
        app, tapp_comm, Petra::EpetraVector_To_TpetraVectorConst(
          *ig, tapp_comm));
    }
    sg_solver->setup(model);

    // Evaluate SG responses at SG parameters
    EpetraExt::ModelEvaluator::InArgs sg_inArgs = sg_solver->createInArgs();
    EpetraExt::ModelEvaluator::OutArgs sg_outArgs = 
      sg_solver->createOutArgs();
    int np = sg_inArgs.Np();
    for (int i=0; i<np; i++) {
      if (sg_inArgs.supports(EpetraExt::ModelEvaluator::IN_ARG_p_sg, i)) {
	Teuchos::RCP<const Stokhos::EpetraVectorOrthogPoly> p_sg = 
	  sg_solver->get_p_sg_init(i);
	sg_inArgs.set_p_sg(i, p_sg);
      }
    }

    int ng = sg_outArgs.Ng();
    for (int i=0; i<ng; i++) {
      if (sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_g_sg, i)) {
	Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> g_sg =
	  sg_solver->create_g_sg(i);
	sg_outArgs.set_g_sg(i, g_sg);
      }
    }

    sg_solver->evalModel(sg_inArgs, sg_outArgs);

    for (int i=0; i<ng-1; i++) {
      // Don't loop over last g which is x, since it is a long vector
      // to print out.
      if (sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_g_sg, i)) {

	// Print mean and standard deviation      
	Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> g_sg = 
	  sg_outArgs.get_g_sg(i);
	Epetra_Vector g_mean(*(sg_solver->get_g_map(i)));
	Epetra_Vector g_std_dev(*(sg_solver->get_g_map(i)));
	g_sg->computeMean(g_mean);
	g_sg->computeStandardDeviation(g_std_dev);
	out->precision(12);
	*out << "Response " << i << " Mean =      " << std::endl 
	     << g_mean << std::endl;
	*out << "Response " << i << " Std. Dev. = " << std::endl 
	     << g_std_dev << std::endl;

	status += sg_slvrfctry.checkSGTestResults(0, g_sg);
      }
    }
    *out << "\nNumber of Failed Comparisons: " << status << std::endl;
  
    sg_forward_solution = sg_outArgs.get_g_sg(ng-1);

    }


    /* Space reserved for the projection of the forward solution onto
       the higher order basis for the adjoint solution.  
       In general, this will require projecting is physical space
       as well as in stochastic space.
    */



    // 
    // Solve adjoint problem
    //
    {

    Teuchos::RCP<Teuchos::Time> adjointTime =
      Teuchos::TimeMonitor::getNewTimer("AlbanySG: ***Adjoint Solver Time***");
    Teuchos::TimeMonitor adjtotalTimer(*adjointTime); //start timer

    // Parse parameters
    Albany::SolverFactory sg_slvrfctry(adjsg_xmlfilename, 
      Albany::createTeuchosCommFromMpiComm(Albany_MPI_COMM_WORLD));
    Teuchos::ParameterList& albanyParams = sg_slvrfctry.getParameters();
    Teuchos::RCP< Teuchos::ParameterList> piroParams = 
      Teuchos::rcp(&(albanyParams.sublist("Piro")),false);
    
    // Create stochastic Galerkin solver
    Teuchos::RCP<Piro::Epetra::StokhosSolver> sg_solver =
      Teuchos::rcp(new Piro::Epetra::StokhosSolver(piroParams, globalComm));

    // Get comm for spatial problem
    Teuchos::RCP<const Epetra_Comm> app_comm = sg_solver->getSpatialComm();
    Teuchos::RCP<const Teuchos_Comm> tapp_comm = Albany::createTeuchosCommFromEpetraComm(app_comm);

    // Create SG solver
    Teuchos::RCP<Albany::Application> app;
    Teuchos::RCP<EpetraExt::ModelEvaluator> model = 
      sg_slvrfctry.createAlbanyAppAndModel(app, tapp_comm);
    sg_solver->setup(model);

    // Set projected forward solution as the initial guess
    sg_solver->set_x_sg_init(*sg_forward_solution);

    // Evaluate SG responses at SG parameters
    EpetraExt::ModelEvaluator::InArgs sg_inArgs = sg_solver->createInArgs();
    EpetraExt::ModelEvaluator::OutArgs sg_outArgs = 
      sg_solver->createOutArgs();
    int np = sg_inArgs.Np();
    for (int i=0; i<np; i++) {
      if (sg_inArgs.supports(EpetraExt::ModelEvaluator::IN_ARG_p_sg, i)) {
	Teuchos::RCP<const Stokhos::EpetraVectorOrthogPoly> p_sg = 
	  sg_solver->get_p_sg_init(i);
	sg_inArgs.set_p_sg(i, p_sg);
      }
    }

    int ng = sg_outArgs.Ng();
    for (int i=0; i<ng; i++) {
      if (sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_g_sg, i)) {
	Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> g_sg =
	  sg_solver->create_g_sg(i);
	sg_outArgs.set_g_sg(i, g_sg);
      }
    }

    sg_solver->evalModel(sg_inArgs, sg_outArgs);

    for (int i=0; i<ng-1; i++) {
      // Don't loop over last g which is x, since it is a long vector
      // to print out.
      if (sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_g_sg, i)) {

	// Print mean and standard deviation      
	Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> g_sg = 
	  sg_outArgs.get_g_sg(i);
	Epetra_Vector g_mean(*(sg_solver->get_g_map(i)));
	Epetra_Vector g_std_dev(*(sg_solver->get_g_map(i)));
	g_sg->computeMean(g_mean);
	g_sg->computeStandardDeviation(g_std_dev);
	out->precision(12);
	*out << "Response " << i << " Mean =      " << std::endl 
	     << g_mean << std::endl;
	*out << "Response " << i << " Std. Dev. = " << std::endl 
	     << g_std_dev << std::endl;

	status += sg_slvrfctry.checkSGTestResults(0, g_sg);
      }
    }
    *out << "\nNumber of Failed Comparisons: " << status << std::endl;

    /* Space reserved for computing the error representation which involves
       integrating over both physical and stochastic space and may require
       a number of projections.
    */

    }

    totalTimer.~TimeMonitor();
    Teuchos::TimeMonitor::summarize(std::cout,false,true,false);

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
  if (!success) status+=10000;

  return status;
}
Exemplo n.º 15
0
int main(int argc, char *argv[]) {

  int status=0; // 0 = pass, failures are incremented
  bool success = true;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);
  Teuchos::RCP<Teuchos::FancyOStream> out(Teuchos::VerboseObjectBase::getDefaultOStream());

  // Command-line argument for input file
  Albany::CmdLineArgs cmd("input.xml", "inputSG.xml");
  cmd.parse_cmdline(argc, argv, *out);
  std::string xmlfilename;
  std::string sg_xmlfilename;
  bool do_initial_guess;
  if (cmd.has_second_xml_file) {
    xmlfilename = cmd.xml_filename;
    sg_xmlfilename = cmd.xml_filename2;
    do_initial_guess = true;
  }
  else if (cmd.has_first_xml_file) {
    xmlfilename = "";
    sg_xmlfilename = cmd.xml_filename;
    do_initial_guess = false;
  }
  else {
    xmlfilename = "";
    sg_xmlfilename = "inputSG.xml";
    do_initial_guess = false;
  }

  try {

    Teuchos::RCP<Teuchos::Time> totalTime =
      Teuchos::TimeMonitor::getNewTimer("AlbanySG: ***Total Time***");
    Teuchos::TimeMonitor totalTimer(*totalTime); //start timer

    // Setup communication objects
    Teuchos::RCP<Epetra_Comm> globalComm =
      Albany::createEpetraCommFromMpiComm(Albany_MPI_COMM_WORLD);
    Teuchos::RCP<const Teuchos_Comm> tcomm =
      Tpetra::DefaultPlatform::getDefaultPlatform().getComm();

    // Connect vtune for performance profiling
    if (cmd.vtune) {
      Albany::connect_vtune(tcomm->getRank());
    }

    // Parse parameters
    Albany::SolverFactory sg_slvrfctry(sg_xmlfilename, tcomm);
    Teuchos::ParameterList& albanyParams = sg_slvrfctry.getParameters();
    Teuchos::RCP< Teuchos::ParameterList> piroParams =
      Teuchos::rcp(&(albanyParams.sublist("Piro")),false);

    // If SG is not enabled and user chose "Direct" or "AD",
    // for "SG Method", change this "Global", which always works
#ifndef ALBANY_SG
    std::string sg_method =
      piroParams->sublist("Stochastic Galerkin").get("SG Method", "Global");
    if (sg_method == "Direct" || sg_method == "AD") {
      piroParams->sublist("Stochastic Galerkin").set("SG Method", "Global");
      *out << "**********************************************************\n"
           << "* WARNING!  Direct SG method was chosen, however         *\n"
           << "* ALBANY_SG is not enabled.  Changing to Global method.  *\n"
           << "**********************************************************\n"
           << std::endl;
    }
#endif

    // Create stochastic Galerkin solver
    Teuchos::RCP<Piro::Epetra::StokhosSolver> sg_solver =
      Teuchos::rcp(new Piro::Epetra::StokhosSolver(piroParams, globalComm));

    // Get comm for spatial problem
    Teuchos::RCP<const Epetra_Comm> app_comm = sg_solver->getSpatialComm();
    Teuchos::RCP<const Teuchos_Comm> tapp_comm = Albany::createTeuchosCommFromEpetraComm(app_comm);

    // Compute initial guess if requested
    Teuchos::RCP<Epetra_Vector> ig;
    if (do_initial_guess) {

      // Create solver
      Albany::SolverFactory slvrfctry(
	xmlfilename, tcomm);
      Teuchos::RCP<EpetraExt::ModelEvaluator> solver =
	slvrfctry.create(tapp_comm, tapp_comm);

      // Setup in/out args
      EpetraExt::ModelEvaluator::InArgs params_in = solver->createInArgs();
      EpetraExt::ModelEvaluator::OutArgs responses_out =
	solver->createOutArgs();
      int np = params_in.Np();
      for (int i=0; i<np; i++) {
	Teuchos::RCP<const Epetra_Vector> p = solver->get_p_init(i);
	params_in.set_p(i, p);
      }
      int ng = responses_out.Ng();
      for (int i=0; i<ng; i++) {
	Teuchos::RCP<Epetra_Vector> g =
	  Teuchos::rcp(new Epetra_Vector(*(solver->get_g_map(i))));
	responses_out.set_g(i, g);
      }

      // Evaluate model
      solver->evalModel(params_in, responses_out);

      // Print responses (not last one since that is x)
      *out << std::endl;
      out->precision(8);
      for (int i=0; i<ng-1; i++) {
	if (responses_out.get_g(i) != Teuchos::null)
	  *out << "Response " << i << " = " << std::endl
	       << *(responses_out.get_g(i)) << std::endl;
      }

      // Get final solution as initial guess
      ig = responses_out.get_g(ng-1);

      Teuchos::TimeMonitor::summarize(std::cout,false,true,false);
    }

    // Create SG solver
    Teuchos::RCP<Albany::Application> app;
    Teuchos::RCP<const Tpetra_Vector> initial_guessT;
    if (Teuchos::nonnull(ig)) {
      initial_guessT = Petra::EpetraVector_To_TpetraVectorConst(*ig, tcomm);
    }
    Teuchos::RCP<EpetraExt::ModelEvaluator> model =
      sg_slvrfctry.createAlbanyAppAndModel(app, tcomm, initial_guessT);

    // Hack in rigid body modes for ML
    {
      Teuchos::RCP<Teuchos::ParameterList> sg_solver_params =
        Teuchos::sublist(Teuchos::sublist(piroParams, "Stochastic Galerkin"), "SG Solver Parameters");
      Teuchos::RCP<Teuchos::ParameterList> sg_prec_params =
        Teuchos::sublist(sg_solver_params, "SG Preconditioner");

      if (sg_prec_params->isParameter("Mean Preconditioner Type")) {
        if (sg_prec_params->get<std::string>("Mean Preconditioner Type") == "ML") {
          Teuchos::RCP<Teuchos::ParameterList> ml_params =
            Teuchos::sublist(sg_prec_params, "Mean Preconditioner Parameters");
          const Teuchos::RCP<Albany::RigidBodyModes>&
            rbm = app->getProblem()->getNullSpace();
          // Previously, updateMLPL called importML, but there was no coordinate
          // data yet. Now we just update the parameter list.
          rbm->updatePL(ml_params);          
          sg_solver->resetSolverParameters(*sg_solver_params);
        }
      }
    }

    // Setup SG solver
    {
      const Teuchos::RCP<NOX::Epetra::Observer > NOX_observer =
        Teuchos::rcp(new Albany_NOXObserver(app));
      sg_solver->setup(model, NOX_observer);
    }

    // Evaluate SG responses at SG parameters
    EpetraExt::ModelEvaluator::InArgs sg_inArgs = sg_solver->createInArgs();
    EpetraExt::ModelEvaluator::OutArgs sg_outArgs =
      sg_solver->createOutArgs();
    int np = sg_inArgs.Np();
    for (int i=0; i<np; i++) {
      if (sg_inArgs.supports(EpetraExt::ModelEvaluator::IN_ARG_p_sg, i)) {
	Teuchos::RCP<const Stokhos::EpetraVectorOrthogPoly> p_sg =
	  sg_solver->get_p_sg_init(i);
	sg_inArgs.set_p_sg(i, p_sg);
      }
    }

    // By default, request the sensitivities if not explicitly disabled
    const bool computeSensitivities =
      sg_slvrfctry.getAnalysisParameters().sublist("Solve").get("Compute Sensitivities", true);
    int ng = sg_outArgs.Ng();
    for (int i=0; i<ng; i++) {
      if (sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_g_sg, i)) {
	Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> g_sg =
	  sg_solver->create_g_sg(i);
	sg_outArgs.set_g_sg(i, g_sg);
      }

      for (int j=0; j<np; j++) {
	EpetraExt::ModelEvaluator::DerivativeSupport ds =
	  sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_DgDp_sg,i,j);
	if (computeSensitivities &&
	    ds.supports(EpetraExt::ModelEvaluator::DERIV_MV_BY_COL)) {
	  int ncol = sg_solver->get_p_map(j)->NumMyElements();
	  Teuchos::RCP<Stokhos::EpetraMultiVectorOrthogPoly> dgdp_sg =
	    sg_solver->create_g_mv_sg(i,ncol);
	  sg_outArgs.set_DgDp_sg(i, j, dgdp_sg);
	}
      }
    }

    sg_solver->evalModel(sg_inArgs, sg_outArgs);

    bool printResponse =
      albanyParams.sublist("Problem").get("Print Response Expansion", true);
    for (int i=0; i<ng-1; i++) {
      // Don't loop over last g which is x, since it is a long vector
      // to print out.
      if (sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_g_sg, i)) {

	// Print mean and standard deviation
	Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> g_sg =
	  sg_outArgs.get_g_sg(i);
	if (g_sg != Teuchos::null && app->getResponse(i)->isScalarResponse()) {
	  Epetra_Vector g_mean(*(sg_solver->get_g_map(i)));
	  Epetra_Vector g_std_dev(*(sg_solver->get_g_map(i)));
	  g_sg->computeMean(g_mean);
	  g_sg->computeStandardDeviation(g_std_dev);
	  out->precision(12);
	  out->setf(std::ios::scientific);
	  *out << "Response " << i << " Mean =      " << std::endl
	       << g_mean << std::endl;
	  *out << "Response " << i << " Std. Dev. = " << std::endl
	       << g_std_dev << std::endl;
	  if (printResponse) {
	    *out << "Response " << i << "           = " << std::endl
		 << *g_sg << std::endl;
	    for (int j=0; j<np; j++) {
	      EpetraExt::ModelEvaluator::DerivativeSupport ds =
		sg_outArgs.supports(EpetraExt::ModelEvaluator::OUT_ARG_DgDp_sg,i,j);
	      if (!ds.none()) {
		Teuchos::RCP<Stokhos::EpetraMultiVectorOrthogPoly> dgdp_sg =
		  sg_outArgs.get_DgDp_sg(i,j).getMultiVector();
		if (dgdp_sg != Teuchos::null)
		  *out << "Response " << i << " Derivative " << j << " = "
		       << std::endl << *dgdp_sg << std::endl;
	      }
	    }
	  }

	  status += sg_slvrfctry.checkSGTestResults(i, g_sg, &g_mean, &g_std_dev);
	}
      }
    }
    *out << "\nNumber of Failed Comparisons: " << status << std::endl;

    totalTimer.~TimeMonitor();
    Teuchos::TimeMonitor::summarize(std::cout,false,true,false);
    Teuchos::TimeMonitor::zeroOutTimers();

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
  if (!success) status+=10000;

  return status;
}
Exemplo n.º 16
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service xfer_svc;

    int server_index=0;
    int rank_in_server=0;

    int transport_index=-1;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct xfer_args args;

    const int num_io_methods = 8;
    const int io_method_vals[] = {
            XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC,
            XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC,
            XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC,
            XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC};
    const char * io_method_names[] = {
            "write-encode-sync", "write-encode-async",
            "write-rdma-sync", "write-rdma-async",
            "read-encode-sync", "read-encode-async",
            "read-rdma-sync", "read-rdma-async"};

    const int nssi_transport_list[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_GEMINI,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_MPI};

    const int num_nssi_transports = 11;
    const int nssi_transport_vals[] = {
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10
            };
    const char * nssi_transport_names[] = {
            "portals",
            "ptl",
            "infiniband",
            "ib",
            "gemini",
            "gni",
            "bgpdcmf",
            "dcmf",
            "bgqpami",
            "pami",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.len = 1;
    args.delay = 1;
    args.io_method = XFER_WRITE_RDMA_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.num_servers = 1;
    args.num_threads = 0;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.kill_server_flag = true;
    args.block_distribution = true;


    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");
        parser.setOption("num-servers", &args.num_servers, "Number of server processes");
        parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process");
        parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment");
        parser.setOption("block-distribution", "rr-distribution", &args.block_distribution,
                "Use a block distribution scheme to assign clients to servers");

        // Set an enumeration command line option for the io_method
        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n"
                "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n"
                "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n"
                "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n"
                "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n"
                "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n"
                "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n"
                "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous");


        // Set an enumeration command line option for the NNTI transport
        parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals|ptl    : Cray or Schutt\n"
                "\t\t\tinfiniband|ib  : libibverbs\n"
                "\t\t\tgemini|gni     : Cray libugni (Gemini or Aries)\n"
                "\t\t\tbgpdcmf|dcmf   : IBM BG/P DCMF\n"
                "\t\t\tbgqpami|pami   : IBM BG/Q PAMI\n"
                "\t\t\tmpi            : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(args.debug_level, "transport_index=%d", transport_index);
    if (transport_index > -1) {
    	args.transport     =nssi_transport_list[transport_index];
    	args.transport_name=std::string(nssi_transport_names[transport_index]);
    }
	args.io_method_name=std::string(io_method_names[args.io_method]);

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting xfer-service test", rank);

#ifdef TRIOS_ENABLE_COMMSPLITTER
    if (args.transport == NSSI_RPC_MPI) {
        MPI_Pcontrol(0);
    }
#endif

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    int color = 0;  // color=0-->server, color=1-->client
    if (args.client_flag && args.server_flag) {
        if (np < 2) {
            log_error(debug_level, "Must use at least 2 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Put all the servers as the first ranks.
        if (rank < args.num_servers) {
            color = 0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            color = 1;  // all others are clients
            log_debug(debug_level, "rank=%d is a client", rank);
        }

        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }
    else {
        if (args.client_flag) {
            color=1;
            log_debug(debug_level, "rank=%d is a client", rank);
        }
        else if (args.server_flag) {
            color=0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            log_error(debug_level, "Must be either a client or a server");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }

    MPI_Comm_rank(comm, &splitrank);
    MPI_Comm_size(comm, &splitsize);

    log_debug(debug_level, "%d: Finished splitting communicators", rank);

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // If running as both client and server, gather and distribute
    // the server URLs to all the clients.
    if (args.server_flag && args.client_flag) {

        std::string all_urls;

        // This needs to be a vector of chars, not a string
        all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0');

        // Have servers gather their URLs
        if (color == 0) {
            assert(args.num_servers == splitsize);  // these should be equal

            log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str());

            // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD)
            MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR,
                    &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm);
        }

        // broadcast the full set of server urls to all processes
        MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD);

        log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size());

        if (color == 1) {

            // For block distribution scheme use the utility function (in xfer_util.cpp)
            if (args.block_distribution) {
                // Use this utility function to calculate the server_index
                xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
            }

            // Use a simple round robin distribution scheme
            else {
                server_index   = splitrank % args.num_servers;
                rank_in_server = splitrank / args.num_servers;
            }

            // Copy the server url out of the list of urls
            int offset = server_index * NSSI_URL_LEN;

            args.server_url = all_urls.substr(offset, NSSI_URL_LEN);

            log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
        }


        log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str());
    }

    // If running as a client only, have to get the list of servers from the urlfile.
    else if (!args.server_flag && args.client_flag){

        sleep(args.delay);  // give server time to get started

        std::vector< std::string > urlbuf;
        xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm);
        args.num_servers = urlbuf.size();

        // For block distribution scheme use the utility function (in xfer_util.cpp)
        if (args.block_distribution) {
            // Use this utility function to calculate the server_index
            xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
        }

        // Use a simple round robin distribution scheme
        else {
            server_index   = splitrank % args.num_servers;
            rank_in_server = splitrank / args.num_servers;
        }

        args.server_url = urlbuf[server_index];
        log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url = my_url;

        if (args.url_file.empty()) {
            log_error(debug_level, "Must set --url-file");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm);
    }

    // Set the debug level for the xfer service.
    xfer_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str());

    print_args(out, args, "%");

    log_debug(debug_level, "server_url=%s", args.server_url.c_str());

    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on node 0.
     *  In this example, the server is a single process.
     */
    if (color == 0) {
        rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, node 0, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref xfer_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {


            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str());
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(xfer_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        // wait for all the clients to connect
        MPI_Barrier(comm);

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(xfer_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            xfer_client_main(args, xfer_svc, comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if ((args.kill_server_flag) && (rank_in_server == 0)) {
                log_debug(debug_level, "%d: Halting xfer service", rank);
                rc = nssi_kill(&xfer_svc, 0, 5000);
            }
            rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc);
            if (rc != NSSI_OK) {
                log_error(xfer_debug_level, "could not free svc description: %s",
                        nssi_err_str(rc));
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
    	out << "\nEnd Result: TEST PASSED" << std::endl;
    else
    	out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
Exemplo n.º 17
0
int main(int argc, char *argv[]) {

  bool success = true;
  bool verbose = true;
  try {
    // Set up the printing utilities
    Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr =
      Teuchos::rcp(new Teuchos::ParameterList);
    Teuchos::ParameterList& noxParams = *noxParamsPtr.get();
    Teuchos::ParameterList& printParams = noxParams.sublist("Printing");
    printParams.set("Output Precision", 5);

    std::string paramFilename;
    bool   usingParamInputFile = false;

    if (argc > 1) {
      if (argv[1][0]=='-' && argv[1][1]=='v')
        printParams.set("Output Information",
             NOX::Utils::OuterIteration +
             NOX::Utils::OuterIterationStatusTest +
             NOX::Utils::InnerIteration +
             NOX::Utils::Parameters +
             NOX::Utils::Details +
             NOX::Utils::Warning +
             NOX::Utils::TestDetails);
      else if (argv[1][0]=='-' && argv[1][1]=='p') {
        if (argc < 3) {
          std::cout << "Error: An input parameter file was expected but not found. \n" << std::endl;
          printParams.set("Output Information", NOX::Utils::Error);
          NOX::Utils printing(printParams);
          return EXIT_FAILURE;
          }
        paramFilename = argv[2];
        std::cout << "Reading parameter information from file \"" << paramFilename << "\""<< std::endl;
        usingParamInputFile = true;
        }
      else
        printParams.set("Output Information", NOX::Utils::Error);
    }
    NOX::Utils printing(printParams);

    // Identify the test
    if (printing.isPrintType(NOX::Utils::TestDetails)) {
      std::cout << "Starting lapack/NOX_NewTest/NOX_NewTest.exe" << std::endl;
    }

    // Final return value (0 = succefull, non-zero = failure)
    //int status = 0;

    // *** Insert your testing here! ***

    // Set up the problem interface
    Rosenbrock rosenbrock;

    // Create a group which uses that problem interface. The group will
    // be initialized to contain the default initial guess for the
    // specified problem.
    Teuchos::RCP<NOX::LAPACK::Group> grp =
      Teuchos::rcp(new NOX::LAPACK::Group(rosenbrock));

    // Set up the status tests
    Teuchos::RCP<NOX::StatusTest::NormF> statusTestA =
      Teuchos::rcp(new NOX::StatusTest::NormF(1.0e-4));
    Teuchos::RCP<NOX::StatusTest::MaxIters> statusTestB =
      Teuchos::rcp(new NOX::StatusTest::MaxIters(20));
    Teuchos::RCP<NOX::StatusTest::Combo> statusTestsCombo =
      Teuchos::rcp(new NOX::StatusTest::Combo(NOX::StatusTest::Combo::OR,
                          statusTestA, statusTestB));

    // Read parameters from file paramFilename - command line arg#1
    if (usingParamInputFile && !NOX::parseTextInputFile(paramFilename, noxParams))
       std::cout << "Using unchanged parameters " << std::endl;

    // Create the solver
    Teuchos::RCP<NOX::Solver::Generic> solver =
      NOX::Solver::buildSolver(grp, statusTestsCombo, noxParamsPtr);

    // Solve the nonlinesar system
    NOX::StatusTest::StatusType status = solver->solve();

    // Print the answer
    std::cout << "\n" << "-- Parameter List From Solver --" << "\n";
    solver->getList().print(std::cout);

    // Get the answer
    NOX::LAPACK::Group solnGrp =
      dynamic_cast<const NOX::LAPACK::Group&>(solver->getSolutionGroup());

    // Final return value (0 = succefull, non-zero = failure)
    if (status == NOX::StatusTest::Converged) {
      std::cout << "Test passed!" << std::endl;
      success = true;
    }
    else {
      std::cout << "Test failed!" << std::endl;
      success = false;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 18
0
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = true;
  bool verbose = true;
  try {
    Teuchos::RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    /**********************************************************************************/
    /* SET TEST PARAMETERS                                                            */
    /**********************************************************************************/
    // Note: use --help to list available options.
    Teuchos::CommandLineProcessor clp(false);

    // Default is Laplace1D with nx = 8748.
    // It's a nice size for 1D and perfect aggregation. (6561=3^8)
    //Nice size for 1D and perfect aggregation on small numbers of processors. (8748=4*3^7)
    Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748); // manage parameters of the test case
    Xpetra::Parameters xpetraParameters(clp);             // manage parameters of xpetra

    // custom parameters
    int pauseForDebugger=0;
    //std::string aggOrdering = "natural";
    int minPerAgg=2; //was 3 in simple
    int maxNbrAlreadySelected=0;
    int printTimings=0;
    std::string xmlFile="parameters.xml";

    //clp.setOption("aggOrdering",&aggOrdering,"aggregation ordering strategy (natural,graph)");
    clp.setOption("debug",&pauseForDebugger,"pause to attach debugger");
    clp.setOption("maxNbrSel",&maxNbrAlreadySelected,"maximum # of nbrs allowed to be in other aggregates");
    clp.setOption("minPerAgg",&minPerAgg,"minimum #DOFs per aggregate");
    clp.setOption("timings",&printTimings,"print timings to screen");
    clp.setOption("xmlFile",&xmlFile,"file name containing MueLu multigrid parameters in XML format");

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    Teuchos::RCP<Teuchos::TimeMonitor> globalTimeMonitor = Teuchos::rcp (new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("Timings: Global Time")));

    if (pauseForDebugger) {
      Utilities::PauseForDebugger();
    }

    matrixParameters.check();
    xpetraParameters.check();
    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    if (comm->getRank() == 0) {
      std::cout << xpetraParameters << matrixParameters;
    }

    /**********************************************************************************/
    /* CREATE INITIAL MATRIX                                                          */
    /**********************************************************************************/
    Teuchos::RCP<const Map> map;
    Teuchos::RCP<Matrix> A;

    {
      Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("Timings: Matrix Build"));

      map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm);
      Teuchos::RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap
      A = Pr->BuildMatrix();

    }
    /**********************************************************************************/
    /*                                                                                */
    /**********************************************************************************/
    Teuchos::ParameterList paramList;
    Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(&paramList), *comm);

    // create parameter list interpreter
    Teuchos::RCP<HierarchyManager> mueluFactory = Teuchos::rcp(new ParameterListInterpreter(paramList));

    Teuchos::RCP<Hierarchy> H = mueluFactory->CreateHierarchy();

    H->GetLevel(0)->Set< Teuchos::RCP<Matrix> >("A", A);

    Teuchos::RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getRowMap(), 1);
    nullspace->putScalar(1.0);
    H->GetLevel(0)->Set("Nullspace", nullspace);

    // set minimal information about number of layers for semicoarsening...
    // This information can also be provided as a user parameter in the xml file using the
    // parameter: "semicoarsen: num layers"
    H->GetLevel(0)->Set("NumZLayers",matrixParameters.GetParameterList().get<GO>("nz"));


    mueluFactory->SetupHierarchy(*H);

    for (int l=0; l<H->GetNumLevels(); l++) {
      Teuchos::RCP<MueLu::Level> level = H->GetLevel(l);
      if(level->IsAvailable("A", MueLu::NoFactory::get()) == false) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("P", MueLu::NoFactory::get()) == false && l>0) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("R", MueLu::NoFactory::get()) == false && l>0) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("PreSmoother",  MueLu::NoFactory::get()) == false) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("PostSmoother", MueLu::NoFactory::get()) == false && l<H->GetNumLevels()-1) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("NumZLayers",   MueLu::NoFactory::get()) == true && l>0) {  success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      H->GetLevel(l)->print(std::cout, MueLu::Debug);
    }
    ///////////////////////////////////////////////////////////

    // =========================================================================
    // System solution (Ax = b)
    // =========================================================================
    comm->barrier();
    typedef Teuchos::ScalarTraits<SC> STS;
    SC zero = STS::zero(), one = STS::one();

    Teuchos::RCP<Vector> X = VectorFactory::Build(A->getRowMap());
    Teuchos::RCP<Vector> B = VectorFactory::Build(A->getRowMap());

    {
      // we set seed for reproducibility
      Utilities::SetRandomSeed(*comm);
      X->randomize();
      A->apply(*X, *B, Teuchos::NO_TRANS, one, zero);

      Teuchos::Array<STS::magnitudeType> norms(1);
      B->norm2(norms);
      B->scale(one/norms[0]);
      X->putScalar(zero);
    }

    comm->barrier();

    H->IsPreconditioner(false);
    H->Iterate(*B, *X, 20);

    // Timer final summaries
    globalTimeMonitor = Teuchos::null; // stop this timer before summary

    if (printTimings)
      Teuchos::TimeMonitor::summarize();
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 19
0
int main(int argc, char* argv[])
{
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);
  const int procRank = Teuchos::GlobalMPISession::getRank();

  Teuchos::oblackholestream blackhole;
  std::ostream &out = ( procRank == 0 ? std::cout : blackhole );

  bool success = true;

  try {

    out << Teuchos::Teuchos_Version() << std::endl << std::endl;

    // Creating an empty command line processor looks like:
    Teuchos::CommandLineProcessor My_CLP;

    My_CLP.setDocString(
      "This example program demonstrates how to use this Teuchos::CommandLineProcessor class\n"
      "to get options from the command-line and print this help messange automatically.\n"
      );

    /* To set and option, it must be given a name and default value.  Additionally,
       each option can be given a help std::string.  Although it is not necessary, a help
       std::string aids a users comprehension of the acceptable command line arguments.
       Some examples of setting command line options are:
    */
    // Set an integer command line option.
    int NumIters = 1550;
    My_CLP.setOption("iterations", &NumIters, "Number of iterations");
    // Set a long integer command line option
    long int MatrixDim = Teuchos::OrdinalTraits<long int>::max();
    My_CLP.setOption("long-matrix-dim", &MatrixDim, "Matrix dimension (long)");
#ifdef HAVE_TEUCHOS_LONG_LONG_INT
    long long int MatrixDim2 = Teuchos::OrdinalTraits<long long int>::max();
    My_CLP.setOption("long-long-matrix-dim", &MatrixDim2, "Matrix dimension (long long)");
#endif
    // Set a double-precision command line option.
    double Tolerance = 1e-10;
    My_CLP.setOption("tolerance", &Tolerance, "Tolerance");
    // Set a std::string command line option.
    std::string Solver = "GMRES";
    My_CLP.setOption("solver", &Solver, "Linear solver");
    // Set a boolean command line option.
    bool Precondition = true;
    My_CLP.setOption("precondition","no-precondition",
                     &Precondition,"Preconditioning flag");
    // Set an enumeration command line option
    const int    num_speed_values  = 3;
    const ESpeed speed_opt_values[] = { SPEED_SLOW, SPEED_MEDIUM, SPEED_FAST };
    const char*  speed_opt_names[]  = { "slow",     "medium",     "fast"     };
    ESpeed       Speed = SPEED_MEDIUM;
    My_CLP.setOption(
      "speed", &Speed,
      num_speed_values, speed_opt_values, speed_opt_names,
      "Speed of our solver"
      );

    /* There are also two methods that control the behavior of the
       command line processor.  First, for the command line processor to
       allow an unrecognized a command line option to be ignored (and
       only have a warning printed), use:
    */
    My_CLP.recogniseAllOptions(true);

    /* Second, by default, if the parser finds a command line option it
       doesn't recognize or finds the --help option, it will throw an
       std::exception.  If you want prevent a command line processor from
       throwing an std::exception (which is important in this program since
       we don't have an try/catch around this) when it encounters a
       unrecognized option or help is printed, use:
    */
    My_CLP.throwExceptions(false);

    /* We now parse the command line where argc and argv are passed to
       the parse method.  Note that since we have turned off std::exception
       throwing above we had better grab the return argument so that
       we can see what happened and act accordingly.
    */
    Teuchos::CommandLineProcessor::EParseCommandLineReturn
      parseReturn= My_CLP.parse( argc, argv );
    if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
      return 0;
    }
    if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
      return 1; // Error!
    }
    // Here is where you would use these command line arguments but for this example program
    // we will just print the help message with the new values of the command-line arguments.
    if (procRank == 0)
      out << "\nPrinting help message with new values of command-line arguments ...\n\n";
    My_CLP.printHelpMessage(argv[0],out);

    // Now we will print the option values
    if (procRank == 0) {
      out << "\nPrinting user options after parsing ...\n\n";
      out << "NumIters     = " << NumIters << std::endl;
      out << "MatrixDim    = " << MatrixDim << std::endl;
#ifdef HAVE_TEUCHOS_LONG_LONG_INT
      out << "MatrixDim2   = " << MatrixDim2 << std::endl;
#endif
      out << "Tolerance    = " << Tolerance << std::endl;
      out << "Solver       = \"" << Solver << "\"\n";
      out << "Precondition = " << Precondition << std::endl;
      out << "Speed        = " << Speed << std::endl;
    }

  } // try
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

  if(success)
    out << "\nEnd Result: TEST PASSED" << std::endl;	

  return ( success ? 0 : 1 );
}
Exemplo n.º 20
0
int main(int argc, char *argv[])
{
  typedef int                 IndexType;
  typedef double              ValueType;
  typedef cusp::device_memory MemorySpace;
  //typedef cusp::row_major     Orientation;

  bool success = true;
  bool verbose = false;
  try {

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString("This test performance of block multiply routines.\n");
    IndexType n = 32;
    CLP.setOption("n", &n, "Number of mesh points in the each direction");
    IndexType nrhs_begin = 32;
    CLP.setOption("begin", &nrhs_begin,
                  "Staring number of right-hand-sides");
    IndexType nrhs_end = 512;
    CLP.setOption("end", &nrhs_end,
                  "Ending number of right-hand-sides");
    IndexType nrhs_step = 32;
    CLP.setOption("step", &nrhs_step,
                  "Increment in number of right-hand-sides");
    IndexType nits = 10;
    CLP.setOption("nits", &nits,
                  "Number of multiply iterations");
    int device_id = 0;
    CLP.setOption("device", &device_id, "CUDA device ID");
    CLP.parse( argc, argv );

    // Set CUDA device
    cudaSetDevice(device_id);
    cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);

    // create 3D Poisson problem
    cusp::csr_matrix<IndexType, ValueType, MemorySpace> A;
    cusp::gallery::poisson27pt(A, n, n, n);

    std::cout << "nrhs , num_rows , num_entries , row_time , row_gflops , "
              << "col_time , col_gflops" << std::endl;

    for (IndexType nrhs = nrhs_begin; nrhs <= nrhs_end; nrhs += nrhs_step) {

      double flops =
        2.0 * static_cast<double>(A.num_entries) * static_cast<double>(nrhs);

      // test row-major storage
      cusp::array2d<ValueType, MemorySpace, cusp::row_major> x_row(
        A.num_rows, nrhs, 1);
      cusp::array2d<ValueType, MemorySpace, cusp::row_major> y_row(
        A.num_rows, nrhs, 0);

      cusp::detail::timer row_timer;
      row_timer.start();
      for (IndexType iter=0; iter<nits; ++iter) {
        cusp::MVmultiply(A, x_row, y_row);
      }
      cudaDeviceSynchronize();
      double row_time = row_timer.seconds_elapsed() / nits;
      double row_gflops = 1.0e-9 * flops / row_time;

      // test column-major storage
      cusp::array2d<ValueType, MemorySpace, cusp::column_major> x_col(
        A.num_rows, nrhs, 1);
      cusp::array2d<ValueType, MemorySpace, cusp::column_major> y_col(
        A.num_rows, nrhs, 0);

      cusp::detail::timer col_timer;
      col_timer.start();
      for (IndexType iter=0; iter<nits; ++iter) {
        cusp::MVmultiply(A, x_col, y_col);
      }
      cudaDeviceSynchronize();
      double col_time = col_timer.seconds_elapsed() / nits;
      double col_gflops = 1.0e-9 * flops / col_time;

      std::cout << nrhs << " , "
                << A.num_rows << " , "  << A.num_entries << " , "
                << row_time << " , " << row_gflops << " , "
                << col_time << " , " << col_gflops
                << std::endl;

    }

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  if (success)
    return 0;
  return -1;
}
Exemplo n.º 21
0
  //================================================================
  //================================================================
  // RN_20091215: This needs to be called only once per time step
  // in the beginning to set up the problem.
  //================================================================
  void FC_FUNC(inittrilinos,INITTRILINOS) (int& bandwidth, int& mySize,
	       int* myIndicies, double* myX, double* myY, double* myZ,
	       int* mpi_comm_f) {
// mpi_comm_f: CISM's fortran mpi communicator

#ifdef GLIMMER_MPI
    // Make sure the MPI_Init in Fortran is recognized by C++.
    // We used to call an extra MPI_Init if (!flag), but the behavior of doing so is uncertain,
    // especially if CISM's MPI communicator is a subset of MPI_COMM_WORLD (as can be the case in CESM).
    // Thus, for now, we die with an error message if C++ perceives MPI to be uninitialized.
    // If this causes problems (e.g., if certain MPI implementations seem not to recognize 
    // that MPI has already been initialized), then we will revisit how to handle this.
       int flag;
       MPI_Initialized(&flag);
       if (!flag) {
	 cout << "ERROR in inittrilinos: MPI not initialized according to C++ code" << endl;
	 exit(1);
       }
    MPI_Comm mpi_comm_c = MPI_Comm_f2c(*mpi_comm_f);
    Epetra_MpiComm comm(mpi_comm_c);
    Teuchos::MpiComm<int> tcomm(Teuchos::opaqueWrapper(mpi_comm_c));
#else
    Epetra_SerialComm comm;
    Teuchos::SerialComm<int> tcomm;
#endif

    Teuchos::RCP<const Epetra_Map> rowMap = 
      Teuchos::rcp(new Epetra_Map(-1,mySize,myIndicies,1,comm) );

    TEUCHOS_TEST_FOR_EXCEPTION(!rowMap->UniqueGIDs(), std::logic_error,
       "Error: inittrilinos, myIndices array needs to have Unique entries" 
        << " across all processor.");

    // Diagnostic output for partitioning
    int minSize, maxSize;
    comm.MinAll(&mySize, &minSize, 1);
    comm.MaxAll(&mySize, &maxSize, 1);
    if (comm.MyPID()==0) 
      cout << "\nPartition Info in init_trilinos: Total nodes = " << rowMap->NumGlobalElements()
           << "  Max = " << maxSize << "  Min = " << minSize 
           << "  Ave = " << rowMap->NumGlobalElements() / comm.NumProc() << endl;

    soln = Teuchos::rcp(new Epetra_Vector(*rowMap));

    // Read parameter list once
    try { 
       pl = Teuchos::rcp(new Teuchos::ParameterList("Trilinos Options"));
       Teuchos::updateParametersFromXmlFileAndBroadcast("trilinosOptions.xml", pl.ptr(), tcomm);

       Teuchos::ParameterList validPL("Valid List");;
       validPL.sublist("Stratimikos"); validPL.sublist("Piro");
       pl->validateParameters(validPL, 0);
    }
    catch (std::exception& e) {
      cout << "\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n" 
           << e.what() << "\nExiting: Invalid trilinosOptions.xml file."
           << "\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << endl;
      exit(1);
    }
    catch (...) {
      cout << "\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n" 
           << "\nExiting: Invalid trilinosOptions.xml file."
           << "\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << endl;
      exit(1);
    }

    try { 
      // Set the coordinate position of the nodes for ML for repartitioning (important for #procs > 100s)
      if (pl->sublist("Stratimikos").isParameter("Preconditioner Type")) {
         if ("ML" == pl->sublist("Stratimikos").get<string>("Preconditioner Type")) {
           Teuchos::ParameterList& mlList =
              pl->sublist("Stratimikos").sublist("Preconditioner Types").sublist("ML").sublist("ML Settings");
           mlList.set("x-coordinates",myX);
           mlList.set("y-coordinates",myY);
           mlList.set("z-coordinates",myZ);
           mlList.set("PDE equations", 1);
         }
      }

      out = Teuchos::VerboseObjectBase::getDefaultOStream();

      // Reset counters every time step: can remove these lines to have averages over entire run
      linearSolveIters_total = 0;
      linearSolveCount=0;
      linearSolveSuccessCount = 0;

      // Create an interface that holds a CrsMatrix instance and some useful methods.
      interface = Teuchos::rcp(new TrilinosMatrix_Interface(rowMap, bandwidth, comm));

      Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;
      linearSolverBuilder.setParameterList(Teuchos::sublist(pl, "Stratimikos"));
      lowsFactory = linearSolverBuilder.createLinearSolveStrategy("");
      lowsFactory->setOStream(out);
      lowsFactory->setVerbLevel(Teuchos::VERB_LOW);

      lows=Teuchos::null;
      thyraOper=Teuchos::null;
    }
    TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
    if (!success) exit(1);
  }
Exemplo n.º 22
0
int main(int argc, char *argv[]) {
  // Initialize MPI
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
#endif

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif

  bool verbose = false;
  if (argc > 1)
    if (argv[1][0]=='-' && argv[1][1]=='v')
      verbose = true;

  bool success = true;
  try {
    // Get the process ID and the total number of processors
    int MyPID = Comm.MyPID();
#ifdef HAVE_MPI
    int NumProc = Comm.NumProc();
#endif

    // Set up the printing utilities
    Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr =
      Teuchos::rcp(new Teuchos::ParameterList);
    Teuchos::ParameterList& noxParams = *(noxParamsPtr.get());
    // Only print output if the "-v" flag is set on the command line
    Teuchos::ParameterList& printParams = noxParams.sublist("Printing");
    printParams.set("MyPID", MyPID);
    printParams.set("Output Precision", 5);
    printParams.set("Output Processor", 0);
    if( verbose )
      printParams.set("Output Information",
          NOX::Utils::OuterIteration +
          NOX::Utils::OuterIterationStatusTest +
          NOX::Utils::InnerIteration +
          NOX::Utils::Parameters +
          NOX::Utils::Details +
          NOX::Utils::Warning +
          NOX::Utils::TestDetails);
    else
      printParams.set("Output Information", NOX::Utils::Error +
          NOX::Utils::TestDetails);

    NOX::Utils printing(printParams);

    // Identify the test problem
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Starting epetra/NOX_NewTest/NOX_NewTest.exe" << std::endl;

    // Identify processor information
#ifdef HAVE_MPI
    if (printing.isPrintType(NOX::Utils::TestDetails)) {
      printing.out() << "Parallel Run" << std::endl;
      printing.out() << "Number of processors = " << NumProc << std::endl;
      printing.out() << "Print Process = " << MyPID << std::endl;
    }
    Comm.Barrier();
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Process " << MyPID << " is alive!" << std::endl;
    Comm.Barrier();
#else
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Serial Run" << std::endl;
#endif

    // *** Insert your testing here! ***

    // Final return value (0 = successfull, non-zero = failure)
    int status = 0;

    // Summarize test results
    if (status == 0)
    {
      success = true;
      printing.out() << "Test passed!" << std::endl;
    }
    else
    {
      success = false;
      printing.out() << "Test failed!" << std::endl;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MPI
  MPI_Finalize();
#endif
  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 23
0
int main(int argc, char *argv[]) {

  // Initialize MPI
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
#endif

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif

  bool verbose = false;
  bool success = false;
  try {
    int globalLength = 100; // This should suffice

    if (argc > 1)
      if (argv[1][0]=='-' && argv[1][1]=='v')
        verbose = true;

    // Get the process ID and the total number of processors
    int MyPID = Comm.MyPID();

    // Set up the printing utilities
    Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr =
      Teuchos::rcp(new Teuchos::ParameterList);
    Teuchos::ParameterList& noxParams = *(noxParamsPtr.get());
    // Only print output if the "-v" flag is set on the command line
    Teuchos::ParameterList& printParams = noxParams.sublist("Printing");
    printParams.set("MyPID", MyPID);
    printParams.set("Output Precision", 5);
    printParams.set("Output Processor", 0);
    if( verbose )
      printParams.set("Output Information",
          NOX::Utils::OuterIteration +
          NOX::Utils::OuterIterationStatusTest +
          NOX::Utils::InnerIteration +
          NOX::Utils::Parameters +
          NOX::Utils::Details +
          NOX::Utils::Warning +
          NOX::Utils::TestDetails);
    else
      printParams.set("Output Information", NOX::Utils::Error);

    NOX::Utils printing(printParams);

    // Identify the test problem
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Starting epetra/NOX_Vector/NOX_Vector.exe" << std::endl;

    // Create a TestCompare class
    NOX::TestCompare tester( printing.out(), printing);
    double tolerance = 1.e-12;
    NOX::TestCompare::CompareType aComp = NOX::TestCompare::Absolute;

    // Identify processor information
#ifdef HAVE_MPI
    printing.out() << "Parallel Run" << std::endl;
    printing.out() << "Number of processors = " << Comm.NumProc() << std::endl;
    printing.out() << "Print Process = " << MyPID << std::endl;
    Comm.Barrier();
    if (printing.isPrintType(NOX::Utils::TestDetails))
      printing.out() << "Process " << MyPID << " is alive!" << std::endl;
    Comm.Barrier();
#else
    printing.out() << "Serial Run" << std::endl;
#endif

    // Create a map describing data distribution
    Epetra_Map * standardMap = new Epetra_Map(globalLength, 0, Comm);

    // Return value
    int status = 0;

    // *** Start Testing Here!!! ***

    // First create the Epetra_Vector needed to construct our NOX vector
    Epetra_Vector * epetraVec = new Epetra_Vector(*standardMap, true);

    NOX::Epetra::Vector * noxVec1 = new NOX::Epetra::Vector(*epetraVec, NOX::DeepCopy);
    delete epetraVec; epetraVec = 0;

    NOX::Epetra::Vector * noxVec2 = new NOX::Epetra::Vector(*noxVec1);
    noxVec2->init(1.0);

    // Test our norms
    NOX::Abstract::Vector::NormType
      oneNorm = NOX::Abstract::Vector::OneNorm,
      twoNorm = NOX::Abstract::Vector::TwoNorm,
      infNorm = NOX::Abstract::Vector::MaxNorm;

    double expectedOneNorm = (double) globalLength,
           expectedTwoNorm = sqrt( (double) globalLength),
           expectedInfNorm = 1.0;

    status += tester.testValue( noxVec2->norm(oneNorm), expectedOneNorm,
                                tolerance, "One-Norm Test", aComp);
    status += tester.testValue( noxVec2->norm(twoNorm), expectedTwoNorm,
                                tolerance, "Two-Norm Test", aComp);
    status += tester.testValue( noxVec2->norm(infNorm), expectedInfNorm,
                                tolerance, "Max-Norm Test", aComp);


    // Test random, reciprocal and dot methods
    noxVec1->random();
    // Threshold values since we want to do a reciprocal
    int myLength = standardMap->NumMyElements();
    for( int i = 0; i < myLength; ++i )
      if( fabs(noxVec1->getEpetraVector()[i]) < 1.e-8 ) noxVec1->getEpetraVector()[i] = 1.e-8;

    noxVec2->reciprocal(*noxVec1);
    double product = noxVec1->innerProduct(*noxVec2);

    status += tester.testValue( product, expectedOneNorm,
                                tolerance, "Random, Reciprocal and Dot Test", aComp);


    // Test abs and weighted-norm methods
    /*  ----------------------------
        NOT SUPPORTED AT THIS TIME
        ----------------------------
    noxVec2->abs(*noxVec2);
    double wNorm = noxVec1->norm(*noxVec2);
    status += tester.testValue( wNorm, noxVec1->norm(oneNorm),
                                tolerance, "Abs and Weighted-Norm Test", aComp);
    */

    // Test operator= , abs, update and scale methods
    (*noxVec2) = (*noxVec1);
    noxVec2->abs(*noxVec2);
    double sumAll = noxVec1->norm(oneNorm);
    noxVec2->update( 1.0, *noxVec1, 1.0 );
    noxVec2->scale(0.5);
    double sumPositive = noxVec2->norm(oneNorm);
    (*noxVec2) = (*noxVec1);
    noxVec2->abs(*noxVec2);
    noxVec2->update( 1.0, *noxVec1, -1.0 );
    noxVec2->scale(0.5);
    double sumNegative = noxVec2->norm(oneNorm);

    status += tester.testValue( (sumPositive + sumNegative), sumAll,
                                tolerance, "Abs, Operator= , Update and Scale Test", aComp);

    success = status==0;

    if (success)
      printing.out() << "Test passed!" << std::endl;
    else
      printing.out() << "Test failed!" << std::endl;

    delete noxVec2;
    delete noxVec1;
    delete standardMap;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 24
0
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  // Initialize MPI and setup an Epetra communicator
  MPI_Init(&argc,&argv);
  Teuchos::RCP<Epetra_MpiComm> Comm = Teuchos::rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
  // If we aren't using MPI, then setup a serial communicator.
  Teuchos::RCP<Epetra_SerialComm> Comm = Teuchos::rcp( new Epetra_SerialComm() );
#endif

  bool success = false;
  bool verbose = false;
  try {
    int i, epetra_ierr;
    bool ierr, gerr = true;

    // number of global elements
    int dim = 100;
    int blockSize = 5;

    bool verbose = false;
    if (argc>1) {
      if (argv[1][0]=='-' && argv[1][1]=='v') {
        verbose = true;
      }
    }

    // Construct a Map that puts approximately the same number of
    // equations on each processor.
    Teuchos::RCP<Epetra_Map> Map = Teuchos::rcp( new Epetra_Map(dim, 0, *Comm) );

    // Get update list and number of local equations from newly created Map.
    int NumMyElements = Map->NumMyElements();
    std::vector<int> MyGlobalElements(NumMyElements);
    Map->MyGlobalElements(&MyGlobalElements[0]);

    // Create an integer vector NumNz that is used to build the Petra Matrix.
    // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation
    // on this processor
    std::vector<int> NumNz(NumMyElements);

    // We are building a tridiagonal matrix where each row has (-1 2 -1)
    // So we need 2 off-diagonal terms (except for the first and last equation)
    for (i=0; i<NumMyElements; i++) {
      if (MyGlobalElements[i]==0 || MyGlobalElements[i] == dim-1) {
        NumNz[i] = 2;
      }
      else {
        NumNz[i] = 3;
      }
    }

    // Create an Epetra_Matrix
    Teuchos::RCP<Epetra_CrsMatrix> A = Teuchos::rcp( new Epetra_CrsMatrix(Copy, *Map, &NumNz[0]) );

    // Add  rows one-at-a-time
    // Need some vectors to help
    // Off diagonal Values will always be -1
    std::vector<double> Values(2);
    Values[0] = -1.0; Values[1] = -1.0;
    std::vector<int> Indices(2);
    double two = 2.0;
    int NumEntries;
    for (i=0; i<NumMyElements; i++) {
      if (MyGlobalElements[i]==0) {
        Indices[0] = 1;
        NumEntries = 1;
      }
      else if (MyGlobalElements[i] == dim-1) {
        Indices[0] = dim-2;
        NumEntries = 1;
      }
      else {
        Indices[0] = MyGlobalElements[i]-1;
        Indices[1] = MyGlobalElements[i]+1;
        NumEntries = 2;
      }
      epetra_ierr = A->InsertGlobalValues(MyGlobalElements[i],NumEntries,&Values[0],&Indices[0]);
      assert(epetra_ierr==0);
      // Put in the diagonal entry
      epetra_ierr = A->InsertGlobalValues(MyGlobalElements[i],1,&two,&MyGlobalElements[i]);
      assert(epetra_ierr==0);
    }

    // Finish building the epetra matrix A
    epetra_ierr = A->FillComplete();
    assert(epetra_ierr==0);

    // Create an Anasazi::EpetraSymOp from this Epetra_CrsMatrix
    Teuchos::RCP<Anasazi::EpetraSymOp> op = Teuchos::rcp(new Anasazi::EpetraSymOp(A));

    // Issue several useful typedefs;
    typedef Anasazi::MultiVec<double> EMV;
    typedef Anasazi::Operator<double> EOP;

    // Create an Epetra_MultiVector for an initial vector to start the solver.
    // Note that this needs to have the same number of columns as the blocksize.
    Teuchos::RCP<Anasazi::EpetraMultiVec> ivec = Teuchos::rcp( new Anasazi::EpetraMultiVec(*Map, blockSize) );
    ivec->Random();

    // Create an output manager to handle the I/O from the solver
    Teuchos::RCP<Anasazi::OutputManager<double> > MyOM = Teuchos::rcp( new Anasazi::BasicOutputManager<double>() );
    if (verbose) {
      MyOM->setVerbosity( Anasazi::Warnings );
    }

    // test the Epetra adapter multivector
    ierr = Anasazi::TestMultiVecTraits<double,EMV>(MyOM,ivec);
    gerr &= ierr;
    if (ierr) {
      MyOM->print(Anasazi::Warnings,"*** EpetraAdapter PASSED TestMultiVecTraits()\n");
    }
    else {
      MyOM->print(Anasazi::Warnings,"*** EpetraAdapter FAILED TestMultiVecTraits() ***\n\n");
    }

    // test the Epetra adapter operator
    ierr = Anasazi::TestOperatorTraits<double,EMV,EOP>(MyOM,ivec,op);
    gerr &= ierr;
    if (ierr) {
      MyOM->print(Anasazi::Warnings,"*** EpetraAdapter PASSED TestOperatorTraits()\n");
    }
    else {
      MyOM->print(Anasazi::Warnings,"*** EpetraAdapter FAILED TestOperatorTraits() ***\n\n");
    }

    success = gerr;
    if (success) {
      MyOM->print(Anasazi::Warnings,"End Result: TEST PASSED\n");
    } else {
      MyOM->print(Anasazi::Warnings,"End Result: TEST FAILED\n");
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 25
0
int main(int argc, char *argv[])
{
  bool success = true;
  bool verbose = false;
  try {

    const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
    const size_t num_cores_per_socket =
      Kokkos::hwloc::get_available_cores_per_numa();
    const size_t num_threads_per_core =
      Kokkos::hwloc::get_available_threads_per_core();

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This test performance of MP::Vector multiply routines.\n");
    int nGrid = 32;
    CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
    int nIter = 10;
    CLP.setOption("ni", &nIter, "Number of multiply iterations");
#ifdef KOKKOS_HAVE_PTHREAD
    bool threads = true;
    CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
    int num_cores = num_cores_per_socket * num_sockets;
    CLP.setOption("cores", &num_cores,
                  "Number of CPU cores to use (defaults to all)");
    int num_hyper_threads = num_threads_per_core;
    CLP.setOption("hyperthreads", &num_hyper_threads,
                  "Number of hyper threads per core to use (defaults to all)");
    int threads_per_vector = 1;
    CLP.setOption("threads_per_vector", &threads_per_vector,
                  "Number of threads to use within each vector");
#endif
#ifdef KOKKOS_HAVE_CUDA
    bool cuda = true;
    CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
    int cuda_threads_per_vector = 16;
    CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector,
                  "Number of Cuda threads to use within each vector");
    int cuda_block_size = 0;
    CLP.setOption("cuda_block_size", &cuda_block_size,
                  "Cuda block size (0 implies the default choice)");
    int num_cuda_blocks = 0;
    CLP.setOption("num_cuda_blocks", &num_cuda_blocks,
                  "Number of Cuda blocks (0 implies the default choice)");
    int device_id = 0;
    CLP.setOption("device", &device_id, "CUDA device ID");
#endif
    CLP.parse( argc, argv );

    typedef int Ordinal;
    typedef double Scalar;

#ifdef KOKKOS_HAVE_PTHREAD
    if (threads) {
      typedef Kokkos::Threads Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::Threads::initialize(num_cores*num_hyper_threads);

      std::cout << std::endl
                << "Threads performance with " << num_cores*num_hyper_threads
                << " threads:" << std::endl;

      Kokkos::DeviceConfig dev_config(num_cores,
                                       threads_per_vector,
                                       num_hyper_threads / threads_per_vector);

      mainHost<Storage>(nGrid, nIter, dev_config);

      Kokkos::Threads::finalize();
    }
#endif

#ifdef KOKKOS_HAVE_CUDA
    if (cuda) {
      typedef Kokkos::Cuda Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::Cuda::host_mirror_device_type::initialize();
      Kokkos::Cuda::initialize(Kokkos::Cuda::SelectDevice(device_id));

      cudaDeviceProp deviceProp;
      cudaGetDeviceProperties(&deviceProp, device_id);
      std::cout << std::endl
                << "CUDA performance for device " << device_id << " ("
                << deviceProp.name << "):"
                << std::endl;

      Kokkos::DeviceConfig dev_config(
        num_cuda_blocks,
        cuda_threads_per_vector,
        cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);

      mainCuda<Storage>(nGrid,nIter,dev_config);

      Kokkos::Cuda::host_mirror_device_type::finalize();
      Kokkos::Cuda::finalize();
    }
#endif

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  if (success)
    return 0;
  return -1;
}
Exemplo n.º 26
0
    int RunEnvironment::
    processCLP(int procRank, int argc, char* argv[])
    {
      Teuchos::oblackholestream blackhole;
      std::ostream &out = ( procRank == 0 ? std::cout : blackhole );

      bool success = true;
  
      try {

        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
        */

        clp.recogniseAllOptions(true);
  
        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
        */
        clp.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
        */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ;
        try {
          parseReturn = clp.parse( argc, argv );
          //std::cout << "tmp srk parseReturn = " << parseReturn << std::endl;
        }
        catch (std::exception exc)
          {
            out << "RunEnvironment::processCLP error, exc= " << exc.what() << std::endl;
            return 1;
          }

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {

          //std::cout << "tmp srk parseReturn = PARSE_HELP_PRINTED " << parseReturn << std::endl;

          return 1;
        }

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION   ) {

          // std::cout << "tmp srk parseReturn = PARSE_UNRECOGNIZED_OPTION " << parseReturn << std::endl;

          if (m_debug)
            out << "RunEnvironment::processCLP error, unrecognized option" << std::endl;
          return 1; // Error!
        }

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
          if (m_debug)
            out << "RunEnvironment::processCLP success" << std::endl;
        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        if (procRank == 0 && m_debug)
          {
            out << "\nPrinting help message with new values of command-line arguments ...\n\n";

            clp.throwExceptions(false);

            clp.printHelpMessage(argv[0],out);

            clp.throwExceptions(true);
          }

        // Now we will print the option values
        if (procRank == 0 && m_debug) {
          out << "\nPrinting user options after parsing ...\n\n";
          out << " output_log_opt= " <<  output_log_opt << std::endl;
          out << " dw_opt= " <<  dw_opt << std::endl;
          out << " timer_opt= " <<  timer_opt << std::endl;
          out << " directory_opt= " <<  directory_opt << std::endl;
          out << " help_opt= " <<  help_opt << std::endl;
        }

      } // try
      TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);
  
      if(success && m_debug)
        out << "\nEnd Result: TEST PASSED" << std::endl;	

      return ( success ? 0 : 1 );
    }
Exemplo n.º 27
0
int main(int argc, char *argv[])
{
  // Create output stream. (Handy for multicore output.)
  auto out = Teuchos::VerboseObjectBase::getDefaultOStream();

  Teuchos::GlobalMPISession session(&argc, &argv, NULL);

  auto comm = Teuchos::DefaultComm<int>::getComm();

  // Wrap the whole code in a big try-catch-statement.
  bool success = true;
  try {
    // =========================================================================
    // Handle command line arguments.
    // Boost::program_options is somewhat more complete here (e.g. you can
    // specify options without the "--" syntax), but it isn't less complicated
    // to use. Stick with Teuchos for now.
    Teuchos::CommandLineProcessor myClp;

    myClp.setDocString(
      "Numerical parameter continuation for nonlinear Schr\"odinger equations.\n"
    );

    std::string xmlInputPath = "";
    myClp.setOption("xml-input-file", &xmlInputPath,
                    "XML file containing the parameter list", true );

    // Print warning for unrecognized arguments and make sure to throw an
    // exception if something went wrong.
    //myClp.throwExceptions(false);
    //myClp.recogniseAllOptions ( true );

    // Finally, parse the command line.
    myClp.parse(argc, argv);

    // Retrieve Piro parameter list from given file.
    std::shared_ptr<Teuchos::ParameterList> piroParams(
        new Teuchos::ParameterList()
        );
    Teuchos::updateParametersFromXmlFile(
        xmlInputPath,
        Teuchos::rcp(piroParams).ptr()
        );
    // =======================================================================
    // Extract the location of input and output files.
    const Teuchos::ParameterList outputList =
      piroParams->sublist("Output", true);

    // Set default directory to be the directory of the XML file itself
    const std::string xmlDirectory =
      xmlInputPath.substr(0, xmlInputPath.find_last_of( "/" ) + 1);

    // By default, take the current directory.
    std::string prefix = "./";
    if (!xmlDirectory.empty())
      prefix = xmlDirectory + "/";

    const std::string outputDirectory = prefix;

    const std::string contFilePath =
      prefix + outputList.get<std::string>("Continuation data file name");

    Teuchos::ParameterList & inputDataList = piroParams->sublist("Input", true);

    const std::string inputExodusFile =
      prefix + inputDataList.get<std::string>("File");
    const int step = inputDataList.get<int>("Initial Psi Step");

    //const bool useBordering = piroParams->get<bool>("Bordering");
    // =======================================================================
    // Read the data from the file.
    auto mesh = std::make_shared<Nosh::StkMesh>(
        Teuchos::get_shared_ptr(comm),
        inputExodusFile,
        step
        );

    // Cast the data into something more accessible.
    auto psi = mesh->getComplexVector("psi");
    //psi->Random();

    // Set the output directory for later plotting with this.
    std::stringstream outputFile;
    outputFile << outputDirectory << "/solution.e";
    mesh->openOutputChannel(outputFile.str());

    // Create a parameter map from the initial parameter values.
    Teuchos::ParameterList initialParameterValues =
      piroParams->sublist("Initial parameter values", true);

    // Check if we need to interpret the time value stored in the file
    // as a parameter.
    const std::string & timeName =
      piroParams->get<std::string>("Interpret time as", "");
    if (!timeName.empty()) {
      initialParameterValues.set(timeName, mesh->getTime());
    }

    // Explicitly set the initial parameter value for this list.
    const std::string & paramName =
      piroParams->sublist( "LOCA" )
      .sublist( "Stepper" )
      .get<std::string>("Continuation Parameter");
    *out << "Setting the initial parameter value of \""
         << paramName << "\" to " << initialParameterValues.get<double>(paramName) << "." << std::endl;
    piroParams->sublist( "LOCA" )
    .sublist( "Stepper" )
    .set("Initial Value", initialParameterValues.get<double>(paramName));

    // Set the thickness field.
    auto thickness = std::make_shared<Nosh::ScalarField::Constant>(*mesh, 1.0);

    // Some alternatives for the positive-definite operator.
    // (a) -\Delta (Laplace operator with Neumann boundary)
    //const std::shared_ptr<Nosh::ParameterMatrix::Virtual> matrixBuilder =
    //  rcp(new Nosh::ParameterMatrix::Laplace(mesh, thickness));

    // (b) (-i\nabla-A)^2 (Kinetic energy of a particle in magnetic field)
    // (b1) 'A' explicitly given in file.
    const double mu = initialParameterValues.get<double>("mu");
    auto mvp = std::make_shared<Nosh::VectorField::ExplicitValues>(*mesh, "A", mu);

    //const std::shared_ptr<Nosh::ParameterMatrix::Virtual> keoBuilder(
    //    new Nosh::ParameterMatrix::Keo(mesh, thickness, mvp)
    //    );
    //const std::shared_ptr<Nosh::ParameterMatrix::Virtual> DKeoDPBuilder(
    //    new Nosh::ParameterMatrix::DKeoDP(mesh, thickness, mvp, "mu")
    //    );

    // (b2) 'A' analytically given (here with constant curl).
    //      Optionally add a rotation axis u. This is important
    //      if continuation happens as a rotation of the vector
    //      field around an axis.
    //const std::shared_ptr<DoubleVector> b = rcp(new DoubleVector(3));
    //std::shared_ptr<Teuchos::SerialDenseVector<int,double> > u = Teuchos::null;
    //if ( piroParams->isSublist("Rotation vector") )
    //{
    //    u = rcp(new Teuchos::SerialDenseVector<int,double>(3));
    //    Teuchos::ParameterList & rotationVectorList =
    //        piroParams->sublist( "Rotation vector", false );
    //    (*u)[0] = rotationVectorList.get<double>("x");
    //    (*u)[1] = rotationVectorList.get<double>("y");
    //    (*u)[2] = rotationVectorList.get<double>("z");
    //}
    //std::shared_ptr<Nosh::VectorField::Virtual> mvp =
    //  rcp(new Nosh::VectorField::ConstantCurl(mesh, b, u));
    //const std::shared_ptr<Nosh::ParameterMatrix::Virtual> matrixBuilder =
    //  rcp(new Nosh::ParameterMatrix::Keo(mesh, thickness, mvp));
    // (b3) 'A' analytically given in a class you write yourself, derived
    //      from Nosh::ParameterMatrix::Virtual.
    // [...]
    //
    // Setup the scalar potential V.
    // (a) A constant potential.
    //std::shared_ptr<Nosh::ScalarField::Virtual> sp =
    //rcp(new Nosh::ScalarField::Constant(*mesh, -1.0));
    //const double T = initialParameterValues.get<double>("T");
    // (b) With explicit values.
    //std::shared_ptr<Nosh::ScalarField::Virtual> sp =
    //rcp(new Nosh::ScalarField::ExplicitValues(*mesh, "V"));
    // (c) One you built yourself by deriving from Nosh::ScalarField::Virtual.
    auto sp = std::make_shared<MyScalarField>(mesh);


    const double g = initialParameterValues.get<double>("g");
    // Finally, create the model evaluator.
    // This is the most important object in the whole stack.
    auto modelEvaluator = std::make_shared<Nosh::ModelEvaluator::Nls>(
        mesh,
        mvp,
        sp,
        g,
        thickness,
        psi,
        "mu"
        );

    // Build the Piro model evaluator. It's used to hook up with
    // several different backends (NOX, LOCA, Rhythmos,...).
    std::shared_ptr<Thyra::ModelEvaluator<double>> piro;

    // Declare the eigensaver; it will be used only for LOCA solvers, though.
    std::shared_ptr<Nosh::SaveEigenData> glEigenSaver;

    // Switch by solver type.
    std::string & solver = piroParams->get<std::string>("Piro Solver");

    if (solver == "NOX") {
      auto observer = std::make_shared<Nosh::Observer>(modelEvaluator);

      piro = std::make_shared<Piro::NOXSolver<double>>(
            Teuchos::rcp(piroParams),
            Teuchos::rcp(modelEvaluator),
            Teuchos::rcp(observer)
            );
    } else if (solver == "LOCA") {
      auto observer = std::make_shared<Nosh::Observer>(
          modelEvaluator,
          contFilePath,
          piroParams->sublist("LOCA")
          .sublist("Stepper")
          .get<std::string>("Continuation Parameter")
          );

      // Setup eigen saver.
#ifdef HAVE_LOCA_ANASAZI
      bool computeEigenvalues = piroParams->sublist( "LOCA" )
                                .sublist( "Stepper" )
                                .get<bool>("Compute Eigenvalues");
      if (computeEigenvalues) {
        Teuchos::ParameterList & eigenList = piroParams->sublist("LOCA")
                                             .sublist("Stepper")
                                             .sublist("Eigensolver");
        std::string eigenvaluesFilePath =
          xmlDirectory + "/" + outputList.get<std::string> ( "Eigenvalues file name" );

        glEigenSaver = std::make_shared<Nosh::SaveEigenData>(
            eigenList,
            modelEvaluator,
            eigenvaluesFilePath
            );

        std::shared_ptr<LOCA::SaveEigenData::AbstractStrategy>
          glSaveEigenDataStrategy = glEigenSaver;
        eigenList.set("Save Eigen Data Method",
                      "User-Defined");
        eigenList.set("User-Defined Save Eigen Data Name",
                      "glSaveEigenDataStrategy");
        eigenList.set("glSaveEigenDataStrategy",
                      glSaveEigenDataStrategy);
      }
#endif
      // Get the solver.
      std::shared_ptr<Piro::LOCASolver<double>> piroLOCASolver(
          new Piro::LOCASolver<double>(
            Teuchos::rcp(piroParams),
            Teuchos::rcp(modelEvaluator),
            Teuchos::null
            //Teuchos::rcp(observer)
            )
          );

//      // Get stepper and inject it into the eigensaver.
//      std::shared_ptr<LOCA::Stepper> stepper = Teuchos::get_shared_ptr(
//          piroLOCASolver->getLOCAStepperNonConst()
//          );
//#ifdef HAVE_LOCA_ANASAZI
//      if (computeEigenvalues)
//        glEigenSaver->setLocaStepper(stepper);
//#endif
      piro = piroLOCASolver;
    }
#if 0
    else if ( solver == "Turning Point" ) {
      std::shared_ptr<Nosh::Observer> observer;

      Teuchos::ParameterList & bifList =
        piroParams->sublist("LOCA").sublist("Bifurcation");

      // Fetch the (approximate) null state.
      auto nullstateZ = mesh->getVector("null");

      // Set the length normalization vector to be the initial null vector.
      TEUCHOS_ASSERT(nullstateZ);
      auto lengthNormVec = Teuchos::rcp(new NOX::Thyra::Vector(*nullstateZ));
      //lengthNormVec->init(1.0);
      bifList.set("Length Normalization Vector", lengthNormVec);

      // Set the initial null vector.
      auto initialNullAbstractVec =
        Teuchos::rcp(new NOX::Thyra::Vector(*nullstateZ));
      // initialNullAbstractVec->init(1.0);
      bifList.set("Initial Null Vector", initialNullAbstractVec);

      piro = std::make_shared<Piro::LOCASolver<double>>(
            Teuchos::rcp(piroParams),
            Teuchos::rcp(modelEvaluator),
            Teuchos::null
            //Teuchos::rcp(observer)
            );
    }
#endif
    else {
      TEUCHOS_TEST_FOR_EXCEPT_MSG(
          true,
          "Unknown solver type \"" << solver << "\"."
          );
    }
    // ----------------------------------------------------------------------

    // Now the setting of inputs and outputs.
    Thyra::ModelEvaluatorBase::InArgs<double> inArgs = piro->createInArgs();
    inArgs.set_p(
        0,
        piro->getNominalValues().get_p(0)
        );

    // Set output arguments to evalModel call.
    Thyra::ModelEvaluatorBase::OutArgs<double> outArgs = piro->createOutArgs();

    // Now solve the problem and return the responses.
    const Teuchos::RCP<Teuchos::Time> piroSolveTime =
      Teuchos::TimeMonitor::getNewTimer("Piro total solve time");;
    {
      Teuchos::TimeMonitor tm(*piroSolveTime);
      piro->evalModel(inArgs, outArgs);
    }

    // Manually release LOCA stepper.
#ifdef HAVE_LOCA_ANASAZI
    if (glEigenSaver)
      glEigenSaver->releaseLocaStepper();
#endif

    // Print timing data.
    Teuchos::TimeMonitor::summarize();
  } catch (Teuchos::CommandLineProcessor::HelpPrinted) {
  } catch (Teuchos::CommandLineProcessor::ParseError) {
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, *out, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    Teuchos::RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    /**********************************************************************************/
    /* SET TEST PARAMETERS                                                            */
    /**********************************************************************************/
    /**********************************************************************************/
    /* SET TEST PARAMETERS                                                            */
    /**********************************************************************************/
    // Note: use --help to list available options.
    Teuchos::CommandLineProcessor clp(false);

    // Default is Laplace1D with nx = 8748.
    // It's a nice size for 1D and perfect aggregation. (6561=3^8)
    //Nice size for 1D and perfect aggregation on small numbers of processors. (8748=4*3^7)
    Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748); // manage parameters of the test case
    Xpetra::Parameters xpetraParameters(clp);             // manage parameters of xpetra

    // custom parameters
    int pauseForDebugger=0;
    //std::string aggOrdering = "natural";
    int minPerAgg=2; //was 3 in simple
    int maxNbrAlreadySelected=0;
    int printTimings=0;

    //clp.setOption("aggOrdering",&aggOrdering,"aggregation ordering strategy (natural,graph)");
    clp.setOption("debug",&pauseForDebugger,"pause to attach debugger");
    clp.setOption("maxNbrSel",&maxNbrAlreadySelected,"maximum # of nbrs allowed to be in other aggregates");
    clp.setOption("minPerAgg",&minPerAgg,"minimum #DOFs per aggregate");
    clp.setOption("timings",&printTimings,"print timings to screen");

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    Teuchos::RCP<Teuchos::TimeMonitor> globalTimeMonitor = Teuchos::rcp (new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("Timings: Global Time")));

    if (pauseForDebugger) {
      Utilities::PauseForDebugger();
    }

    matrixParameters.check();
    xpetraParameters.check();
    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    if (comm->getRank() == 0) {
      std::cout << xpetraParameters << matrixParameters;
    }

    /**********************************************************************************/
    /* CREATE INITIAL MATRIX                                                          */
    /**********************************************************************************/
    Teuchos::RCP<const Map> map;
    Teuchos::RCP<Matrix> A;

    {
      Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("Timings: Matrix Build"));

      map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm);
      Teuchos::RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap
      A = Pr->BuildMatrix();

    }
    /**********************************************************************************/
    /*                                                                                */
    /**********************************************************************************/


    Teuchos::RCP<Hierarchy> hierarchy = Teuchos::rcp(new Hierarchy(A));
    hierarchy->SetDefaultVerbLevel(MueLu::toMueLuVerbLevel(Teuchos::VERB_EXTREME));
    hierarchy->SetMaxCoarseSize(100);

    ///////////////////////////////////////////////////////////

    Teuchos::RCP<MueLu::Level> Finest = hierarchy->GetLevel();  // get finest level

    Finest->Set("A",A);

    Teuchos::RCP<AmalgamationFactory> amalgFact = Teuchos::rcp(new AmalgamationFactory());
    Teuchos::RCP<CoalesceDropFactory> dropFact = Teuchos::rcp(new CoalesceDropFactory());
    dropFact->SetFactory("UnAmalgamationInfo", amalgFact);

    // aggregation factory
    Teuchos::RCP<UncoupledAggregationFactory> UnCoupledAggFact = Teuchos::rcp(new UncoupledAggregationFactory(/*dropFact*/));
    UnCoupledAggFact->SetFactory("Graph", dropFact);
    UnCoupledAggFact->SetFactory("DofsPerNode", dropFact);
    //UnCoupledAggFact->SetFactory("Graph", dropFact); // UnCoupledAggFact not changed to new factory handling
    //UnCoupledAggFact->SetMinNodesPerAggregate(minPerAgg);
    //UnCoupledAggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected);
    //UnCoupledAggFact->SetOrdering(MueLu::AggOptions::GRAPH);
    UnCoupledAggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected));
    UnCoupledAggFact->SetParameter("aggregation: min agg size",           Teuchos::ParameterEntry(minPerAgg));
    UnCoupledAggFact->SetParameter("aggregation: ordering",               Teuchos::ParameterEntry(std::string("graph")));

    Finest->Request("Graph",dropFact.get());
    Finest->Request("DofsPerNode",dropFact.get());
    Finest->Request("UnAmalgamationInfo",amalgFact.get());
    UnCoupledAggFact->Build(*Finest);

    // Timer final summaries
    globalTimeMonitor = Teuchos::null; // stop this timer before summary

    if (printTimings)
      Teuchos::TimeMonitor::summarize();

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Exemplo n.º 29
0
int main(int argc, char *argv[])
{
    bool success = true;
    bool verbose = false;
    try {

        const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
        const size_t num_cores_per_socket =
            Kokkos::hwloc::get_available_cores_per_numa();
        const size_t num_threads_per_core =
            Kokkos::hwloc::get_available_threads_per_core();

        // Setup command line options
        Teuchos::CommandLineProcessor CLP;
        CLP.setDocString(
            "This test performance of MP::Vector multiply routines.\n");
        int nGrid = 32;
        CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
        int nIter = 10;
        CLP.setOption("ni", &nIter, "Number of multiply iterations");
        int ensemble_min = 4;
        CLP.setOption("emin", &ensemble_min, "Staring ensemble size");
        int ensemble_max = 24;
        CLP.setOption("emax", &ensemble_max, "Stoping ensemble size");
        int ensemble_step = 4;
        CLP.setOption("estep", &ensemble_step, "Ensemble increment");
#ifdef KOKKOS_HAVE_PTHREAD
        bool threads = true;
        CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
        int num_cores = num_cores_per_socket * num_sockets;
        CLP.setOption("cores", &num_cores,
                      "Number of CPU cores to use (defaults to all)");
        int num_hyper_threads = num_threads_per_core;
        CLP.setOption("hyperthreads", &num_hyper_threads,
                      "Number of hyper threads per core to use (defaults to all)");
#endif
#ifdef KOKKOS_HAVE_CUDA
        bool cuda = true;
        CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
        int device_id = 0;
        CLP.setOption("device", &device_id, "CUDA device ID");
#endif
        CLP.parse( argc, argv );

        typedef int Ordinal;
        typedef double Scalar;

#ifdef KOKKOS_HAVE_PTHREAD
        if (threads) {
            typedef Kokkos::Threads Device;

            Kokkos::Threads::initialize(num_cores*num_hyper_threads);

            std::cout << std::endl
                      << "Threads performance with " << num_cores*num_hyper_threads
                      << " threads:" << std::endl;

            performance_test_driver<Scalar,Ordinal,Device>(
                nGrid, nIter, ensemble_min, ensemble_max, ensemble_step);

            Kokkos::Threads::finalize();
        }
#endif

#ifdef KOKKOS_HAVE_CUDA
        if (cuda) {
            typedef Kokkos::Cuda Device;

            Kokkos::HostSpace::execution_space::initialize();
            Kokkos::Cuda::initialize(Kokkos::Cuda::SelectDevice(device_id));

            cudaDeviceProp deviceProp;
            cudaGetDeviceProperties(&deviceProp, device_id);
            std::cout << std::endl
                      << "CUDA performance for device " << device_id << " ("
                      << deviceProp.name << "):"
                      << std::endl;

            performance_test_driver<Scalar,Ordinal,Device>(
                nGrid, nIter, ensemble_min, ensemble_max, ensemble_step);

            Kokkos::HostSpace::execution_space::finalize();
            Kokkos::Cuda::finalize();
        }
#endif

    }
    TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

    if (success)
        return 0;
    return -1;
}
Exemplo n.º 30
0
int main(int argc, char *argv[])
{
  bool success = true;
  bool verbose = false;
  try {

    Teuchos::oblackholestream blackHole;
    Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole);

    Teuchos::RCP<const Teuchos::Comm<int> > comm =
      Teuchos::DefaultComm<int>::getComm();

    const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
    const size_t num_cores_per_socket =
      Kokkos::hwloc::get_available_cores_per_numa();
    const size_t num_threads_per_core =
      Kokkos::hwloc::get_available_threads_per_core();

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This test performance of MP::Vector FEM assembly.\n");
    int nGrid = 32;
    CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
    int nIter = 10;
    CLP.setOption("ni", &nIter, "Number of assembly iterations");
    bool print = false;
    CLP.setOption("print", "no-print", &print, "Print debugging output");
    bool check = false;
    int num_cores = num_cores_per_socket * num_sockets;
    CLP.setOption("cores", &num_cores,
                  "Number of CPU cores to use (defaults to all)");
    int num_hyper_threads = num_threads_per_core;
    CLP.setOption("hyperthreads", &num_hyper_threads,
                  "Number of hyper threads per core to use (defaults to all)");
    int threads_per_vector = 1;
    CLP.setOption("threads_per_vector", &threads_per_vector,
                  "Number of threads to use within each vector");
    CLP.setOption("check", "no-check", &check, "Check correctness");
#ifdef KOKKOS_HAVE_SERIAL
    bool serial = true;
    CLP.setOption("serial", "no-serial", &serial, "Enable Serial device");
#endif
#ifdef KOKKOS_HAVE_PTHREAD
    bool threads = true;
    CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
#endif
#ifdef KOKKOS_HAVE_OPENMP
    bool openmp = true;
    CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
#endif
#ifdef KOKKOS_HAVE_CUDA
    bool cuda = true;
    CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
    int cuda_threads_per_vector = 16;
    CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector,
                  "Number of Cuda threads to use within each vector");
    int cuda_block_size = 256;
    CLP.setOption("cuda_block_size", &cuda_block_size,
                  "Cuda block size");
    int num_cuda_blocks = 0;
    CLP.setOption("num_cuda_blocks", &num_cuda_blocks,
                  "Number of Cuda blocks (0 implies the default choice)");
    int device_id = -1;
    CLP.setOption("device", &device_id, "CUDA device ID.  Set to default of -1 to use the default device as determined by the local node MPI rank and --ngpus");
    int ngpus = 1;
    CLP.setOption("ngpus", &ngpus, "Number of GPUs per node for multi-GPU runs via MPI");
#endif
    CLP.parse( argc, argv );

    int use_nodes[3];
    use_nodes[0] = nGrid; use_nodes[1] = nGrid; use_nodes[2] = nGrid;

    typedef int Ordinal;
    typedef double Scalar;
    const Kokkos::Example::FENL::AssemblyMethod Method =
      Kokkos::Example::FENL::FadElementOptimized;
    // const Kokkos::Example::FENL::AssemblyMethod Method =
    //   Kokkos::Example::FENL::Analytic;

#ifdef KOKKOS_HAVE_SERIAL
    if (serial) {
      typedef Kokkos::Serial Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::Serial::initialize();

      if (comm->getRank() == 0)
        std::cout << std::endl
                  << "Serial performance with " << comm->getSize()
                  << " MPI ranks" << std::endl;

      Kokkos::Example::FENL::DeviceConfig dev_config(1, 1, 1);

      mainHost<Storage,Method>(comm, print, nIter, use_nodes, check,
                               dev_config);

      Kokkos::Serial::finalize();
    }
#endif

#ifdef KOKKOS_HAVE_PTHREAD
    if (threads) {
      typedef Kokkos::Threads Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::Threads::initialize(num_cores*num_hyper_threads);

      if (comm->getRank() == 0)
        std::cout << std::endl
                  << "Threads performance with " << comm->getSize()
                  << " MPI ranks and " << num_cores*num_hyper_threads
                  << " threads per rank:" << std::endl;

      Kokkos::Example::FENL::DeviceConfig dev_config(num_cores,
                                       threads_per_vector,
                                       num_hyper_threads / threads_per_vector);

      mainHost<Storage,Method>(comm, print, nIter, use_nodes, check,
                               dev_config);

      Kokkos::Threads::finalize();
    }
#endif

#ifdef KOKKOS_HAVE_OPENMP
    if (openmp) {
      typedef Kokkos::OpenMP Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::OpenMP::initialize(num_cores*num_hyper_threads);

      if (comm->getRank() == 0)
        std::cout << std::endl
                  << "OpenMP performance with " << comm->getSize()
                  << " MPI ranks and " << num_cores*num_hyper_threads
                  << " threads per rank:" << std::endl;

      Kokkos::Example::FENL::DeviceConfig dev_config(num_cores,
                                       threads_per_vector,
                                       num_hyper_threads / threads_per_vector);

      mainHost<Storage,Method>(comm, print, nIter, use_nodes, check,
                               dev_config);

      Kokkos::OpenMP::finalize();
    }
#endif

#ifdef KOKKOS_HAVE_CUDA
    if (cuda) {
      typedef Kokkos::Cuda Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      if (device_id == -1) {
        int local_rank = 0;
        char *str;
        if ((str = std::getenv("SLURM_LOCALID")))
          local_rank = std::atoi(str);
        else if ((str = std::getenv("MV2_COMM_WORLD_LOCAL_RANK")))
          local_rank = std::atoi(str);
        else if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")))
          local_rank = std::atoi(str);
        device_id = local_rank % ngpus;

        // Check device is valid
        int num_device; cudaGetDeviceCount(&num_device);
        TEUCHOS_TEST_FOR_EXCEPTION(
          device_id >= num_device, std::logic_error,
          "Invalid device ID " << device_id << ".  You probably are trying" <<
          " to run with too many GPUs per node");
      }

      Kokkos::HostSpace::execution_space::initialize();
      Kokkos::Cuda::initialize(Kokkos::Cuda::SelectDevice(device_id));

      cudaDeviceProp deviceProp;
      cudaGetDeviceProperties(&deviceProp, device_id);
      if (comm->getRank() == 0)
        std::cout << std::endl
                  << "CUDA performance performance with " << comm->getSize()
                  << " MPI ranks and device " << device_id << " ("
                  << deviceProp.name << "):"
                  << std::endl;

      Kokkos::Example::FENL::DeviceConfig dev_config(
        num_cuda_blocks,
        cuda_threads_per_vector,
        cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);

      mainCuda<Storage,Method>(comm, print, nIter, use_nodes, check,
                               dev_config);

      Kokkos::HostSpace::execution_space::finalize();
      Kokkos::Cuda::finalize();
    }
#endif

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  if (success)
    return 0;
  return -1;
}