C++ (Cpp) MPI_Initialized примеры использования

Пример #1

0

Показать файл

Файл: gasnet_bootstrap_mpi.c Проект: PriMachVisSys/chapel

int gasneti_bootstrapInit_mpi(int *argc, char ***argv, gasnet_node_t *nodes, gasnet_node_t *mynode) {
  MPI_Group world;
  int err;

  /* Call MPI_Init exactly once */
  err = MPI_Initialized(&gasnetc_mpi_preinitialized);
  if (MPI_SUCCESS != err) return GASNET_ERR_NOT_INIT;
  if (!gasnetc_mpi_preinitialized) {
    err = MPI_Init(argc, argv);
    if (MPI_SUCCESS != err) return GASNET_ERR_NOT_INIT;
  }

  /* Create private communicator */
  err = MPI_Comm_group(MPI_COMM_WORLD, &world);
  gasneti_assert(err == MPI_SUCCESS);
  err = MPI_Comm_create(MPI_COMM_WORLD, world, &gasnetc_mpi_comm);
  gasneti_assert(err == MPI_SUCCESS);
  err = MPI_Group_free(&world);
  gasneti_assert(err == MPI_SUCCESS);

  /* Get size and rank */
  err = MPI_Comm_size(gasnetc_mpi_comm, &gasnetc_mpi_size);
  gasneti_assert(err == MPI_SUCCESS);
  *nodes = gasnetc_mpi_size;
  if ((int)(*nodes) != gasnetc_mpi_size) *nodes = 0; /* Overflow! */

  err = MPI_Comm_rank(gasnetc_mpi_comm, &gasnetc_mpi_rank);
  gasneti_assert(err == MPI_SUCCESS);
  *mynode = gasnetc_mpi_rank;

  gasneti_setupGlobalEnvironment(*nodes, *mynode,
				 &gasneti_bootstrapExchange_mpi,
				 &gasneti_bootstrapBroadcast_mpi);

  return GASNET_OK;
}

Пример #2

0

Показать файл

Файл: parstream.cpp Проект: siddarthc/CHOMBO-EBAMRReactive

std::ostream& pout()
{
#ifdef CH_MPI
  // the common case is _open == true, which just returns s_pout
  if ( ! s_pout_open )
  {
    // the uncommon cae: the file isn't opened, MPI may not be
    // initialized, and the basename may not have been set
    int flag_i, flag_f;
    MPI_Initialized(&flag_i);
    MPI_Finalized(&flag_f);
    // app hasn't set a basename yet, so set the default
    if ( ! s_pout_init )
    {
      s_pout_basename = "pout" ;
      s_pout_init = true ;
    }
    // if MPI not initialized, we cant open the file so return cout
    if ( ! flag_i || flag_f)
    {
      return std::cout; // MPI hasn't been started yet, or has ended....
    }
    // MPI is initialized, so file must not be, so open it
    setFileName() ;
    openFile() ;
    // finally, in case the open failed, return cout
    if ( ! s_pout_open )
    {
      return std::cout ;
    }
  }
  return s_pout ;
#else
  return std::cout;
#endif
}

Пример #3

0

Показать файл

/** Finalize DALEC.  Must be called before MPI is finalized.  DALEC calls are
  * not valid after finalization.  Collective on world group.
  *
  * @return            Zero on success
  */
int PDALEC_Finalize(void)
{
    int dalec_alive = atomic_fetch_sub_explicit(&(DALECI_GLOBAL_STATE.alive),
                                                1,memory_order_seq_cst);

    if (dalec_alive == 1) {
        /* Check for MPI initialization */
        int mpi_is_init, mpi_is_fin;
        MPI_Initialized(&mpi_is_init);
        MPI_Finalized(&mpi_is_fin);

        /* Free communicator if possible and return */
        if (!mpi_is_init || mpi_is_fin) {
            DALECI_Warning("MPI must be active when calling DALEC_Finalize");
            return DALEC_ERROR_MPI_USAGE;
        } else {
            int rc = MPI_Comm_free(&DALECI_GLOBAL_STATE.mpi_comm);
            return DALECI_Check_MPI("DALEC_Finalize", "MPI_Comm_free", rc);
        }
    } else {
        /* Library is still active. */
        return DALEC_SUCCESS;
    }
}

Пример #4

0

Показать файл

Файл: MpiCommunicator.cpp Проект: baskarang/icnc

        int MpiCommunicator::init( int minId, long thecomm_ )
        {
            VT_FUNC_I( "MpiCommunicator::init" );

            assert( sizeof(thecomm_) >= sizeof(MPI_Comm) );
            MPI_Comm thecomm = (MPI_Comm)thecomm_;

            // turn wait mode on for intel mpi if possible
            // this should greatly improve performance for intel mpi
            PAL_SetEnvVar( "I_MPI_WAIT_MODE", "enable", 0);

            int flag;
            MPI_Initialized( &flag );
            if ( ! flag ) {
                int p;
                //!! FIXME passing NULL ptr breaks mvapich1 mpi implementation
                MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p );
                if( p != MPI_THREAD_MULTIPLE ) {
                    // can't use Speaker yet, need Channels to be inited
                    std::cerr << "[CnC] Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl;
                }
            } else if( thecomm == 0 ) {
                CNC_ABORT( "Process has already been initialized" );
            }


            MPI_Comm myComm = MPI_COMM_WORLD;
            int rank;
            MPI_Comm parentComm;
            if( thecomm == 0 ) {
                MPI_Comm_get_parent( &parentComm );
            } else {
                m_customComm = true;
                m_exit0CallOk = false;
                myComm = thecomm;
            }
            MPI_Comm_rank( myComm, &rank );
            
            // father of all checks if he's requested to spawn processes:
            if ( rank == 0 && parentComm == MPI_COMM_NULL ) {
                // Ok, let's spawn the clients.
                // I need some information for the startup.
                // 1. Name of the executable (default is the current exe)
                const char * _tmp = getenv( "CNC_MPI_SPAWN" );
                if ( _tmp ) {
                    int nClientsToSpawn = atol( _tmp );
                    _tmp = getenv( "CNC_MPI_EXECUTABLE" );
                    std::string clientExe( _tmp ? _tmp : "" );
                    if( clientExe.empty() ) clientExe = PAL_GetProgname();
                    CNC_ASSERT( ! clientExe.empty() );
                    // 3. Special setting for MPI_Info: hosts
                    const char * clientHost = getenv( "CNC_MPI_HOSTS" );
                    
                    // Prepare MPI_Info object:
                    MPI_Info clientInfo = MPI_INFO_NULL;
                    if ( clientHost ) {
                        MPI_Info_create( &clientInfo );
                        if ( clientHost ) {
                            MPI_Info_set( clientInfo, const_cast< char * >( "host" ), const_cast< char * >( clientHost ) );
                            // can't use Speaker yet, need Channels to be inited
                            std::cerr << "[CnC " << rank << "] Set MPI_Info_set( \"host\", \"" << clientHost << "\" )\n";
                        }
                    }
                    // Now spawn the client processes:
                    // can't use Speaker yet, need Channels to be inited
                    std::cerr << "[CnC " << rank << "] Spawning " << nClientsToSpawn << " MPI processes" << std::endl;
                    int* errCodes = new int[nClientsToSpawn];
                    MPI_Comm interComm;
                    int err = MPI_Comm_spawn( const_cast< char * >( clientExe.c_str() ),
                                              MPI_ARGV_NULL, nClientsToSpawn,
                                              clientInfo, 0, MPI_COMM_WORLD,
                                              &interComm, errCodes );
                    delete [] errCodes;
                    if ( err ) {
                        // can't use Speaker yet, need Channels to be inited
                        std::cerr << "[CnC " << rank << "] Error in MPI_Comm_spawn. Skipping process spawning";
                    } else {
                        MPI_Intercomm_merge( interComm, 0, &myComm );
                    }
                } // else {
                // No process spawning
                // MPI-1 situation: all clients to be started by mpiexec
                //                    myComm = MPI_COMM_WORLD;
                //}
            }
            if ( thecomm == 0 && parentComm != MPI_COMM_NULL ) {
                // I am a child. Build intra-comm to the parent.
                MPI_Intercomm_merge( parentComm, 1, &myComm );
            }
            MPI_Comm_rank( myComm, &rank );

            CNC_ASSERT( m_channel == NULL );
            MpiChannelInterface* myChannel = new MpiChannelInterface( use_crc(), myComm );
            m_channel = myChannel;

            int size;
            MPI_Comm_size( myComm, &size );
            // Are we on the host or on the remote side?
            if ( rank == 0 ) {
                if( size <= 1 ) {
                    Speaker oss( std::cerr ); oss << "Warning: no clients avabilable. Forgot to set CNC_MPI_SPAWN?";
                }
                // ==> HOST startup: 
                // This initializes the mpi environment in myChannel.
                MpiHostInitializer hostInitializer( *myChannel );
                hostInitializer.init_mpi_comm( myComm );
            } else {
                // ==> CLIENT startup:
                // This initializes the mpi environment in myChannel.
                MpiClientInitializer clientInitializer( *myChannel );
                clientInitializer.init_mpi_comm( myComm );
            }

            { Speaker oss( std::cerr ); oss << "MPI initialization complete (rank " << rank << ")."; }

            //            MPI_Barrier( myComm );

            // Now the mpi specific setup is finished.
            // Do the generic initialization stuff.
            GenericCommunicator::init( minId );

            return 0;
        }

Пример #5

0

Показать файл

Файл: libmesh.C Проект: huangh-inl/libmesh

LibMeshInit::LibMeshInit (int argc, const char* const* argv,
                          MPI_Comm COMM_WORLD_IN)
#endif
{
  // should _not_ be initialized already.
  libmesh_assert (!libMesh::initialized());

  // Build a command-line parser.
  command_line.reset (new GetPot (argc, argv));

  // Disable performance logging upon request
  {
    if (libMesh::on_command_line ("--disable-perflog"))
      libMesh::perflog.disable_logging();
  }

  // Build a task scheduler
  {
    // Get the requested number of threads, defaults to 1 to avoid MPI and
    // multithreading competition.  If you would like to use MPI and multithreading
    // at the same time then (n_mpi_processes_per_node)x(n_threads) should be the
    //  number of processing cores per node.
    std::vector<std::string> n_threads(2);
    n_threads[0] = "--n_threads";
    n_threads[1] = "--n-threads";
    libMesh::libMeshPrivateData::_n_threads =
      libMesh::command_line_value (n_threads, 1);

    // Set the number of OpenMP threads to the same as the number of threads libMesh is going to use
#ifdef LIBMESH_HAVE_OPENMP
    omp_set_num_threads(libMesh::libMeshPrivateData::_n_threads);
#endif

    task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
  }

  // Construct singletons who may be at risk of the
  // "static initialization order fiasco"
  Singleton::setup();

  // Make sure the construction worked
  libmesh_assert(remote_elem);

#if defined(LIBMESH_HAVE_MPI)

  // Allow the user to bypass MPI initialization
  if (!libMesh::on_command_line ("--disable-mpi"))
    {
      // Check whether the calling program has already initialized
      // MPI, and avoid duplicate Init/Finalize
      int flag;
      MPI_Initialized (&flag);

      if (!flag)
        {
#if MPI_VERSION > 1
          int mpi_thread_provided;
          const int mpi_thread_requested = libMesh::n_threads() > 1 ?
                                           MPI_THREAD_FUNNELED :
                                           MPI_THREAD_SINGLE;

          MPI_Init_thread (&argc, const_cast<char***>(&argv),
                           mpi_thread_requested, &mpi_thread_provided);

          if ((libMesh::n_threads() > 1) &&
              (mpi_thread_provided < MPI_THREAD_FUNNELED))
            {
              libmesh_warning("Warning: MPI failed to guarantee MPI_THREAD_FUNNELED\n" << 
                              "for a threaded run.\n" <<
                              "Be sure your library is funneled-thread-safe..." <<
                               std::endl);

              // Ideally, if an MPI stack tells us it's unsafe for us
              // to use threads, we shouldn't use threads.
              // In practice, we've encountered one MPI stack (an
              // mvapich2 configuration) that returned
              // MPI_THREAD_SINGLE as a proper warning, two stacks
              // that handle MPI_THREAD_FUNNELED properly, and two
              // current stacks plus a couple old stacks that return
              // MPI_THREAD_SINGLE but support libMesh threaded runs
              // anyway.

              // libMesh::libMeshPrivateData::_n_threads = 1;
              // task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
            }
#else
          if (libMesh::libMeshPrivateData::_n_threads > 1)
            {
              libmesh_warning("Warning: using MPI1 for threaded code.\n" <<
                              "Be sure your library is funneled-thread-safe..." <<
                              std::endl);
            }

          MPI_Init (&argc, const_cast<char***>(&argv));
#endif
          libmesh_initialized_mpi = true;
        }

      // Duplicate the input communicator for internal use
      // And get a Parallel::Communicator copy too, to use
      // as a default for that API
      this->_comm = COMM_WORLD_IN;

      libMesh::GLOBAL_COMM_WORLD = COMM_WORLD_IN;

#ifndef LIBMESH_DISABLE_COMMWORLD
      libMesh::COMM_WORLD = COMM_WORLD_IN;
      Parallel::Communicator_World = COMM_WORLD_IN;
#endif

      //MPI_Comm_set_name not supported in at least SGI MPT's MPI implementation
      //MPI_Comm_set_name (libMesh::COMM_WORLD, "libMesh::COMM_WORLD");

      libMeshPrivateData::_processor_id =
        libmesh_cast_int<processor_id_type>(this->comm().rank());
      libMeshPrivateData::_n_processors =
        libmesh_cast_int<processor_id_type>(this->comm().size());

      // Set up an MPI error handler if requested.  This helps us get
      // into a debugger with a proper stack when an MPI error occurs.
      if (libMesh::on_command_line ("--handle-mpi-errors"))
        {
#if MPI_VERSION > 1
          MPI_Comm_create_errhandler(libMesh_MPI_Handler, &libmesh_errhandler);
          MPI_Comm_set_errhandler(libMesh::GLOBAL_COMM_WORLD, libmesh_errhandler);
          MPI_Comm_set_errhandler(MPI_COMM_WORLD, libmesh_errhandler);
#else
          MPI_Errhandler_create(libMesh_MPI_Handler, &libmesh_errhandler);
          MPI_Errhandler_set(libMesh::GLOBAL_COMM_WORLD, libmesh_errhandler);
          MPI_Errhandler_set(MPI_COMM_WORLD, libmesh_errhandler);
#endif // #if MPI_VERSION > 1
        }
    }

  // Could we have gotten bad values from the above calls?
  libmesh_assert_greater (libMeshPrivateData::_n_processors, 0);

  // The libmesh_cast_int already tested _processor_id>=0
  // libmesh_assert_greater_equal (libMeshPrivateData::_processor_id, 0);

  // Let's be sure we properly initialize on every processor at once:
  libmesh_parallel_only(this->comm());

#endif

#if defined(LIBMESH_HAVE_PETSC)

  // Allow the user to bypass PETSc initialization
  if (!libMesh::on_command_line ("--disable-petsc")

#if defined(LIBMESH_HAVE_MPI)
      // If the user bypassed MPI, we'd better be safe and assume that
      // PETSc was built to require it; otherwise PETSc initialization
      // dies.
      && !libMesh::on_command_line ("--disable-mpi")
#endif
      )
    {
      int ierr=0;

      PETSC_COMM_WORLD = libMesh::GLOBAL_COMM_WORLD;

      // Check whether the calling program has already initialized
      // PETSc, and avoid duplicate Initialize/Finalize
      PetscBool petsc_already_initialized;
      ierr = PetscInitialized(&petsc_already_initialized);
      CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
      if (petsc_already_initialized != PETSC_TRUE)
        libmesh_initialized_petsc = true;
# if defined(LIBMESH_HAVE_SLEPC)

      // If SLEPc allows us to check whether the calling program
      // has already initialized it, we do that, and avoid
      // duplicate Initialize/Finalize.
      // We assume that SLEPc will handle PETSc appropriately,
      // which it does in the versions we've checked.
#  if !SLEPC_VERSION_LESS_THAN(2,3,3)
      if (!SlepcInitializeCalled)
#  endif
        {
          ierr = SlepcInitialize  (&argc, const_cast<char***>(&argv), NULL, NULL);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
          libmesh_initialized_slepc = true;
        }
# else
      if (libmesh_initialized_petsc)
        {
          ierr = PetscInitialize (&argc, const_cast<char***>(&argv), NULL, NULL);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
        }
# endif
    }
#endif

  // Re-parse the command-line arguments.  Note that PETSc and MPI
  // initialization above may have removed command line arguments
  // that are not relevant to this application in the above calls.
  // We don't want a false-positive by detecting those arguments.
  command_line->parse_command_line (argc, argv);

  // The following line is an optimization when simultaneous
  // C and C++ style access to output streams is not required.
  // The amount of benefit which occurs is probably implementation
  // defined, and may be nothing.  On the other hand, I have seen
  // some IO tests where IO peformance improves by a factor of two.
  if (!libMesh::on_command_line ("--sync-with-stdio"))
    std::ios::sync_with_stdio(false);

  // Honor the --separate-libmeshout command-line option.
  // When this is specified, the library uses an independent ostream
  // for libMesh::out/libMesh::err messages, and
  // std::cout and std::cerr are untouched by any other options
  if (libMesh::on_command_line ("--separate-libmeshout"))
    {
      // Redirect.  We'll share streambufs with cout/cerr for now, but
      // presumably anyone using this option will want to replace the
      // bufs later.
      std::ostream* newout = new std::ostream(std::cout.rdbuf());
      libMesh::out = *newout;
      std::ostream* newerr = new std::ostream(std::cerr.rdbuf());
      libMesh::err = *newerr;
    }

  // Honor the --redirect-stdout command-line option.
  // When this is specified each processor sends
  // libMesh::out/libMesh::err messages to
  // stdout.processor.####
  if (libMesh::on_command_line ("--redirect-stdout"))
    {
      std::ostringstream filename;
      filename << "stdout.processor." << libMesh::global_processor_id();
      _ofstream.reset (new std::ofstream (filename.str().c_str()));
      // Redirect, saving the original streambufs!
      out_buf = libMesh::out.rdbuf (_ofstream->rdbuf());
      err_buf = libMesh::err.rdbuf (_ofstream->rdbuf());
    }

  // redirect libMesh::out to nothing on all
  // other processors unless explicitly told
  // not to via the --keep-cout command-line argument.
  if (libMesh::global_processor_id() != 0)
    if (!libMesh::on_command_line ("--keep-cout"))
      libMesh::out.rdbuf (NULL);

  // Check command line to override printing
  // of reference count information.
  if(libMesh::on_command_line("--disable-refcount-printing") )
    ReferenceCounter::disable_print_counter_info();

#ifdef LIBMESH_ENABLE_EXCEPTIONS
  // Set our terminate handler to write stack traces in the event of a
  // crash
  old_terminate_handler = std::set_terminate(libmesh_terminate_handler);
#endif


  if (libMesh::on_command_line("--enable-fpe"))
    libMesh::enableFPE(true);

  // The library is now ready for use
  libMeshPrivateData::_is_initialized = true;


  // Make sure these work.  Library methods
  // depend on these being implemented properly,
  // so this is a good time to test them!
  libmesh_assert (libMesh::initialized());
  libmesh_assert (!libMesh::closed());
}

Пример #6

0

Показать файл

Файл: macsio_main.c Проект: LLNL/MACSio

int
main(int argc, char *argv[])
{
    json_object *main_obj = json_object_new_object();
    json_object *parallel_obj = json_object_new_object();
    json_object *problem_obj = 0;
    json_object *clargs_obj = 0;
    MACSIO_TIMING_GroupMask_t main_grp;
    MACSIO_TIMING_TimerId_t main_tid;
    int i, argi, exercise_scr = 0;
    int size = 1, rank = 0;

    /* quick pre-scan for scr cl flag */
    for (i = 0; i < argc && !exercise_scr; i++)
        exercise_scr = !strcmp("exercise_scr", argv[i]);

#warning SHOULD WE BE USING MPI-3 API
#ifdef HAVE_MPI
    MPI_Init(&argc, &argv);
#ifdef HAVE_SCR
#warning SANITY CHECK WITH MIFFPP
    if (exercise_scr)
        SCR_Init();
#endif
    MPI_Comm_dup(MPI_COMM_WORLD, &MACSIO_MAIN_Comm);
    MPI_Errhandler_set(MACSIO_MAIN_Comm, MPI_ERRORS_RETURN);
    MPI_Comm_size(MACSIO_MAIN_Comm, &MACSIO_MAIN_Size);
    MPI_Comm_rank(MACSIO_MAIN_Comm, &MACSIO_MAIN_Rank);
    mpi_errno = MPI_SUCCESS;
#endif
    errno = 0;

    main_grp = MACSIO_TIMING_GroupMask("MACSIO main()");
    main_tid = MT_StartTimer("main", main_grp, MACSIO_TIMING_ITER_AUTO);

    MACSIO_LOG_StdErr = MACSIO_LOG_LogInit(MACSIO_MAIN_Comm, 0, 0, 0, 0);

    /* Process the command line and put the results in the problem */
    clargs_obj = ProcessCommandLine(argc, argv, &argi);
    json_object_object_add(main_obj, "clargs", clargs_obj);

    strncpy(MACSIO_UTILS_UnitsPrefixSystem, JsonGetStr(clargs_obj, "units_prefix_system"),
        sizeof(MACSIO_UTILS_UnitsPrefixSystem));

    MACSIO_LOG_MainLog = MACSIO_LOG_LogInit(MACSIO_MAIN_Comm,
        JsonGetStr(clargs_obj, "log_file_name"),
        JsonGetInt(clargs_obj, "log_line_length"),
        JsonGetInt(clargs_obj, "log_line_cnt/0"),
        JsonGetInt(clargs_obj, "log_line_cnt/1"));

#warning THESE INITIALIZATIONS SHOULD BE IN MACSIO_LOG
    MACSIO_LOG_DebugLevel = JsonGetInt(clargs_obj, "debug_level");

    /* Setup parallel information */
    json_object_object_add(parallel_obj, "mpi_size", json_object_new_int(MACSIO_MAIN_Size));
    json_object_object_add(parallel_obj, "mpi_rank", json_object_new_int(MACSIO_MAIN_Rank));
    json_object_object_add(main_obj, "parallel", parallel_obj);

#warning SHOULD WE INCLUDE TOP-LEVEL INFO ON VAR NAMES AND WHETHER THEYRE RESTRICTED
#warning CREATE AN IO CONTEXT OBJECT
    /* Acquire an I/O context handle from the plugin */

    /* Do a read or write test */
    if (strcmp(JsonGetStr(clargs_obj, "read_path"),"null"))
        main_read(argi, argc, argv, main_obj);
    else
        main_write(argi, argc, argv, main_obj);

    /* stop total timer */
    MT_StopTimer(main_tid);

    /* Write timings data file if requested */
    if (strlen(JsonGetStr(clargs_obj, "timings_file_name")))
        write_timings_file(JsonGetStr(clargs_obj, "timings_file_name"));

    MACSIO_TIMING_ClearTimers(MACSIO_TIMING_ALL_GROUPS);

#warning ATEXIT THESE
    if (json_object_put(main_obj) != 1)
    {
        MACSIO_LOG_MSG(Info, ("Unable to free main JSON object"));
    }
    MACSIO_TIMING_GroupMask(0);
    MACSIO_TIMING_ReduceTimers(MACSIO_MAIN_Comm, -1);
    json_object_apath_get_string(0,0); /* free circ cache */
    MACSIO_LOG_LogFinalize(MACSIO_LOG_MainLog);
    MACSIO_LOG_LogFinalize(MACSIO_LOG_StdErr);

#ifdef HAVE_SCR
    if (exercise_scr)
        SCR_Finalize();
#endif

#ifdef HAVE_MPI
    {   int result;
        if ((MPI_Initialized(&result) == MPI_SUCCESS) && result)
            MPI_Finalize();
    }
#endif

#warning FIX RETVAL OF MAIN TO BE NON-ZERO WHEN ERRORS OCCUR
    return (0);
}

Пример #7

0

Показать файл

Файл: hbwmalloc.c Проект: jeffhammond/myhbwmalloc

void myhbwmalloc_init(void)
{
    /* set to NULL before trying to initialize.  if we return before
     * successful creation of the mspace, then it will still be NULL,
     * and we can use that in subsequent library calls to determine
     * that the library failed to initialize. */
    myhbwmalloc_mspace = NULL;

    /* verbose printout? */
    myhbwmalloc_verbose = 0;
    {
        char * env_char = getenv("HBWMALLOC_VERBOSE");
        if (env_char != NULL) {
            myhbwmalloc_verbose = 1;
            printf("hbwmalloc: HBWMALLOC_VERBOSE set\n");
        }
    }

    /* fail hard or soft? */
    myhbwmalloc_hardfail = 1;
    {
        char * env_char = getenv("HBWMALLOC_SOFTFAIL");
        if (env_char != NULL) {
            myhbwmalloc_hardfail = 0;
            printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n");
        }
    }

    /* set the atexit handler that will destroy the mspace and free the numa allocation */
    atexit(myhbwmalloc_final);

    /* detect and configure use of NUMA memory nodes */
    {
        int max_possible_node        = numa_max_possible_node();
        int num_possible_nodes       = numa_num_possible_nodes();
        int max_numa_nodes           = numa_max_node();
        int num_configured_nodes     = numa_num_configured_nodes();
        int num_configured_cpus      = numa_num_configured_cpus();
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_max_possible_node()    = %d\n", max_possible_node);
            printf("hbwmalloc: numa_num_possible_nodes()   = %d\n", num_possible_nodes);
            printf("hbwmalloc: numa_max_node()             = %d\n", max_numa_nodes);
            printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes);
            printf("hbwmalloc: numa_num_configured_cpus()  = %d\n", num_configured_cpus);
        }
        /* FIXME this is a hack.  assumes HBW is only numa node 1. */
        if (num_configured_nodes <= 2) {
            myhbwmalloc_numa_node = num_configured_nodes-1;
        } else {
            fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes);
        }

        if (myhbwmalloc_verbose) {
            for (int i=0; i<num_configured_nodes; i++) {
                unsigned max_numa_cpus = numa_num_configured_cpus();
                struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus );
                int rc = numa_node_to_cpus(i, mask);
                if (rc != 0) {
                    fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n");
                } else {
                    printf("hbwmalloc: numa node %d cpu mask:", i);
                    for (unsigned j=0; j<max_numa_cpus; j++) {
                        int bit = numa_bitmask_isbitset(mask,j);
                        printf(" %d", bit);
                    }
                    printf("\n");
                }
                numa_bitmask_free(mask);
            }
            fflush(stdout);
        }
    }

#if 0 /* unused */
    /* see if the user specifies a slab size */
    size_t slab_size_requested = 0;
    {
        char * env_char = getenv("HBWMALLOC_BYTES");
        if (env_char!=NULL) {
            long units = 1L;
            if      ( NULL != strstr(env_char,"G") ) units = 1000000000L;
            else if ( NULL != strstr(env_char,"M") ) units = 1000000L;
            else if ( NULL != strstr(env_char,"K") ) units = 1000L;
            else                                     units = 1L;

            int num_count = strspn(env_char, "0123456789");
            memset( &env_char[num_count], ' ', strlen(env_char)-num_count);
            slab_size_requested = units * atol(env_char);
        }
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested);
        }
    }
#endif

    /* see what libnuma says is available */
    size_t myhbwmalloc_slab_size;
    {
        int node = myhbwmalloc_numa_node;
        long long freemem;
        long long maxmem = numa_node_size64(node, &freemem);
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n",
                    maxmem, freemem, node);
        }
        myhbwmalloc_slab_size = freemem;
    }

    /* assume threads, disable if MPI knows otherwise, then allow user to override. */
    int multithreaded = 1;
#ifdef HAVE_MPI
    int nprocs;
    {
        int is_init, is_final;
        MPI_Initialized(&is_init);
        MPI_Finalized(&is_final);
        if (is_init && !is_final) {
            MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
        }

        /* give equal portion to every MPI process */
        myhbwmalloc_slab_size /= nprocs;

        /* if the user initializes MPI with MPI_Init or
         * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there
         * are no threads at all, which means we can skip the
         * malloc mspace lock.
         *
         * if the user lies to MPI, they deserve any bad thing
         * that comes of it. */
        int provided;
        MPI_Query_thread(&provided);
        if (provided==MPI_THREAD_SINGLE) {
            multithreaded = 0;
        } else {
            multithreaded = 1;
        }

        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded);
            printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size);
        }
    }
#endif

    /* user can assert that hbwmalloc and friends need not be thread-safe */
    {
        char * env_char = getenv("HBWMALLOC_LOCKLESS");
        if (env_char != NULL) {
            multithreaded = 0;
            if (myhbwmalloc_verbose) {
                printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n");
            }
        }
    }

    myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node);
    if (myhbwmalloc_slab==NULL) {
        fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size);
        return;
    } else {
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size);
        }

        /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping,
         * so the capacity must be at least this large */
        if (myhbwmalloc_slab_size < 128*sizeof(size_t)) {
            fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n");
            return;
        }

        /* see above regarding if the user lies to MPI. */
        int locked = multithreaded;
        myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked);
        if (myhbwmalloc_mspace == NULL) {
            fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n");
            return;
        } else if (myhbwmalloc_verbose) {
            printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size);
        }
    }
}

Пример #8

0

Показать файл

Файл: smpi_f77.c Проект: ricardojrdez/simgrid

void mpi_initialized_(int* flag, int* ierr){
  *ierr = MPI_Initialized(flag);
}

Пример #9

0

Показать файл

Файл: lis_init.c Проект: OpenModelica/OMCompiler-3rdParty

LIS_INT lis_initialize(LIS_INT* argc, char** argv[])
{
  LIS_ARGS  p;
  LIS_INT    i,nprocs;

  LIS_DEBUG_FUNC_IN;

/*  lis_memory_init();*/

  #ifdef USE_MPI
    MPI_Initialized(&lis_mpi_initialized);
    if (!lis_mpi_initialized) MPI_Init(argc, argv);

    #ifdef USE_QUAD_PRECISION
      MPI_Type_contiguous( LIS_MPI_MSCALAR_LEN, MPI_DOUBLE, &LIS_MPI_MSCALAR );
      MPI_Type_commit( &LIS_MPI_MSCALAR );
      MPI_Op_create((MPI_User_function *)lis_mpi_msum, LIS_TRUE, &LIS_MPI_MSUM);
    #endif
  #endif

  #ifdef _OPENMP
    nprocs = omp_get_max_threads();
  #endif

  lis_arg2args(*argc,*argv,&cmd_args);
  p = cmd_args->next;
  while( p!=cmd_args )
  {
    for(i=0;i<LIS_INIT_OPTIONS_LEN;i++)
    {
      if( strcmp(p->arg1, LIS_INIT_OPTNAME[i])==0 )
      {
        switch( LIS_INIT_OPTACT[i] )
        {
        case LIS_INIT_OPTIONS_OMPNUMTHREADS:
#ifdef _LONGLONG
          sscanf(p->arg2, "%lld", &nprocs);
#else
          sscanf(p->arg2, "%d", &nprocs);
#endif
          break;
        }
      }
    }
    p = p->next;
  }

  #ifdef _OPENMP
    omp_set_num_threads(nprocs);
    lis_vec_tmp = (LIS_SCALAR *)lis_malloc( nprocs*LIS_VEC_TMP_PADD*sizeof(LIS_QUAD),"lis_initialize::lis_vec_tmp" );
    if( lis_vec_tmp==NULL )
    {
      LIS_SETERR_MEM(nprocs*LIS_VEC_TMP_PADD*sizeof(LIS_QUAD));
      return LIS_ERR_OUT_OF_MEMORY;
    }
  #endif

  #ifdef USE_QUAD_PRECISION
    lis_quad_scalar_tmp = (LIS_SCALAR *)lis_malloc( LIS_QUAD_SCALAR_SIZE*sizeof(LIS_SCALAR),"lis_initialize::lis_quad_scalar_tmp" );
    if( lis_quad_scalar_tmp==NULL )
    {
      LIS_SETERR_MEM(LIS_QUAD_SCALAR_SIZE*sizeof(LIS_SCALAR));
      return LIS_OUT_OF_MEMORY;
    }
    lis_quad_x87_fpu_init(&lis_x87_fpu_cw);
  #endif

  for(i=1;i<*argc;i++)
  {
    if( strncmp(argv[0][i], "-help", 5)==0 )
    {
/*      lis_display();*/
      CHKERR(1);
    }
    else if( strncmp(argv[0][i], "-ver", 4)==0 )
    {
      lis_version();
      CHKERR(1);
    }
  }

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}

Пример #10

0

Показать файл

Файл: a1d_core.c Проект: jeffhammond/a1

int A1D_Initialize()
{

#ifdef DMAPPD_USES_MPI
    int mpi_initialized, mpi_provided;
    int mpi_status = MPI_SUCCESS;

    int namelen;
    char procname[MPI_MAX_PROCESSOR_NAME];
#endif

#ifdef __CRAYXE
    int                                 pmi_status  = PMI_SUCCESS;
    int                                 nodeid = -1;
    rca_mesh_coord_t                    rca_xyz;

    dmapp_return_t                      dmapp_status = DMAPP_RC_SUCCESS;

    dmapp_rma_attrs_ext_t               dmapp_config_in, dmapp_config_out;

    dmapp_jobinfo_t                     dmapp_info;
    dmapp_pe_t                          dmapp_rank = -1;
    int                                 dmapp_size = -1;
#endif
    int                                 sheapflag = 0;

#ifdef DEBUG_FUNCTION_ENTER_EXIT
    fprintf(stderr,"entering A1D_Initialize() \n");
#endif

#ifdef DMAPPD_USES_MPI

    /***************************************************
     *
     * configure MPI
     *
     ***************************************************/

    /* MPI has to be Initialized for this implementation to work */
    MPI_Initialized(&mpi_initialized);
    assert(mpi_initialized==1);

    /* MPI has to tolerate threads because A1 supports them */
    MPI_Query_thread(&mpi_provided);
    //assert(mpi_provided>MPI_THREAD_SINGLE);

    /* have to use our own communicator for collectives to be proper */
    mpi_status = MPI_Comm_dup(MPI_COMM_WORLD,&A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* get my MPI rank */
    mpi_status = MPI_Comm_rank(A1D_COMM_WORLD,&mpi_rank);
    assert(mpi_status==0);

    /* get MPI world size */
    mpi_status = MPI_Comm_size(A1D_COMM_WORLD,&mpi_size);
    assert(mpi_status==0);

    /* in a perfect world, this would provide topology information like BG */
    MPI_Get_processor_name( procname, &namelen );
    printf( "%d: MPI_Get_processor_name = %s\n" , mpi_rank, procname );
    fflush( stdout );

    /* barrier to make sure MPI is ready everywhere */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

#endif

#ifdef __CRAYXE

    /***************************************************
     *
     * query topology
     *
     ***************************************************/

    PMI_Get_nid( mpi_rank, &nodeid );
    assert(pmi_status==PMI_SUCCESS);

    rca_get_meshcoord((uint16_t)nodeid, &rca_xyz);
    printf("%d: rca_get_meshcoord returns (%2u,%2u,%2u)\n", mpi_rank, rca_xyz.mesh_x, rca_xyz.mesh_y, rca_xyz.mesh_z );

#endif

#ifdef __CRAYXE

    /***************************************************
     *
     * configure DMAPP
     *
     ***************************************************/

    dmapp_config_in.max_outstanding_nb   = DMAPP_DEF_OUTSTANDING_NB; /*  512 */
    dmapp_config_in.offload_threshold    = DMAPP_OFFLOAD_THRESHOLD;  /* 4096 */
#ifdef DETERMINISTIC_ROUTING
    dmapp_config_in.put_relaxed_ordering = DMAPP_ROUTING_DETERMINISTIC;
    dmapp_config_in.get_relaxed_ordering = DMAPP_ROUTING_DETERMINISTIC;
#else
    dmapp_config_in.put_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE;
    dmapp_config_in.get_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE;
#endif
    dmapp_config_in.max_concurrency      = 1; /* not thread-safe */
#ifdef FLUSH_IMPLEMENTED
    dmapp_config_in.PI_ordering          = DMAPP_PI_ORDERING_RELAXED;
#else
    dmapp_config_in.PI_ordering          = DMAPP_PI_ORDERING_STRICT;
#endif

    dmapp_status = dmapp_init_ext( &dmapp_config_in, &dmapp_config_out );
    assert(dmapp_status==DMAPP_RC_SUCCESS);

#ifndef FLUSH_IMPLEMENTED
    /* without strict PI ordering, we have to flush remote stores with a get packet to force global visibility */
    assert( dmapp_config_out.PI_ordering == DMAPP_PI_ORDERING_STRICT);
#endif

    dmapp_status = dmapp_get_jobinfo(&dmapp_info);
    assert(dmapp_status==DMAPP_RC_SUCCESS);

    dmapp_rank     = dmapp_info.pe;
    dmapp_size     = dmapp_info.npes;
    A1D_Sheap_desc = dmapp_info.sheap_seg;

    /* make sure PMI and DMAPP agree */
    assert(mpi_rank==dmapp_rank);
    assert(mpi_size==dmapp_size);

#endif

    /***************************************************
     *
     * setup protocols
     *
     ***************************************************/

#ifdef FLUSH_IMPLEMENTED
    /* allocate Put list */
    A1D_Put_flush_list = malloc( mpi_size * sizeof(int32_t) );
    assert(A1D_Put_flush_list != NULL);
#endif

#ifdef __CRAYXE
    A1D_Acc_lock = dmapp_sheap_malloc( sizeof(int64_t) );
#endif

    A1D_Allreduce_issame64((size_t)A1D_Acc_lock, &sheapflag);
    assert(sheapflag==1);

#ifdef DEBUG_FUNCTION_ENTER_EXIT
    fprintf(stderr,"exiting A1D_Initialize() \n");
#endif

    return(0);
}

Пример #11

0

Показать файл

Файл: mpi.hpp Проект: jgreitemann/ALPSCore

 static bool initialized()
 {
     int ini;
     MPI_Initialized(&ini);
     return ini;
 }

Пример #12

0

Показать файл

Файл: main.c Проект: fenech/2d

int main(int argc, char **argv)
{
    double fret;            /* Frank energy */
    double start, end;      /* for timing */
    double **grid;          /* 2D grid */
    int **lock;             /* locked cells */
    double **fgrid;         /* fullgrid */
    int **flock;            /* locked cells (fullgrid) */
    char fname[128] = "log";
    char gname[128] = "grid";
    char suffix[128];
    int iter = 0;
    long maxiter;
    int flag, rank, np;     /* MPI variables */
    float t0;               /* starting "temperature" */
    FILE *log_fp;
    t_par par[2];
    int sep, ba;

    MPI_Init(&argc, &argv);
    MPI_Initialized(&flag);
    if (flag != 1) MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    if (argc < 11 || argc > 12) {
        if (rank == 0) printf("Usage: <%s> <x length> <y length> <monte carlo steps> <temp> <major axis> <minor axis> <align> <theta> <separation> <boundary angle> [id]\n", argv[0]);
        MPI_Finalize();
        exit(1);
    }

    nx = atoi(argv[1]);
    ny = atoi(argv[2]);
    maxiter = atoi(argv[3]);
    t0 = atof(argv[4]);
    par[0].major = par[1].major = atoi(argv[5]);
    par[0].minor = par[1].minor = atoi(argv[6]);
    if (strcmp(argv[7], "para") != 0 && strcmp(argv[7], "perp") != 0) {
        if (rank == 0) printf("Alignment must be para or perp\n");
        MPI_Finalize();
        exit(1);
    }
    strcpy(par[0].align, argv[7]);
    strcpy(par[1].align, argv[7]);
    par[0].theta = 0;
    par[1].theta = atof(argv[8]);
    sep = atoi(argv[9]);
    ba = atoi(argv[10]);

    if (argc == 12) id = atoi(argv[11]);
    else id = 1;

    par[0].cy = par[1].cy = ny / 2;
    par[0].cx = nx / 2 - par[0].major - sep / 2;
    par[1].cx = nx / 2 + par[1].major + sep / 2 - 1;

    sprintf(suffix, "r%dx%d_t%.0f_s%d_a%d_%d_%s",
            par[0].major, par[0].minor, par[1].theta, sep, ba, id, par[0].align);
    par[1].theta = PI * par[1].theta / 180.0;

    random_key key;

    int success = initialise(&grid, &lock, par, sep, ba, suffix, &key);
    int all_succeeded;
    MPI_Allreduce(&success, &all_succeeded, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    if (all_succeeded != np) {
        MPI_Finalize();
        return 0;
    }

    strcat(gname, suffix);
    print(grid, gname);

    fret = func(grid, lock, 0);
    if (rank == 0) {
        printf("Initial Frank Energy: %f\n", fret);
        strcat(fname, suffix);
        log_fp = fopen(fname, "w");
    }

    start = MPI_Wtime();
    monte(grid, lock, maxiter, t0, log_fp, suffix, key);
    end = MPI_Wtime();

    grid2root(grid, &fgrid);
    lock2root(lock, &flock);
    prncont(fgrid, "testgrid");

    fret = func(grid, lock, 0);
    if (rank == 0) conjgrad(fgrid, flock, 10000, &fret, log_fp, suffix, par, sep);

    if (rank == 0) {
        printf("End Frank Energy:     %f\n", fret);
        printf("No. iterations:       %d\n", iter);
        printf("Time taken:           %f\n", end - start);
    }

    MPI_Finalize();
    return 0;
}

Пример #13

0

Показать файл

Файл: ComponentInterface.C Проект: xyuan/IMPACT

void ComponentInterface::init_done( bool pane_changed) throw(COM_exception) {
  // Loop through the dataitems.
  if ( _status == STATUS_SHRUNK) {
    int max_id=0;
    for (Attr_map::iterator it=_attr_map.begin(); it!=_attr_map.end(); ++it) {
      int id = it->second->id(), ncomp=it->second->size_of_components();
      max_id = std::max( max_id, id+ncomp+(ncomp>1));
    }
    if (max_id < _last_id) _last_id = max_id;
  }

  int npanes = _pane_map.size();
  std::vector< int> pane_ids; pane_ids.reserve(npanes);

  for (Pane_map::iterator it=_pane_map.begin(); it!=_pane_map.end(); ++it) {
    it->second->init_done();
    pane_ids.push_back( it->second->id());
  }
  
  _status = STATUS_NOCHANGE;

  if ( !pane_changed) {
    if ( npanes>int(_pane_map.size())) {
      throw COM_exception( COM_ERR_INIT_DONE_PANEMAP, append_frame
			   (_name,ComponentInterface::init_done));
    }
    return;
  }

  // communicate pane mapping
  int flag; MPI_Initialized( &flag);
  if ( _comm == MPI_COMM_NULL) flag = 0;

  // Compute proc_map
  int nprocs;
  if ( flag) MPI_Comm_size( _comm, &nprocs); else nprocs = 1;

  // Obtain the number of panes.
  std::vector<int> npanes_all(nprocs);
  if ( flag)
    MPI_Allgather( &npanes, 1, MPI_INT, &npanes_all[0], 1, MPI_INT, _comm);
  else
    npanes_all[0] = npanes;
  
  std::vector<int> disps(nprocs+1); 
  disps[0]=0;
  for ( int i=0; i<nprocs; ++i) disps[i+1] = disps[i]+npanes_all[i];

  std::vector<int> pane_ids_all( disps[nprocs]);
  if ( flag) 
    MPI_Allgatherv( &pane_ids[0], npanes, MPI_INT, &pane_ids_all[0], 
		    &npanes_all[0], &disps[0], MPI_INT, _comm);
  else
    pane_ids_all = pane_ids;

  // Build process map
  _proc_map.clear();
  for ( int p=0; p<nprocs; ++p) {
    for ( int j=disps[p], jn=disps[p+1]; j<jn; ++j)
      _proc_map[ pane_ids_all[j]] = p;
  }
}

Пример #14

0

Показать файл

Файл: na_test.c Проект: carns/mercury

static void
na_test_mpi_init(struct na_test_info *na_test_info)
{
    int mpi_initialized = 0;
    int mpi_finalized = 0;

    na_test_info->mpi_comm = MPI_COMM_WORLD; /* default */

    MPI_Initialized(&mpi_initialized);
    if (mpi_initialized) {
        NA_LOG_WARNING("MPI was already initialized");
        goto done;
    }
    MPI_Finalized(&mpi_finalized);
    if (mpi_finalized) {
        NA_LOG_ERROR("MPI was already finalized");
        goto done;
    }

#ifdef NA_MPI_HAS_GNI_SETUP
    /* Setup GNI job before initializing MPI */
    if (NA_MPI_Gni_job_setup() != NA_SUCCESS) {
        NA_LOG_ERROR("Could not setup GNI job");
        return;
    }
#endif
    if (na_test_info->listen || na_test_info->mpi_static) {
        int provided;

        MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &provided);
        if (provided != MPI_THREAD_MULTIPLE) {
            NA_LOG_ERROR("MPI_THREAD_MULTIPLE cannot be set");
        }

        /* Only if we do static MPMD MPI */
        if (na_test_info->mpi_static) {
            int mpi_ret, color, global_rank;

            MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
            /* Color is 1 for server, 2 for client */
            color = (na_test_info->listen) ? 1 : 2;

            /* Assume that the application did not split MPI_COMM_WORLD already */
            mpi_ret = MPI_Comm_split(MPI_COMM_WORLD, color, global_rank,
                &na_test_info->mpi_comm);
            if (mpi_ret != MPI_SUCCESS) {
                NA_LOG_ERROR("Could not split communicator");
            }
#ifdef NA_HAS_MPI
            /* Set init comm that will be used to setup NA MPI */
            NA_MPI_Set_init_intra_comm(na_test_info->mpi_comm);
#endif
        }
    } else {
        MPI_Init(NULL, NULL);
    }

done:
    MPI_Comm_rank(na_test_info->mpi_comm, &na_test_info->mpi_comm_rank);
    MPI_Comm_size(na_test_info->mpi_comm, &na_test_info->mpi_comm_size);

    return;
}

Пример #15

0

Показать файл

Файл: pinit.c Проект: feelpp/debian-petsc

/*@C
   PetscInitialize - Initializes the PETSc database and MPI.
   PetscInitialize() calls MPI_Init() if that has yet to be called,
   so this routine should always be called near the beginning of
   your program -- usually the very first line!

   Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set

   Input Parameters:
+  argc - count of number of command line arguments
.  args - the command line arguments
.  file - [optional] PETSc database file, also checks ~username/.petscrc and .petscrc use NULL to not check for
          code specific file. Use -skip_petscrc in the code specific file to skip the .petscrc files
-  help - [optional] Help message to print, use NULL for no message

   If you wish PETSc code to run ONLY on a subcommunicator of MPI_COMM_WORLD, create that
   communicator first and assign it to PETSC_COMM_WORLD BEFORE calling PetscInitialize(). Thus if you are running a
   four process job and two processes will run PETSc and have PetscInitialize() and PetscFinalize() and two process will not,
   then do this. If ALL processes in the job are using PetscInitialize() and PetscFinalize() then you don't need to do this, even
   if different subcommunicators of the job are doing different things with PETSc.

   Options Database Keys:
+  -start_in_debugger [noxterm,dbx,xdb,gdb,...] - Starts program in debugger
.  -on_error_attach_debugger [noxterm,dbx,xdb,gdb,...] - Starts debugger when error detected
.  -on_error_emacs <machinename> causes emacsclient to jump to error file
.  -on_error_abort calls abort() when error detected (no traceback)
.  -on_error_mpiabort calls MPI_abort() when error detected
.  -error_output_stderr prints error messages to stderr instead of the default stdout
.  -error_output_none does not print the error messages (but handles errors in the same way as if this was not called)
.  -debugger_nodes [node1,node2,...] - Indicates nodes to start in debugger
.  -debugger_pause [sleeptime] (in seconds) - Pauses debugger
.  -stop_for_debugger - Print message on how to attach debugger manually to
                        process and wait (-debugger_pause) seconds for attachment
.  -malloc - Indicates use of PETSc error-checking malloc (on by default for debug version of libraries)
.  -malloc no - Indicates not to use error-checking malloc
.  -malloc_debug - check for memory corruption at EVERY malloc or free
.  -malloc_test - like -malloc_dump -malloc_debug, but only active for debugging builds
.  -fp_trap - Stops on floating point exceptions (Note that on the
              IBM RS6000 this slows code by at least a factor of 10.)
.  -no_signal_handler - Indicates not to trap error signals
.  -shared_tmp - indicates /tmp directory is shared by all processors
.  -not_shared_tmp - each processor has own /tmp
.  -tmp - alternative name of /tmp directory
.  -get_total_flops - returns total flops done by all processors
.  -memory_info - Print memory usage at end of run
-  -server <port> - start PETSc webserver (default port is 8080)

   Options Database Keys for Profiling:
   See the <a href="../../docs/manual.pdf#nameddest=ch_profiling">profiling chapter of the users manual</a> for details.
+  -info <optional filename> - Prints verbose information to the screen
.  -info_exclude <null,vec,mat,pc,ksp,snes,ts> - Excludes some of the verbose messages
.  -log_sync - Log the synchronization in scatters, inner products and norms
.  -log_trace [filename] - Print traces of all PETSc calls to the screen (useful to determine where a program
        hangs without running in the debugger).  See PetscLogTraceBegin().
.  -log_summary [filename] - Prints summary of flop and timing information to screen. If the filename is specified the
        summary is written to the file.  See PetscLogView().
.  -log_summary_python [filename] - Prints data on of flop and timing usage to a file or screen. See PetscLogPrintSViewPython().
.  -log_all [filename] - Logs extensive profiling information  See PetscLogDump().
.  -log [filename] - Logs basic profiline information  See PetscLogDump().
-  -log_mpe [filename] - Creates a logfile viewable by the utility Jumpshot (in MPICH distribution)

    Only one of -log_trace, -log_summary, -log_all, -log, or -log_mpe may be used at a time

   Environmental Variables:
+   PETSC_TMP - alternative tmp directory
.   PETSC_SHARED_TMP - tmp is shared by all processes
.   PETSC_NOT_SHARED_TMP - each process has its own private tmp
.   PETSC_VIEWER_SOCKET_PORT - socket number to use for socket viewer
-   PETSC_VIEWER_SOCKET_MACHINE - machine to use for socket viewer to connect to


   Level: beginner

   Notes:
   If for some reason you must call MPI_Init() separately, call
   it before PetscInitialize().

   Fortran Version:
   In Fortran this routine has the format
$       call PetscInitialize(file,ierr)

+   ierr - error return code
-  file - [optional] PETSc database file, also checks ~username/.petscrc and .petscrc use NULL_CHARACTER to not check for
          code specific file. Use -skip_petscrc in the code specific file to skip the .petscrc files

   Important Fortran Note:
   In Fortran, you MUST use NULL_CHARACTER to indicate a
   null character string; you CANNOT just use NULL as
   in the C version. See the <a href="../../docs/manual.pdf">users manual</a> for details.

   If your main program is C but you call Fortran code that also uses PETSc you need to call PetscInitializeFortran() soon after
   calling PetscInitialize().

   Concepts: initializing PETSc

.seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscInitializeNoArguments()

@*/
PetscErrorCode  PetscInitialize(int *argc,char ***args,const char file[],const char help[])
{
  PetscErrorCode ierr;
  PetscMPIInt    flag, size;
  PetscInt       nodesize;
  PetscBool      flg;
  char           hostname[256];

  PetscFunctionBegin;
  if (PetscInitializeCalled) PetscFunctionReturn(0);

  /* these must be initialized in a routine, not as a constant declaration*/
  PETSC_STDOUT = stdout;
  PETSC_STDERR = stderr;

  ierr = PetscOptionsCreate();CHKERRQ(ierr);

  /*
     We initialize the program name here (before MPI_Init()) because MPICH has a bug in
     it that it sets args[0] on all processors to be args[0] on the first processor.
  */
  if (argc && *argc) {
    ierr = PetscSetProgramName(**args);CHKERRQ(ierr);
  } else {
    ierr = PetscSetProgramName("Unknown Name");CHKERRQ(ierr);
  }

  ierr = MPI_Initialized(&flag);CHKERRQ(ierr);
  if (!flag) {
    if (PETSC_COMM_WORLD != MPI_COMM_NULL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You cannot set PETSC_COMM_WORLD if you have not initialized MPI first");
#if defined(PETSC_HAVE_MPI_INIT_THREAD)
    {
      PetscMPIInt provided;
      ierr = MPI_Init_thread(argc,args,MPI_THREAD_FUNNELED,&provided);CHKERRQ(ierr);
    }
#else
    ierr = MPI_Init(argc,args);CHKERRQ(ierr);
#endif
    PetscBeganMPI = PETSC_TRUE;
  }
  if (argc && args) {
    PetscGlobalArgc = *argc;
    PetscGlobalArgs = *args;
  }
  PetscFinalizeCalled = PETSC_FALSE;

  if (PETSC_COMM_WORLD == MPI_COMM_NULL) PETSC_COMM_WORLD = MPI_COMM_WORLD;
  ierr = MPI_Comm_set_errhandler(PETSC_COMM_WORLD,MPI_ERRORS_RETURN);CHKERRQ(ierr);

  /* Done after init due to a bug in MPICH-GM? */
  ierr = PetscErrorPrintfInitialize();CHKERRQ(ierr);

  ierr = MPI_Comm_rank(MPI_COMM_WORLD,&PetscGlobalRank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(MPI_COMM_WORLD,&PetscGlobalSize);CHKERRQ(ierr);

  MPIU_BOOL = MPI_INT;
  MPIU_ENUM = MPI_INT;

  /*
     Initialized the global complex variable; this is because with
     shared libraries the constructors for global variables
     are not called; at least on IRIX.
  */
#if defined(PETSC_HAVE_COMPLEX)
  {
#if defined(PETSC_CLANGUAGE_CXX)
    PetscComplex ic(0.0,1.0);
    PETSC_i = ic;
#elif defined(PETSC_CLANGUAGE_C)
    PETSC_i = _Complex_I;
#endif
  }

#if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
  ierr = MPI_Type_contiguous(2,MPI_DOUBLE,&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr);
  ierr = MPI_Type_contiguous(2,MPI_FLOAT,&MPIU_C_COMPLEX);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_C_COMPLEX);CHKERRQ(ierr);
#endif
#endif /* PETSC_HAVE_COMPLEX */

  /*
     Create the PETSc MPI reduction operator that sums of the first
     half of the entries and maxes the second half.
  */
  ierr = MPI_Op_create(PetscMaxSum_Local,1,&PetscMaxSum_Op);CHKERRQ(ierr);

#if defined(PETSC_USE_REAL___FLOAT128)
  ierr = MPI_Type_contiguous(2,MPI_DOUBLE,&MPIU___FLOAT128);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU___FLOAT128);CHKERRQ(ierr);
#if defined(PETSC_HAVE_COMPLEX)
  ierr = MPI_Type_contiguous(4,MPI_DOUBLE,&MPIU___COMPLEX128);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU___COMPLEX128);CHKERRQ(ierr);
#endif
  ierr = MPI_Op_create(PetscMax_Local,1,&MPIU_MAX);CHKERRQ(ierr);
  ierr = MPI_Op_create(PetscMin_Local,1,&MPIU_MIN);CHKERRQ(ierr);
#endif

#if (defined(PETSC_HAVE_COMPLEX) && !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)) || defined(PETSC_USE_REAL___FLOAT128)
  ierr = MPI_Op_create(PetscSum_Local,1,&MPIU_SUM);CHKERRQ(ierr);
#endif

  ierr = MPI_Type_contiguous(2,MPIU_SCALAR,&MPIU_2SCALAR);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_2SCALAR);CHKERRQ(ierr);
  ierr = MPI_Op_create(PetscADMax_Local,1,&PetscADMax_Op);CHKERRQ(ierr);
  ierr = MPI_Op_create(PetscADMin_Local,1,&PetscADMin_Op);CHKERRQ(ierr);

#if defined(PETSC_USE_64BIT_INDICES) || !defined(MPI_2INT)
  ierr = MPI_Type_contiguous(2,MPIU_INT,&MPIU_2INT);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_2INT);CHKERRQ(ierr);
#endif

  /*
     Attributes to be set on PETSc communicators
  */
  ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelCounter,&Petsc_Counter_keyval,(void*)0);CHKERRQ(ierr);
  ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelComm_Outer,&Petsc_InnerComm_keyval,(void*)0);CHKERRQ(ierr);
  ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelComm_Inner,&Petsc_OuterComm_keyval,(void*)0);CHKERRQ(ierr);

  /*
     Build the options database
  */
  ierr = PetscOptionsInsert(argc,args,file);CHKERRQ(ierr);


  /*
     Print main application help message
  */
  ierr = PetscOptionsHasName(NULL,"-help",&flg);CHKERRQ(ierr);
  if (help && flg) {
    ierr = PetscPrintf(PETSC_COMM_WORLD,help);CHKERRQ(ierr);
  }
  ierr = PetscOptionsCheckInitial_Private();CHKERRQ(ierr);

  /* SHOULD PUT IN GUARDS: Make sure logging is initialized, even if we do not print it out */
#if defined(PETSC_USE_LOG)
  ierr = PetscLogBegin_Private();CHKERRQ(ierr);
#endif

  /*
     Load the dynamic libraries (on machines that support them), this registers all
     the solvers etc. (On non-dynamic machines this initializes the PetscDraw and PetscViewer classes)
  */
  ierr = PetscInitialize_DynamicLibraries();CHKERRQ(ierr);

  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = PetscInfo1(0,"PETSc successfully started: number of processors = %d\n",size);CHKERRQ(ierr);
  ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
  ierr = PetscInfo1(0,"Running on machine: %s\n",hostname);CHKERRQ(ierr);

  ierr = PetscOptionsCheckInitial_Components();CHKERRQ(ierr);
  /* Check the options database for options related to the options database itself */
  ierr = PetscOptionsSetFromOptions();CHKERRQ(ierr);

#if defined(PETSC_USE_PETSC_MPI_EXTERNAL32)
  /*
      Tell MPI about our own data representation converter, this would/should be used if extern32 is not supported by the MPI

      Currently not used because it is not supported by MPICH.
  */
#if !defined(PETSC_WORDS_BIGENDIAN)
  ierr = MPI_Register_datarep((char*)"petsc",PetscDataRep_read_conv_fn,PetscDataRep_write_conv_fn,PetscDataRep_extent_fn,NULL);CHKERRQ(ierr);
#endif
#endif

  ierr = PetscOptionsGetInt(NULL,"-hmpi_spawn_size",&nodesize,&flg);CHKERRQ(ierr);
  if (flg) {
#if defined(PETSC_HAVE_MPI_COMM_SPAWN)
    ierr = PetscHMPISpawn((PetscMPIInt) nodesize);CHKERRQ(ierr); /* worker nodes never return from here; they go directly to PetscEnd() */
#else
    SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PETSc built without MPI 2 (MPI_Comm_spawn) support, use -hmpi_merge_size instead");
#endif
  } else {
    ierr = PetscOptionsGetInt(NULL,"-hmpi_merge_size",&nodesize,&flg);CHKERRQ(ierr);
    if (flg) {
      ierr = PetscHMPIMerge((PetscMPIInt) nodesize,NULL,NULL);CHKERRQ(ierr);
      if (PetscHMPIWorker) { /* if worker then never enter user code */
        PetscInitializeCalled = PETSC_TRUE;
        PetscEnd();
      }
    }
  }

#if defined(PETSC_HAVE_CUDA)
  {
    PetscMPIInt p;
    for (p = 0; p < PetscGlobalSize; ++p) {
      if (p == PetscGlobalRank) cublasInit();
      ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr);
    }
  }
#endif

  ierr = PetscOptionsHasName(NULL,"-python",&flg);CHKERRQ(ierr);
  if (flg) {
    PetscInitializeCalled = PETSC_TRUE;
    ierr = PetscPythonInitialize(NULL,NULL);CHKERRQ(ierr);
  }

  ierr = PetscThreadCommInitializePackage();CHKERRQ(ierr);

  /*
      Setup building of stack frames for all function calls
  */
#if defined(PETSC_USE_DEBUG)
  PetscThreadLocalRegister((PetscThreadKey*)&petscstack); /* Creates petscstack_key if needed */
  ierr = PetscStackCreate();CHKERRQ(ierr);
#endif

#if defined(PETSC_SERIALIZE_FUNCTIONS)
  ierr = PetscFPTCreate(10000);CHKERRQ(ierr);
#endif

  /*
      Once we are completedly initialized then we can set this variables
  */
  PetscInitializeCalled = PETSC_TRUE;
  PetscFunctionReturn(0);
}

Пример #16

0

Показать файл

Файл: mtest.c Проект: mpoquet/simgrid

/*
   Initialize MTest, initializing MPI if necessary.

 Environment Variables:
+ MPITEST_DEBUG - If set (to any value), turns on debugging output
. MPITEST_THREADLEVEL_DEFAULT - If set, use as the default "provided"
                                level of thread support.  Applies to
                                MTest_Init but not MTest_Init_thread.
- MPITEST_VERBOSE - If set to a numeric value, turns on that level of
  verbose output.  This is used by the routine 'MTestPrintfMsg'

*/
void MTest_Init_thread(int *argc, char ***argv, int required, int *provided)
{
    int flag;
    char *envval = 0;

    MPI_Initialized(&flag);
    if (!flag) {
        /* Permit an MPI that claims only MPI 1 but includes the
         * MPI_Init_thread routine (e.g., IBM MPI) */
#if MPI_VERSION >= 2 || defined(HAVE_MPI_INIT_THREAD)
        MPI_Init_thread(argc, argv, required, provided);
#else
        MPI_Init(argc, argv);
        *provided = -1;
#endif
    }
    /* Check for debugging control */
    if (getenv("MPITEST_DEBUG")) {
        dbgflag = 1;
        MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    }

    /* Check for verbose control */
    envval = getenv("MPITEST_VERBOSE");
    if (envval) {
        char *s;
        long val = strtol(envval, &s, 0);
        if (s == envval) {
            /* This is the error case for strtol */
            fprintf(stderr, "Warning: %s not valid for MPITEST_VERBOSE\n", envval);
            fflush(stderr);
        }
        else {
            if (val >= 0) {
                verbose = val;
            }
            else {
                fprintf(stderr, "Warning: %s not valid for MPITEST_VERBOSE\n", envval);
                fflush(stderr);
            }
        }
    }
    /* Check for option to return success/failure in the return value of main */
    envval = getenv("MPITEST_RETURN_WITH_CODE");
    if (envval) {
        if (strcmp(envval, "yes") == 0 ||
            strcmp(envval, "YES") == 0 ||
            strcmp(envval, "true") == 0 || strcmp(envval, "TRUE") == 0) {
            returnWithVal = 1;
        }
        else if (strcmp(envval, "no") == 0 ||
                 strcmp(envval, "NO") == 0 ||
                 strcmp(envval, "false") == 0 || strcmp(envval, "FALSE") == 0) {
            returnWithVal = 0;
        }
        else {
            fprintf(stderr, "Warning: %s not valid for MPITEST_RETURN_WITH_CODE\n", envval);
            fflush(stderr);
        }
    }

    /* Print rusage data if set */
    if (getenv("MPITEST_RUSAGE")) {
        usageOutput = 1;
    }
}

Пример #17

0

Показать файл

Файл: mpi.c Проект: guziy/modeltools

FC_FUNC( mpi_initialized , MPI_INITIALIZED )(int *flag, int *ierror)
{
  *ierror=MPI_Initialized(flag);
}

Пример #18

0

Показать файл

Файл: 3d2rewq.cpp Проект: sakamoto-poteko/3DEW

int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int initFlag;
    MPI_Initialized(&initFlag);
    if (!initFlag) {
        printf("MPI init failed\n");
        return 8;
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    int l,mm=5;
    int nx,ny,nz,lt,nedge;
    float frequency;
    float velmax;
    float dt;
    int ncx_shot1,ncy_shot1,ncz_shot;
    int ishot,ncy_shot,ncx_shot;
    float unit;
    int nxshot,nyshot,dxshot,dyshot;
    char infile[80],outfile[80],logfile[80],tmp[80], nodelog[84];
    FILE  *fin, *fout, *flog, *fnode;
    MPI_File mpi_flog, mpi_fout;
    MPI_Status mpi_status;
    struct timeval start,end;
    float all_time;

    float *u, *v, *w, *up, *up1, *up2,
            *vp, *vp1, *vp2, *wp, *wp1, *wp2,
            *us, *us1, *us2, *vs, *vs1, *vs2,
            *ws, *ws1, *ws2, *vpp, *density, *vss;
    float c[5][7];
    float *wave;
    float nshot,t0,tt,c0;
    float dtx,dtz,dtxz,dr1,dr2,dtx4,dtz4,dtxz4;
    char message[100];

    if(argc<4)
    {
        printf("please add 3 parameter: inpurfile, outfile, logfile\n");
        exit(1);
    }

    message[99] = 0;    // Avoid string buffer overrun

    strcpy(infile,argv[1]);
    strcpy(outfile,argv[2]);
    strcpy(logfile,argv[3]);
    strcpy(nodelog,logfile);
    strcat(nodelog, ".node");

    strcpy(tmp,"date ");
    strncat(tmp, ">> ",3);
    strncat(tmp, logfile, strlen(logfile));
    if (proc_rank == 0) {
        flog = fopen(logfile,"w");
        fprintf(flog,"------------start time------------\n");
        fclose(flog);
        system(tmp);
        gettimeofday(&start,NULL);
    }
    fin = fopen(infile,"r");
    if(fin == NULL)
    {
        printf("file %s is  not exist\n",infile);
        exit(2);
    }
    fscanf(fin,"nx=%d\n",&nx);
    fscanf(fin,"ny=%d\n",&ny);
    fscanf(fin,"nz=%d\n",&nz);
    fscanf(fin,"lt=%d\n",&lt);
    fscanf(fin,"nedge=%d\n",&nedge);
    fscanf(fin,"ncx_shot1=%d\n",&ncx_shot1);
    fscanf(fin,"ncy_shot1=%d\n",&ncy_shot1);
    fscanf(fin,"ncz_shot=%d\n",&ncz_shot);
    fscanf(fin,"nxshot=%d\n",&nxshot);
    fscanf(fin,"nyshot=%d\n",&nyshot);
    fscanf(fin,"frequency=%f\n",&frequency);
    fscanf(fin,"velmax=%f\n",&velmax);
    fscanf(fin,"dt=%f\n",&dt);
    fscanf(fin,"unit=%f\n",&unit);
    fscanf(fin,"dxshot=%d\n",&dxshot);
    fscanf(fin,"dyshot=%d\n",&dyshot);
    fclose(fin);
    if (proc_rank == 0) {   // Master
        printf("\n--------workload parameter--------\n");
        printf("nx=%d\n",nx);
        printf("ny=%d\n",ny);
        printf("nz=%d\n",nz);
        printf("lt=%d\n",lt);
        printf("nedge=%d\n",nedge);
        printf("ncx_shot1=%d\n",ncx_shot1);
        printf("ncy_shot1=%d\n",ncy_shot1);
        printf("ncz_shot=%d\n",ncz_shot);
        printf("nxshot=%d\n",nxshot);
        printf("nyshot=%d\n",nyshot);
        printf("frequency=%f\n",frequency);
        printf("velmax=%f\n",velmax);
        printf("dt=%f\n",dt);
        printf("unit=%f\n",unit);
        printf("dxshot=%d\n",dxshot);
        printf("dyshot=%d\n\n",dyshot);

        flog = fopen(logfile,"a");
        fprintf(flog,"\n--------workload parameter--------\n");
        fprintf(flog,"nx=%d\n",nx);
        fprintf(flog,"ny=%d\n",ny);
        fprintf(flog,"nz=%d\n",nz);
        fprintf(flog,"lt=%d\n",lt);
        fprintf(flog,"nedge=%d\n",nedge);
        fprintf(flog,"ncx_shot1=%d\n",ncx_shot1);
        fprintf(flog,"ncy_shot1=%d\n",ncy_shot1);
        fprintf(flog,"ncz_shot=%d\n",ncz_shot);
        fprintf(flog,"nxshot=%d\n",nxshot);
        fprintf(flog,"nyshot=%d\n",nyshot);
        fprintf(flog,"frequency=%f\n",frequency);
        fprintf(flog,"velmax=%f\n",velmax);
        fprintf(flog,"dt=%f\n",dt);
        fprintf(flog,"unit=%f\n",unit);
        fprintf(flog,"dxshot=%d\n",dxshot);
        fprintf(flog,"dyshot=%d\n\n",dyshot);
        fclose(flog);
        fnode = fopen(nodelog, "a");
        fprintf(fnode,"World size: %d\n", world_size);
        fclose(fnode);
    }

#ifdef _WITH_PHI
    // [Afa] It is recommended that for Intel Xeon Phi data is 64-byte aligned.
    // Upon successful completion, posix_memalign() shall return zero
    if (posix_memalign((void **)&u  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&v  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&w  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws2, 64, sizeof(float)*nz*ny*nx)) return 2;
#else
    u       = (float*)malloc(sizeof(float)*nz*ny*nx);
    v       = (float*)malloc(sizeof(float)*nz*ny*nx);
    w       = (float*)malloc(sizeof(float)*nz*ny*nx);
    up      = (float*)malloc(sizeof(float)*nz*ny*nx);
    up1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    up2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp      = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp      = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    us      = (float*)malloc(sizeof(float)*nz*ny*nx);
    us1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    us2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs      = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws      = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws2     = (float*)malloc(sizeof(float)*nz*ny*nx);
#endif
    // [Afa] Those are not offloaded to phi yet
    vpp     = (float*)malloc(sizeof(float)*nz*ny*nx);
    density = (float*)malloc(sizeof(float)*nz*ny*nx);
    vss     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wave = (float*)malloc(sizeof(float)*lt);

    nshot=nxshot*nyshot;
    t0=1.0/frequency;

    // [Afa] Branch optmization
    // TODO: Will compiler optimize the `condition'?
    //       i.e Can I write `for(i=0;i< (nz < 210 ? nz : 210);i++)'?
    int condition = nz < 210 ? nz : 210;
    for(int i=0; i < condition;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++) {
                vpp[i*ny*nx+j*nx+k]=2300.;
                vss[i*ny*nx+j*nx+k]=1232.;
                density[i*ny*nx+j*nx+k]=1.;
            }
        }
    }

    condition = nz < 260 ? nz : 260;
    for(int i=210; i < condition;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++) {
                vpp[i*ny*nx+j*nx+k]=2800.;
                vss[i*ny*nx+j*nx+k]=1509.;
                density[i*ny*nx+j*nx+k]=2.;
            }
        }
    }

    for(int i=260;i<nz;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++)
            {
                vpp[i*ny*nx+j*nx+k]=3500.;
                vss[i*ny*nx+j*nx+k]=1909.;
                density[i*ny*nx+j*nx+k]=2.5;
            }
        }
    }

    for(l=0;l<lt;l++)
    {
        tt=l*dt;
        tt=tt-t0;
        float sp=PIE*frequency*tt;
        float fx=100000.*exp(-sp*sp)*(1.-2.*sp*sp);
        wave[l]=fx;
    }

    // TODO: [Afa] Data produced by code below are static. See table below
    if(mm==5)
    {
        c0=-2.927222164;
        c[0][0]=1.66666665;
        c[1][0]=-0.23809525;
        c[2][0]=0.03968254;
        c[3][0]=-0.004960318;
        c[4][0]=0.0003174603;
    }

    c[0][1]=0.83333;
    c[1][1]=-0.2381;
    c[2][1]=0.0595;
    c[3][1]=-0.0099;
    c[4][1]=0.0008;

    for(int i=0;i<5;i++)
        for(int j=0;j<5;j++)
            c[j][2+i]=c[i][1]*c[j][1];
    /*
     * mm == 5, c =
     * 1.666667    0.833330    0.694439    -0.198416   0.049583    -0.008250   0.000667
     * -0.238095   -0.238100   -0.198416   0.056692    -0.014167   0.002357    -0.000190
     * 0.039683    0.059500    0.049583    -0.014167   0.003540    -0.000589   0.000048
     * -0.004960   -0.009900   -0.008250   0.002357    -0.000589   0.000098    -0.000008
     * 0.000317    0.000800    0.000667    -0.000190   0.000048    -0.000008   0.000001
    */

    /*
     * mm != 5, c =
     * 0.000000    0.833330    0.694439    -0.198416   0.049583    -0.008250   0.000667
     * 0.000000    -0.238100   -0.198416   0.056692    -0.014167   0.002357    -0.000190
     * 0.000000    0.059500    0.049583    -0.014167   0.003540    -0.000589   0.000048
     * 0.000000    -0.009900   -0.008250   0.002357    -0.000589   0.000098    -0.000008
     * 0.000000    0.000800    0.000667    -0.000190   0.000048    -0.000008   0.000001
     */

    dtx=dt/unit;
    dtz=dt/unit;
    dtxz=dtx*dtz;

    dr1=dtx*dtx/2.;
    dr2=dtz*dtz/2.;

    dtx4=dtx*dtx*dtx*dtx;
    dtz4=dtz*dtz*dtz*dtz;
    dtxz4=dtx*dtx*dtz*dtz;

    if (proc_rank == 0) {
        fout = fopen(outfile, "wb");
        fclose(fout);
    }   // [Afa] Truncate file. We need a prettier way

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_File_open(MPI_COMM_WORLD, outfile, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_fout);
    MPI_File_open(MPI_COMM_WORLD, nodelog, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_flog);
    // [Afa] *About Nodes Number* nshot (i.e nxshot * nyshot) should be multiple of node numbers,
    //       or there will be hungry processes
    int loop_per_proc = ((int)nshot % world_size == 0) ? (nshot / world_size) : (nshot / world_size + 1);
    printf("\x1B[31mDEBUG:\x1b[39;49m World size %d, Loop per Proc %d, nshot %f, I am No. %d\n",
           world_size, loop_per_proc, nshot, proc_rank);

    //    for(ishot=1;ishot<=nshot;ishot++)   // [Afa] nshot is 20 in para1.in, but 200 in para2.in
    for (int loop_index = 0; loop_index < loop_per_proc; ++loop_index)
    {
        ishot = loop_index + proc_rank * loop_per_proc + 1; // [Afa] See commented code 2 lines above to understand this line
        if (ishot <= nshot) { // [Afa] ishot <= nshot
            printf("shot %d, process %d\n",ishot, proc_rank);
            snprintf(message, 29, "shot %6d, process %6d\n", ishot, proc_rank);     // [Afa] Those numbers:
            MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET);                // 28: string without '\0'
            MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status);           // 29: with '\0'
        } else {
            printf("shot HUNGRY, process %d\n", proc_rank);
            snprintf(message, 29, "shot HUNGRY, process %6d\n", proc_rank);
            MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET);
            MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status);
            continue;
        }
        ncy_shot=ncy_shot1+(ishot/nxshot)*dyshot;
        ncx_shot=ncx_shot1+(ishot%nxshot)*dxshot;

        // [Afa] Matrix is zeroed in every loop
        // i.e. The relation between those matrices in each loop is pretty loose
        // Matrices not zeroed are: vpp, density, vss and wave, and they're not changed (read-only)
        // We only need to partially collect matrix `up'

        // TODO: [Afa] Get a better way to pass those pointers, and mark them as `restrict'
        // And WHY are they using cpp as extension? C++11 doesn't support `restrict'
        zero_matrices(u, w, ws2, up2, vp1, wp1, us, ws, wp, us2, us1, wp2,
                      v, up1, nz, nx, up, ny, ws1, vs, vp2, vs1, vs2, vp);

        for(l=1;l<=lt;l++)
        {
            float xmax=l*dt*velmax;
            int nleft=ncx_shot-xmax/unit-10;
            int nright=ncx_shot+xmax/unit+10;
            int nfront=ncy_shot-xmax/unit-10;
            int nback=ncy_shot+xmax/unit+10;
            int ntop=ncz_shot-xmax/unit-10;
            int nbottom=ncz_shot+xmax/unit+10;
            if(nleft<5) nleft=5;
            if(nright>nx-5) nright=nx-5;
            if(nfront<5) nfront=5;
            if(nback>ny-5) nback=ny-5;
            if(ntop<5) ntop=5;
            if(nbottom>nz-5) nbottom=nz-5;
            ntop = ntop-1;
            nfront = nfront-1;
            nleft = nleft-1;

            // Although up, vp, wp, us, vs, ws are modified below, we're sure there's no race condition.
            // Each loop accesses a UNIQUE element in the array, and the value is not used, no need to worry about the dirty cache
#pragma omp parallel for shared(u) shared(v) shared(w) shared(up1) shared(up2) shared(vp1) shared(vp2) shared(wp1) \
    shared(wp2) shared(us) shared(us1) shared(us2) shared(vs) shared(vs1) shared(vs2) shared(ws) shared(ws1) shared(ws2) \
    shared(vss) shared(vpp) shared(dr1) shared(dr2) shared(dtz) shared(dtx) shared(ncx_shot) shared(ncy_shot) shared(ncz_shot) \
    shared(wave)
            for(int k=ntop;k<nbottom;k++) {
                for(int j=nfront;j<nback;j++) {
                    for(int i=nleft;i<nright;i++)
                    {
                        float vvp2,drd1,drd2,vvs2;
                        float px,sx;
                        if(i==ncx_shot-1&&j==ncy_shot-1&&k==ncz_shot-1)
                        {
                            px=1.;
                            sx=0.;
                        }
                        else
                        {
                            px=0.;
                            sx=0.;
                        }
                        vvp2=vpp[k*ny*nx+j*nx+i]*vpp[k*ny*nx+j*nx+i];
                        drd1=dr1*vvp2;
                        drd2=dr2*vvp2;

                        vvs2=vss[k*ny*nx+j*nx+i]*vss[k*ny*nx+j*nx+i];
                        drd1=dr1*vvs2;
                        drd2=dr2*vvs2;

                        float tempux2=0.0f;
                        float tempuy2=0.0f;
                        float tempuz2=0.0f;
                        float tempvx2=0.0f;
                        float tempvy2=0.0f;
                        float tempvz2=0.0f;
                        float tempwx2=0.0f;
                        float tempwy2=0.0f;
                        float tempwz2=0.0f;
                        float tempuxz=0.0f;
                        float tempuxy=0.0f;
                        float tempvyz=0.0f;
                        float tempvxy=0.0f;
                        float tempwxz=0.0f;
                        float tempwyz=0.0f;

                        // This will make the compiler do the vectorization
                        for(int kk=1;kk<=mm;kk++) {
                            tempux2 += c[kk-1][0]*(u[k*ny*nx+j*nx+(i+kk)]+u[k*ny*nx+j*nx+(i-kk)]);
                            tempuy2 += c[kk-1][0]*(u[k*ny*nx+(j+kk)*nx+i]+u[k*ny*nx+(j-kk)*nx+i]);
                            tempuz2 += c[kk-1][0]*(u[(k+kk)*ny*nx+j*nx+i]+u[(k-kk)*ny*nx+j*nx+i]);
                        }
                        for(int kk=1;kk<=mm;kk++) {
                            tempvx2 += c[kk-1][0]*(v[k*ny*nx+j*nx+(i+kk)]+v[k*ny*nx+j*nx+(i-kk)]);
                            tempvy2 += c[kk-1][0]*(v[k*ny*nx+(j+kk)*nx+i]+v[k*ny*nx+(j-kk)*nx+i]);
                            tempvz2 += c[kk-1][0]*(v[(k+kk)*ny*nx+j*nx+i]+v[(k-kk)*ny*nx+j*nx+i]);
                        }
                        for(int kk=1;kk<=mm;kk++) {
                            tempwx2 += c[kk-1][0]*(w[k*ny*nx+j*nx+(i+kk)]+w[k*ny*nx+j*nx+(i-kk)]);
                            tempwy2 += c[kk-1][0]*(w[k*ny*nx+(j+kk)*nx+i]+w[k*ny*nx+(j-kk)*nx+i]);
                            tempwz2 += c[kk-1][0]*(w[(k+kk)*ny*nx+j*nx+i]+w[(k-kk)*ny*nx+j*nx+i]);
                        }

                         //for(kk=1;kk<=mm;kk++) end

                        tempux2=(tempux2+c0*u[k*ny*nx+j*nx+i])*vvp2*dtx*dtx;
                        // u[k][j][i]
                        tempuy2=(tempuy2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        // u[k][j][i]
                        tempuz2=(tempuz2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtz*dtz;
                        // u[k][j][i]

                        tempvx2=(tempvx2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempvy2=(tempvy2+c0*v[k*ny*nx+j*nx+i])*vvp2*dtx*dtx;
                        tempvz2=(tempvz2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtz*dtz;

                        tempwx2=(tempwx2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempwy2=(tempwy2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempwz2=(tempwz2+c0*w[k*ny*nx+j*nx+i])*vvp2*dtz*dtz;

                        // This loop is auto-vectorized
                        for(int kk=1;kk<=mm;kk++)
                        {
                            for(int kkk=1;kkk<=mm;kkk++)
                            {
                                tempuxz=tempuxz+c[kkk-1][1+kk]*(u[(k+kkk)*ny*nx+j*nx+(i+kk)]
                                        -u[(k-kkk)*ny*nx+j*nx+(i+kk)]
                                        +u[(k-kkk)*ny*nx+j*nx+(i-kk)]
                                        -u[(k+kkk)*ny*nx+j*nx+(i-kk)]);
                                // u[k+kkk][j][i+kk], u[k-kkk][j][i+kk], u[k-kkk][j][i-kk], u[k+kkk][j][i-kk]
                                tempuxy=tempuxy+c[kkk-1][1+kk]*(u[k*ny*nx+(j+kkk)*nx+(i+kk)]
                                        -u[k*ny*nx+(j-kkk)*nx+(i+kk)]
                                        +u[k*ny*nx+(j-kkk)*nx+(i-kk)]
                                        -u[k*ny*nx+(j+kkk)*nx+(i-kk)]);

                                tempvyz=tempvyz+c[kkk-1][1+kk]*(v[(k+kkk)*ny*nx+(j+kk)*nx+i]
                                        -v[(k-kkk)*ny*nx+(j+kk)*nx+i]
                                        +v[(k-kkk)*ny*nx+(j-kk)*nx+i]
                                        -v[(k+kkk)*ny*nx+(j-kk)*nx+i]);
                                tempvxy=tempvxy+c[kkk-1][1+kk]*(v[k*ny*nx+(j+kkk)*nx+(i+kk)]
                                        -v[k*ny*nx+(j-kkk)*nx+(i+kk)]
                                        +v[k*ny*nx+(j-kkk)*nx+(i-kk)]
                                        -v[k*ny*nx+(j+kkk)*nx+(i-kk)]);

                                tempwyz=tempwyz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+(j+kk)*nx+i]
                                        -w[(k-kkk)*ny*nx+(j+kk)*nx+i]
                                        +w[(k-kkk)*ny*nx+(j-kk)*nx+i]
                                        -w[(k+kkk)*ny*nx+(j-kk)*nx+i]);
                                tempwxz=tempwxz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+j*nx+(i+kk)]
                                        -w[(k-kkk)*ny*nx+j*nx+(i+kk)]
                                        +w[(k-kkk)*ny*nx+j*nx+(i-kk)]
                                        -w[(k+kkk)*ny*nx+j*nx+(i-kk)]);
                            } // for(kkk=1;kkk<=mm;kkk++) end
                        } //for(kk=1;kk<=mm;kk++) end

                        // LValues below are only changed here
                        up[k*ny*nx+j*nx+i]=2.*up1[k*ny*nx+j*nx+i]-up2[k*ny*nx+j*nx+i]
                                +tempux2+tempwxz*vvp2*dtz*dtx
                                +tempvxy*vvp2*dtz*dtx;
                        // up1[k][j][j], up2[k][j][i], up[k][j][i]
                        vp[k*ny*nx+j*nx+i]=2.*vp1[k*ny*nx+j*nx+i]-vp2[k*ny*nx+j*nx+i]
                                +tempvy2+tempuxy*vvp2*dtz*dtx
                                +tempwyz*vvp2*dtz*dtx;
                        wp[k*ny*nx+j*nx+i]=2.*wp1[k*ny*nx+j*nx+i]-wp2[k*ny*nx+j*nx+i]
                                +tempwz2+tempuxz*vvp2*dtz*dtx
                                +tempvyz*vvp2*dtz*dtx
                                +px*wave[l-1];
                        us[k*ny*nx+j*nx+i]=2.*us1[k*ny*nx+j*nx+i]-us2[k*ny*nx+j*nx+i]+tempuy2+tempuz2
                                -tempvxy*vvs2*dtz*dtx-tempwxz*vvs2*dtz*dtx;
                        vs[k*ny*nx+j*nx+i]=2.*vs1[k*ny*nx+j*nx+i]-vs2[k*ny*nx+j*nx+i]+tempvx2+tempvz2
                                -tempuxy*vvs2*dtz*dtx-tempwyz*vvs2*dtz*dtx;
                        ws[k*ny*nx+j*nx+i]=2.*ws1[k*ny*nx+j*nx+i]-ws2[k*ny*nx+j*nx+i]+tempwx2+tempwy2
                                -tempuxz*vvs2*dtz*dtx-tempvyz*vvs2*dtz*dtx;
                    }//for(i=nleft;i<nright;i++) end
                }
            }

            // Again, those are UNIQUE access. Safe to share
#pragma omp parallel for shared(up) shared(us) shared(vp) shared(vs) shared(wp) shared(ws) shared(u) shared(v) shared(w) \
    shared(up2) shared(up1) shared(us2) shared(us1) shared(vp2) shared(vp1) shared(wp2) shared(wp1) shared(ws2) shared(ws1)
            for(int k=ntop;k<nbottom;k++)
                for(int j=nfront;j<nback;j++)
                    for(int i=nleft;i<nright;i++)
                    {
                        u[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]+us[k*ny*nx+j*nx+i];
                        v[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]+vs[k*ny*nx+j*nx+i];
                        w[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]+ws[k*ny*nx+j*nx+i];

                        up2[k*ny*nx+j*nx+i]=up1[k*ny*nx+j*nx+i];
                        up1[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i];
                        us2[k*ny*nx+j*nx+i]=us1[k*ny*nx+j*nx+i];
                        us1[k*ny*nx+j*nx+i]=us[k*ny*nx+j*nx+i];
                        vp2[k*ny*nx+j*nx+i]=vp1[k*ny*nx+j*nx+i];
                        vp1[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i];
                        vs2[k*ny*nx+j*nx+i]=vs1[k*ny*nx+j*nx+i];
                        vs1[k*ny*nx+j*nx+i]=vs[k*ny*nx+j*nx+i];
                        wp2[k*ny*nx+j*nx+i]=wp1[k*ny*nx+j*nx+i];
                        wp1[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i];
                        ws2[k*ny*nx+j*nx+i]=ws1[k*ny*nx+j*nx+i];
                        ws1[k*ny*nx+j*nx+i]=ws[k*ny*nx+j*nx+i];
                    }//for(i=nleft;i<nright;i++) end
        }//for(l=1;l<=lt;l++) end
        // [Afa] Do we need to keep the order of data?
        // [Afa Update] Yes, we do need to KEEP THE ORDER of data
        //        fwrite(up+169*ny*nx,sizeof(float),ny*nx,fout);    // This is the original fwrite

        MPI_File_seek(mpi_fout, (ishot - 1) * ny * nx * sizeof(float), MPI_SEEK_SET);
        MPI_File_write(mpi_fout, up + 169 * ny * nx, ny * nx, MPI_FLOAT, &mpi_status);

    }//for(ishot=1;ishot<=nshot;ishot++) end

    MPI_File_close(&mpi_fout);
    MPI_File_close(&mpi_flog);

    free(u);
    free(v);
    free(w);
    free(up);
    free(up1);
    free(up2);
    free(vp);
    free(vp1);
    free(vp2);
    free(wp);
    free(wp1);
    free(wp2);
    free(us);
    free(us1);
    free(us2);
    free(vs);
    free(vs1);
    free(vs2);
    free(ws);
    free(ws1);
    free(ws2);
    free(vpp);
    free(density);
    free(vss);
    free(wave);

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();

    if (proc_rank == 0) {
        gettimeofday(&end,NULL);
        all_time = (end.tv_sec-start.tv_sec)+(float)(end.tv_usec-start.tv_usec)/1000000.0;
        printf("run time:\t%f s\n",all_time);
        flog = fopen(logfile,"a");
        fprintf(flog,"\nrun time:\t%f s\n\n",all_time);
        fclose(flog);
        flog = fopen(logfile,"a");
        fprintf(flog,"------------end time------------\n");
        fclose(flog);
        system(tmp);
    }


    // Why return 1?
    return 0;
}

Пример #19

0

Показать файл

Файл: a1d_core.c Проект: jeffhammond/a1

int A1D_Initialize()
{
    int mpi_initialized, mpi_provided;
    int mpi_status;
    int i;
    size_t bytes_in, bytes_out;
    DCMF_Result dcmf_result;
    DCMF_Configure_t dcmf_config;
    DCMF_Memregion_t local_memregion;

    /***************************************************
     *
     * configure MPI
     *
     ***************************************************/

    /* MPI has to be initialized for this implementation to work */
    MPI_Initialized(&mpi_initialized);
    assert(mpi_initialized==1);

    /* MPI has to be thread-safe so that DCMF doesn't explode */
    MPI_Query_thread(&mpi_provided);
    assert(mpi_provided==MPI_THREAD_MULTIPLE);

    /* have to use our own communicator for collectives to be proper */
    mpi_status = MPI_Comm_dup(MPI_COMM_WORLD,&A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* get my MPI rank */
    mpi_status = MPI_Comm_rank(A1D_COMM_WORLD,&myrank);
    assert(mpi_status==0);

    /* get MPI world size */
    mpi_status = MPI_Comm_size(A1D_COMM_WORLD,&mpi_size);
    assert(mpi_status==0);

    /* make sure MPI and DCMF agree */
    assert(myrank==DCMF_Messager_rank());
    assert(mpi_size==DCMF_Messager_size());

    /* barrier before DCMF_Messager_configure to make sure MPI is ready everywhere */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /***************************************************
     *
     * configure DCMF
     *
     ***************************************************/

    /* to be safe, but perhaps not necessary */
    dcmf_config.thread_level = DCMF_THREAD_MULTIPLE;
#ifdef ACCUMULATE_IMPLEMENTED
    /* interrupts required for accumulate only, Put/Get use DMA
     * if accumulate not used, MPI will query environment for DCMF_INTERRUPTS */
    dcmf_config.interrupts = DCMF_INTERRUPTS_ON;
#endif

    /* reconfigure DCMF with interrupts on */
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Messager_configure(&dcmf_config, &dcmf_config);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* barrier after DCMF_Messager_configure to make sure everyone has the new DCMF config */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /***************************************************
     *
     * setup DCMF memregions
     *
     ***************************************************/

    /* allocate memregion list */
    A1D_Memregion_list = malloc( mpi_size * sizeof(DCMF_Memregion_t) );
    assert(A1D_Memregion_list != NULL);

    /* allocate base pointer list */
    A1D_Baseptr_list = malloc( mpi_size * sizeof(void*) );
    assert(A1D_Memregion_list != NULL);

    /* create memregions */
    bytes_in = -1;
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Memregion_create(&local_memregion,&bytes_out,bytes_in,NULL,0);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* exchange memregions because we don't use symmetry heap */
    mpi_status = MPI_Allgather(&local_memregion,sizeof(DCMF_Memregion_t),MPI_BYTE,
                               A1D_Memregion_list,sizeof(DCMF_Memregion_t),MPI_BYTE,
                               A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* destroy temporary local memregion */
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Memregion_destroy(&local_memregion);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* check for valid memregions */
    DCMF_CriticalSection_enter(0);
    for (i = 0; i < mpi_size; i++)
    {
        dcmf_result = DCMF_Memregion_query(&A1D_Memregion_list[i],
                                           &bytes_out,
                                           &A1D_Baseptr_list[i]);
        assert(dcmf_result==DCMF_SUCCESS);
    }
    DCMF_CriticalSection_exit(0);

#ifdef FLUSH_IMPLEMENTED
    /***************************************************
     *
     * setup flush list(s)
     *
     ***************************************************/

    /* allocate Put list */
    A1D_Put_flush_list = malloc( mpi_size * sizeof(int) );
    assert(A1D_Put_flush_list != NULL);

  #ifdef ACCUMULATE_IMPLEMENTED
    /* allocate Acc list */
    A1D_Send_flush_list = malloc( mpi_size * sizeof(int) );
    assert(A1D_Send_flush_list != NULL);
  #endif

#endif

    /***************************************************
     *
     * define null callback
     *
     ***************************************************/

    A1D_Nocallback.function = NULL;
    A1D_Nocallback.clientdata = NULL;

    return(0);
}

Пример #20

0

Показать файл

Файл: mpplib2.c Проект: benswift/skeleton-pic

/*--------------------------------------------------------------------*/
void cppinit2(int *idproc, int *nvp, int argc, char *argv[]) {
/* this subroutine initializes parallel processing
   lgrp communicator = MPI_COMM_WORLD
   output: idproc, nvp
   idproc = processor id in lgrp communicator
   nvp = number of real or virtual processors obtained
local data */
   static int ibig = 2147483647;
   static float small = 1.0e-12;
   int ierror, flag, ndprec, idprec, iprec;
   float prec;
   prec = 1.0 + small;
   iprec = ibig + 1;
/* ndprec = (0,1) = (no,yes) use (normal,autodouble) precision */
   if (vresult(prec) > 1.0)
      ndprec = 1;
   else
      ndprec = 0;
/* idprec = (0,1) = (no,yes) use (normal,autodouble) integer precision */
   if (iresult(iprec) > 0)
      idprec = 1;
   else
      idprec = 0;
/* Open error file */
   unit2 = fopen("C.2","w");
/* indicate whether MPI_INIT has been called */
   ierror = MPI_Initialized(&flag);
   if (!flag) {
/* initialize the MPI execution environment */
      ierror = MPI_Init(&argc,&argv);
      if (ierror) exit(1);
   }
   lworld = MPI_COMM_WORLD;
   lgrp = lworld;
/* determine the rank of the calling process in the communicator */
   ierror = MPI_Comm_rank(lgrp,idproc);
/* determine the size of the group associated with a communicator */
   ierror = MPI_Comm_size(lgrp,&nproc);
/* set default datatypes */
   mint = MPI_INT;
   mdouble = MPI_DOUBLE;
/* single precision real */
   if (ndprec==0) {
      mreal = MPI_FLOAT;
      mcplx = MPI_COMPLEX;
   }
/* double precision real */
   else {
      mreal = MPI_DOUBLE;
      mcplx = MPI_DOUBLE_COMPLEX;
   }
/* single precision integer */
/* if (idprec==0)           */
/*    mint = MPI_INT;       */
/* double precision integer */
/* else                     */
/*    mint = MPI_LONG;      */
/* operators */
   msum = MPI_SUM;
   mmax = MPI_MAX;
   *nvp = nproc;
   return;
}

Пример #21

0

Показать файл

Файл: h5test.c Проект: zlongshen/hdf5

/*-------------------------------------------------------------------------
 * Function:  h5_show_hostname
 *
 * Purpose:  Show hostname.  Show process ID if in MPI environment.
 *
 * Return:  void
 *
 * Programmer:  Albert Cheng
 *              2002/04/22
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
void
h5_show_hostname(void)
{
    char  hostname[80];
#ifdef H5_HAVE_WIN32_API
    WSADATA wsaData;
    int err;
#endif

    /* try show the process or thread id in multiple processes cases*/
#ifdef H5_HAVE_PARALLEL
    {
        int mpi_rank, mpi_initialized, mpi_finalized;

        MPI_Initialized(&mpi_initialized);
        MPI_Finalized(&mpi_finalized);

        if(mpi_initialized && !mpi_finalized) {
            MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
            printf("MPI-process %d.", mpi_rank);
        }
        else
            printf("thread 0.");
    }
#elif defined(H5_HAVE_THREADSAFE)
    printf("thread %lu.", HDpthread_self_ulong());
#else
    printf("thread 0.");
#endif
#ifdef H5_HAVE_WIN32_API

    err = WSAStartup( MAKEWORD(2,2), &wsaData );
    if ( err != 0 ) {
        /* could not find a usable WinSock DLL */
        return;
    }

    /* Confirm that the WinSock DLL supports 2.2.*/
    /* Note that if the DLL supports versions greater    */
    /* than 2.2 in addition to 2.2, it will still return */
    /* 2.2 in wVersion since that is the version we      */
    /* requested.                                        */

    if ( LOBYTE( wsaData.wVersion ) != 2 ||
            HIBYTE( wsaData.wVersion ) != 2 ) {
        /* could not find a usable WinSock DLL */
        WSACleanup( );
        return;
    }

#endif
#ifdef H5_HAVE_GETHOSTNAME
    if (gethostname(hostname, (size_t)80) < 0)
        printf(" gethostname failed\n");
    else
        printf(" hostname=%s\n", hostname);
#else
    printf(" gethostname not supported\n");
#endif
#ifdef H5_HAVE_WIN32_API
    WSACleanup();
#endif
}

Пример #22

0

Показать файл

Файл: pboot.c Проект: tomdrake/baldy

//SEXP pboot(SEXP data, SEXP statistic, SEXP ind, SEXP lt0, SEXP varg){
SEXP pboot(SEXP scenario,...){
   
  SEXP result;
  double *func_results;
  int response, worldSize;
  enum commandCodes commandCode;
  int scene = asInteger(scenario);
  va_list ap;
  int c;
  // get the function arguments common to all scenarios 
  // 1, R, lt0, vargs, strdata, strstatistic
  va_start(ap, scenario); 
  int r = asInteger(va_arg(ap, SEXP)); // the number of replications to perform
  int ltn = asInteger(va_arg(ap, SEXP)); // the number of results the statistical function returns
  SEXP varg = va_arg(ap, SEXP);
  SEXP data = va_arg(ap, SEXP);
  SEXP statistic = va_arg(ap, SEXP);

  MPI_Initialized(&response);
  if (response) {
      DEBUG("MPI is init'ed in ptest\n");
  } else {

      DEBUG("MPI is NOT init'ed in ptest\n");
      PROTECT(result = NEW_INTEGER(1));
      INTEGER(result)[0] = -1;
      UNPROTECT(1);

      return result;
  }

  MPI_Comm_size(MPI_COMM_WORLD, &worldSize);
 
  // broadcast command to other processors
  commandCode = PBOOT;
  MPI_Bcast(&commandCode, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);

  // intialise the memory to store the results
  func_results = (double *)malloc(sizeof(double) * r * ltn); 

  int * find;
  int * ind;
  int * wind;
  SEXP f, w, Spred, Sind;
  int m; 
  int * pred;

  switch(scene) {   
    case 1:;
      SEXP rangen, mle;
      rangen = va_arg(ap, SEXP);
      mle = va_arg(ap, SEXP);
      response = boot(1, func_results, r, ltn, varg, CHAR(STRING_ELT(data,0)), translateChar(PRINTNAME(statistic)), rangen, mle);
      break; 
    case 2:;
      f = va_arg(ap, SEXP); 
      c = ncols(f); // number of columns in the index
      find = (int *)malloc(sizeof(int) * r * c);
      Rmatrix2Carray(f, find, r, c);
      response = boot(2, func_results, r, ltn, varg, CHAR(STRING_ELT(data,0)), translateChar(PRINTNAME(statistic)), c, find);
      free(find);
      break; 
    case 3:;
      f = va_arg(ap, SEXP); 
      Spred = va_arg(ap, SEXP); 
      c = ncols(f); // number of columns in the index
      m = ncols(Spred); 
      find = (int *)malloc(sizeof(int) * r * c);
      pred = (int *)malloc(sizeof(int) * r * m);
      Rmatrix2Carray(f, find, r, c);
      Rmatrix2Carray(Spred, pred, r, m);
      response = boot(3, func_results, r, ltn, varg, CHAR(STRING_ELT(data,0)), translateChar(PRINTNAME(statistic)), c, find, pred, m);
      free(find);
      free(pred);
      break; 
    case 4:
      w = va_arg(ap, SEXP);
      c = ncols(w); // number of columns in the index
      wind = (int *)malloc(sizeof(int) * r * c);
      Rmatrix2Carray(w, wind, r, c);
      response = boot(4, func_results, r, ltn, varg, CHAR(STRING_ELT(data,0)), translateChar(PRINTNAME(statistic)), c, wind);
      free(wind);
      break; 
    case 5:;
      w = va_arg(ap, SEXP);
      Spred = va_arg(ap, SEXP);
      c = ncols(w); // number of columns in the index
      m = ncols(Spred);
      wind = (int *)malloc(sizeof(int) * r * c);
      pred = (int *)malloc(sizeof(int) * r * m);
      Rmatrix2Carray(w, wind, r, c);
      Rmatrix2Carray(Spred, pred, r, m);
      response = boot(5, func_results, r, ltn, varg, CHAR(STRING_ELT(data,0)), translateChar(PRINTNAME(statistic)), c, wind, pred, m);
      free(wind);
      free(pred);
      break; 
    case 8:
      ;// work around for gcc bug
      // retrieve function arguments 
      SEXP ind = va_arg(ap, SEXP);
      c = ncols(ind); // replications are the number of columns in the index
      // convert the ind from (horrible) SEXP format to C array
      int * cind;
      cind = (int *)malloc(sizeof(int) * r * c);
      Rmatrix2Carray(ind, cind, r, c);
      
      // sending everything to the implementation function
      response = boot(8, func_results, r, ltn, varg, CHAR(STRING_ELT(data,0)), translateChar(PRINTNAME(statistic)), c, cind);
      free(cind);
      break; // end of scenario 8
    default:
      break; 
  }// end of switch
  va_end(ap);
  // Turn the array passed back from the implementation into 
  // a SEXP object that can be returned to R.
  PROTECT(result = allocMatrix(REALSXP,r ,ltn)); // t.star <- matrix(NA, sum(R), lt0)
  Carray2Rmatrix(func_results, result, r, ltn);
  free(func_results);
  UNPROTECT(1); 
  return result;
}

Пример #23

0

Показать файл

Файл: h5test.c Проект: Hulalazz/rnnlib

/*-------------------------------------------------------------------------
 * Function:  getenv_all
 *
 * Purpose:  Used to get the environment that the root MPI task has.
 *     name specifies which environment variable to look for
 *     val is the string to which the value of that environment
 *     variable will be copied.
 *
 *     NOTE: The pointer returned by this function is only
 *     valid until the next call to getenv_all and the data
 *     stored there must be copied somewhere else before any
 *     further calls to getenv_all take place.
 *
 * Return:  pointer to a string containing the value of the environment variable
 *     NULL if the varialbe doesn't exist in task 'root's environment.
 *
 * Programmer:  Leon Arber
 *              4/4/05
 *
 * Modifications:
 *    Use original getenv if MPI is not initialized. This happens
 *    one uses the PHDF5 library to build a serial nature code.
 *    Albert 2006/04/07
 *
 *-------------------------------------------------------------------------
 */
char *
getenv_all(MPI_Comm comm, int root, const char* name)
{
    int mpi_size, mpi_rank, mpi_initialized;
    int len;
    static char* env = NULL;

    assert(name);

    MPI_Initialized(&mpi_initialized);
    if(!mpi_initialized) {
  /* use original getenv */
  if(env)
      HDfree(env);
  env = HDgetenv(name);
    } /* end if */
    else {
  MPI_Comm_rank(comm, &mpi_rank);
  MPI_Comm_size(comm, &mpi_size);
  assert(root < mpi_size);

  /* The root task does the getenv call
   * and sends the result to the other tasks */
  if(mpi_rank == root) {
      env = HDgetenv(name);
      if(env) {
    len = (int)HDstrlen(env);
    MPI_Bcast(&len, 1, MPI_INT, root, comm);
    MPI_Bcast(env, len, MPI_CHAR, root, comm);
      }
      else {
    /* len -1 indicates that the variable was not in the environment */
    len = -1;
    MPI_Bcast(&len, 1, MPI_INT, root, comm);
      }
  }
  else {
      MPI_Bcast(&len, 1, MPI_INT, root, comm);
      if(len >= 0) {
    if(env == NULL)
        env = (char*) HDmalloc((size_t)len+1);
    else if(HDstrlen(env) < (size_t)len)
        env = (char*) HDrealloc(env, (size_t)len+1);

    MPI_Bcast(env, len, MPI_CHAR, root, comm);
    env[len] = '\0';
      }
      else {
    if(env)
        HDfree(env);
    env = NULL;
      }
  }
    }

#ifndef NDEBUG
    MPI_Barrier(comm);
#endif

    return env;
}

Пример #24

0

Показать файл

Файл: fastdebug.c Проект: kheaactua/fastdebug

int fastdebug_(const char *str, int *lineno ,int str_len) {

    int flag;
    mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
    char *msg, *nstr;
    char hostname[30], fname[50];
//	char prefix[] = "/tmp/gmdebug/debug-";
    char *prefix = "/scratch/gmdebug-";
    char preamble[ROUGH_PREAMBLE_LENGTH];

    // Bail immediately if there has been an error, or we're simply deactivated
    // and acting as a stub file.
    if (fast_debug_error || is_stubbed) return 0;

    // Get the hostname
    gethostname(hostname, sizeof hostname);

    MPI_Initialized(&flag);
    if(flag==0) return 1;
    if (fp == -1) {
        if(NULL!=getenv("MATT_PREFIX")) prefix=getenv("MATT_PREFIX");
        mypid=getpid();
        MPI_Initialized(&flag);
        if(flag) MPI_Comm_rank(MPI_COMM_WORLD, &myrank );
        else fprintf(stderr,"[fastdebug] MPI NOT STARTED\n");

        // just for debug..
        if (VERBOSE) mypid=0;

        snprintf(fname,sizeof(fname)-1,"%s%5.5d-%3.3d",prefix,mypid,myrank);
        if (VERBOSE) printf("Opening file %s\n", fname);
        if ((fp=open(fname, O_WRONLY | O_CREAT | O_APPEND, mode)) < 0) {
            fprintf(stderr, "[fastdebug]: Cannot open file %s on %s.  Deactivating fastdebug.\n", fname, hostname);
            //exit(1);
            fast_debug_error=1;
            return 0;
        }
    }

    // Write the preable, so, time, hostname, etc.
    snprintf(preamble, ROUGH_PREAMBLE_LENGTH+1,
             "%lld [%s:%05d:%02d]",
             f_gettimeofday_(),
             hostname, mypid, myrank);

    // Magic number ROUGH_PREAMBLE_LENGTH is estimated length of all the
    // host/pid/rank info in the debug line
    if ((str_len + strlen(preamble)) > MAX_STR_LEN) {
        // Truncate message
        str_len = MAX_STR_LEN - strlen(preamble);
    }
    str_len++; // null character

    // Trim white space
    if (VERBOSE) printf("[fastdebug] Trying to allocation %d characters for nstr \"%s\"\n", str_len, str);
    if ((nstr=(char *)alloca(str_len)) == NULL) {
        fprintf(stderr, "[fastdebug]: Cannot allocate msg %d characters for string.\n", (str_len+2));
        exit(1);
    }
    snprintf(nstr, str_len, "%s", str); // copy it to a mutable stirng
    nstr=trim(nstr); // trim white space
    str_len=strlen(nstr);


    // str_len will now be the entire output message (preample and all)
    // The 128 is a magic number buffer, for the spaces and line breaks, etc..
    str_len=str_len+strlen(preamble)+128;

    if (VERBOSE) printf("[fastdebug] Trying to allocation %d characters for combined string \"%s %s\"\n", str_len+2, preamble, nstr);
    if ((msg=(char *)alloca(str_len)) == NULL) {
        fprintf(stderr, "[fastdebug]: Cannot allocate msg %d characters for string.\n", (str_len+2));
        exit(1);
    }

    // Combine the string
    snprintf(msg, str_len, "%s(%5d) %s\n", preamble,*lineno, nstr);
    //snprintf(msg, str_len+2, "%s %s\n", nstr);

    if (VERBOSE) printf("Writing \"%s\" to %s\n", msg, fname);
    write(fp,msg,strlen(msg));

    /*close(fp);*/

    return 1;
}

Пример #25

0

Показать файл

Файл: cfio.c Проект: CFIO/CFIO

int cfio_init(int x_proc_num, int y_proc_num, int ratio)
{
    int rc, i;
    int size;
    int root = 0;
    int error, ret;
    int server_proc_num;
    int best_server_amount;
    MPI_Group group, client_group, server_group;
    int *ranks;

    //set_debug_mask(DEBUG_CFIO | DEBUG_SERVER);// | DEBUG_MSG | DEBUG_SERVER);
    rc = MPI_Initialized(&i); 
    if( !i )
    {
	error("MPI should be initialized before the cfio\n");
	return -1;
    }

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    //if(rank == 100)
    //{
    //    set_debug_mask(DEBUG_MAP);
    //}
    
    client_num = x_proc_num * y_proc_num;
    server_proc_num = size - client_num;
    if(server_proc_num < 0)
    {
	server_proc_num = 0;
    }
    
    best_server_amount = (int)((double)client_num / ratio);
    if(best_server_amount <= 0)
    {
	best_server_amount = 1;
    }

    MPI_Comm_group(MPI_COMM_WORLD, &group);
    
    //ranks = malloc(client_num * sizeof(int));
    //for(i = 0; i < client_num; i ++)
    //{
    //    ranks[i] = i;
    //}
    //MPI_Group_incl(group, client_num, ranks, &client_group);
    //MPI_Comm_create(MPI_COMM_WORLD, client_group, &client_comm);
    //free(ranks);

    ranks = malloc(server_proc_num * sizeof(int));
    for(i = 0; i < server_proc_num; i ++)
    {
	ranks[i] = i + client_num;
    }
    MPI_Group_incl(group, server_proc_num, ranks, &server_group);
    MPI_Comm_create(MPI_COMM_WORLD, server_group, &server_comm);
    free(ranks);

    //times_start();

    if((ret = cfio_map_init(
		    x_proc_num, y_proc_num, server_proc_num, 
		    best_server_amount, MPI_COMM_WORLD, server_comm)) < 0)
    {
	error("Map Init Fail.");
	return ret;
    }

    if(cfio_map_proc_type(rank) == CFIO_MAP_TYPE_SERVER)
    {
	if((ret = cfio_server_init()) < 0)
	{
	    error("");
	    return ret;
	}
	if((ret = cfio_server_start()) < 0)
	{
	    error("");
	    return ret;
	}
    }else if(cfio_map_proc_type(rank) == CFIO_MAP_TYPE_CLIENT)
    {
	if((ret = cfio_send_init(CLIENT_BUF_SIZE)) < 0)
	{
	    error("");
	    return ret;
	}

	if((ret = cfio_id_init(CFIO_ID_INIT_CLIENT)) < 0)
	{
	    error("");
	    return ret;
	}
	
    }

    debug(DEBUG_CFIO, "success return.");
    return CFIO_ERROR_NONE;
}

Пример #26

0

Показать файл

Файл: hybrid_report_mask.c Проект: TACC/maskeraid

int hybrid_report_mask(){

                        // General
int i,j,ierr;
int id, rid,tid;
int in_mpi, in_omp;
int thrd, nthrds;
int ncpus, nel_set;

                        // Mask storage
static int ** omp_proc_mask;
static int  * omp_mask_pac;
char *dummy;

                       // MPI specific Variables
int rank, nranks;
MPI_Request *request; 
MPI_Status  *status;

static int   multi_node = 0;
static char *all_names;
static int   max_name_len;
     int   name_len;
     char  proc_name[MPI_MAX_PROCESSOR_NAME];

char l,p;
int  tpc;   // hwthreads/core

   Maskopts opts;
                          // get print_speed fast or slow (f|c);   listing cores or SMT (c|s)
   p = opts.get_p();
   l = opts.get_l();

   tpc=get_threads_per_node();
                                         // In MPI and parallel region ?
   MPI_Initialized(&in_mpi);
   in_omp = omp_in_parallel();
   if(in_mpi == 0){
     printf("ERROR: ***** Must call hybrid_report_mask() in MPI program. ***** \n");
     exit(1);
   }

                                        // Get rank number & no of ranks via MPI
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &nranks);

   if(in_omp == 0){
     if(rank == 0){
        printf("         ***** When using 1 thread, Intel OpenMP MAY report "
                              "\"not in a parallel region\" (Uh!)***** \n");
        printf("         ***** Each row will only have a rank number (no \"0\" thread_id). \n");
        printf("WARNING: ***** Unspecified results if hybrid_report_mask "
                              "not called in parallel region of MPI code section. ***** \n");
     }
   }
 
   thrd  =  omp_get_thread_num();        // thread id
   nthrds =  omp_get_num_threads();      // Number of Threads
                                         // Get number of cpus (this gives no. 
                                         // of cpu_ids in /proc/cpuinfo)
   ncpus = (int) sysconf(_SC_NPROCESSORS_ONLN);
 
 
                                  // Working only with MPI processes (masters)
   #pragma omp master
   {
                                  // Get a list of nodes from all ranks.
     MPI_Get_processor_name(proc_name,&name_len);
     MPI_Allreduce(&name_len, &max_name_len, 1,MPI_INT, MPI_MAX, MPI_COMM_WORLD);
     all_names = (char *) malloc(sizeof(int*)*nranks*(max_name_len+1));
     MPI_Gather( proc_name, max_name_len+1 , MPI_CHAR,
                 all_names, max_name_len+1, MPI_CHAR,
                 0, MPI_COMM_WORLD);
  
                                  // If multiple nodes, make muti_node non-zero.
     if(rank == 0){
       for(id=0;id<nranks;id++){
         if( strcmp(&all_names[id*(max_name_len+1)],&all_names[0]) ) multi_node++; }

     }
   
                                  // Create shared storage for masks (only master allocates)
 
     omp_proc_mask =                          (int **) malloc(sizeof(int*)*nthrds);
     for(i=0;i<nthrds;i++) omp_proc_mask[i] = (int * ) malloc(sizeof(int )*ncpus  );
     for(i=0;i<nthrds;i++) for(j=0;j<ncpus;j++) omp_proc_mask[i][j] =0;
 
   }
   #pragma omp barrier

   #pragma omp critical           // (boundto -- may not be thread safe)
     ierr = boundto(&nel_set,omp_proc_mask[thrd]);
   #pragma omp barrier
 
   #pragma omp master
   {
 
     omp_mask_pac = (int *) malloc(sizeof(int)*nranks*nthrds*ncpus);  // need packing space for mpi send/recv

     if(rank == 0){
       request = (MPI_Request *) malloc(sizeof(MPI_Request)*nranks);
       status  = (MPI_Status  *) malloc(sizeof(MPI_Status )*nranks);

       print_mask(1,  dummy,  multi_node,     0, 0,   ncpus, nranks,nthrds, omp_proc_mask[0],tpc,l);  //print header 
       fflush(stdout);

       for(tid=0;tid<nthrds;tid++){
          print_mask(0,  &all_names[tid*(max_name_len+1)], multi_node,  0,tid,   ncpus, nranks,nthrds, omp_proc_mask[tid],tpc,l);
       }
       fflush(stdout);
         
       for(rid=1;rid<nranks;rid++){ // Receive other rank's packed mask arrays
         MPI_Irecv(&omp_mask_pac[rid*nthrds*ncpus], nthrds*ncpus, MPI_INT, rid, 99, MPI_COMM_WORLD, &request[rid-1]);
       } 

       MPI_Waitall(nranks-1,&request[0],&status[0]);

       for(rid=1;rid<nranks;rid++){ // Print for each rank
            for(tid=0;tid<nthrds;tid++){
               print_mask(0,  &all_names[tid*(max_name_len+1)], multi_node,  rid,tid,   ncpus, nranks,nthrds, &omp_mask_pac[rid*nthrds*ncpus + tid*ncpus],tpc,l);
               if(p == 's') ierr=usleep(300000);
            }
       }

       if(nranks*nthrds > 50)
          print_mask(2,  dummy,  multi_node,     0, 0,   ncpus, nranks,nthrds, omp_proc_mask[0],tpc,l);  //print header 

       fflush(stdout);

     }   // end root printing

     else{  //all non-root ranks

                                       // Pack up the ranks' mask arrays (Uh, should have made one array from beginning!) 
          for(   tid=0;tid<nthrds;tid++){
             for( id=0; id<ncpus;  id++)  omp_mask_pac[(tid*ncpus)+id] = omp_proc_mask[tid][id];
             if(p == 's') ierr=usleep(300000);
          }
                                       // Send to root
          MPI_Send(omp_mask_pac, nthrds*ncpus, MPI_INT, 0, 99, MPI_COMM_WORLD);
               

     } // end non-root printing

                                  // Return allocated space

     for(i=0;i<nthrds;i++) free(omp_proc_mask[i]);
                           free(omp_proc_mask);
     free(omp_mask_pac);
     if(rank == 0 ){ free(request); free(status);}
     free(all_names);

   } // end of Master

   #pragma omp barrier            // JIC, what all threads leaving at the same time.

}

Пример #27

0

Показать файл

Файл: Teuchos_GlobalMPISession.cpp Проект: crtrott/Trilinos

GlobalMPISession::GlobalMPISession( int* argc, char*** argv, std::ostream *out )
{
  std::ostringstream oss;

  // Above is used to create all output before sending to *out to avoid
  // jumbled parallel output between processors

#ifdef HAVE_MPI

  int mpierr = 0;

  // Assert that MPI is not already initialized
  int mpiHasBeenStarted = 0;
  MPI_Initialized(&mpiHasBeenStarted);
  if (mpiHasBeenStarted) {
    if (out) {
      *out << "GlobalMPISession(): Error, MPI_Intialized() return true,"
           << " calling std::terminate()!\n"
           << std::flush;
    }
    std::terminate();
  }

  // Initialize MPI
  mpierr = ::MPI_Init(argc, (char ***) argv);
  if (mpierr != 0) {
    if (out) {
      *out << "GlobalMPISession(): Error, MPI_Init() returned error code="
           << mpierr << "!=0, calling std::terminate()!\n"
           << std::flush;
    }
    std::terminate();
  }

  initialize(out); // Get NProc_ and rank_

  int nameLen;
  char procName[MPI_MAX_PROCESSOR_NAME];
  mpierr = ::MPI_Get_processor_name(procName, &nameLen);
  if (mpierr != 0) {
    if (out) {
      *out << "GlobalMPISession():  Error, MPI_Get_processor_name() error code="
           << mpierr << "!=0, calling std::terminate()!\n"
           << std::flush;
    }
    std::terminate();
  }

  oss << "Teuchos::GlobalMPISession::GlobalMPISession(): started processor with name "
      << procName << " and rank " << rank_ << "!" << std::endl;

#else

  oss << "Teuchos::GlobalMPISession::GlobalMPISession(): started serial run"
      << std::endl;

#endif

#ifndef TEUCHOS_SUPPRESS_PROC_STARTUP_BANNER

  // See if we should suppress the startup banner
  bool printStartupBanner = true;
  const std::string suppress_option("--teuchos-suppress-startup-banner");
  for ( int opt_i = 0; opt_i < *argc; ++opt_i ) {
    if ( suppress_option == (*argv)[opt_i] ) {
      // We are suppressing the output!
      printStartupBanner = false;
      // Remove this option!
      // Note that (*argv)[*argc]==0 but convention so we copy it too!
      for( int i = opt_i; i < *argc; ++i )
        (*argv)[i] = (*argv)[i+1];
      --*argc;
    }
  }
  if (out && printStartupBanner) {
    *out << oss.str() << std::flush;
  }

#endif

}

Пример #28

0

Показать файл

Файл: nrnmpi.c Проект: stephanmg/neuron

/* so src/nrnpython/inithoc.cpp does not have to include a c++ mpi.h */
int nrnmpi_wrap_mpi_init(int* flag) {
	return MPI_Initialized(flag);
}

Пример #29

0

Показать файл

Файл: pmrrr.c Проект: birm/Elemental

int pmrrr
(char *jobz, char *range, int *np, double  *D,
 double *E, double *vl, double *vu, int *il,
 int *iu, int *tryracp, MPI_Comm comm, int *nzp,
 int *offsetp, double *W, double *Z, int *ldz, int *Zsupp)
{
  /* Input parameter */
  int  n      = *np;
  bool onlyW = toupper(jobz[0]) == 'N';
  bool wantZ = toupper(jobz[0]) == 'V';
  bool cntval = toupper(jobz[0]) == 'C';
  bool alleig = toupper(range[0]) == 'A';
  bool valeig = toupper(range[0]) == 'V';
  bool indeig = toupper(range[0]) == 'I';

  /* Check input parameters */
  if(!(onlyW  || wantZ  || cntval)) return 1;
  if(!(alleig || valeig || indeig)) return 1;
  if(n <= 0) return 1;
  if (valeig) {
    if(*vu<=*vl) return 1;
  } else if (indeig) {
    if (*il<1 || *il>n || *iu<*il || *iu>n) return 1;
  }
  
  /* MPI & multithreading info */
  int is_init, is_final;
  MPI_Initialized(&is_init);
  MPI_Finalized(&is_final);
  if (is_init!=1 || is_final==1) {
    fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n", 
      is_init, is_final);
    return 1;
  }
  MPI_Comm comm_dup;
  MPI_Comm_dup(comm, &comm_dup);
  int nproc, pid, thread_support;
  MPI_Comm_size(comm_dup, &nproc);
  MPI_Comm_rank(comm_dup, &pid);
  MPI_Query_thread(&thread_support);

  int nthreads;
  if ( !(thread_support == MPI_THREAD_MULTIPLE ||
         thread_support == MPI_THREAD_FUNNELED) ) {
    /* Disable multithreading; note: to support multithreading with 
     * MPI_THREAD_SERIALIZED the code must be changed slightly; this 
     * is not supported at the moment */
    nthreads = 1;
  } else {
    char *ompvar = getenv("PMR_NUM_THREADS");
    if (ompvar == NULL) {
      nthreads = DEFAULT_NUM_THREADS;
    } else {
      nthreads = atoi(ompvar);
    }
  }

#if defined(MVAPICH2_VERSION)
  if (nthreads>1) {
    int mv2_affinity=1;
    char *mv2_string = getenv("MV2_ENABLE_AFFINITY");
    if (mv2_string != NULL) 
      mv2_affinity = atoi(mv2_string);
    if (mv2_affinity!=0) {
      nthreads = 1;
      if (pid==0) {
        fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n");
        fflush(stderr);
      }
    }
  }
#endif

  /* If only maximal number of local eigenvectors are queried
   * return if possible here */
  *nzp     = 0;
  *offsetp = 0;
  if (cntval) {
    if ( alleig || n < DSTEMR_IF_SMALLER ) {
      *nzp = iceil(n,nproc);
      MPI_Comm_free(&comm_dup);
      return 0;
    } else if (indeig) {
      *nzp = iceil(*iu-*il+1,nproc);
      MPI_Comm_free(&comm_dup);
      return 0;
    }
  }

  /* Check if computation should be done by multiple processes */
  int info;
  if (n < DSTEMR_IF_SMALLER) {
    info = handle_small_cases(jobz, range, np, D, E, vl, vu, il,
			      iu, tryracp, comm, nzp, offsetp, W,
			      Z, ldz, Zsupp);
    MPI_Comm_free(&comm_dup);
    return info;
  }

  /* Allocate memory */
  double *Werr = (double*)malloc(n*sizeof(double)); assert(Werr!=NULL);
  double *Wgap = (double*)malloc(n*sizeof(double)); assert(Wgap!=NULL);
  double *gersch = (double*)malloc(2*n*sizeof(double)); assert(gersch!=NULL);
  int *iblock = (int*)calloc(n,sizeof(int)); assert(iblock!=NULL);
  int *iproc  = (int*)malloc(n*sizeof(int)); assert(iproc!=NULL);
  int *Windex = (int*)malloc(n*sizeof(int)); assert(Windex!=NULL);
  int *isplit = (int*)malloc(n*sizeof(int)); assert(isplit!=NULL);
  int *Zindex = (int*)malloc(n*sizeof(int)); assert(Zindex!=NULL);
  proc_t *procinfo = (proc_t*)malloc(sizeof(proc_t)); assert(procinfo!=NULL);
  in_t *Dstruct = (in_t*)malloc(sizeof(in_t)); assert(Dstruct!=NULL);
  val_t *Wstruct = (val_t*)malloc(sizeof(val_t)); assert(Wstruct!=NULL);
  vec_t *Zstruct = (vec_t*)malloc(sizeof(vec_t)); assert(Zstruct!=NULL);
  tol_t *tolstruct = (tol_t*)malloc(sizeof(tol_t)); assert(tolstruct!=NULL);

  /* Bundle variables into a structures */
  procinfo->pid            = pid;
  procinfo->nproc          = nproc;
  procinfo->comm           = comm_dup;
  procinfo->nthreads       = nthreads;
  procinfo->thread_support = thread_support;

  Dstruct->n      = n;
  Dstruct->D      = D;
  Dstruct->E      = E;
  Dstruct->isplit = isplit;

  Wstruct->n      = n;
  Wstruct->vl     = vl;
  Wstruct->vu     = vu;
  Wstruct->il     = il;
  Wstruct->iu     = iu;
  Wstruct->W      = W;
  Wstruct->Werr   = Werr;
  Wstruct->Wgap   = Wgap;
  Wstruct->Windex = Windex;
  Wstruct->iblock = iblock;
  Wstruct->iproc  = iproc;
  Wstruct->gersch = gersch;

  Zstruct->ldz    = *ldz;
  Zstruct->nz     = 0;
  Zstruct->Z      = Z;
  Zstruct->Zsupp  = Zsupp;
  Zstruct->Zindex = Zindex;

  /* Scale matrix to allowable range, returns 1.0 if not scaled */
  double scale = scale_matrix(Dstruct, Wstruct, valeig);

  /*  Test if matrix warrants more expensive computations which
   *  guarantees high relative accuracy */
  if (*tryracp)
    odrrr(&n, D, E, &info); /* 0 - rel acc */
  else info = -1;

  int i;
  double *Dcopy, *E2copy;
  if (info == 0) {
    /* This case is extremely rare in practice */ 
    tolstruct->split = DBL_EPSILON;
    /* Copy original data needed for refinement later */
    Dcopy = (double*)malloc(n*sizeof(double)); assert(Dcopy!=NULL);
    memcpy(Dcopy, D, n*sizeof(double));  
    E2copy = (double*)malloc(n*sizeof(double)); assert(E2copy!=NULL);
    for (i=0; i<n-1; i++) 
      E2copy[i] = E[i]*E[i];
  } else {
    /* Neg. threshold forces old splitting criterion */
    tolstruct->split = -DBL_EPSILON; 
    *tryracp = 0;
  }

  if (!wantZ) {
    /* Compute eigenvalues to full precision */
    tolstruct->rtol1 = 4.0 * DBL_EPSILON;
    tolstruct->rtol2 = 4.0 * DBL_EPSILON;
  } else {
    /* Do not compute to full accuracy first, but refine later */
    tolstruct->rtol1 = sqrt(DBL_EPSILON);
    tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1);
    tolstruct->rtol2 = sqrt(DBL_EPSILON)*5.0E-3;
    tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2);
    tolstruct->rtol2 = fmax(4.0 * DBL_EPSILON, tolstruct->rtol2);
  }

  /*  Compute all eigenvalues: sorted by block */
  info = plarre(procinfo,jobz,range,Dstruct,Wstruct,tolstruct,nzp,offsetp);
  assert(info == 0);

  /* If just number of local eigenvectors are queried */
  if (cntval & valeig) {    
    clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
	     isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
	     tolstruct);
    return 0;
  }

  /* If only eigenvalues are to be computed */
  if (!wantZ) {

    /* Refine to high relative with respect to input T */
    if (*tryracp) {
      info = 
        refine_to_highrac
        (procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct);
      assert(info == 0);
    }

    /* Sort eigenvalues */
    qsort(W, n, sizeof(double), cmp);

    /* Only keep subset ifirst:ilast */
    int ifirst, ilast, isize;
    int iil = *il;
    int iiu = *iu;
    int ifirst_tmp=iil;
    for (i=0; i<nproc; i++) {
      int chunk  = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc);
      int ilast_tmp;
      if (i == nproc-1) {
	ilast_tmp = iiu;
      } else {
	ilast_tmp = ifirst_tmp + chunk - 1;
	ilast_tmp = imin(ilast_tmp, iiu);
      }
      if (i == pid) {
	ifirst    = ifirst_tmp;
	ilast     = ilast_tmp;
	isize     = ilast - ifirst + 1;
	*offsetp = ifirst - iil;
	*nzp      = isize;
      }
      ifirst_tmp = ilast_tmp + 1;
      ifirst_tmp = imin(ifirst_tmp, iiu + 1);
    }
    if (isize > 0) {
      memmove(W, &W[ifirst-1], *nzp * sizeof(double));
    }

    /* If matrix was scaled, rescale eigenvalues */
    invscale_eigenvalues(Wstruct, scale, *nzp);

    clean_up
    (comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
     isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct);

    return 0;
  } /* end of only eigenvalues to compute */

  /* Compute eigenvectors */
  info = plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, 
		nzp, offsetp);
  assert(info == 0);

  /* Refine to high relative with respect to input matrix */
  if (*tryracp) {
    info = refine_to_highrac(procinfo, jobz, Dcopy, E2copy, 
			     Dstruct, nzp, Wstruct, tolstruct);
    assert(info == 0);
  }

  /* If matrix was scaled, rescale eigenvalues */
  invscale_eigenvalues(Wstruct, scale, n);

  /* Make the first nz elements of W contains the eigenvalues
   * associated to the process */
  int j, im=0;
  for (j=0; j<n; j++) {
    if (iproc[j] == pid) {
      W[im]      = W[j];
      Windex[im] = Windex[j];
      Zindex[im] = Zindex[j];
      im++;
    }
  }

  clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
	   isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
	   tolstruct);
  if (*tryracp) {
    free(Dcopy);
    free(E2copy);
  }

  return 0;
} /* end pmrrr */

Пример #30

0

Показать файл

Файл: nrnmpi.c Проект: stephanmg/neuron

void nrnmpi_init(int nrnmpi_under_nrncontrol, int* pargc, char*** pargv) {
#if NRNMPI
	int i, b, flag;
	static int called = 0;
	if (called) { return; }
	called = 1;
	nrnmpi_use = 1;
	nrnmpi_under_nrncontrol_ = nrnmpi_under_nrncontrol;
	if( nrnmpi_under_nrncontrol_ ) {
#if 0
{int i;
printf("nrnmpi_init: argc=%d\n", *pargc);
for (i=0; i < *pargc; ++i) {
	printf("%d |%s|\n", i, (*pargv)[i]);
}
}
#endif

#if NRN_MUSIC
	nrnmusic_init(pargc, pargv); /* see src/nrniv/nrnmusic.cpp */
#endif

#if !ALWAYS_CALL_MPI_INIT
	/* this is not good. depends on mpirun adding at least one
	   arg that starts with -p4 but that probably is dependent
	   on mpich and the use of the ch_p4 device. We are trying to
	   work around the problem that MPI_Init may change the working
	   directory and so when not invoked under mpirun we would like to
	   NOT call MPI_Init.
	*/
		b = 0;
		for (i=0; i < *pargc; ++i) {
			if (strncmp("-p4", (*pargv)[i], 3) == 0) {
				b = 1;
				break;
			}
			if (strcmp("-mpi", (*pargv)[i]) == 0) {
				b = 1;
				break;
			}
		}
		if (nrnmusic) { b = 1; }
		if (!b) {
			nrnmpi_use = 0;
			nrnmpi_under_nrncontrol_ = 0;
			return;
		}
#endif
		MPI_Initialized(&flag);

		if (!flag && MPI_Init(pargc, pargv) != MPI_SUCCESS) {
		  printf("MPI_INIT failed\n");
		}

#if NRN_MUSIC
		if (nrnmusic) {
			asrt(MPI_Comm_dup(nrnmusic_comm, &nrnmpi_world_comm));
		}else{
#else
		{
#endif
			asrt(MPI_Comm_dup(MPI_COMM_WORLD, &nrnmpi_world_comm));
		}
	}
	grp_bbs = MPI_GROUP_NULL;
	grp_net = MPI_GROUP_NULL;
	asrt(MPI_Comm_dup(nrnmpi_world_comm, &nrnmpi_comm));
	asrt(MPI_Comm_dup(nrnmpi_world_comm, &nrn_bbs_comm));
	asrt(MPI_Comm_rank(nrnmpi_world_comm, &nrnmpi_myid_world));
	asrt(MPI_Comm_size(nrnmpi_world_comm, &nrnmpi_numprocs_world));
	nrnmpi_numprocs = nrnmpi_numprocs_bbs = nrnmpi_numprocs_world;
	nrnmpi_myid = nrnmpi_myid_bbs = nrnmpi_myid_world;
	nrnmpi_spike_initialize();
	/*begin instrumentation*/
#if USE_HPM
	hpmInit( nrnmpi_myid_world, "mpineuron" );
#endif
#if 0
{int i;
printf("nrnmpi_init: argc=%d\n", *pargc);
for (i=0; i < *pargc; ++i) {
	printf("%d |%s|\n", i, (*pargv)[i]);
}
}
#endif
#if 1
	if (nrnmpi_myid == 0) {
		printf("numprocs=%d\n", nrnmpi_numprocs_world);
	}
#endif

#endif /* NRNMPI */



}

double nrnmpi_wtime() {
#if NRNMPI
	if (nrnmpi_use) {
		return MPI_Wtime();
	}
#endif
	return nrn_timeus();
}

void nrnmpi_terminate() {
#if NRNMPI
	if (nrnmpi_use) {
#if 0
		printf("%d nrnmpi_terminate\n", nrnmpi_myid_world);
#endif
#if USE_HPM
		hpmTerminate( nrnmpi_myid_world );
#endif
		if( nrnmpi_under_nrncontrol_ ) {
#if NRN_MUSIC
			if (nrnmusic) {
				nrnmusic_terminate();
			}else
#endif
			MPI_Finalize();
		}
		nrnmpi_use = 0;
#if nrnmpidebugleak
		nrnmpi_checkbufleak();
#endif
	}
#endif /*NRNMPI*/
}

void nrnmpi_abort(int errcode) {
#if NRNMPI
	int flag;
	MPI_Initialized(&flag);
	if (flag) {
		MPI_Abort(MPI_COMM_WORLD, errcode);
	}else{
		abort();
	}
#else
	abort();
#endif
}