Ejemplo n.º 1
0
int MPI_Init( int *argc, char **argv[] )
{

    int i, j;
    
    // call real init function
    PMPI_Init( argc, argv );

    // get current rank
    PMPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    // get total number of process
    PMPI_Comm_size(MPI_COMM_WORLD, &proc_num);

    if(my_rank == 0){
        send_count = (int *)malloc(proc_num * proc_num * sizeof(int));
        send_size = (int *)malloc(proc_num * proc_num * sizeof(int));
        
    }

    // allocate space for local statistic storage
    my_send_count = (int *)malloc(proc_num * sizeof(int));
    my_send_size = (int *)malloc(proc_num * sizeof(int));

    // init stastistic storage
    for(i = 0; i < proc_num; i++){
        my_send_count[i] = 0;
        my_send_size[i] = 0;        
    }

    return 0;
}
Ejemplo n.º 2
0
void ompi_igatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
                     char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs,
                     MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm,
                     MPI_Fint *request, MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype c_sendtype, c_recvtype;
    MPI_Request c_request;
    int size, c_ierr;
    OMPI_ARRAY_NAME_DECL(recvcounts);
    OMPI_ARRAY_NAME_DECL(displs);

    c_comm = PMPI_Comm_f2c(*comm);
    c_sendtype = PMPI_Type_f2c(*sendtype);
    c_recvtype = PMPI_Type_f2c(*recvtype);

    PMPI_Comm_size(c_comm, &size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);
    OMPI_ARRAY_FINT_2_INT(displs, size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    c_ierr = PMPI_Igatherv(sendbuf, OMPI_FINT_2_INT(*sendcount),
                           c_sendtype, recvbuf,
                           OMPI_ARRAY_NAME_CONVERT(recvcounts),
                           OMPI_ARRAY_NAME_CONVERT(displs),
                           c_recvtype,
                           OMPI_FINT_2_INT(*root),
                           c_comm, &c_request);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);
    if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request);
}
Ejemplo n.º 3
0
void ompi_ireduce_scatter_f(char *sendbuf, char *recvbuf,
                            MPI_Fint *recvcounts, MPI_Fint *datatype,
                            MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request,
                            MPI_Fint *ierr)
{
    int c_ierr;
    MPI_Comm c_comm;
    MPI_Datatype c_type;
    MPI_Request c_request;
    MPI_Op c_op;
    int size;
    OMPI_ARRAY_NAME_DECL(recvcounts);

    c_comm = PMPI_Comm_f2c(*comm);
    c_type = PMPI_Type_f2c(*datatype);
    c_op = PMPI_Op_f2c(*op);

    PMPI_Comm_size(c_comm, &size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    c_ierr = PMPI_Ireduce_scatter(sendbuf, recvbuf,
                                 OMPI_ARRAY_NAME_CONVERT(recvcounts),
                                 c_type, c_op, c_comm, &c_request);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);
    if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request);
}
Ejemplo n.º 4
0
void ompi_reduce_scatter_block_f(char *sendbuf, char *recvbuf,
                                 MPI_Fint *recvcount, MPI_Fint *datatype,
                                 MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)
{
    int c_ierr;
    MPI_Comm c_comm;
    MPI_Datatype c_type;
    MPI_Op c_op;
    int size;

    c_comm = PMPI_Comm_f2c(*comm);
    c_type = PMPI_Type_f2c(*datatype);
    c_op = PMPI_Op_f2c(*op);

    PMPI_Comm_size(c_comm, &size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    c_ierr = PMPI_Reduce_scatter_block(sendbuf, recvbuf,
                                      OMPI_FINT_2_INT(*recvcount),
                                      c_type, c_op, c_comm);
   if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);
}
Ejemplo n.º 5
0
int MPI_Init(int* argc, char*** argv)
{
    int result, MPIT_result;
    int provided;

    result = PMPI_Init(argc, argv);

    PMPI_Comm_size(MPI_COMM_WORLD, &comm_world_size);
    PMPI_Comm_rank(MPI_COMM_WORLD, &comm_world_rank);

    MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided);
    if (MPIT_result != MPI_SUCCESS) {
        fprintf(stderr, "ERROR : failed to intialize MPI_T interface, preventing to get monitoring results: check your OpenMPI installation\n");
        PMPI_Abort(MPI_COMM_WORLD, MPIT_result);
    }

    MPIT_result = MPI_T_pvar_session_create(&session);
    if (MPIT_result != MPI_SUCCESS) {
        fprintf(stderr, "ERROR : failed to create MPI_T session, preventing to get monitoring results: check your OpenMPI installation\n");
        PMPI_Abort(MPI_COMM_WORLD, MPIT_result);
    }

    init_monitoring_result("pml_monitoring_messages_count", &counts);
    init_monitoring_result("pml_monitoring_messages_size", &sizes);

    start_monitoring_result(&counts);
    start_monitoring_result(&sizes);

    return result;
}
Ejemplo n.º 6
0
int MPI_Comm_size(MPI_Comm comm, int* size_ptr)
{
    if (handle_get_type(comm) == COMM_EP)
    {
        *size_ptr = handle_get_size(comm);
        return MPI_SUCCESS;
    }
    else
       return PMPI_Comm_size(comm, size_ptr);
}
Ejemplo n.º 7
0
void ompi_comm_spawn_f(char *command, char *argv, MPI_Fint *maxprocs,
		      MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm,
		      MPI_Fint *intercomm, MPI_Fint *array_of_errcodes,
		      MPI_Fint *ierr, int cmd_len, int string_len)
{
    MPI_Comm c_comm, c_new_comm;
    MPI_Info c_info;
    int size, c_ierr;
    int *c_errs;
    char **c_argv;
    char *c_command;
    OMPI_ARRAY_NAME_DECL(array_of_errcodes);

    c_comm = PMPI_Comm_f2c(*comm);
    c_info = PMPI_Info_f2c(*info);
    PMPI_Comm_size(c_comm, &size);
    ompi_fortran_string_f2c(command, cmd_len, &c_command);

    /* It's allowed to ignore the errcodes */

    if (OMPI_IS_FORTRAN_ERRCODES_IGNORE(array_of_errcodes)) {
        c_errs = MPI_ERRCODES_IGNORE;
    } else {
        OMPI_ARRAY_FINT_2_INT_ALLOC(array_of_errcodes, size);
        c_errs = OMPI_ARRAY_NAME_CONVERT(array_of_errcodes);
    }

    /* It's allowed to have no argv */

    if (OMPI_IS_FORTRAN_ARGV_NULL(argv)) {
        c_argv = MPI_ARGV_NULL;
    } else {
        ompi_fortran_argv_blank_f2c(argv, string_len, string_len, &c_argv);
    }

    c_ierr = PMPI_Comm_spawn(c_command, c_argv,
                            OMPI_FINT_2_INT(*maxprocs),
                            c_info,
                            OMPI_FINT_2_INT(*root),
                            c_comm, &c_new_comm, c_errs);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    if (MPI_SUCCESS == c_ierr) {
        *intercomm = PMPI_Comm_c2f(c_new_comm);
    }
    free(c_command);
    if (MPI_ARGV_NULL != c_argv && NULL != c_argv) {
        opal_argv_free(c_argv);
    }
    if (!OMPI_IS_FORTRAN_ERRCODES_IGNORE(array_of_errcodes)) {
	OMPI_ARRAY_INT_2_FINT(array_of_errcodes, size);
    } else {
	OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_errcodes);
    }
}
Ejemplo n.º 8
0
void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts,
		     MPI_Fint *sdispls, MPI_Fint *sendtypes,
		     char *recvbuf, MPI_Fint *recvcounts,
		     MPI_Fint *rdispls, MPI_Fint *recvtypes,
		     MPI_Fint *comm, MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype *c_sendtypes, *c_recvtypes;
    int size, c_ierr;
    OMPI_ARRAY_NAME_DECL(sendcounts);
    OMPI_ARRAY_NAME_DECL(sdispls);
    OMPI_ARRAY_NAME_DECL(recvcounts);
    OMPI_ARRAY_NAME_DECL(rdispls);

    c_comm = PMPI_Comm_f2c(*comm);
    PMPI_Comm_size(c_comm, &size);

    c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype));
    c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype));

    OMPI_ARRAY_FINT_2_INT(sendcounts, size);
    OMPI_ARRAY_FINT_2_INT(sdispls, size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);
    OMPI_ARRAY_FINT_2_INT(rdispls, size);

    while (size > 0) {
        c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]);
        c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]);
        --size;
    }

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    c_ierr = PMPI_Alltoallw(sendbuf,
                           OMPI_ARRAY_NAME_CONVERT(sendcounts),
                           OMPI_ARRAY_NAME_CONVERT(sdispls),
                           c_sendtypes,
                           recvbuf,
                           OMPI_ARRAY_NAME_CONVERT(recvcounts),
                           OMPI_ARRAY_NAME_CONVERT(rdispls),
                           c_recvtypes, c_comm);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(sdispls);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(rdispls);
    free(c_sendtypes);
    free(c_recvtypes);
}
Ejemplo n.º 9
0
/**
 * Measurement wrapper for MPI_Comm_size
 * @note Auto-generated by wrapgen from template: std.w
 * @note C interface
 * @note Introduced with MPI 1.0
 * @ingroup cg
 */
int MPI_Comm_size(MPI_Comm comm,
                  int*     size)
{
  int return_val;

  if (IS_EVENT_GEN_ON_FOR(CG))
  {
    EVENT_GEN_OFF();
    esd_enter(epk_mpi_regid[EPK__MPI_COMM_SIZE]);

    return_val = PMPI_Comm_size(comm, size);

    esd_exit(epk_mpi_regid[EPK__MPI_COMM_SIZE]);
    EVENT_GEN_ON();
  }
  else
  {
    return_val = PMPI_Comm_size(comm, size);
  }

  return return_val;
}
Ejemplo n.º 10
0
void ompi_comm_size_f(MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr)
{
    int c_ierr;
    MPI_Comm c_comm = PMPI_Comm_f2c( *comm );
    OMPI_SINGLE_NAME_DECL(size);

    c_ierr = PMPI_Comm_size( c_comm, OMPI_SINGLE_NAME_CONVERT(size) );
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    if (MPI_SUCCESS == c_ierr) {
       OMPI_SINGLE_INT_2_FINT(size);
    }
}
void__give_pebil_name(mpi_init_)(int* ierr){
#else
void __wrapper_name(mpi_init_)(int* ierr){
    pmpi_init_(ierr);
#endif // USES_PSINSTRACER

    PMPI_Comm_rank(MPI_COMM_WORLD, &__taskid);
    PMPI_Comm_size(MPI_COMM_WORLD, &__ntasks);

    mpiValid = 1;

    fprintf(stdout, "-[p%d]- remapping to taskid %d/%d on host %u in mpi_init_ wrapper\n", getpid(), __taskid, __ntasks, gethostid());
    tool_mpi_init();
}
int __give_pebil_name(MPI_Init)(int* argc, char*** argv){
    int retval = 0;
#else
int __wrapper_name(MPI_Init)(int* argc, char*** argv){
    int retval = PMPI_Init(argc, argv);
#endif // USES_PSINSTRACER

    PMPI_Comm_rank(MPI_COMM_WORLD, &__taskid);
    PMPI_Comm_size(MPI_COMM_WORLD, &__ntasks);

    mpiValid = 1;

    fprintf(stdout, "-[p%d]- remapping to taskid %d/%d on host %u in MPI_Init wrapper\n", getpid(), __taskid, __ntasks, gethostid());
    tool_mpi_init();

    return retval;
}
Ejemplo n.º 13
0
/* MPI_Bcast_user: This is our version of MPI function. */
int MPI_Bcast_user(void *buf, int count, MPI_Datatype datatype, int root,
                   MPI_Comm comm)
{
	int i, rank, commsize;

	PMPI_Comm_size(comm, &commsize);
	PMPI_Comm_rank(comm, &rank);

	/* Simple linear algorithm for broadcast */
	if (rank == root) {
		for (i = 0; i < commsize; i++) {
			if (i != root)
				PMPI_Send(buf, count, datatype, i, 4321, comm);
		}
	} else {
		PMPI_Recv(buf, count, datatype, root, 4321, comm, MPI_STATUS_IGNORE);
	}
	return MPI_SUCCESS;
}
Ejemplo n.º 14
0
  void synchronize_effort_keys(effort_data& effort_log, MPI_Comm comm) {
    int rank, size;
    PMPI_Comm_rank(comm, &rank);
    PMPI_Comm_size(comm, &size);

    relatives rels = get_radix_relatives(rank, size);
    if (rels.left >= 0)  receive_keys(effort_log, rels.left, comm);
    if (rels.right >= 0) receive_keys(effort_log, rels.right, comm);
  
    if (rels.parent >= 0) {
      send_keys(effort_log, rels.parent, comm);
      receive_keys(effort_log, rels.parent, comm);
    }

    // TODO: be more efficient and only propagate the difference 
    // back up the tree.
    if (rels.left >= 0)  send_keys(effort_log, rels.left, comm);
    if (rels.right >= 0) send_keys(effort_log, rels.right, comm);
  }
Ejemplo n.º 15
0
void ompi_neighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls,
                               MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts,
                               MPI_Fint *rdispls, MPI_Fint *recvtype,
                               MPI_Fint *comm, MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype c_sendtype, c_recvtype;
    int size, c_ierr;
    OMPI_ARRAY_NAME_DECL(sendcounts);
    OMPI_ARRAY_NAME_DECL(sdispls);
    OMPI_ARRAY_NAME_DECL(recvcounts);
    OMPI_ARRAY_NAME_DECL(rdispls);

    c_comm = PMPI_Comm_f2c(*comm);
    c_sendtype = PMPI_Type_f2c(*sendtype);
    c_recvtype = PMPI_Type_f2c(*recvtype);

    PMPI_Comm_size(c_comm, &size);
    OMPI_ARRAY_FINT_2_INT(sendcounts, size);
    OMPI_ARRAY_FINT_2_INT(sdispls, size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);
    OMPI_ARRAY_FINT_2_INT(rdispls, size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    c_ierr = PMPI_Neighbor_alltoallv(sendbuf,
                                    OMPI_ARRAY_NAME_CONVERT(sendcounts),
                                    OMPI_ARRAY_NAME_CONVERT(sdispls),
                                    c_sendtype,
                                    recvbuf,
                                    OMPI_ARRAY_NAME_CONVERT(recvcounts),
                                    OMPI_ARRAY_NAME_CONVERT(rdispls),
                                    c_recvtype, c_comm);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(sdispls);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(rdispls);
}
Ejemplo n.º 16
0
void COMM_ALL_INIT(int argc, char **argv)
{
  PMPI_Comm_size(MPI_COMM_WORLD, &critpath_tasks);
  PMPI_Comm_rank(MPI_COMM_WORLD, &critpath_myid);

  local_timestamp = 0;
  critpath = 1;
  after_barrier = 0;
  barrier_number = 1;

  graphlib_newGraph(&lgr);

  barrier_number = 0;
  //  cp_setnode(-1,-1,-1,0);

  barrier_number = 1;
  cp_setnode(-1, local_timestamp, -1, critpath);
  after_barrier = 1;
  barrier_number++;
}
Ejemplo n.º 17
0
int check_comm_registry(MPI_Comm comm)
{
    MPID_Comm* comm_ptr;
    MPIU_THREADPRIV_DECL;
    MPIU_THREADPRIV_GET;
    MPID_Comm_get_ptr( comm, comm_ptr );
    int context_id = 0, i =0, my_rank, size;
    context_id = comm_ptr->context_id;

    PMPI_Comm_rank(comm, &my_rank);
    PMPI_Comm_size(comm, &size);

    for (i = 0; i < comm_registered; i++){
        if (comm_registry[i] == context_id){
            return 1;
        }
    }

    return 0;
}
Ejemplo n.º 18
0
int main( int argc, char* argv[] )
{
  int i, j;
  int myrank, nprocs;
  char buf[256];

  MPI_Init( &argc, &argv);

  PMPI_Comm_rank( MPI_COMM_WORLD, &myrank );
  PMPI_Comm_size( MPI_COMM_WORLD, &nprocs );

  MPI_Bcast(buf, 1, MPI_BYTE, 0, MPI_COMM_WORLD);

  for( i=0; i<OUTER; i++ ) {
    for( j=0; j<INNER; j++ ) {
      MPI_Bcast(buf, 2, MPI_BYTE, 0, MPI_COMM_WORLD);
      MPI_Bcast(buf, 3, MPI_BYTE, 0, MPI_COMM_WORLD);
    }

    for( j=0; j<INNER; j++ ) {
      MPI_Bcast(buf, 4, MPI_BYTE, 0, MPI_COMM_WORLD);
      MPI_Bcast(buf, 5, MPI_BYTE, 0, MPI_COMM_WORLD);
    }

    if( i==OUTER-1 ) {
      MPI_Bcast(buf, 7, MPI_BYTE, 0, MPI_COMM_WORLD);
    } 
    
    MPI_Bcast(buf, 6, MPI_BYTE, 0, MPI_COMM_WORLD);
    
    MPI_Bcast(buf, 8, MPI_BYTE, 0, MPI_COMM_WORLD);
  }

  MPI_Bcast(buf, 9, MPI_BYTE, 0, MPI_COMM_WORLD);

 
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 19
0
int MPI_Allgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype,
                  void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
{
    int ret;
    cqueue_t* mycqueue = handle_get_cqueue(comm);

    if (mycqueue != NULL)
    {
        MPI_Request tmprequest;
        ret = cqueue_iallgather(mycqueue, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &tmprequest);
        return MPI_Wait(&tmprequest, MPI_STATUS_IGNORE);
    }
    else
    {
        if (std_mpi_mode == STD_MPI_MODE_IMPLICIT && max_ep > 0)
        {
            int commsize;
            int typesize;
            MPI_Type_size(recvtype, &typesize);
            if (sendbuf == MPI_IN_PLACE)
            {
                PMPI_Comm_size(comm, &commsize);
                for (int rank = 0; rank < commsize; rank++)
                {
                    block_coll_request[rank] = MPI_REQUEST_NULL;
                    cqueue_ibcast(client_get_cqueue(rank % max_ep), recvbuf + rank * typesize * recvcount, recvcount,
                                  recvtype, rank, comm, &block_coll_request[rank]);
                }
                return MPI_Waitall(commsize, block_coll_request, MPI_STATUS_IGNORE);
            }
            else
              return PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
        }
        return PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
    }

    return ret;
}
Ejemplo n.º 20
0
int EPLIB_init()
{
    process_env_vars();

    init_sig_handlers();

    PMPI_Comm_rank(MPI_COMM_WORLD, &taskid);
    PMPI_Comm_size(MPI_COMM_WORLD, &num_tasks);

    set_local_uuid(taskid);

    allocator_init();

    if (max_ep == 0) return MPI_SUCCESS;

    /* Initialize client */
    client_init(taskid, num_tasks);

    /* Register MPI type and MPI Op before any other cqueue commands */
    cqueue_mpi_type_register();
    cqueue_mpi_op_register();

    /* Initialize communicator handles table */
    handle_init();

    /* Initialize window object table */
    window_init();

    /* Create server world and peer comm for MPI_COMM_WORLD */
    EPLIB_split_comm(MPI_COMM_WORLD, 0, taskid, MPI_COMM_WORLD);

    if (std_mpi_mode == STD_MPI_MODE_IMPLICIT)
        block_coll_request = malloc(max_ep*sizeof(MPI_Request));

    return MPI_SUCCESS;
}
Ejemplo n.º 21
0
int MPI_Win_flush_all(MPI_Win win)
{
    MTCORE_Win *uh_win;
    int mpi_errno = MPI_SUCCESS;
    int user_rank, user_nprocs;
    int i, j, k;

    MTCORE_DBG_PRINT_FCNAME();

    MTCORE_Fetch_uh_win_from_cache(win, uh_win);

    if (uh_win == NULL) {
        /* normal window */
        return PMPI_Win_flush_all(win);
    }

    /* mtcore window starts */

    MTCORE_Assert((uh_win->info_args.epoch_type & MTCORE_EPOCH_LOCK) ||
                  (uh_win->info_args.epoch_type & MTCORE_EPOCH_LOCK_ALL));

    PMPI_Comm_rank(uh_win->user_comm, &user_rank);
    PMPI_Comm_size(uh_win->user_comm, &user_nprocs);

    if (!(uh_win->info_args.epoch_type & MTCORE_EPOCH_LOCK)) {
        /* In lock_all only epoch, single window is shared by multiple targets. */

#ifdef MTCORE_ENABLE_SYNC_ALL_OPT

        /* Optimization for MPI implementations that have optimized lock_all.
         * However, user should be noted that, if MPI implementation issues lock messages
         * for every target even if it does not have any operation, this optimization
         * could lose performance and even lose asynchronous! */
        MTCORE_DBG_PRINT("[%d]flush_all(uh_win 0x%x)\n", user_rank, uh_win->uh_wins[0]);
        mpi_errno = PMPI_Win_flush_all(uh_win->uh_wins[0]);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
#else
        /* Flush every helper once in the single window.
         * TODO: track op issuing, only flush the helpers which receive ops. */
        for (i = 0; i < uh_win->num_h_ranks_in_uh; i++) {
            mpi_errno = PMPI_Win_flush(uh_win->h_ranks_in_uh[i], uh_win->uh_wins[0]);
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;
        }
#endif

#ifdef MTCORE_ENABLE_LOCAL_LOCK_OPT
        mpi_errno = MTCORE_Win_flush_self_impl(uh_win);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
#endif

    }
    else {

        /* In lock_all/lock mixed epoch, separate windows are bound with each target. */
        mpi_errno = MTCORE_Win_mixed_flush_all_impl(win, uh_win);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
    }

#if defined(MTCORE_ENABLE_RUNTIME_LOAD_OPT)
    for (i = 0; i < user_nprocs; i++) {
        for (j = 0; j < uh_win->targets[i].num_segs; j++) {
            /* Lock of main helper is granted, we can start load balancing from the next flush/unlock.
             * Note that only target which was issued operations to is guaranteed to be granted. */
            if (uh_win->targets[i].segs[j].main_lock_stat == MTCORE_MAIN_LOCK_OP_ISSUED) {
                uh_win->targets[i].segs[j].main_lock_stat = MTCORE_MAIN_LOCK_GRANTED;
                MTCORE_DBG_PRINT("[%d] main lock (rank %d, seg %d) granted\n", user_rank, i, j);
            }

            MTCORE_Reset_win_target_load_opt(i, uh_win);
        }
    }
#endif

    /* TODO: All the operations which we have not wrapped up will be failed, because they
     * are issued to user window. We need wrap up all operations.
     */

  fn_exit:
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 22
0
static int MTCORE_Win_mixed_flush_all_impl(MPI_Win win, MTCORE_Win * uh_win)
{
    int mpi_errno = MPI_SUCCESS;
    int user_rank, user_nprocs;
    int i, j, k;

    PMPI_Comm_rank(uh_win->user_comm, &user_rank);
    PMPI_Comm_size(uh_win->user_comm, &user_nprocs);

    /* Flush all Helpers in corresponding uh-window of each target process.. */
#ifdef MTCORE_ENABLE_SYNC_ALL_OPT

    /* Optimization for MPI implementations that have optimized lock_all.
     * However, user should be noted that, if MPI implementation issues lock messages
     * for every target even if it does not have any operation, this optimization
     * could lose performance and even lose asynchronous! */
    for (i = 0; i < uh_win->num_uh_wins; i++) {
        MTCORE_DBG_PRINT("[%d]flush_all(uh_win 0x%x)\n", user_rank, uh_win->uh_wins[i]);
        mpi_errno = PMPI_Win_flush_all(uh_win->uh_wins[i]);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
    }
#else

    /* TODO: track op issuing, only flush the helpers which receive ops. */
    for (i = 0; i < user_nprocs; i++) {
#if !defined(MTCORE_ENABLE_RUNTIME_LOAD_OPT)
        /* RMA operations are only issued to the main helper, so we only flush it. */
        for (j = 0; j < uh_win->targets[i].num_segs; j++) {
            int main_h_off = uh_win->targets[i].segs[j].main_h_off;
            int target_h_rank_in_uh = uh_win->targets[i].h_ranks_in_uh[main_h_off];
            MTCORE_DBG_PRINT("[%d]flush(Helper(%d), uh_wins 0x%x), instead of "
                             "target rank %d seg %d\n", user_rank, target_h_rank_in_uh,
                             uh_win->targets[i].segs[j].uh_win, i, j);

            mpi_errno = PMPI_Win_flush(target_h_rank_in_uh, uh_win->targets[i].segs[j].uh_win);
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;
        }
#else
        /* RMA operations may be distributed to all helpers, so we should
         * flush all helpers on all windows. See discussion in win_flush. */
        for (k = 0; k < MTCORE_ENV.num_h; k++) {
            int target_h_rank_in_uh = uh_win->targets[i].h_ranks_in_uh[k];
            MTCORE_DBG_PRINT("[%d]flush(Helper(%d), uh_win 0x%x), instead of "
                             "target rank %d\n", user_rank, target_h_rank_in_uh,
                             uh_win->targets[i].uh_win, i);

            mpi_errno = PMPI_Win_flush(target_h_rank_in_uh, uh_win->targets[i].uh_win);
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;
        }
#endif /*end of MTCORE_ENABLE_RUNTIME_LOAD_OPT */
    }
#endif /*end of MTCORE_ENABLE_SYNC_ALL_OPT */

#ifdef MTCORE_ENABLE_LOCAL_LOCK_OPT
    mpi_errno = MTCORE_Win_flush_self_impl(uh_win);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;
#endif

  fn_exit:
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 23
0
int main(int argc, char *argv[]) {

    int   numproc, rank, len;
    char  hostname[MPI_MAX_PROCESSOR_NAME];

    PMPI_Init(&argc, &argv);
    PMPI_Comm_size(MPI_COMM_WORLD, &numproc);
    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);
    PMPI_Get_processor_name(hostname, &len);

    if (rank==0) {
        int *freq,i,j;
        freq=(int *)malloc(sizeof(int)*numproc);
        char *temp;
        temp=(char*)malloc(sizeof(char)*(numproc-1));
        MPI_Status *stat, *stat1;
        stat = (MPI_Status*)malloc(sizeof(MPI_Status)*(numproc-1));
        stat1 = (MPI_Status*)malloc(sizeof(MPI_Status)*(numproc-1));
        MPI_Request *req;
        req = (MPI_Request *)malloc(sizeof(MPI_Request)*(numproc-1));
        int N=numproc*numproc;

        for(i=1; i<numproc; i++) {
            PMPI_Recv(temp+i-1, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, stat+(i-1));//, req+(i-1)*2);
        }

        for(i=1; i<numproc; i++) {
            PMPI_Recv(freq+i*numproc, numproc, MPI_INT, i, 1, MPI_COMM_WORLD,
                      stat1+(i-1));
        }

        printf("echo\n");
        // MPI_Waitall((numproc-1), req, stat);
        for (i=1; i<numproc; i++) {
            printf("Rank %d ", i);
            for (j=0; j<numproc; j++) {
                if(j!=i) {
                    int loc = i*numproc+j;
                    printf("%d ",freq[loc]);
                }
            }
            printf("\n");
        }
    }

    else {
        int i, *nsend;
        char *rMsg, msg='x';
        rMsg=(char*)malloc(sizeof(char));
        nsend=(int*)malloc(sizeof(int)*numproc);
        // msg=(char*)malloc(sizeof(char));
        // memset(msg, 'z', sizeof(char));
        memset(nsend, 0, sizeof(int)*numproc);
        MPI_Request *req;
        req = (MPI_Request *)malloc(sizeof(MPI_Request)*(numproc));
        MPI_Status *stat;
        stat = (MPI_Status*)malloc(sizeof(MPI_Status)*(numproc-1));
        for (i=0; i<numproc; i++) {
            if(i!=rank) {
                *(nsend+i)+=*(nsend+i)+1;
                PMPI_Isend(&msg, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, &(req[i]));
            }
        }
        // printf("Echo-1\n");
        for (i=1; i<numproc; i++) {
            if (i!=rank)
                PMPI_Recv(rMsg, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, stat+i-1);
        }
        // printf("Echo-2\n");
        MPI_Isend(nsend, numproc, MPI_INT, 0, 1, MPI_COMM_WORLD, req+numproc);
        // MPI_Isend(msg, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, req+numproc);
        // printf("Echo-3\n");
    }
    PMPI_Finalize();
    return(0);
}
/******************************************************************
*                                                                 *
*                   MPI Functions for Management                  *
*                                                                 *
******************************************************************/
double E_MPI_Init(int * argc, char*** argv)
{
    // assume all data files are existing
    // users may run IMB manually.
    // and copy datas to all machines manually
    parse_loggpo("paras/cmp_para", &log_cmp);
    parse_loggpo("paras/net_para", &log_net);
    parse_loggpo("paras/smp_para", &log_smp);
    parse_imb("paras/coll_para", &imb);
    // get self location HOSTNAME:CORE
    char proc_file_name[50];
    sprintf(proc_file_name, "/proc/%d/stat", getpid());
    FILE* proc_file = fopen(proc_file_name, "r");
    if (! proc_file) {
        printf("Proc File %s Open Failed!\n", proc_file_name);
    }
    int core;
    if (1 != fscanf(proc_file, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d %*u %*u %*u",&core))
        printf("Read Core ID Failed!\n");
    char hostname[40];
    gethostname(hostname,40);
    // Send their info to rank:0
    int gsize;
    PMPI_Comm_size( MPI_COMM_WORLD, &gsize);
    location = (pLocation)malloc(gsize*sizeof(Location));
    int myrank;
    PMPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    if (myrank != 0) {
        char sendbuf[100];
        sprintf(sendbuf, "%s %d",hostname,core);
        PMPI_Send( sendbuf, 100, MPI_CHAR, 0, myrank, MPI_COMM_WORLD);
    }
    else {
        char rbuf[100];
        MPI_Status ms;
        char** ls = (char**)malloc(gsize*sizeof(char*));
        int cnt = 0;
        ls[0] = (char*)malloc(40);
        if (ls[0] == strcpy(ls[0],hostname)) 
            ++ cnt;
        location[0].node = 0;
        location[0].core = core;
        char r_hn[40];
        int r_core;
        for (int rank = 1; rank < gsize; ++ rank) {
            PMPI_Recv(rbuf, 100, MPI_CHAR, rank, rank, MPI_COMM_WORLD, &ms);
            sscanf(rbuf,"%s %d",r_hn,&r_core);
            int i;
            for (i = 0; i < cnt; ++ i) {
                if(strcmp(ls[i],r_hn) == 0) {
                    location[rank].node = i;
                    location[rank].core = r_core;
                    break;
                }
            }
            if (i == cnt) {
                ls[i] = (char*)malloc(40);
                if (ls[i] == strcpy(ls[i],r_hn)) 
                    ++ cnt;
                location[rank].node = i;
                location[rank].core = r_core;
            }
        }
        for (int i = 0; i < cnt; ++ i) 
            free(ls[i]);
        free(ls);
#if 0
        printf("from RANK 0\n");
        for (int i = 0; i < gsize; ++ i) {
            printf ("rank:%d, node:%d, core:%d\n", i, location[i].node, location[i].core );
        }
#endif 
    }
    // boardcast to all MPI ranks
    PMPI_Bcast(location, 2*gsize, MPI_INT, 0, MPI_COMM_WORLD);
#if 0
    if (myrank == 20) {
        printf("from RANK 20\n");
        for (int i = 0; i < gsize; ++ i) {
            printf ("rank:%d, node:%d, core:%d\n", i, location[i].node, location[i].core );
        }
    }
#endif
    req_list.len = 0;
    req_list.head = NULL;
	return 0;
}
Ejemplo n.º 25
0
/* task level init 
   - executed by each MPI task only once immediately after MPI_Init
*/
void
mpiPi_init (char *appName)
{
  if (time (&mpiPi.start_timeofday) == (time_t) - 1)
    {
      mpiPi_msg_warn ("Could not get time of day from time()\n");
    }

  mpiPi.toolname = "mpiP";
  mpiPi.comm = MPI_COMM_WORLD;
  mpiPi.tag = 9821;
  mpiPi.procID = getpid ();
  mpiPi.appName = strdup (appName);
  PMPI_Comm_rank (mpiPi.comm, &mpiPi.rank);
  PMPI_Comm_size (mpiPi.comm, &mpiPi.size);
  PMPI_Get_processor_name (mpiPi.hostname, &mpiPi.hostnamelen);
  mpiPi.stdout_ = stdout;
  mpiPi.stderr_ = stderr;
  mpiPi.lookup = mpiPi_lookup;

  mpiPi.enabled = 1;
  mpiPi.enabledCount = 1;
  mpiPi.cumulativeTime = 0.0;
  mpiPi.global_app_time = 0.0;
  mpiPi.global_mpi_time = 0.0;
  mpiPi.global_mpi_size = 0.0;
  mpiPi.global_mpi_io = 0.0;
  mpiPi.global_mpi_msize_threshold_count = 0;
  mpiPi.global_mpi_sent_count = 0;
  mpiPi.global_time_callsite_count = 0;
  mpiPi.global_task_info = NULL;
  
  char tmpfilename[64];
  sprintf(tmpfilename,"%d.trace\0",mpiPi.rank);
  mpiPi.recfile = fopen(tmpfilename,"wb"); 
  printf("Open Rec File %s !\n", tmpfilename);

  /* set some defaults values */
  mpiPi.collectorRank = 0;
  mpiPi.tableSize = 256;
  mpiPi.stackDepth = 1;		/* the value 2 includes parent wrapper function */
  mpiPi.reportPrintThreshold = 0.0;
  mpiPi.baseNames = 0;
  mpiPi.reportFormat = MPIP_REPORT_SCI_FORMAT;
  mpiPi.calcCOV = 1;
  mpiPi.inAPIrtb = 0;
  mpiPi.do_lookup = 1;
  mpiPi.messageCountThreshold = -1;
  mpiPi.report_style = mpiPi_style_verbose;
  mpiPi.print_callsite_detail = 1;
#ifdef COLLECTIVE_REPORT_DEFAULT
  mpiPi.collective_report = 1;
#else
  mpiPi.collective_report = 0;
#endif
  mpiPi_getenv ();

  mpiPi.task_callsite_stats =
    h_open (mpiPi.tableSize, mpiPi_callsite_stats_pc_hashkey,
	    mpiPi_callsite_stats_pc_comparator);

  /* -- welcome msg only collector  */
  if (mpiPi.collectorRank == mpiPi.rank)
    {
      mpiPi_msg ("\n");
      mpiPi_msg ("%s V%d.%d.%d (Build %s/%s)\n", mpiPi.toolname, mpiPi_vmajor,
		 mpiPi_vminor, mpiPi_vpatch, mpiPi_vdate, mpiPi_vtime);
      mpiPi_msg ("Direct questions and errors to %s\n", MPIP_HELP_LIST);
      mpiPi_msg ("\n");
    }

  mpiPi_msg_debug ("appName is %s\n", appName);
  mpiPi_msg_debug ("successful init on %d, %s\n", mpiPi.rank, mpiPi.hostname);

  if (mpiPi.enabled)
    {
      mpiPi_GETTIME (&mpiPi.startTime);
    }

  return;
}
Ejemplo n.º 26
0
int create_2level_comm (MPI_Comm comm, int size, int my_rank)
{
    static const char FCNAME[] = "create_2level_comm";
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm* comm_ptr;
    MPID_Comm* comm_world_ptr;
    MPI_Group subgroup1, comm_group;
    MPID_Group *group_ptr=NULL;
    int leader_comm_size, my_local_size, my_local_id, input_flag =0, output_flag=0;
    int errflag = FALSE;
    int leader_group_size=0;
  
    MPIU_THREADPRIV_DECL;
    MPIU_THREADPRIV_GET;
    MPID_Comm_get_ptr( comm, comm_ptr );
    MPID_Comm_get_ptr( MPI_COMM_WORLD, comm_world_ptr );

    int* shmem_group = MPIU_Malloc(sizeof(int) * size);
    if (NULL == shmem_group){
        printf("Couldn't malloc shmem_group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    /* Creating local shmem group */
    int i = 0;
    int local_rank = 0;
    int grp_index = 0;
    comm_ptr->ch.leader_comm=MPI_COMM_NULL;
    comm_ptr->ch.shmem_comm=MPI_COMM_NULL;

    MPIDI_VC_t* vc = NULL;
    for (; i < size ; ++i){
       MPIDI_Comm_get_vc(comm_ptr, i, &vc);
       if (my_rank == i || vc->smp.local_rank >= 0){
           shmem_group[grp_index] = i;
           if (my_rank == i){
               local_rank = grp_index;
           }
           ++grp_index;
       }  
    } 

    /* Creating leader group */
    int leader = 0;
    leader = shmem_group[0];


    /* Gives the mapping to any process's leader in comm */
    comm_ptr->ch.leader_map = MPIU_Malloc(sizeof(int) * size);
    if (NULL == comm_ptr->ch.leader_map){
        printf("Couldn't malloc group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }
    
    mpi_errno = MPIR_Allgather_impl (&leader, 1, MPI_INT , comm_ptr->ch.leader_map, 1, MPI_INT, comm_ptr, &errflag);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }


    int* leader_group = MPIU_Malloc(sizeof(int) * size);
    if (NULL == leader_group){
        printf("Couldn't malloc leader_group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    /* Gives the mapping from leader's rank in comm to 
     * leader's rank in leader_comm */
    comm_ptr->ch.leader_rank = MPIU_Malloc(sizeof(int) * size);
    if (NULL == comm_ptr->ch.leader_rank){
        printf("Couldn't malloc marker\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    for (i=0; i < size ; ++i){
         comm_ptr->ch.leader_rank[i] = -1;
    }
    int* group = comm_ptr->ch.leader_map;
    grp_index = 0;
    for (i=0; i < size ; ++i){
        if (comm_ptr->ch.leader_rank[(group[i])] == -1){
            comm_ptr->ch.leader_rank[(group[i])] = grp_index;
            leader_group[grp_index++] = group[i];
           
        }
    }
    leader_group_size = grp_index;
    comm_ptr->ch.leader_group_size = leader_group_size;

    mpi_errno = PMPI_Comm_group(comm, &comm_group);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    
    mpi_errno = PMPI_Group_incl(comm_group, leader_group_size, leader_group, &subgroup1);
     if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = PMPI_Comm_create(comm, subgroup1, &(comm_ptr->ch.leader_comm));
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    MPID_Comm *leader_ptr;
    MPID_Comm_get_ptr( comm_ptr->ch.leader_comm, leader_ptr );
       
    MPIU_Free(leader_group);
    MPID_Group_get_ptr( subgroup1, group_ptr );
    if(group_ptr != NULL) { 
       mpi_errno = PMPI_Group_free(&subgroup1);
       if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
       }
    } 

    mpi_errno = PMPI_Comm_split(comm, leader, local_rank, &(comm_ptr->ch.shmem_comm));
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    MPID_Comm *shmem_ptr;
    MPID_Comm_get_ptr(comm_ptr->ch.shmem_comm, shmem_ptr);


    mpi_errno = PMPI_Comm_rank(comm_ptr->ch.shmem_comm, &my_local_id);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    mpi_errno = PMPI_Comm_size(comm_ptr->ch.shmem_comm, &my_local_size);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    if(my_local_id == 0) { 
           int array_index=0;
           mpi_errno = PMPI_Comm_size(comm_ptr->ch.leader_comm, &leader_comm_size);
           if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
           }

           comm_ptr->ch.node_sizes = MPIU_Malloc(sizeof(int)*leader_comm_size);
           mpi_errno = PMPI_Allgather(&my_local_size, 1, MPI_INT,
				 comm_ptr->ch.node_sizes, 1, MPI_INT, comm_ptr->ch.leader_comm);
           if(mpi_errno) {
              MPIU_ERR_POP(mpi_errno);
           }
           comm_ptr->ch.is_uniform = 1; 
           for(array_index=0; array_index < leader_comm_size; array_index++) { 
                if(comm_ptr->ch.node_sizes[0] != comm_ptr->ch.node_sizes[array_index]) {
                     comm_ptr->ch.is_uniform = 0; 
                     break;
                }
           }
     } 
    
    comm_ptr->ch.is_global_block = 0; 
    /* We need to check to see if the ranks are block or not. Each node leader
     * gets the global ranks of all of its children processes. It scans through
     * this array to see if the ranks are in block order. The node-leaders then
     * do an allreduce to see if all the other nodes are also in block order.
     * This is followed by an intra-node bcast to let the children processes
     * know of the result of this step */ 
    if(my_local_id == 0) {
            int is_local_block = 1; 
            int index = 1; 
            
            while( index < my_local_size) { 
                    if( (shmem_group[index] - 1) != 
                         shmem_group[index - 1]) { 
                            is_local_block = 0; 
                            break; 
                    }
            index++;  
            }  

            comm_ptr->ch.shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/
            mpi_errno = MPIR_Allreduce_impl(&(is_local_block), 
                                      &(comm_ptr->ch.is_global_block), 1, 
                                      MPI_INT, MPI_LAND, leader_ptr, &errflag);
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
            mpi_errno = MPIR_Bcast_impl(&(comm_ptr->ch.is_global_block),1, MPI_INT, 0,
                                   shmem_ptr, &errflag); 
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
    } else { 
            mpi_errno = MPIR_Bcast_impl(&(comm_ptr->ch.is_global_block),1, MPI_INT, 0,
                                   shmem_ptr, &errflag); 
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
    }      
                              

    if (my_local_id == 0){
        lock_shmem_region();
        increment_shmem_comm_count();
        shmem_comm_count = get_shmem_comm_count();
        unlock_shmem_region();
    }
    
    shmem_ptr->ch.shmem_coll_ok = 0; 
    /* To prevent Bcast taking the knomial_2level_bcast route */
    mpi_errno = MPIR_Bcast_impl (&shmem_comm_count, 1, MPI_INT, 0, shmem_ptr, &errflag);
     if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }


    if (shmem_comm_count <= g_shmem_coll_blocks){
        shmem_ptr->ch.shmem_comm_rank = shmem_comm_count-1;
        input_flag = 1;
    } else{
        input_flag = 0;
    }
    comm_ptr->ch.shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/
    mpi_errno = MPIR_Allreduce_impl(&input_flag, &output_flag, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    comm_ptr->ch.allgather_comm_ok = 0;
    if (allgather_ranking){
        int is_contig =1, check_leader =1, check_size=1, is_local_ok=0,is_block=0;
        int PPN;
        int shmem_grp_size = my_local_size;
        int leader_rank; 
        MPI_Group allgather_group; 
        comm_ptr->ch.allgather_comm=MPI_COMM_NULL; 
        comm_ptr->ch.allgather_new_ranks=NULL;

        if(comm_ptr->ch.leader_comm != MPI_COMM_NULL) { 
            PMPI_Comm_rank(comm_ptr->ch.leader_comm, &leader_rank); 
        } 

        mpi_errno=MPIR_Bcast_impl(&leader_rank, 1, MPI_INT, 0, shmem_ptr, &errflag);
        if(mpi_errno) {
        	MPIU_ERR_POP(mpi_errno);
        } 

        for (i=1; i < shmem_grp_size; i++ ){
            if (shmem_group[i] != shmem_group[i-1]+1){
                is_contig =0; 
                break;
            }
        }

        if (leader != (shmem_grp_size*leader_rank)){
            check_leader=0;
        }

        if (shmem_grp_size != (size/leader_group_size)){
            check_size=0;
        }

        is_local_ok = is_contig && check_leader && check_size;

        mpi_errno = MPIR_Allreduce_impl(&is_local_ok, &is_block, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag);
        if(mpi_errno) {
            MPIU_ERR_POP(mpi_errno);
        }

        if (is_block){
            int counter=0,j;
            comm_ptr->ch.allgather_new_ranks = MPIU_Malloc(sizeof(int)*size);
            if (NULL == comm_ptr->ch.allgather_new_ranks){
                    mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
                                                  FCNAME, __LINE__, MPI_ERR_OTHER, 
                                                  "**nomem", 0 );
                    return mpi_errno;
            }
   
            PPN = shmem_grp_size;
            
            for (j=0; j < PPN; j++){
                for (i=0; i < leader_group_size; i++){
                    comm_ptr->ch.allgather_new_ranks[counter] = j + i*PPN;
                    counter++;
                }
            }

            mpi_errno = PMPI_Group_incl(comm_group, size, comm_ptr->ch.allgather_new_ranks, &allgather_group);
            if(mpi_errno) {
                 MPIU_ERR_POP(mpi_errno);
            }  
            mpi_errno = PMPI_Comm_create(comm_ptr->handle, allgather_group, &(comm_ptr->ch.allgather_comm));
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            }
            comm_ptr->ch.allgather_comm_ok = 1;
            
            mpi_errno=PMPI_Group_free(&allgather_group);
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            }
        }
    }
    mpi_errno=PMPI_Group_free(&comm_group);
    if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
    }

    if (output_flag == 1){
        comm_ptr->ch.shmem_coll_ok = 1;
        comm_registry[comm_registered++] = comm_ptr->context_id;
    } else{
        comm_ptr->ch.shmem_coll_ok = 0;
        MPID_Group_get_ptr( subgroup1, group_ptr );
        if(group_ptr != NULL) { 
             mpi_errno = PMPI_Group_free(&subgroup1);
             if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
             }
        }
        MPID_Group_get_ptr( comm_group, group_ptr );
        if(group_ptr != NULL) { 
             mpi_errno = PMPI_Group_free(&comm_group);
             if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
             }
        }
        free_2level_comm(comm_ptr);
        comm_ptr->ch.shmem_comm = MPI_COMM_NULL; 
        comm_ptr->ch.leader_comm = MPI_COMM_NULL; 
    }
    ++comm_count;
    MPIU_Free(shmem_group);

    fn_fail: 
       MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
    
    return (mpi_errno);


}
Ejemplo n.º 27
0
void vt_esync(MPI_Comm comm)
{
  uint64_t time, etime;
  Sync_TsPerRun* temp_ts;
  Sync_Map* temp_map;

  VT_MPI_INT myrank;
  VT_MPI_INT numnodes;
  VT_MPI_INT partnerid, numslots;
  VT_MPI_INT i;

  VT_SUSPEND_IO_TRACING();

  /* mark begin of clock synchronization */
  time = vt_pform_wtime();
  vt_enter(&time, vt_trc_regid[VT__TRC_SYNCTIME]);
  /* ... also as comment for vtunify */
  vt_comment(&time, "__ETIMESYNC__");

  /* barrier at entry */
  PMPI_Barrier(comm);

  temp_ts = (Sync_TsPerRun*) malloc(sizeof(Sync_TsPerRun));
  if (temp_ts == NULL) vt_error();

  temp_ts->sync_phase = NULL;
  temp_ts->next       = NULL;

  if (SyncTsPerRunFirst == NULL)
  {
    SyncTsPerRunFirst = temp_ts;
    SyncTsPerRunLast  = temp_ts;
  }
  else
  {
    SyncTsPerRunLast->next = temp_ts;    
    SyncTsPerRunLast = temp_ts;
  }

  /* measure time synchronization */

  PMPI_Comm_rank(comm, &myrank);
  PMPI_Comm_size(comm, &numnodes);

  numslots = (VT_MPI_INT)ceil(log((double)(numnodes)) / log(2.0));

  for(i = 0; i < numslots; i++)
  {
    partnerid = esync_commpartner(myrank, numnodes, i);
    if( partnerid < numnodes )
    {
      if( myrank < partnerid )
      {
	esync_master(partnerid, comm, myrank);
      }
      else
      {
	esync_slave(partnerid, comm);
      }
    }
  }
   
  /* add myrank to list of map ids */

  temp_map = (Sync_Map*)malloc(sizeof(Sync_Map));
  if (temp_map == NULL) vt_error();
  temp_map->id       = myrank;
  temp_map->time     = time;
  temp_map->duration = (uint32_t) 0;
  temp_map->next     = NULL;

  if (SyncMapIdFirst == NULL)
  {
    SyncMapIdFirst = temp_map;
    SyncMapIdLast = temp_map;
  }
  else
  {
    SyncMapIdLast->next = temp_map;
    SyncMapIdLast = temp_map;
  }

  /* barrier at exit */
  PMPI_Barrier(comm);

  /* mark end of clock synchronization */
  etime = vt_pform_wtime();
  vt_exit(&etime);

  /* increment number of sync. phases */
  SyncRound++;

  /* set timestamp of next synchronization if necessary */
  if (SyncNext != (uint64_t)-1)
    SyncNext = etime + SyncIntv;

  /* calculate sync. duration */
  SyncMapIdLast->duration = etime - time;

  VT_RESUME_IO_TRACING();
}
Ejemplo n.º 28
0
void vt_sync(MPI_Comm comm, uint64_t* ltime, int64_t* offset)
{
  VT_MPI_INT myrank, myrank_host, myrank_sync;
  VT_MPI_INT numnodes;
  uint64_t time;

  MPI_Comm host_comm;
  MPI_Comm sync_comm;

  VT_SUSPEND_IO_TRACING(VT_CURRENT_THREAD);

  /* mark begin of clock synchronization */
  time = vt_pform_wtime();
  vt_enter(VT_CURRENT_THREAD, &time, vt_trc_regid[VT__TRC_SYNCTIME]);

  /* barrier at entry */
  PMPI_Barrier(comm);

  *offset = 0;
  *ltime = vt_pform_wtime();

  PMPI_Comm_rank(comm, &myrank);

  /* create communicator containing all processes on the same node */

  PMPI_Comm_split(comm, (vt_pform_node_id() & 0x7FFFFFFF), 0, &host_comm);
  PMPI_Comm_rank(host_comm, &myrank_host);

  /* create communicator containing all processes with rank zero in the
     previously created communicators */
  
  PMPI_Comm_split(comm, myrank_host, 0, &sync_comm);
  PMPI_Comm_rank(sync_comm, &myrank_sync);
  PMPI_Comm_size(sync_comm, &numnodes);

  /* measure offsets between all nodes and the root node (rank 0 in sync_comm) */

  if (myrank_host == 0)
  {
    VT_MPI_INT i;

    for (i = 1; i < numnodes; i++)
    {
      PMPI_Barrier(sync_comm);
      if (myrank_sync == i)
	*offset = sync_slave(ltime, 0, sync_comm);
      else if (myrank_sync == 0)
	*offset = sync_master(ltime, i, sync_comm);
    }
  }

  /* distribute offset and ltime across all processes on the same node */

  PMPI_Bcast(offset, 1, MPI_LONG_LONG_INT, 0, host_comm);
  PMPI_Bcast(ltime, 1, MPI_LONG_LONG_INT, 0, host_comm);

  PMPI_Comm_free(&host_comm);
  PMPI_Comm_free(&sync_comm);

  /* barrier at exit */
  PMPI_Barrier(comm);

  /* mark end of clock synchronization */
  time = vt_pform_wtime();
  vt_exit(VT_CURRENT_THREAD, &time);

  VT_RESUME_IO_TRACING(VT_CURRENT_THREAD);
}
Ejemplo n.º 29
0
int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
{
    int mpi_errno = MPI_SUCCESS;
    int has_args;
    int has_env;
    int thread_provided;
    int exit_init_cs_on_failure = 0;

    /* For any code in the device that wants to check for runtime 
       decisions on the value of isThreaded, set a provisional
       value here. We could let the MPID_Init routine override this */
#ifdef HAVE_RUNTIME_THREADCHECK
    MPIR_ThreadInfo.isThreaded = required == MPI_THREAD_MULTIPLE;
#endif

    MPIU_THREAD_CS_INIT;

    /* FIXME: Move to os-dependent interface? */
#ifdef HAVE_WINDOWS_H
    /* prevent the process from bringing up an error message window if mpich 
       asserts */
    _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE );
    _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR );
    _CrtSetReportHook2(_CRT_RPTHOOK_INSTALL, assert_hook);
#ifdef _WIN64
    {
    /* FIXME: (Windows) This severly degrades performance but fixes alignment 
       issues with the datatype code. */
    /* Prevent misaligned faults on Win64 machines */
    UINT mode, old_mode;
    
    old_mode = SetErrorMode(SEM_NOALIGNMENTFAULTEXCEPT);
    mode = old_mode | SEM_NOALIGNMENTFAULTEXCEPT;
    SetErrorMode(mode);
    }
#endif
#endif

    /* We need this inorder to implement IS_THREAD_MAIN */
#   if (MPICH_THREAD_LEVEL >= MPI_THREAD_SERIALIZED) && defined(MPICH_IS_THREADED)
    {
	MPID_Thread_self(&MPIR_ThreadInfo.master_thread);
    }
#   endif

#ifdef HAVE_ERROR_CHECKING
    /* Because the PARAM system has not been initialized, temporarily
       uncondtionally enable error checks.  Once the PARAM system is
       initialized, this may be reset */
    MPIR_Process.do_error_checks = 1;
#else
    MPIR_Process.do_error_checks = 0;
#endif

    /* Initialize necessary subsystems and setup the predefined attribute
       values.  Subsystems may change these values. */
    MPIR_Process.attrs.appnum          = -1;
    MPIR_Process.attrs.host            = 0;
    MPIR_Process.attrs.io              = 0;
    MPIR_Process.attrs.lastusedcode    = MPI_ERR_LASTCODE;
    MPIR_Process.attrs.tag_ub          = 0;
    MPIR_Process.attrs.universe        = MPIR_UNIVERSE_SIZE_NOT_SET;
    MPIR_Process.attrs.wtime_is_global = 0;

    /* Set the functions used to duplicate attributes.  These are 
       when the first corresponding keyval is created */
    MPIR_Process.attr_dup  = 0;
    MPIR_Process.attr_free = 0;

#ifdef HAVE_CXX_BINDING
    /* Set the functions used to call functions in the C++ binding 
       for reductions and attribute operations.  These are null
       until a C++ operation is defined.  This allows the C code
       that implements these operations to not invoke a C++ code
       directly, which may force the inclusion of symbols known only
       to the C++ compiler (e.g., under more non-GNU compilers, including
       Solaris and IRIX). */
    MPIR_Process.cxx_call_op_fn = 0;

#endif
    /* This allows the device to select an alternative function for 
       dimsCreate */
    MPIR_Process.dimsCreate     = 0;

    /* "Allocate" from the reserved space for builtin communicators and
       (partially) initialize predefined communicators.  comm_parent is
       intially NULL and will be allocated by the device if the process group
       was started using one of the MPI_Comm_spawn functions. */
    MPIR_Process.comm_world		    = MPID_Comm_builtin + 0;
    MPIR_Comm_init(MPIR_Process.comm_world);
    MPIR_Process.comm_world->handle	    = MPI_COMM_WORLD;
    MPIR_Process.comm_world->context_id	    = 0 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_world->recvcontext_id = 0 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_world->comm_kind	    = MPID_INTRACOMM;
    /* This initialization of the comm name could be done only when 
       comm_get_name is called */
    MPIU_Strncpy(MPIR_Process.comm_world->name, "MPI_COMM_WORLD",
		 MPI_MAX_OBJECT_NAME);

    MPIR_Process.comm_self		    = MPID_Comm_builtin + 1;
    MPIR_Comm_init(MPIR_Process.comm_self);
    MPIR_Process.comm_self->handle	    = MPI_COMM_SELF;
    MPIR_Process.comm_self->context_id	    = 1 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_self->recvcontext_id  = 1 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_self->comm_kind	    = MPID_INTRACOMM;
    MPIU_Strncpy(MPIR_Process.comm_self->name, "MPI_COMM_SELF",
		 MPI_MAX_OBJECT_NAME);

#ifdef MPID_NEEDS_ICOMM_WORLD
    MPIR_Process.icomm_world		    = MPID_Comm_builtin + 2;
    MPIR_Comm_init(MPIR_Process.icomm_world);
    MPIR_Process.icomm_world->handle	    = MPIR_ICOMM_WORLD;
    MPIR_Process.icomm_world->context_id    = 2 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.icomm_world->recvcontext_id= 2 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.icomm_world->comm_kind	    = MPID_INTRACOMM;
    MPIU_Strncpy(MPIR_Process.icomm_world->name, "MPI_ICOMM_WORLD",
		 MPI_MAX_OBJECT_NAME);

    /* Note that these communicators are not ready for use - MPID_Init 
       will setup self and world, and icomm_world if it desires it. */
#endif

    MPIR_Process.comm_parent = NULL;

    /* Setup the initial communicator list in case we have 
       enabled the debugger message-queue interface */
    MPIR_COMML_REMEMBER( MPIR_Process.comm_world );
    MPIR_COMML_REMEMBER( MPIR_Process.comm_self );

    /* Call any and all MPID_Init type functions */
    MPIR_Err_init();
    MPIR_Datatype_init();
    MPIR_Group_init();

    /* MPIU_Timer_pre_init(); */

    mpi_errno = MPIR_Param_init_params();
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    /* Wait for debugger to attach if requested. */
    if (MPIR_PARAM_DEBUG_HOLD) {
        volatile int hold = 1;
        while (hold)
#ifdef HAVE_USLEEP
            usleep(100);
#endif
            ;
    }


#if HAVE_ERROR_CHECKING == MPID_ERROR_LEVEL_RUNTIME
    MPIR_Process.do_error_checks = MPIR_PARAM_ERROR_CHECKING;
#endif

    /* define MPI as initialized so that we can use MPI functions within 
       MPID_Init if necessary */
    MPIR_Process.initialized = MPICH_WITHIN_MPI;

    /* We can't acquire any critical sections until this point.  Any
     * earlier the basic data structures haven't been initialized */
    MPIU_THREAD_CS_ENTER(INIT,required);
    exit_init_cs_on_failure = 1;

    mpi_errno = MPID_Init(argc, argv, required, &thread_provided, 
			  &has_args, &has_env);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    /* Capture the level of thread support provided */
    MPIR_ThreadInfo.thread_provided = thread_provided;
    if (provided) *provided = thread_provided;
#ifdef HAVE_RUNTIME_THREADCHECK
    MPIR_ThreadInfo.isThreaded = (thread_provided == MPI_THREAD_MULTIPLE);
#endif

    /* FIXME: Define these in the interface.  Does Timer init belong here? */
    MPIU_dbg_init(MPIR_Process.comm_world->rank);
    MPIU_Timer_init(MPIR_Process.comm_world->rank,
		    MPIR_Process.comm_world->local_size);
#ifdef USE_MEMORY_TRACING
    MPIU_trinit( MPIR_Process.comm_world->rank );
    /* Indicate that we are near the end of the init step; memory 
       allocated already will have an id of zero; this helps 
       separate memory leaks in the initialization code from 
       leaks in the "active" code */
    /* Uncomment this code to leave out any of the MPID_Init/etc 
       memory allocations from the memory leak testing */
    /* MPIU_trid( 1 ); */
#endif
#ifdef USE_DBG_LOGGING
    MPIU_DBG_Init( argc, argv, has_args, has_env, 
		   MPIR_Process.comm_world->rank );
#endif

    /* Initialize the C versions of the Fortran link-time constants.
       
       We now initialize the Fortran symbols from within the Fortran 
       interface in the routine that first needs the symbols.
       This fixes a problem with symbols added by a Fortran compiler that 
       are not part of the C runtime environment (the Portland group
       compilers would do this) 
    */
#if defined(HAVE_FORTRAN_BINDING) && defined(HAVE_MPI_F_INIT_WORKS_WITH_C)
    mpirinitf_();
#endif

    /* FIXME: Does this need to come before the call to MPID_InitComplete?
       For some debugger support, MPIR_WaitForDebugger may want to use
       MPI communication routines to collect information for the debugger */
#ifdef HAVE_DEBUGGER_SUPPORT
    MPIR_WaitForDebugger();
#endif

    /* Let the device know that the rest of the init process is completed */
    if (mpi_errno == MPI_SUCCESS) 
	mpi_errno = MPID_InitCompleted();

#if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_)
    if (is_shmem_collectives_enabled()){
        if (check_split_comm(pthread_self())){
            int my_id, size;
            PMPI_Comm_rank(MPI_COMM_WORLD, &my_id);
            PMPI_Comm_size(MPI_COMM_WORLD, &size);
            disable_split_comm(pthread_self());
            create_2level_comm(MPI_COMM_WORLD, size, my_id);
            enable_split_comm(pthread_self());
        }
    }
#endif /* defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */


fn_exit:
    MPIU_THREAD_CS_EXIT(INIT,required);
    return mpi_errno;

fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    /* signal to error handling routines that core services are unavailable */
    MPIR_Process.initialized = MPICH_PRE_INIT;

    if (exit_init_cs_on_failure) {
        MPIU_THREAD_CS_EXIT(INIT,required);
    }
    MPIU_THREAD_CS_FINALIZE;
    return mpi_errno;
    /* --END ERROR HANDLING-- */
}
Ejemplo n.º 30
0
/*@

MPI_Comm_dup - Duplicates an existing communicator with all its cached
               information

Input Parameter:
. comm - Communicator to be duplicated (handle) 

Output Parameter:
. newcomm - A new communicator over the same group as 'comm' but with a new
  context. See notes.  (handle) 

Notes:
  This routine is used to create a new communicator that has a new
  communication context but contains the same group of processes as
  the input communicator.  Since all MPI communication is performed
  within a communicator (specifies as the group of processes `plus`
  the context), this routine provides an effective way to create a
  private communicator for use by a software module or library.  In
  particular, no library routine should use 'MPI_COMM_WORLD' as the
  communicator; instead, a duplicate of a user-specified communicator
  should always be used.  For more information, see Using MPI, 2nd
  edition. 

  Because this routine essentially produces a copy of a communicator,
  it also copies any attributes that have been defined on the input
  communicator, using the attribute copy function specified by the
  'copy_function' argument to 'MPI_Keyval_create'.  This is
  particularly useful for (a) attributes that describe some property
  of the group associated with the communicator, such as its
  interconnection topology and (b) communicators that are given back
  to the user; the attibutes in this case can track subsequent
  'MPI_Comm_dup' operations on this communicator.

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM

.seealso: MPI_Comm_free, MPI_Keyval_create, MPI_Attr_put, MPI_Attr_delete,
 MPI_Comm_create_keyval, MPI_Comm_set_attr, MPI_Comm_delete_attr
@*/
int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *comm_ptr = NULL, *newcomm_ptr;
    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_DUP);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPIU_THREAD_CS_ENTER(ALLFUNC,);
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_DUP);
    
    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_COMM(comm, mpi_errno);
            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
	}
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPID_Comm_get_ptr( comm, comm_ptr );
    
    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            /* Validate comm_ptr */
            MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
	    /* If comm_ptr is not valid, it will be reset to null */
            MPIR_ERRTEST_ARGNULL(newcomm, "newcomm", mpi_errno);
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */
    
    mpi_errno = MPIR_Comm_dup_impl(comm_ptr, &newcomm_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    MPIU_OBJ_PUBLISH_HANDLE(*newcomm, newcomm_ptr->handle);

#if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_)
    if (enable_shmem_collectives){
        if (check_split_comm(pthread_self())){
            if (*newcomm != MPI_COMM_NULL){

                int flag;
                PMPI_Comm_test_inter(*newcomm, &flag);

                if (flag == 0){
                    int my_id, size;
                    mpi_errno = PMPI_Comm_rank(*newcomm, &my_id);
                     if(mpi_errno) {
                        MPIU_ERR_POP(mpi_errno);
                    }
                    mpi_errno = PMPI_Comm_size(*newcomm, &size);
                     if(mpi_errno) {
                        MPIU_ERR_POP(mpi_errno);
                    }
                    disable_split_comm(pthread_self());
                    mpi_errno = create_2level_comm(*newcomm, size, my_id);
                     if(mpi_errno) {
                        MPIU_ERR_POP(mpi_errno);
                    }
                    enable_split_comm(pthread_self());
                }

            }
        }
    }
#endif /* defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */

    /* ... end of body of routine ... */

  fn_exit:
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_DUP);
    MPIU_THREAD_CS_EXIT(ALLFUNC,);
    return mpi_errno;
    
  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_comm_dup",
	    "**mpi_comm_dup %C %p", comm, newcomm);
    }
#   endif
    *newcomm = MPI_COMM_NULL;
    mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}