Exemple #1
0
/* STUB */
int MPI_Allgather ( void *sendbuf, int sendcount, MPI_Datatype sendtype,
                    void *recvbuf, int recvcount, MPI_Datatype recvtype, 
                   MPI_Comm comm )
{
  _MPI_COVERAGE();
  return PMPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); 
}
Exemple #2
0
int MPI_Allgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype,
                  void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
{
    int ret;
    cqueue_t* mycqueue = handle_get_cqueue(comm);

    if (mycqueue != NULL)
    {
        MPI_Request tmprequest;
        ret = cqueue_iallgather(mycqueue, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &tmprequest);
        return MPI_Wait(&tmprequest, MPI_STATUS_IGNORE);
    }
    else
    {
        if (std_mpi_mode == STD_MPI_MODE_IMPLICIT && max_ep > 0)
        {
            int commsize;
            int typesize;
            MPI_Type_size(recvtype, &typesize);
            if (sendbuf == MPI_IN_PLACE)
            {
                PMPI_Comm_size(comm, &commsize);
                for (int rank = 0; rank < commsize; rank++)
                {
                    block_coll_request[rank] = MPI_REQUEST_NULL;
                    cqueue_ibcast(client_get_cqueue(rank % max_ep), recvbuf + rank * typesize * recvcount, recvcount,
                                  recvtype, rank, comm, &block_coll_request[rank]);
                }
                return MPI_Waitall(commsize, block_coll_request, MPI_STATUS_IGNORE);
            }
            else
              return PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
        }
        return PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
    }

    return ret;
}
Exemple #3
0
int MPI_Comm_create_endpoints(MPI_Comm parent_comm, int num_endpoints, MPI_Info info, MPI_Comm out_comm_hdls[])
{
    int* num_ep_list;
    int tot_endpoints = 0;
    int local_epid, global_epid = 0;
    int i;

    /* Return error if requested endpoints greater than servers */
    if (num_endpoints > max_ep) return MPI_ERR_UNKNOWN;

    /* Map endpoints to servers */
    /* Step 1/3: Gather total number of endpoints requested by all
       application tasks in the parent communicator parent_comm */
    num_ep_list = (int*)malloc(num_tasks*sizeof(int));
    ASSERT(num_ep_list != NULL);
    PMPI_Allgather(&num_endpoints, 1, MPI_INT, (void*)num_ep_list, 1, MPI_INT, parent_comm);

    /* Step 2/3: Create communicator handle objects */
    for (i = 0; i < num_tasks; i++)
    {
        tot_endpoints += num_ep_list[i];
        if (i < taskid)
            global_epid += num_ep_list[i];
    }

    for (local_epid = 0; local_epid < num_endpoints; local_epid++)
    {
	out_comm_hdls[local_epid] = (MPI_Comm) handle_register(COMM_EP, MPI_COMM_NULL, parent_comm,
                                                    num_endpoints, local_epid, tot_endpoints,
                                                    global_epid, NULL, NULL);
        global_epid++;
    }

    /* Step 3/3: */
    client_multiplex_endpoints(max_ep, num_tasks, num_endpoints, num_ep_list, out_comm_hdls);

    PMPI_Barrier(parent_comm);

    free(num_ep_list);

    return MPI_SUCCESS;
}
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
                  MPI_Comm comm)
{
    char sendtypename[MPI_MAX_OBJECT_NAME], recvtypename[MPI_MAX_OBJECT_NAME];
    char commname[MPI_MAX_OBJECT_NAME];
    int len;
    int rank;
    
    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);    
    PMPI_Type_get_name(sendtype, sendtypename, &len);
    PMPI_Type_get_name(recvtype, recvtypename, &len);
    PMPI_Comm_get_name(comm, commname, &len);
    
    fprintf(stderr, "MPI_ALLGATHER[%d]: sendbuf %0" PRIxPTR " sendcount %d sendtype %s\n\trecvbuf %0" PRIxPTR " recvcount %d recvtype %s comm %s\n",
           rank, (uintptr_t) sendbuf, sendcount, sendtypename, (uintptr_t) recvbuf, recvcount, recvtypename, commname);
    fflush(stderr);
    
    return PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
}
int MPI_Allgather( void* sbuff, int scnt, MPI_Datatype stype,
                   void* rbuff, int rcnt, MPI_Datatype rtype,
                   MPI_Comm comm )
{
    int             g2g = 1;
    char            call[COLLCHK_SM_STRLEN];
    int             are2buffs;

    sprintf( call, "ALLGATHER" );

    /* Check if init has been called */
    g2g = CollChk_is_init();

    if( g2g ) {
        /* check for call consistency */
        CollChk_same_call( comm, call );
        /* check MPI_IN_PLACE consistency */
        CollChk_check_buff( comm, sbuff, call );
        
        /* check data signature consistency */
#if defined( HAVE_MPI_IN_PLACE )
        are2buffs = ( sbuff != MPI_IN_PLACE );
#else
        are2buffs = 1;
#endif
        CollChk_dtype_allgather(comm, stype, scnt, rtype, rcnt,
                                are2buffs, call);

        /* make the call */
        return PMPI_Allgather( sbuff, scnt, stype, rbuff, rcnt, rtype, comm );
    }
    else {
        /* init not called */
        return CollChk_err_han( "MPI_Init() has not been called!",
                                COLLCHK_ERR_NOT_INIT, call, comm );
    }
}
Exemple #6
0
int create_2level_comm (MPI_Comm comm, int size, int my_rank)
{
    static const char FCNAME[] = "create_2level_comm";
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm* comm_ptr;
    MPID_Comm* comm_world_ptr;
    MPI_Group subgroup1, comm_group;
    MPID_Group *group_ptr=NULL;
    int leader_comm_size, my_local_size, my_local_id, input_flag =0, output_flag=0;
    int errflag = FALSE;
    int leader_group_size=0;
  
    MPIU_THREADPRIV_DECL;
    MPIU_THREADPRIV_GET;
    MPID_Comm_get_ptr( comm, comm_ptr );
    MPID_Comm_get_ptr( MPI_COMM_WORLD, comm_world_ptr );

    int* shmem_group = MPIU_Malloc(sizeof(int) * size);
    if (NULL == shmem_group){
        printf("Couldn't malloc shmem_group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    /* Creating local shmem group */
    int i = 0;
    int local_rank = 0;
    int grp_index = 0;
    comm_ptr->ch.leader_comm=MPI_COMM_NULL;
    comm_ptr->ch.shmem_comm=MPI_COMM_NULL;

    MPIDI_VC_t* vc = NULL;
    for (; i < size ; ++i){
       MPIDI_Comm_get_vc(comm_ptr, i, &vc);
       if (my_rank == i || vc->smp.local_rank >= 0){
           shmem_group[grp_index] = i;
           if (my_rank == i){
               local_rank = grp_index;
           }
           ++grp_index;
       }  
    } 

    /* Creating leader group */
    int leader = 0;
    leader = shmem_group[0];


    /* Gives the mapping to any process's leader in comm */
    comm_ptr->ch.leader_map = MPIU_Malloc(sizeof(int) * size);
    if (NULL == comm_ptr->ch.leader_map){
        printf("Couldn't malloc group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }
    
    mpi_errno = MPIR_Allgather_impl (&leader, 1, MPI_INT , comm_ptr->ch.leader_map, 1, MPI_INT, comm_ptr, &errflag);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }


    int* leader_group = MPIU_Malloc(sizeof(int) * size);
    if (NULL == leader_group){
        printf("Couldn't malloc leader_group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    /* Gives the mapping from leader's rank in comm to 
     * leader's rank in leader_comm */
    comm_ptr->ch.leader_rank = MPIU_Malloc(sizeof(int) * size);
    if (NULL == comm_ptr->ch.leader_rank){
        printf("Couldn't malloc marker\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    for (i=0; i < size ; ++i){
         comm_ptr->ch.leader_rank[i] = -1;
    }
    int* group = comm_ptr->ch.leader_map;
    grp_index = 0;
    for (i=0; i < size ; ++i){
        if (comm_ptr->ch.leader_rank[(group[i])] == -1){
            comm_ptr->ch.leader_rank[(group[i])] = grp_index;
            leader_group[grp_index++] = group[i];
           
        }
    }
    leader_group_size = grp_index;
    comm_ptr->ch.leader_group_size = leader_group_size;

    mpi_errno = PMPI_Comm_group(comm, &comm_group);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    
    mpi_errno = PMPI_Group_incl(comm_group, leader_group_size, leader_group, &subgroup1);
     if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = PMPI_Comm_create(comm, subgroup1, &(comm_ptr->ch.leader_comm));
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    MPID_Comm *leader_ptr;
    MPID_Comm_get_ptr( comm_ptr->ch.leader_comm, leader_ptr );
       
    MPIU_Free(leader_group);
    MPID_Group_get_ptr( subgroup1, group_ptr );
    if(group_ptr != NULL) { 
       mpi_errno = PMPI_Group_free(&subgroup1);
       if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
       }
    } 

    mpi_errno = PMPI_Comm_split(comm, leader, local_rank, &(comm_ptr->ch.shmem_comm));
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    MPID_Comm *shmem_ptr;
    MPID_Comm_get_ptr(comm_ptr->ch.shmem_comm, shmem_ptr);


    mpi_errno = PMPI_Comm_rank(comm_ptr->ch.shmem_comm, &my_local_id);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    mpi_errno = PMPI_Comm_size(comm_ptr->ch.shmem_comm, &my_local_size);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    if(my_local_id == 0) { 
           int array_index=0;
           mpi_errno = PMPI_Comm_size(comm_ptr->ch.leader_comm, &leader_comm_size);
           if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
           }

           comm_ptr->ch.node_sizes = MPIU_Malloc(sizeof(int)*leader_comm_size);
           mpi_errno = PMPI_Allgather(&my_local_size, 1, MPI_INT,
				 comm_ptr->ch.node_sizes, 1, MPI_INT, comm_ptr->ch.leader_comm);
           if(mpi_errno) {
              MPIU_ERR_POP(mpi_errno);
           }
           comm_ptr->ch.is_uniform = 1; 
           for(array_index=0; array_index < leader_comm_size; array_index++) { 
                if(comm_ptr->ch.node_sizes[0] != comm_ptr->ch.node_sizes[array_index]) {
                     comm_ptr->ch.is_uniform = 0; 
                     break;
                }
           }
     } 
    
    comm_ptr->ch.is_global_block = 0; 
    /* We need to check to see if the ranks are block or not. Each node leader
     * gets the global ranks of all of its children processes. It scans through
     * this array to see if the ranks are in block order. The node-leaders then
     * do an allreduce to see if all the other nodes are also in block order.
     * This is followed by an intra-node bcast to let the children processes
     * know of the result of this step */ 
    if(my_local_id == 0) {
            int is_local_block = 1; 
            int index = 1; 
            
            while( index < my_local_size) { 
                    if( (shmem_group[index] - 1) != 
                         shmem_group[index - 1]) { 
                            is_local_block = 0; 
                            break; 
                    }
            index++;  
            }  

            comm_ptr->ch.shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/
            mpi_errno = MPIR_Allreduce_impl(&(is_local_block), 
                                      &(comm_ptr->ch.is_global_block), 1, 
                                      MPI_INT, MPI_LAND, leader_ptr, &errflag);
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
            mpi_errno = MPIR_Bcast_impl(&(comm_ptr->ch.is_global_block),1, MPI_INT, 0,
                                   shmem_ptr, &errflag); 
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
    } else { 
            mpi_errno = MPIR_Bcast_impl(&(comm_ptr->ch.is_global_block),1, MPI_INT, 0,
                                   shmem_ptr, &errflag); 
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
    }      
                              

    if (my_local_id == 0){
        lock_shmem_region();
        increment_shmem_comm_count();
        shmem_comm_count = get_shmem_comm_count();
        unlock_shmem_region();
    }
    
    shmem_ptr->ch.shmem_coll_ok = 0; 
    /* To prevent Bcast taking the knomial_2level_bcast route */
    mpi_errno = MPIR_Bcast_impl (&shmem_comm_count, 1, MPI_INT, 0, shmem_ptr, &errflag);
     if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }


    if (shmem_comm_count <= g_shmem_coll_blocks){
        shmem_ptr->ch.shmem_comm_rank = shmem_comm_count-1;
        input_flag = 1;
    } else{
        input_flag = 0;
    }
    comm_ptr->ch.shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/
    mpi_errno = MPIR_Allreduce_impl(&input_flag, &output_flag, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    comm_ptr->ch.allgather_comm_ok = 0;
    if (allgather_ranking){
        int is_contig =1, check_leader =1, check_size=1, is_local_ok=0,is_block=0;
        int PPN;
        int shmem_grp_size = my_local_size;
        int leader_rank; 
        MPI_Group allgather_group; 
        comm_ptr->ch.allgather_comm=MPI_COMM_NULL; 
        comm_ptr->ch.allgather_new_ranks=NULL;

        if(comm_ptr->ch.leader_comm != MPI_COMM_NULL) { 
            PMPI_Comm_rank(comm_ptr->ch.leader_comm, &leader_rank); 
        } 

        mpi_errno=MPIR_Bcast_impl(&leader_rank, 1, MPI_INT, 0, shmem_ptr, &errflag);
        if(mpi_errno) {
        	MPIU_ERR_POP(mpi_errno);
        } 

        for (i=1; i < shmem_grp_size; i++ ){
            if (shmem_group[i] != shmem_group[i-1]+1){
                is_contig =0; 
                break;
            }
        }

        if (leader != (shmem_grp_size*leader_rank)){
            check_leader=0;
        }

        if (shmem_grp_size != (size/leader_group_size)){
            check_size=0;
        }

        is_local_ok = is_contig && check_leader && check_size;

        mpi_errno = MPIR_Allreduce_impl(&is_local_ok, &is_block, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag);
        if(mpi_errno) {
            MPIU_ERR_POP(mpi_errno);
        }

        if (is_block){
            int counter=0,j;
            comm_ptr->ch.allgather_new_ranks = MPIU_Malloc(sizeof(int)*size);
            if (NULL == comm_ptr->ch.allgather_new_ranks){
                    mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
                                                  FCNAME, __LINE__, MPI_ERR_OTHER, 
                                                  "**nomem", 0 );
                    return mpi_errno;
            }
   
            PPN = shmem_grp_size;
            
            for (j=0; j < PPN; j++){
                for (i=0; i < leader_group_size; i++){
                    comm_ptr->ch.allgather_new_ranks[counter] = j + i*PPN;
                    counter++;
                }
            }

            mpi_errno = PMPI_Group_incl(comm_group, size, comm_ptr->ch.allgather_new_ranks, &allgather_group);
            if(mpi_errno) {
                 MPIU_ERR_POP(mpi_errno);
            }  
            mpi_errno = PMPI_Comm_create(comm_ptr->handle, allgather_group, &(comm_ptr->ch.allgather_comm));
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            }
            comm_ptr->ch.allgather_comm_ok = 1;
            
            mpi_errno=PMPI_Group_free(&allgather_group);
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            }
        }
    }
    mpi_errno=PMPI_Group_free(&comm_group);
    if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
    }

    if (output_flag == 1){
        comm_ptr->ch.shmem_coll_ok = 1;
        comm_registry[comm_registered++] = comm_ptr->context_id;
    } else{
        comm_ptr->ch.shmem_coll_ok = 0;
        MPID_Group_get_ptr( subgroup1, group_ptr );
        if(group_ptr != NULL) { 
             mpi_errno = PMPI_Group_free(&subgroup1);
             if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
             }
        }
        MPID_Group_get_ptr( comm_group, group_ptr );
        if(group_ptr != NULL) { 
             mpi_errno = PMPI_Group_free(&comm_group);
             if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
             }
        }
        free_2level_comm(comm_ptr);
        comm_ptr->ch.shmem_comm = MPI_COMM_NULL; 
        comm_ptr->ch.leader_comm = MPI_COMM_NULL; 
    }
    ++comm_count;
    MPIU_Free(shmem_group);

    fn_fail: 
       MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
    
    return (mpi_errno);


}
Exemple #7
0
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
{
    int mpi_errno = MPI_SUCCESS;
    int i, j;
    int local_rank, local_nprocs, rank, nprocs, user_rank, user_nprocs;
    int local_user_rank = -1, local_user_nprocs = -1;
    int *tmp_gather_buf = NULL, node_id = 0;
    int tmp_bcast_buf[2];
    int *ranks_in_user_world = NULL, *ranks_in_world = NULL;

    MTCORE_DBG_PRINT_FCNAME();

    if (required == 0 && provided == NULL) {
        /* default init */
        mpi_errno = PMPI_Init(argc, argv);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
    }
    else {
        /* user init thread */
        mpi_errno = PMPI_Init_thread(argc, argv, required, provided);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
    }

    PMPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MTCORE_MY_RANK_IN_WORLD = rank;

    mpi_errno = MTCORE_Initialize_env();
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Get a communicator only containing processes with shared memory */
    mpi_errno = PMPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0,
                                     MPI_INFO_NULL, &MTCORE_COMM_LOCAL);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Check number of helpers and number of processes */
    PMPI_Comm_rank(MTCORE_COMM_LOCAL, &local_rank);
    PMPI_Comm_size(MTCORE_COMM_LOCAL, &local_nprocs);

    if (local_nprocs < 2) {
        fprintf(stderr, "No user process found, please run with more than 2 process per node\n");
        mpi_errno = -1;
        goto fn_fail;
    }
    if (MTCORE_ENV.num_h < 1 || MTCORE_ENV.num_h >= local_nprocs) {
        fprintf(stderr, "Wrong value of number of helpers, %d. lt 1 or ge %d.\n",
                MTCORE_ENV.num_h, local_nprocs);
        mpi_errno = -1;
        goto fn_fail;
    }

    /* Specify the first N local processes to be Helper processes */
    MTCORE_H_RANKS_IN_LOCAL = calloc(MTCORE_ENV.num_h, sizeof(int));
    MTCORE_H_RANKS_IN_WORLD = calloc(MTCORE_ENV.num_h, sizeof(int));
    for (i = 0; i < MTCORE_ENV.num_h; i++) {
        MTCORE_H_RANKS_IN_LOCAL[i] = i;
    }
    mpi_errno = PMPI_Comm_group(MPI_COMM_WORLD, &MTCORE_GROUP_WORLD);
    mpi_errno = PMPI_Comm_group(MTCORE_COMM_LOCAL, &MTCORE_GROUP_LOCAL);

    mpi_errno = PMPI_Group_translate_ranks(MTCORE_GROUP_LOCAL, MTCORE_ENV.num_h,
                                           MTCORE_H_RANKS_IN_LOCAL, MTCORE_GROUP_WORLD,
                                           MTCORE_H_RANKS_IN_WORLD);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Create a user comm_world including all the users,
     * user will access it instead of comm_world */
    mpi_errno = PMPI_Comm_split(MPI_COMM_WORLD,
                                local_rank < MTCORE_ENV.num_h, 0, &MTCORE_COMM_USER_WORLD);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    PMPI_Comm_size(MTCORE_COMM_USER_WORLD, &user_nprocs);
    PMPI_Comm_rank(MTCORE_COMM_USER_WORLD, &user_rank);
    PMPI_Comm_group(MTCORE_COMM_USER_WORLD, &MTCORE_GROUP_USER_WORLD);

    /* Create a user comm_local */
    mpi_errno = PMPI_Comm_split(MTCORE_COMM_LOCAL,
                                local_rank < MTCORE_ENV.num_h, 0, &MTCORE_COMM_USER_LOCAL);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Create a helper comm_local */
    mpi_errno = PMPI_Comm_split(MTCORE_COMM_LOCAL,
                                local_rank < MTCORE_ENV.num_h, 1, &MTCORE_COMM_HELPER_LOCAL);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Exchange node id among local processes */
    /* -Only users create a user root communicator for exchanging local informations
     * between different nodes*/
    if (local_rank >= MTCORE_ENV.num_h) {
        PMPI_Comm_rank(MTCORE_COMM_USER_LOCAL, &local_user_rank);
        PMPI_Comm_size(MTCORE_COMM_USER_LOCAL, &local_user_nprocs);
        mpi_errno = PMPI_Comm_split(MTCORE_COMM_USER_WORLD,
                                    local_user_rank == 0, 1, &MTCORE_COMM_UR_WORLD);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;

        /* -Only user roots determine a node id for each USER processes */
        if (local_user_rank == 0) {
            PMPI_Comm_size(MTCORE_COMM_UR_WORLD, &MTCORE_NUM_NODES);
            PMPI_Comm_rank(MTCORE_COMM_UR_WORLD, &MTCORE_MY_NODE_ID);

            tmp_bcast_buf[0] = MTCORE_MY_NODE_ID;
            tmp_bcast_buf[1] = MTCORE_NUM_NODES;
        }
    }
    /* -User root broadcasts to other local processes */
    PMPI_Bcast(tmp_bcast_buf, 2, MPI_INT, MTCORE_ENV.num_h, MTCORE_COMM_LOCAL);
    MTCORE_MY_NODE_ID = tmp_bcast_buf[0];
    MTCORE_NUM_NODES = tmp_bcast_buf[1];

    /* Exchange node id and Helper ranks among world processes */
    ranks_in_world = calloc(nprocs, sizeof(int));
    ranks_in_user_world = calloc(nprocs, sizeof(int));
    for (i = 0; i < nprocs; i++) {
        ranks_in_world[i] = i;
    }
    mpi_errno = PMPI_Group_translate_ranks(MTCORE_GROUP_WORLD, nprocs,
                                           ranks_in_world, MTCORE_GROUP_USER_WORLD,
                                           ranks_in_user_world);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    MTCORE_ALL_NODE_IDS = calloc(nprocs, sizeof(int));
    MTCORE_ALL_H_RANKS_IN_WORLD = calloc(user_nprocs * MTCORE_ENV.num_h, sizeof(int));
    MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD = calloc(MTCORE_NUM_NODES * MTCORE_ENV.num_h, sizeof(int));
    tmp_gather_buf = calloc(nprocs * (1 + MTCORE_ENV.num_h), sizeof(int));

    tmp_gather_buf[rank * (1 + MTCORE_ENV.num_h)] = MTCORE_MY_NODE_ID;
    for (i = 0; i < MTCORE_ENV.num_h; i++) {
        tmp_gather_buf[rank * (1 + MTCORE_ENV.num_h) + i + 1] = MTCORE_H_RANKS_IN_WORLD[i];
    }
    mpi_errno = PMPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                               tmp_gather_buf, 1 + MTCORE_ENV.num_h, MPI_INT, MPI_COMM_WORLD);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    for (i = 0; i < nprocs; i++) {
        int i_user_rank = 0;
        node_id = tmp_gather_buf[i * (1 + MTCORE_ENV.num_h)];
        MTCORE_ALL_NODE_IDS[i] = node_id;

        /* Only copy helper ranks for user processes */
        i_user_rank = ranks_in_user_world[i];
        if (i_user_rank != MPI_UNDEFINED) {
            for (j = 0; j < MTCORE_ENV.num_h; j++) {
                MTCORE_ALL_H_RANKS_IN_WORLD[i_user_rank * MTCORE_ENV.num_h + j] =
                    tmp_gather_buf[i * (1 + MTCORE_ENV.num_h) + j + 1];
                MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD[node_id * MTCORE_ENV.num_h + j] =
                    tmp_gather_buf[i * (1 + MTCORE_ENV.num_h) + j + 1];
            }
        }
    }

#ifdef DEBUG
    MTCORE_DBG_PRINT("Debug gathered info ***** \n");
    for (i = 0; i < nprocs; i++) {
        MTCORE_DBG_PRINT("node_id[%d]: %d\n", i, MTCORE_ALL_NODE_IDS[i]);
    }
#endif

    /* USER processes */
    if (local_rank >= MTCORE_ENV.num_h) {
        /* Get user ranks in world */
        for (i = 0; i < user_nprocs; i++)
            ranks_in_user_world[i] = i;
        MTCORE_USER_RANKS_IN_WORLD = calloc(user_nprocs, sizeof(int));
        mpi_errno = PMPI_Group_translate_ranks(MTCORE_GROUP_USER_WORLD, user_nprocs,
                                               ranks_in_user_world, MTCORE_GROUP_WORLD,
                                               MTCORE_USER_RANKS_IN_WORLD);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;

#ifdef DEBUG
        for (i = 0; i < user_nprocs; i++) {
            MTCORE_DBG_PRINT("helper_rank_in_world[%d]:\n", i);
            for (j = 0; j < MTCORE_ENV.num_h; j++) {
                MTCORE_DBG_PRINT("    %d\n", MTCORE_ALL_H_RANKS_IN_WORLD[i * MTCORE_ENV.num_h + j]);
            }
        }
#endif
        MTCORE_DBG_PRINT("I am user, %d/%d in world, %d/%d in local, %d/%d in user world, "
                         "%d/%d in user local, node_id %d\n", rank, nprocs, local_rank,
                         local_nprocs, user_rank, user_nprocs, local_user_rank,
                         local_user_nprocs, MTCORE_MY_NODE_ID);

        MTCORE_Init_win_cache();
    }
    /* Helper processes */
    /* TODO: Helper process should not run user program */
    else {
        /* free local buffers before enter helper main function */
        if (tmp_gather_buf)
            free(tmp_gather_buf);
        if (ranks_in_user_world)
            free(ranks_in_user_world);
        if (ranks_in_world)
            free(ranks_in_world);

        MTCORE_DBG_PRINT("I am helper, %d/%d in world, %d/%d in local, node_id %d\n", rank,
                         nprocs, local_rank, local_nprocs, MTCORE_MY_NODE_ID);
        run_h_main();
        exit(0);
    }

  fn_exit:
    if (tmp_gather_buf)
        free(tmp_gather_buf);
    if (ranks_in_user_world)
        free(ranks_in_user_world);
    if (ranks_in_world)
        free(ranks_in_world);

    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    if (MTCORE_COMM_USER_WORLD != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_USER_WORLD\n");
        PMPI_Comm_free(&MTCORE_COMM_USER_WORLD);
    }
    if (MTCORE_COMM_LOCAL != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_LOCAL\n");
        PMPI_Comm_free(&MTCORE_COMM_LOCAL);
    }
    if (MTCORE_COMM_USER_LOCAL != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_USER_LOCAL\n");
        PMPI_Comm_free(&MTCORE_COMM_USER_LOCAL);
    }
    if (MTCORE_COMM_UR_WORLD != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_UR_WORLD\n");
        PMPI_Comm_free(&MTCORE_COMM_UR_WORLD);
    }
    if (MTCORE_COMM_HELPER_LOCAL != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_HELPER_LOCAL\n");
        PMPI_Comm_free(&MTCORE_COMM_HELPER_LOCAL);
    }

    if (MTCORE_GROUP_WORLD != MPI_GROUP_NULL)
        PMPI_Group_free(&MTCORE_GROUP_WORLD);
    if (MTCORE_GROUP_LOCAL != MPI_GROUP_NULL)
        PMPI_Group_free(&MTCORE_GROUP_LOCAL);
    if (MTCORE_GROUP_USER_WORLD != MPI_GROUP_NULL)
        PMPI_Group_free(&MTCORE_GROUP_USER_WORLD);

    if (MTCORE_H_RANKS_IN_WORLD)
        free(MTCORE_H_RANKS_IN_WORLD);
    if (MTCORE_H_RANKS_IN_LOCAL)
        free(MTCORE_H_RANKS_IN_LOCAL);
    if (MTCORE_ALL_H_RANKS_IN_WORLD)
        free(MTCORE_ALL_H_RANKS_IN_WORLD);
    if (MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD)
        free(MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD);
    if (MTCORE_ALL_NODE_IDS)
        free(MTCORE_ALL_NODE_IDS);
    if (MTCORE_USER_RANKS_IN_WORLD)
        free(MTCORE_USER_RANKS_IN_WORLD);

    MTCORE_Destroy_win_cache();

    /* Reset global variables */
    MTCORE_COMM_USER_WORLD = MPI_COMM_NULL;
    MTCORE_COMM_USER_LOCAL = MPI_COMM_NULL;
    MTCORE_COMM_LOCAL = MPI_COMM_NULL;

    MTCORE_ALL_H_RANKS_IN_WORLD = NULL;
    MTCORE_ALL_NODE_IDS = NULL;

    PMPI_Abort(MPI_COMM_WORLD, 0);

    goto fn_exit;
    /* --END ERROR HANDLING-- */
}