Example #1
0
int CollChk_check_size(MPI_Comm comm, int size, char* call)
{
    /* rank, size, counter, temp comm buffer, go flag, ok flag */
    int r, s, i, buff, go, ok;
    char err_str[COLLCHK_STD_STRLEN];
    MPI_Status st;int tag=0;   /* needed for communications */

    /* get the rank and size */
    MPI_Comm_rank(comm, &r);
    MPI_Comm_size(comm, &s);

    sprintf(err_str, COLLCHK_NO_ERROR_STR);

    if (r == 0) {
        /* send 0s size to all other processes */
        buff = size;
        PMPI_Bcast(&buff, 1, MPI_INT, 0, comm);

        /* check if all processes are ok to continue */
        go = 1; /* set the go flag */
        for (i=1; i<s; i++) {
            MPI_Recv(&ok, 1, MPI_INT, i, tag, comm, &st);
            /* if the process is not ok unset the go flag */
            if (!ok) go = 0;
        }

        /* broadcast the go flag to the other processes */
        PMPI_Bcast(&go, 1, MPI_INT, 0, comm);
    }
    else {
        /* get the size from 0 */
        PMPI_Bcast(&buff, 1, MPI_INT, 0, comm);

        /* check the size from the local size */
        if (buff != size) {
            /* at this point the size parameter is inconsistant */
            /* print an error message and send an unset ok flag to 0 */
            ok = 0;
            sprintf(err_str, "Data Size (%d) Does not match Rank 0s (%d).\n",
                    size, buff);
            MPI_Send(&ok, 1, MPI_INT, 0, tag, comm);
        }
        else {
            /* at this point the size parameter is consistant  */
            /* send a set ok flag to 0 */
            ok = 1;
            MPI_Send(&ok, 1, MPI_INT, 0, tag, comm);
        }

        /* recieve the go flag from 0 */
        PMPI_Bcast(&go, 1, MPI_INT, 0, comm);
    }

    /* if the go flag is not set exit else return */
    if (!go) {
        return CollChk_err_han(err_str, COLLCHK_ERR_ROOT, call, comm);
    }

    return MPI_SUCCESS;
}
Example #2
0
int CollChk_same_op(MPI_Comm comm, MPI_Op op, char* call)
{
    int r, s, i, go, ok;    /* rank, size, counter, go flag, ok flag */
    char buff[COLLCHK_SM_STRLEN];          /* temp communication buffer */
    char op_str[15];        /* the operation name in string format */
    char err_str[COLLCHK_STD_STRLEN];      /* error string */
    int tag=0;              /* needed for communication */
    MPI_Status st;

    /* get rank and size */
    MPI_Comm_rank(comm, &r);
    MPI_Comm_size(comm, &s);

    sprintf(err_str, COLLCHK_NO_ERROR_STR);
    sprintf(op_str, "%s", CollChk_get_op_string(op));

    if (r == 0) {
        /* send the name of the op to the other processes */
        strcpy(buff, op_str);
        PMPI_Bcast(buff, 15, MPI_CHAR, 0, comm);
        /* ask the other processes if they are ok to continue */
        go = 1;     /* sets the go flag */
        for(i=1; i<s; i++) {
            MPI_Recv(&ok, 1, MPI_INT, i, tag, comm, &st);
            /* if a process has made a bad call unset the go flag */
            if (ok != 1)
               go = 0;
        }

        /* broadcast to the go flag */
        PMPI_Bcast(&go, 1, MPI_INT, 0, comm);
    }
    else {
        /* recieve 0's op name */
        PMPI_Bcast(buff, 15, MPI_CHAR, 0, comm);
        /* check it against the local op name */
        if (strcmp(buff, op_str) != 0) {
            /* at this point the op is not consistant */
            /* print an error message and send a unset ok flag to 0 */
            ok = 0;
            sprintf(err_str, "Inconsistent operation (%s) to "
                             "Rank 0's operation(%s).", op_str, buff);
            MPI_Send(&ok, 1, MPI_INT, 0, tag, comm);
        }
        else {
            /* at this point the op is consistant */
            /* send an set ok flag to 0 */
            ok = 1;
            MPI_Send(&ok, 1, MPI_INT, 0, tag, comm);
        }
        /* get the go flag from 0 */
        PMPI_Bcast(&go, 1, MPI_INT, 0, comm);
    }
    /* if the go flag is not set exit else return */
    if (go != 1) {
        return CollChk_err_han(err_str, COLLCHK_ERR_OP, call, comm);
    }

    return MPI_SUCCESS;
}
Example #3
0
int MPI_Bcast(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
{
    MPE_Log_event(START_BCAST,0,"bcast");
    int wynik=PMPI_Bcast(buffer,count,datatype,root,comm);
    MPE_Log_event(END_BCAST,0,"bcast");
    return wynik;

};
Example #4
0
/* MPI_Bcast: This function is called by MPIPerf. */
int MPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root,
		      MPI_Comm comm)
{
	if (mpiperf_is_measure_started) {
	    /* MPIPerf started measurements, so we can call our function. */
		return MPI_Bcast_user(buf, count, datatype, root, comm);
	}
	return PMPI_Bcast(buf, count, datatype, root, comm);
}
Example #5
0
void vt_sync(MPI_Comm comm, uint64_t* ltime, int64_t* offset)
{
  VT_MPI_INT myrank, myrank_host, myrank_sync;
  VT_MPI_INT numnodes;
  uint64_t time;

  MPI_Comm host_comm;
  MPI_Comm sync_comm;

  VT_SUSPEND_IO_TRACING(VT_CURRENT_THREAD);

  /* mark begin of clock synchronization */
  time = vt_pform_wtime();
  vt_enter(VT_CURRENT_THREAD, &time, vt_trc_regid[VT__TRC_SYNCTIME]);

  /* barrier at entry */
  PMPI_Barrier(comm);

  *offset = 0;
  *ltime = vt_pform_wtime();

  PMPI_Comm_rank(comm, &myrank);

  /* create communicator containing all processes on the same node */

  PMPI_Comm_split(comm, (vt_pform_node_id() & 0x7FFFFFFF), 0, &host_comm);
  PMPI_Comm_rank(host_comm, &myrank_host);

  /* create communicator containing all processes with rank zero in the
     previously created communicators */
  
  PMPI_Comm_split(comm, myrank_host, 0, &sync_comm);
  PMPI_Comm_rank(sync_comm, &myrank_sync);
  PMPI_Comm_size(sync_comm, &numnodes);

  /* measure offsets between all nodes and the root node (rank 0 in sync_comm) */

  if (myrank_host == 0)
  {
    VT_MPI_INT i;

    for (i = 1; i < numnodes; i++)
    {
      PMPI_Barrier(sync_comm);
      if (myrank_sync == i)
	*offset = sync_slave(ltime, 0, sync_comm);
      else if (myrank_sync == 0)
	*offset = sync_master(ltime, i, sync_comm);
    }
  }

  /* distribute offset and ltime across all processes on the same node */

  PMPI_Bcast(offset, 1, MPI_LONG_LONG_INT, 0, host_comm);
  PMPI_Bcast(ltime, 1, MPI_LONG_LONG_INT, 0, host_comm);

  PMPI_Comm_free(&host_comm);
  PMPI_Comm_free(&sync_comm);

  /* barrier at exit */
  PMPI_Barrier(comm);

  /* mark end of clock synchronization */
  time = vt_pform_wtime();
  vt_exit(VT_CURRENT_THREAD, &time);

  VT_RESUME_IO_TRACING(VT_CURRENT_THREAD);
}
/******************************************************************
*                                                                 *
*                   MPI Functions for Management                  *
*                                                                 *
******************************************************************/
double E_MPI_Init(int * argc, char*** argv)
{
    // assume all data files are existing
    // users may run IMB manually.
    // and copy datas to all machines manually
    parse_loggpo("paras/cmp_para", &log_cmp);
    parse_loggpo("paras/net_para", &log_net);
    parse_loggpo("paras/smp_para", &log_smp);
    parse_imb("paras/coll_para", &imb);
    // get self location HOSTNAME:CORE
    char proc_file_name[50];
    sprintf(proc_file_name, "/proc/%d/stat", getpid());
    FILE* proc_file = fopen(proc_file_name, "r");
    if (! proc_file) {
        printf("Proc File %s Open Failed!\n", proc_file_name);
    }
    int core;
    if (1 != fscanf(proc_file, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d %*u %*u %*u",&core))
        printf("Read Core ID Failed!\n");
    char hostname[40];
    gethostname(hostname,40);
    // Send their info to rank:0
    int gsize;
    PMPI_Comm_size( MPI_COMM_WORLD, &gsize);
    location = (pLocation)malloc(gsize*sizeof(Location));
    int myrank;
    PMPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    if (myrank != 0) {
        char sendbuf[100];
        sprintf(sendbuf, "%s %d",hostname,core);
        PMPI_Send( sendbuf, 100, MPI_CHAR, 0, myrank, MPI_COMM_WORLD);
    }
    else {
        char rbuf[100];
        MPI_Status ms;
        char** ls = (char**)malloc(gsize*sizeof(char*));
        int cnt = 0;
        ls[0] = (char*)malloc(40);
        if (ls[0] == strcpy(ls[0],hostname)) 
            ++ cnt;
        location[0].node = 0;
        location[0].core = core;
        char r_hn[40];
        int r_core;
        for (int rank = 1; rank < gsize; ++ rank) {
            PMPI_Recv(rbuf, 100, MPI_CHAR, rank, rank, MPI_COMM_WORLD, &ms);
            sscanf(rbuf,"%s %d",r_hn,&r_core);
            int i;
            for (i = 0; i < cnt; ++ i) {
                if(strcmp(ls[i],r_hn) == 0) {
                    location[rank].node = i;
                    location[rank].core = r_core;
                    break;
                }
            }
            if (i == cnt) {
                ls[i] = (char*)malloc(40);
                if (ls[i] == strcpy(ls[i],r_hn)) 
                    ++ cnt;
                location[rank].node = i;
                location[rank].core = r_core;
            }
        }
        for (int i = 0; i < cnt; ++ i) 
            free(ls[i]);
        free(ls);
#if 0
        printf("from RANK 0\n");
        for (int i = 0; i < gsize; ++ i) {
            printf ("rank:%d, node:%d, core:%d\n", i, location[i].node, location[i].core );
        }
#endif 
    }
    // boardcast to all MPI ranks
    PMPI_Bcast(location, 2*gsize, MPI_INT, 0, MPI_COMM_WORLD);
#if 0
    if (myrank == 20) {
        printf("from RANK 20\n");
        for (int i = 0; i < gsize; ++ i) {
            printf ("rank:%d, node:%d, core:%d\n", i, location[i].node, location[i].core );
        }
    }
#endif
    req_list.len = 0;
    req_list.head = NULL;
	return 0;
}
Example #7
0
int main(int argc, char **argv)
{
    int *buf, i, rank, nints, len;
    char *filename, *tmp;
    int errs=0, toterrs;
    MPI_File fh;
    MPI_Status status;

    PMPI_Init(&argc,&argv);
    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!rank) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    fprintf(stderr, "\n*#  Usage: simple -fname filename\n\n");
	    PMPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+10);
	strcpy(filename, *argv);
	PMPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	PMPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	PMPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+10);
	PMPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    

    buf = (int *) malloc(SIZE);
    nints = SIZE/sizeof(int);
    for (i=0; i<nints; i++) buf[i] = rank*100000 + i;

    /* each process opens a separate file called filename.'myrank' */
    tmp = (char *) malloc(len+10);
    strcpy(tmp, filename);
    sprintf(filename, "%s.%d", tmp, rank);

    PMPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
		   MPI_INFO_NULL, &fh);
    PMPI_File_write(fh, buf, nints, MPI_INT, &status);
    PMPI_File_close(&fh);

    /* reopen the file and read the data back */

    for (i=0; i<nints; i++) buf[i] = 0;
    PMPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  MPI_INFO_NULL, &fh);
    PMPI_File_read(fh, buf, nints, MPI_INT, &status);
    PMPI_File_close(&fh);

    /* check if the data read is correct */
    for (i=0; i<nints; i++) {
	if (buf[i] != (rank*100000 + i)) {
	    errs++;
	    fprintf(stderr, "Process %d: error, read %d, should be %d\n", rank, buf[i], rank*100000+i);
	}
    }

    MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (rank == 0) {
	if( toterrs > 0) {
	    fprintf( stderr, "Found %d errors\n", toterrs );
	}
	else {
	    fprintf( stdout, " No Errors\n" );
	}
    }

    free(buf);
    free(filename);
    free(tmp);

    PMPI_Finalize();
    return 0; 
}
Example #8
0
int MPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
{
  return PMPI_Bcast(buf, count, datatype, root, comm);
}
Example #9
0
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
{
    int mpi_errno = MPI_SUCCESS;
    int i, j;
    int local_rank, local_nprocs, rank, nprocs, user_rank, user_nprocs;
    int local_user_rank = -1, local_user_nprocs = -1;
    int *tmp_gather_buf = NULL, node_id = 0;
    int tmp_bcast_buf[2];
    int *ranks_in_user_world = NULL, *ranks_in_world = NULL;

    MTCORE_DBG_PRINT_FCNAME();

    if (required == 0 && provided == NULL) {
        /* default init */
        mpi_errno = PMPI_Init(argc, argv);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
    }
    else {
        /* user init thread */
        mpi_errno = PMPI_Init_thread(argc, argv, required, provided);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;
    }

    PMPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MTCORE_MY_RANK_IN_WORLD = rank;

    mpi_errno = MTCORE_Initialize_env();
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Get a communicator only containing processes with shared memory */
    mpi_errno = PMPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0,
                                     MPI_INFO_NULL, &MTCORE_COMM_LOCAL);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Check number of helpers and number of processes */
    PMPI_Comm_rank(MTCORE_COMM_LOCAL, &local_rank);
    PMPI_Comm_size(MTCORE_COMM_LOCAL, &local_nprocs);

    if (local_nprocs < 2) {
        fprintf(stderr, "No user process found, please run with more than 2 process per node\n");
        mpi_errno = -1;
        goto fn_fail;
    }
    if (MTCORE_ENV.num_h < 1 || MTCORE_ENV.num_h >= local_nprocs) {
        fprintf(stderr, "Wrong value of number of helpers, %d. lt 1 or ge %d.\n",
                MTCORE_ENV.num_h, local_nprocs);
        mpi_errno = -1;
        goto fn_fail;
    }

    /* Specify the first N local processes to be Helper processes */
    MTCORE_H_RANKS_IN_LOCAL = calloc(MTCORE_ENV.num_h, sizeof(int));
    MTCORE_H_RANKS_IN_WORLD = calloc(MTCORE_ENV.num_h, sizeof(int));
    for (i = 0; i < MTCORE_ENV.num_h; i++) {
        MTCORE_H_RANKS_IN_LOCAL[i] = i;
    }
    mpi_errno = PMPI_Comm_group(MPI_COMM_WORLD, &MTCORE_GROUP_WORLD);
    mpi_errno = PMPI_Comm_group(MTCORE_COMM_LOCAL, &MTCORE_GROUP_LOCAL);

    mpi_errno = PMPI_Group_translate_ranks(MTCORE_GROUP_LOCAL, MTCORE_ENV.num_h,
                                           MTCORE_H_RANKS_IN_LOCAL, MTCORE_GROUP_WORLD,
                                           MTCORE_H_RANKS_IN_WORLD);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Create a user comm_world including all the users,
     * user will access it instead of comm_world */
    mpi_errno = PMPI_Comm_split(MPI_COMM_WORLD,
                                local_rank < MTCORE_ENV.num_h, 0, &MTCORE_COMM_USER_WORLD);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    PMPI_Comm_size(MTCORE_COMM_USER_WORLD, &user_nprocs);
    PMPI_Comm_rank(MTCORE_COMM_USER_WORLD, &user_rank);
    PMPI_Comm_group(MTCORE_COMM_USER_WORLD, &MTCORE_GROUP_USER_WORLD);

    /* Create a user comm_local */
    mpi_errno = PMPI_Comm_split(MTCORE_COMM_LOCAL,
                                local_rank < MTCORE_ENV.num_h, 0, &MTCORE_COMM_USER_LOCAL);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Create a helper comm_local */
    mpi_errno = PMPI_Comm_split(MTCORE_COMM_LOCAL,
                                local_rank < MTCORE_ENV.num_h, 1, &MTCORE_COMM_HELPER_LOCAL);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    /* Exchange node id among local processes */
    /* -Only users create a user root communicator for exchanging local informations
     * between different nodes*/
    if (local_rank >= MTCORE_ENV.num_h) {
        PMPI_Comm_rank(MTCORE_COMM_USER_LOCAL, &local_user_rank);
        PMPI_Comm_size(MTCORE_COMM_USER_LOCAL, &local_user_nprocs);
        mpi_errno = PMPI_Comm_split(MTCORE_COMM_USER_WORLD,
                                    local_user_rank == 0, 1, &MTCORE_COMM_UR_WORLD);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;

        /* -Only user roots determine a node id for each USER processes */
        if (local_user_rank == 0) {
            PMPI_Comm_size(MTCORE_COMM_UR_WORLD, &MTCORE_NUM_NODES);
            PMPI_Comm_rank(MTCORE_COMM_UR_WORLD, &MTCORE_MY_NODE_ID);

            tmp_bcast_buf[0] = MTCORE_MY_NODE_ID;
            tmp_bcast_buf[1] = MTCORE_NUM_NODES;
        }
    }
    /* -User root broadcasts to other local processes */
    PMPI_Bcast(tmp_bcast_buf, 2, MPI_INT, MTCORE_ENV.num_h, MTCORE_COMM_LOCAL);
    MTCORE_MY_NODE_ID = tmp_bcast_buf[0];
    MTCORE_NUM_NODES = tmp_bcast_buf[1];

    /* Exchange node id and Helper ranks among world processes */
    ranks_in_world = calloc(nprocs, sizeof(int));
    ranks_in_user_world = calloc(nprocs, sizeof(int));
    for (i = 0; i < nprocs; i++) {
        ranks_in_world[i] = i;
    }
    mpi_errno = PMPI_Group_translate_ranks(MTCORE_GROUP_WORLD, nprocs,
                                           ranks_in_world, MTCORE_GROUP_USER_WORLD,
                                           ranks_in_user_world);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    MTCORE_ALL_NODE_IDS = calloc(nprocs, sizeof(int));
    MTCORE_ALL_H_RANKS_IN_WORLD = calloc(user_nprocs * MTCORE_ENV.num_h, sizeof(int));
    MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD = calloc(MTCORE_NUM_NODES * MTCORE_ENV.num_h, sizeof(int));
    tmp_gather_buf = calloc(nprocs * (1 + MTCORE_ENV.num_h), sizeof(int));

    tmp_gather_buf[rank * (1 + MTCORE_ENV.num_h)] = MTCORE_MY_NODE_ID;
    for (i = 0; i < MTCORE_ENV.num_h; i++) {
        tmp_gather_buf[rank * (1 + MTCORE_ENV.num_h) + i + 1] = MTCORE_H_RANKS_IN_WORLD[i];
    }
    mpi_errno = PMPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                               tmp_gather_buf, 1 + MTCORE_ENV.num_h, MPI_INT, MPI_COMM_WORLD);
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;

    for (i = 0; i < nprocs; i++) {
        int i_user_rank = 0;
        node_id = tmp_gather_buf[i * (1 + MTCORE_ENV.num_h)];
        MTCORE_ALL_NODE_IDS[i] = node_id;

        /* Only copy helper ranks for user processes */
        i_user_rank = ranks_in_user_world[i];
        if (i_user_rank != MPI_UNDEFINED) {
            for (j = 0; j < MTCORE_ENV.num_h; j++) {
                MTCORE_ALL_H_RANKS_IN_WORLD[i_user_rank * MTCORE_ENV.num_h + j] =
                    tmp_gather_buf[i * (1 + MTCORE_ENV.num_h) + j + 1];
                MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD[node_id * MTCORE_ENV.num_h + j] =
                    tmp_gather_buf[i * (1 + MTCORE_ENV.num_h) + j + 1];
            }
        }
    }

#ifdef DEBUG
    MTCORE_DBG_PRINT("Debug gathered info ***** \n");
    for (i = 0; i < nprocs; i++) {
        MTCORE_DBG_PRINT("node_id[%d]: %d\n", i, MTCORE_ALL_NODE_IDS[i]);
    }
#endif

    /* USER processes */
    if (local_rank >= MTCORE_ENV.num_h) {
        /* Get user ranks in world */
        for (i = 0; i < user_nprocs; i++)
            ranks_in_user_world[i] = i;
        MTCORE_USER_RANKS_IN_WORLD = calloc(user_nprocs, sizeof(int));
        mpi_errno = PMPI_Group_translate_ranks(MTCORE_GROUP_USER_WORLD, user_nprocs,
                                               ranks_in_user_world, MTCORE_GROUP_WORLD,
                                               MTCORE_USER_RANKS_IN_WORLD);
        if (mpi_errno != MPI_SUCCESS)
            goto fn_fail;

#ifdef DEBUG
        for (i = 0; i < user_nprocs; i++) {
            MTCORE_DBG_PRINT("helper_rank_in_world[%d]:\n", i);
            for (j = 0; j < MTCORE_ENV.num_h; j++) {
                MTCORE_DBG_PRINT("    %d\n", MTCORE_ALL_H_RANKS_IN_WORLD[i * MTCORE_ENV.num_h + j]);
            }
        }
#endif
        MTCORE_DBG_PRINT("I am user, %d/%d in world, %d/%d in local, %d/%d in user world, "
                         "%d/%d in user local, node_id %d\n", rank, nprocs, local_rank,
                         local_nprocs, user_rank, user_nprocs, local_user_rank,
                         local_user_nprocs, MTCORE_MY_NODE_ID);

        MTCORE_Init_win_cache();
    }
    /* Helper processes */
    /* TODO: Helper process should not run user program */
    else {
        /* free local buffers before enter helper main function */
        if (tmp_gather_buf)
            free(tmp_gather_buf);
        if (ranks_in_user_world)
            free(ranks_in_user_world);
        if (ranks_in_world)
            free(ranks_in_world);

        MTCORE_DBG_PRINT("I am helper, %d/%d in world, %d/%d in local, node_id %d\n", rank,
                         nprocs, local_rank, local_nprocs, MTCORE_MY_NODE_ID);
        run_h_main();
        exit(0);
    }

  fn_exit:
    if (tmp_gather_buf)
        free(tmp_gather_buf);
    if (ranks_in_user_world)
        free(ranks_in_user_world);
    if (ranks_in_world)
        free(ranks_in_world);

    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    if (MTCORE_COMM_USER_WORLD != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_USER_WORLD\n");
        PMPI_Comm_free(&MTCORE_COMM_USER_WORLD);
    }
    if (MTCORE_COMM_LOCAL != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_LOCAL\n");
        PMPI_Comm_free(&MTCORE_COMM_LOCAL);
    }
    if (MTCORE_COMM_USER_LOCAL != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_USER_LOCAL\n");
        PMPI_Comm_free(&MTCORE_COMM_USER_LOCAL);
    }
    if (MTCORE_COMM_UR_WORLD != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_UR_WORLD\n");
        PMPI_Comm_free(&MTCORE_COMM_UR_WORLD);
    }
    if (MTCORE_COMM_HELPER_LOCAL != MPI_COMM_NULL) {
        MTCORE_DBG_PRINT("free MTCORE_COMM_HELPER_LOCAL\n");
        PMPI_Comm_free(&MTCORE_COMM_HELPER_LOCAL);
    }

    if (MTCORE_GROUP_WORLD != MPI_GROUP_NULL)
        PMPI_Group_free(&MTCORE_GROUP_WORLD);
    if (MTCORE_GROUP_LOCAL != MPI_GROUP_NULL)
        PMPI_Group_free(&MTCORE_GROUP_LOCAL);
    if (MTCORE_GROUP_USER_WORLD != MPI_GROUP_NULL)
        PMPI_Group_free(&MTCORE_GROUP_USER_WORLD);

    if (MTCORE_H_RANKS_IN_WORLD)
        free(MTCORE_H_RANKS_IN_WORLD);
    if (MTCORE_H_RANKS_IN_LOCAL)
        free(MTCORE_H_RANKS_IN_LOCAL);
    if (MTCORE_ALL_H_RANKS_IN_WORLD)
        free(MTCORE_ALL_H_RANKS_IN_WORLD);
    if (MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD)
        free(MTCORE_ALL_UNIQUE_H_RANKS_IN_WORLD);
    if (MTCORE_ALL_NODE_IDS)
        free(MTCORE_ALL_NODE_IDS);
    if (MTCORE_USER_RANKS_IN_WORLD)
        free(MTCORE_USER_RANKS_IN_WORLD);

    MTCORE_Destroy_win_cache();

    /* Reset global variables */
    MTCORE_COMM_USER_WORLD = MPI_COMM_NULL;
    MTCORE_COMM_USER_LOCAL = MPI_COMM_NULL;
    MTCORE_COMM_LOCAL = MPI_COMM_NULL;

    MTCORE_ALL_H_RANKS_IN_WORLD = NULL;
    MTCORE_ALL_NODE_IDS = NULL;

    PMPI_Abort(MPI_COMM_WORLD, 0);

    goto fn_exit;
    /* --END ERROR HANDLING-- */
}