Exemplo n.º 1
0
void armci_memoffset_table_newentry(void *ptr, size_t seg_size) {

    void **ptr_arr;
    void *master_addr = NULL;
    size_t tile_size=0, offset=0;

    if(!ptr) armci_die("armci_memoffset_table_newentry : null ptr",0);

    if(seg_count >= MAX_SEGS) /* CHECK: make it dynamic */
        armci_die("armci_cary_allocate: Increase MAX_SEGS > 512", armci_me);

    if(armci_me == armci_master) master_addr = ptr;
    armci_msg_brdcst(&master_addr, sizeof(void*), armci_master);

    ptr_arr = (void**)malloc(armci_nproc*sizeof(void*));
    armci_cray_gettilesize(ptr, ptr_arr, &tile_size);
    offset = (size_t)((char*)master_addr -  (char*)ptr_arr[armci_master]);

    /* enter in memoffset table */
    armci_memoffset_table[seg_count].seg_addr   = ptr_arr[armci_master];
    armci_memoffset_table[seg_count].seg_size   = seg_size;
    armci_memoffset_table[seg_count].tile_size  = tile_size;
    armci_memoffset_table[seg_count].mem_offset = offset;

#if DEBUG_
    printf("%d: addr=%p seg_size=%ld tile_size=%ld offset=%ld\n", armci_me,
           ptr_arr[armci_master], seg_size, tile_size, offset);
#endif

    ++seg_count;
    free(ptr_arr);
}
Exemplo n.º 2
0
void test_brdcst(int datatype)
{
    void *a[6];
    int len[6] = {1, 10, 100, 1000, 10000, 100000};
    int datatype_size = 0;
    int i, j;

    switch(datatype)
    {
       case ARMCI_INT:
          datatype_size = sizeof(int);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((int *) a[i])[j] = (int) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       case ARMCI_LONG:
          datatype_size = sizeof(long);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((long *) a[i])[j] = (long) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       case ARMCI_FLOAT:
          datatype_size = sizeof(float);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((float *) a[i])[j] = (float) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       case ARMCI_DOUBLE:
          datatype_size = sizeof(double);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((double *) a[i])[j] = (double) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       default:
          break;
    }
    for(i = 0; i < 6; i++)
       armci_msg_brdcst(a[i], len[i] * datatype_size, 0);

    switch(datatype)
    {
       case ARMCI_INT:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((int *) a[i])[j] != (int) j) 
                {
                   printf("ERROR a[%d][%d] = %d != %d\n", i, j, ((int *) a[i])[j], (int) j);
                   ARMCI_Error("armci_brdcst failed (int)\n",0);
                }
          break;
       case ARMCI_LONG:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((long *) a[i])[j] != (long) j)
                {
                   printf("ERROR a[%d][%d] = %ld != %ld\n", i, j, ((long *) a[i])[j], (long) j);
                   ARMCI_Error("armci_brdcst failed (long)\n",0);
                }
          break;
       case ARMCI_FLOAT:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((float *) a[i])[j] != (float) j)
                {
                   printf("ERROR a[%d][%d] = %f != %f\n", i, j, ((float *) a[i])[j], (float) j);
                   ARMCI_Error("armci_brdcst failed (float)\n",0);
                }
          break;
       case ARMCI_DOUBLE:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((double *) a[i])[j] != (double) j)
                {
                   printf("ERROR a[%d][%d] = %f != %f\n", i, j, ((double *) a[i])[j], (double) j);
                   ARMCI_Error("armci_brdcst failed (double)\n",0);
                }
          break;
       default:
          break;
    }

    for(i = 0; i < 6; i++)
       free(a[i]);
}
Exemplo n.º 3
0
void TestGlobals()
{
#define MAXLENG 256*1024
  double *dtest;
  int *itest;
  long *ltest;
  int len;
  int ifrom=nproc-1,lfrom=1,dfrom=1;

  if (me == 0) {
    printf("Global test ... broadcast and reduction for int, long, double\n----------\n");
    fflush(stdout);
  }

  if (!(dtest = (double *) malloc((unsigned) (MAXLENG*sizeof(double)))))
    ARMCI_Error("TestGlobals: failed to allocated dtest", MAXLENG);
  if (!(ltest = (long *) malloc((unsigned) (MAXLENG*sizeof(long)))))
    ARMCI_Error("TestGlobals: failed to allocated ltest", MAXLENG);
  if (!(itest = (int *) malloc((unsigned) (MAXLENG*sizeof(int)))))
    ARMCI_Error("TestGlobals: failed to allocated itest", MAXLENG);

  for (len=1; len<MAXLENG; len*=2) {
    int ilen = len*sizeof(int);
    int dlen = len*sizeof(double);
    int llen = len*sizeof(long);
    int i;
   
    ifrom = (ifrom+1)%nproc;
    lfrom = (lfrom+1)%nproc; 
    dfrom = (lfrom+1)%nproc;

#if 0
    printf("%d:ifrom=%d lfrom=%d dfrom=%d\n",me,ifrom,lfrom,dfrom);fflush(stdout);
#endif

    if (me == 0) {
      printf("Test length = %d ... ", len);
      fflush(stdout);
    }

    if(me == ifrom)for (i=0; i<len; i++)itest[i]=i;
    else for (i=0; i<len; i++)itest[i]=0;
    if(me == lfrom)for (i=0; i<len; i++)ltest[i]=(long)i;
    else for (i=0; i<len; i++)ltest[i]=0L;
    if(me == dfrom)for (i=0; i<len; i++)dtest[i]=(double)i;
    else for (i=0; i<len; i++)dtest[i]=0.0;
    
    /* Test broadcast */
    armci_msg_brdcst(itest, ilen, ifrom);
    armci_msg_brdcst(ltest, llen, lfrom);
    armci_msg_brdcst(dtest, dlen, dfrom);
   
    for (i=0; i<len; i++){
      if (itest[i] != i) armci_die2("int broadcast failed", i,itest[i]);
      if (ltest[i] != (long)i) 
                      armci_die2("long broadcast failed", i,(int)ltest[i]);
      if (dtest[i] != (double)i) 
                      armci_die2("double broadcast failed", i,(int)dtest[i]);
    }
      
    if (me == 0) {
      printf("broadcast OK ...");
      fflush(stdout);
    }

    /* Test global sum */
    for (i=0; i<len; i++) {
      itest[i] = i*me;
      ltest[i] = (long) itest[i];
      dtest[i] = (double) itest[i];
    }


    armci_msg_igop(itest, len, "+");
    armci_msg_lgop(ltest, len, "+");
    armci_msg_dgop(dtest, len, "+");
 

    for (i=0; i<len; i++) {
      int iresult = i*nproc*(nproc-1)/2;
      if (itest[i] != iresult || ltest[i] != (long)iresult || 
          dtest[i] != (double) iresult)
        ARMCI_Error("TestGlobals: global sum failed", (int) i);
    }


    if (me == 0) {
      printf("global sums OK\n");
      fflush(stdout);
    }
  }


  /* now we get timing data */
  time_gop(dtest,MAXLENG);
  time_reduce(dtest,MAXLENG);
     
  free((char *) itest);
  free((char *) ltest);
  free((char *) dtest);
}
Exemplo n.º 4
0
int main(int argc, char *argv[])
{
    int ch;
    extern char *optarg;
    int i, j, r;
    thread_t threads[MAX_TPP];

    /* init MP */
    MP_INIT(argc,argv);
    MP_PROCS(&size);
    MP_MYID(&rank);

    while ((ch = getopt(argc, argv, "t:s:i:d:h")) != -1) {
        switch(ch) {
            case 't': /* # of threads */
                tpp = atoi(optarg);
                if (tpp < 1 || tpp > MAX_TPP) {
                    PRINTF0("\"%s\" is improper value for -t, should be a "
                            "number between 1 and %d(MAX_TPP)\n",
                            optarg, MAX_TPP);
                    usage();
                }
                break;
            case 'i': /* # of iterations */
                iters = atoi(optarg);
                if (iters < 1) {
                    PRINTF0("\"%s\" is improper value for -t, should be a "
                            "number equal or larger than 1\n", optarg);
                    usage();
                }
                break;
            case 's': /* # of elements in the array */
                asize = atoi(optarg);
                if (iters < 1) {
                    PRINTF0("\"%s\" is improper value for -s, should be a "
                            "number equal or larger than 1\n", optarg);
                    usage();
                }
                break;
            case 'd': delay = atoi(optarg); break; /* delay before start */
            case 'h': usage(); break; /* print usage info */
        }
    }
#ifdef NOTHREADS
    tpp = 1;
    PRINTF0("Warning: NOTHREADS debug symbol is set -- running w/o threads\n");
#endif
    th_size = size * tpp;
    PRINTF0("\nTest of multi-threaded capabilities:\n"
            "%d threads per process (%d threads total),\n"
            "%d array elements of size %d,\n"
            "%d iteration(s)\n\n", tpp, th_size, asize, sizeof(atype_t), iters);
    if (delay) {
        printf("%d: %d\n", rank, getpid());
        fflush(stdout);
        sleep(delay);
        MP_BARRIER();
    }
    TH_INIT(size,tpp);
    for (i = 0; i < tpp; i++) th_rank[i] = rank * tpp + i;

#if defined(DEBUG) && defined(LOG2FILE)
    for (i = 0; i < tpp; i++) {
        fname[10] = '0' + th_rank[i] / 100;
        fname[11] = '0' + th_rank[i] % 100 / 10;
        fname[12] = '0' + th_rank[i] % 10;
        dbg[i] = fopen(fname, "w");
    }
#endif
    for (i = 0; i < tpp; i++)
        prndbg(i, "proc %d, thread %d(%d):\n", rank, i, th_rank[i]);

    /* init ARMCI */
    ARMCI_Init();

    /* set global seed (to ensure same random sequence across procs) */
    time_seed = (unsigned)time(NULL);
    armci_msg_brdcst(&time_seed, sizeof(time_seed), 0);
    srand(time_seed); rand();
    prndbg(0, "seed = %u\n", time_seed);
    /* random pairs */
    pairs = calloc(th_size, sizeof(int));
    for (i = 0; i < th_size; i++) pairs[i] = -1;
    for (i = 0; i < th_size; i++) {
        if (pairs[i] != -1) continue;
        r = RND(0, th_size);
        while (i == r || pairs[r] != -1 ) r = RND(0, th_size);
        pairs[i] = r; pairs[r] = i;
    }
    for (i = 0, cbufl = 0; i < th_size; i++)
        cbufl += sprintf(cbuf + cbufl, " %d->%d|%d->%d",
                         i, pairs[i], pairs[i], pairs[pairs[i]]);
    prndbg(0, "random pairs:%s\n", cbuf);
    /* random targets */
    rnd_tgts = calloc(th_size, sizeof(int));
    for (i = 0, cbufl = 0; i < th_size; i++) {
        rnd_tgts[i] = RND(0, th_size);
        if (rnd_tgts[i] == i) { i--; continue; }
        cbufl += sprintf(cbuf + cbufl, " %d", rnd_tgts[i]);
    }
    prndbg(0, "random targets:%s\n", cbuf);
    /* random one */
    rnd_one = RND(0, th_size);
    prndbg(0, "random one = %d\n", rnd_one);

    assert(ptrs1 = calloc(th_size, sizeof(void *)));
    assert(ptrs2 = calloc(th_size, sizeof(void *)));
#ifdef NOTHREADS
    thread_main((void *)(long)0);
#else
    for (i = 0; i < tpp; i++) THREAD_CREATE(threads + i, thread_main, (void *)(long)i);
    for (i = 0; i < tpp; i++) THREAD_JOIN(threads[i], NULL);
#endif

    MP_BARRIER();
    PRINTF0("Tests Completed\n");

    /* clean up */
#if defined(DEBUG) && defined(LOG2FILE)
    for (i = 0; i < tpp; i++) fclose(dbg[i]);
#endif
    ARMCI_Finalize();
    TH_FINALIZE();
    MP_FINALIZE();

	return 0;
}
Exemplo n.º 5
0
void ARMCI_Bcast_(void *buffer, int len, int root, ARMCI_Comm comm) {
    int result;
    MPI_Comm_compare(comm, MPI_COMM_WORLD, &result);
    if(result == MPI_IDENT)  armci_msg_brdcst(buffer, len, root); 
    else MPI_Bcast(buffer, len, MPI_BYTE, root, (MPI_Comm)comm);
}
Exemplo n.º 6
0
static void process_hostlist(char *names)
{
#ifdef CLUSTER

    int i, cluster=0;
    char *s,*master;
    int len, root=0;

    /******** inspect list of machine names to determine locality ********/
    if (armci_me==0){
     
      /* first find out how many cluster nodes we got */
      armci_nclus =1; s=master=names; 
      for(i=1; i < armci_nproc; i++){
        s += strlen(s)+1;
        if(strcmp(s,master)){
          /* we found a new machine name on the list */
          master = s;
          armci_nclus++;
          /*fprintf(stderr,"new name %s len =%d\n",master, strlen(master));*/

        }
      }

      /* allocate memory */ 
      armci_clus_info = (armci_clus_t*)malloc(armci_nclus*sizeof(armci_clus_t));
      if(!armci_clus_info)armci_die("malloc failed for clusinfo",armci_nclus);

      /* fill the data structure  -- go through the list again */ 
      s=names;
      master="*-"; /* impossible hostname */
      cluster =0;
      for(i=0; i < armci_nproc; i++){
        if(strcmp(s,master)){
          /* we found a new machine name on the list */
          master = s;
          armci_clus_info[cluster].nslave=1;
          armci_clus_info[cluster].master=i;
          strcpy(armci_clus_info[cluster].hostname, master); 

#ifdef    CHECK_NODE_NAMES
          /* need consecutive task id allocated on the same node
           * the current test only compares hostnames against first cluster */
          if(cluster) if(!strcmp(master,armci_clus_info[0].hostname)){
               /* we have seen that hostname before */
               fprintf(stderr, "\nIt appears that tasks allocated on the same");
               fprintf(stderr, " host machine do not have\n");
               fprintf(stderr, "consecutive message-passing IDs/numbers. ");
               fprintf(stderr,"This is not acceptable \nto the ARMCI library ");
               fprintf(stderr,"as it prevents SMP optimizations and would\n");
               fprintf(stderr,"lead to poor resource utilization.\n\n");
               fprintf(stderr,"Please contact your System Administrator ");
               fprintf(stderr,"or, if you can, modify the ");
#              if defined(MPI)
                 fprintf(stderr,"MPI");
#              elif defined(TCGMSG)
                 fprintf(stderr,"TCGMSG");
#              elif defined(PVM)
                 fprintf(stderr,"PVM");
#              endif
               fprintf(stderr,"\nmessage-passing job startup configuration.\n\n");
#ifdef HITACHI
               fprintf(stderr,"On Hitachi it can be done by setting environment variable MPIR_RANK_NO_ROUND, for example\n  setenv MPIR_RANK_NO_ROUND yes\n\n");
#endif
               sleep(1);
               armci_die("Cannot run: improper task to host mapping!",0); 
          }
#endif
          cluster++;

        }else{
          /* the process is still on the same host */
          armci_clus_info[cluster-1].nslave++;
        }
        s += strlen(s)+1;
      }

      if(armci_nclus != cluster)
         armci_die("inconsistency processing clusterinfo",armci_nclus);

    }
    /******** process 0 got all data                             ********/

   /* now broadcast locality info struct to all processes 
    * two steps are needed because of the unknown length of hostname list
    */
    len = sizeof(int);
    armci_msg_brdcst(&armci_nclus, len, root);

    if(armci_me){
      /* allocate memory */ 
      armci_clus_info = (armci_clus_t*)malloc(armci_nclus*sizeof(armci_clus_t));
      if(!armci_clus_info)armci_die("malloc failed for clusinfo",armci_nclus);
    }

    len = sizeof(armci_clus_t)*armci_nclus;
    armci_msg_brdcst(armci_clus_info, len, root);

    /******** all processes 0 got all data                         ********/

    /* now determine current cluster node id by comparing me to master */
    armci_clus_me = armci_nclus-1;
    for(i =0; i< armci_nclus-1; i++)
           if(armci_me < armci_clus_info[i+1].master){
              armci_clus_me=i;
              break;
           }
#else

    armci_clus_me=0;
    armci_nclus=1;
    armci_clus_info = (armci_clus_t*)malloc(armci_nclus*sizeof(armci_clus_t));
    if(!armci_clus_info)armci_die("malloc failed for clusinfo",armci_nclus);
    strcpy(armci_clus_info[0].hostname, names); 
    armci_clus_info[0].master=0;
    armci_clus_info[0].nslave=armci_nproc;
#endif

    armci_clus_first = armci_clus_info[armci_clus_me].master;
    armci_clus_last = armci_clus_first +armci_clus_info[armci_clus_me].nslave-1;

}
static int sparse_initialize(int *n, int *non_zero, int **row_ind,
                             int **col_ind, double **values, double **vec,
                             double **svec) {

    int i, j, rc, max, *row_ind_tmp=NULL, *tmp_indices=NULL;
    double *tmp_values=NULL;
    unsigned long len;
    FILE *fp=NULL;

    /* Broadcast order of matrix */
    if(me==0) {
        if((fp=fopen("Sparse-MPI/av41092.rua.data", "r")) == NULL)
            ARMCI_Error("Error: Input file not found", me);
        fortran_indexing = 1; /* This is 1 for Harwell-Boeing format matrices */
        fscanf(fp, "%d", n);
        if(*n%nproc)
            ARMCI_Error("# of rows is not divisible by # of processors", nproc);
        if(*n > ROW)
            ARMCI_Error("order is greater than defined variable ROW", ROW);
    }
    len = sizeof(int);
    armci_msg_brdcst(n, len, 0);

    /* Broad cast number of non_zeros */
    if(me==0) fscanf(fp, "%d", non_zero);
    armci_msg_brdcst(non_zero, len, 0);

    /* Broadcast row indices */
    len = (*n+1)*sizeof(int);
    row_ind_tmp = (int *)malloc(len);
    if(me==0)for(i=0; i<*n+1; i++) {
            fscanf(fp, "%d", &row_ind_tmp[i]);
            if(fortran_indexing) --row_ind_tmp[i];
        }
    armci_msg_brdcst(row_ind_tmp, len, 0);

    load_balance(*n, *non_zero, row_ind_tmp);

    /* find how much temporary storage is needed at the maximum */
    if(me==0) {
        for(max=-1,j=0; j<nproc; j++) if(max<proc_nz_list[j]) max=proc_nz_list[j];
        if(max<0) ARMCI_Error(" max cannot be negative", max);
    }

    /* Broadcast the maximum number of elements */
    len = sizeof(int);
    armci_msg_brdcst(&max, len, 0);

    /* create the Sparse MAtrix Array */
    if(me==0) printf("  Creating ValueArray (CompressedSparseMatrix) ...\n\n");
    create_array((void**)col_ind, sizeof(int), 1, &max);

    /* create the column subscript array */
    if(me==0) printf("  Creating Column Subscript Array ... \n\n");
    create_array((void**)values, sizeof(double), 1, &max);

    /* create the x-vector and the solution vector */
    if(me==0) printf("  Creating Vectors ... \n\n");
    create_array((void**)vec,  sizeof(double),1, &max);
    create_array((void**)svec, sizeof(double),1, &max);
    armci_msg_barrier();


    /* Process 0 distributes the column indices and non_zero values to
       respective processors*/
    if(me == 0) {
        tmp_indices = (int *)malloc(max*sizeof(int));
        tmp_values  = (double *)malloc(max*sizeof(double));

        for(j=0; j<nproc; j++) {
            for(i=0; i<proc_nz_list[j]; i++) {
                fscanf(fp, "%d", &tmp_indices[i]);
                if(fortran_indexing) --tmp_indices[i];
            }
            /* rc = fread(tmp_indices, sizeof(int), proc_nz_list[j], fp); */
            if((rc=ARMCI_Put(tmp_indices, col_ind[j], proc_nz_list[j]*sizeof(int), j)))
                ARMCI_Error("armci_nbput failed\n",rc);
        }
        for(j=0; j<nproc; j++) {
            for(i=0; i<proc_nz_list[j]; i++) fscanf(fp, "%lf", &tmp_values[i]);
            if((rc=ARMCI_Put(tmp_values, values[j], proc_nz_list[j]*sizeof(double), j)))
                ARMCI_Error("armci_nbput failed\n",rc);
        }
    }
    ARMCI_AllFence();
    armci_msg_barrier();
    ARMCI_AllFence();

    /* initializing x-vector */
    if(me==0) for(i=0; i<proc_nz_list[me]; i++) vec[me][i] = (i+1);
    else for(i=0; i<proc_nz_list[me]; i++) vec[me][i]=me*proc_nz_list[me-1]+(i+1);

#if 0
    if(me==0) {
        printf("max = %d\n", max);
        for(i=0; i<max; i++)  printf("%.1f ", values[me][i]);
        printf("\n");
    }
#endif

    *row_ind = row_ind_tmp;
    if(me==0) {
        free(tmp_indices);
        free(tmp_values);
        fclose(fp);
    }
    return 0;
}
Exemplo n.º 8
0
static void process_hostlist(char *names)
{
#ifdef CLUSTER

    int i, cluster=0;
    char *s,*master;
    int len, root=0;

    /******** inspect list of machine names to determine locality ********/
    if (armci_me==0){

        /* first find out how many cluster nodes we got */
        armci_nclus =1; s=master=names; 
        for(i=1; i < armci_nproc; i++){
            s += strlen(s)+1;
            if(strcmp(s,master)){
                /* we found a new machine name on the list */
                master = s;
                armci_nclus++;
                /*fprintf(stderr,"new name %s len =%d\n",master, strlen(master));*/

            }
        }

        /* allocate memory */ 
        armci_clus_info = (armci_clus_t*)malloc(armci_nclus*sizeof(armci_clus_t));
        if(!armci_clus_info)
            armci_die("malloc failed for clusinfo",armci_nclus);

        /* fill the data structure  -- go through the list again */ 
        s=names;
        master="*-"; /* impossible hostname */
        cluster =0;
        for(i=0; i < armci_nproc; i++){
            if(strcmp(s,master)){
                /* we found a new machine name on the list */
                master = s;
                armci_clus_info[cluster].nslave=1;
                armci_clus_info[cluster].master=i;
                strcpy(armci_clus_info[cluster].hostname, master); 

#ifdef    CHECK_NODE_NAMES
                /* need consecutive task id allocated on the same node
                 * the current test only compares hostnames against first cluster */
                if(cluster) 
                    if(!strcmp(master,armci_clus_info[0].hostname)){
                        /* we have seen that hostname before */
                        fprintf(stderr, "ARMCI supports block process mapping only\n");
                        armci_die("Cannot run: improper task to host mapping!",0); 
                    }
#endif
                cluster++;

            }
            else{
                /* the process is still on the same host */
                armci_clus_info[cluster-1].nslave++;
            }
            s += strlen(s)+1;
        }

        if(armci_nclus != cluster)
            armci_die("inconsistency processing clusterinfo",armci_nclus);

    }
    /******** process 0 got all data                             ********/

    /* now broadcast locality info struct to all processes 
     * two steps are needed because of the unknown length of hostname list
     */
    len = sizeof(int);
    armci_msg_brdcst(&armci_nclus, len, root);

    if(armci_me){
        /* allocate memory */ 
        armci_clus_info = (armci_clus_t*)malloc(armci_nclus*sizeof(armci_clus_t));
        if(!armci_clus_info)
            armci_die("malloc failed for clusinfo",armci_nclus);
    }

    len = sizeof(armci_clus_t)*armci_nclus;
    armci_msg_brdcst(armci_clus_info, len, root);

    /******** all processes 0 got all data                         ********/

    /* now determine current cluster node id by comparing me to master */
    armci_clus_me = armci_nclus-1;
    for(i =0; i< armci_nclus-1; i++)
        if(armci_me < armci_clus_info[i+1].master){
            armci_clus_me=i;
            break;
        }
#else

    armci_clus_me=0;
    armci_nclus=1;
    armci_clus_info = (armci_clus_t*)malloc(armci_nclus*sizeof(armci_clus_t));
    if(!armci_clus_info)
        armci_die("malloc failed for clusinfo",armci_nclus);
    strcpy(armci_clus_info[0].hostname, names); 
    armci_clus_info[0].master=0;
    armci_clus_info[0].nslave=armci_nproc;
#endif

    /* Starting process ID on my node */
    armci_clus_first = armci_clus_info[armci_clus_me].master;

    /* Last process ID on my node */
    armci_clus_last = armci_clus_first +
        armci_clus_info[armci_clus_me].nslave-1;

}