void cp_transpose_bck_prim_dvr(double *c,int *icoef_form,double *c_temp,
                               int nstate,int nstate_max,int ncoef,
                               int nstate_proc,int nstate_proc_max,
                               int nstate_proc_min,
                               int nstate_ncoef_proc_max,
                               int nstate_ncoef_proc,
                               int nkf1,int nkf2,int nkf3,
                               int num_proc,int myid,MPI_Comm comm)

/*======================================================================*/
/*                Begin Routine */
{   /*begin routine */

/*======================================================================*/
/*               Local variable declarations                            */

#include "../typ_defs/typ_mask.h"

  int nfull,i,j;
  int idv,irm;
  int irems,iremc,irem,ig,is,ioff;
  int ioff_c,iproc,iii;
  int sendcounts,recvcounts;
  int icount;
  int *ioffv,*inum;

/*========================================================================*/
/*              Incoming variable declarations                            */
/*                                                                        */
/* nstate            = Total number of states                             */
/* nstate_proc       = number of states on this processor                 */
/* nstate_proc_max   = maximum number of states on any processor          */
/* ncoef             = Total number of coefficients in a state            */
/* nstate_ncoef_proc = Number of coefficients in a state on this processor*/
/*                     in the transposed data                             */
/* nstate_ncoef_proc_max = Maximum number of coefficients in a state on   */
/*                          any processesor in the transposed data        */
/* nstate_max        = nstate_proc_max*num_proc                           */
/* c                 = nstate_proc x ncoef array of coefficients          */
/* c_temp            = transposed data stored as nstate x ncoef_proc_max  */
/* ct_temp           = scratch space to help make transposed data         */
/* nscr_size         = size of scratch nstate_ncoef_proc_max*nstate_max   */
/*========================================================================*/
/* 0) Check the forms */

  if((*icoef_form) !=1){
    printf("@@@@@@@@@@@@@@@@@@@@_ERROR_@@@@@@@@@@@@@@@@@@@@\n");
    printf("The coefficients must be in transposed form\n");
    printf("on state processor %d in cp_transpose_bck_prim \n",myid);
    printf("@@@@@@@@@@@@@@@@@@@@_ERROR_@@@@@@@@@@@@@@@@@@@@\n");
    fflush(stdout);exit(1);
  }/*endif*/

  *icoef_form = 0;

/*========================================================================*/
/*========================================================================*/
/* I) Internal rearrangement of coeff data                                */

  irems = (nstate % num_proc);
  iremc = ((nkf2*nkf3)% num_proc);
  irem  = MAX(irems,iremc);

  ioffv = (int *) cmalloc(num_proc*sizeof(int))-1;
  inum  = (int *) cmalloc(num_proc*sizeof(int))-1;

  if((irems != 0)||(iremc != 0)){

    ioffv[1] = 0;
    for(i=1; i<= irems; i++){
      ioffv[i+1] = ioffv[i] + nstate_proc_max*nstate_ncoef_proc;
      inum[i]    = nstate_proc_max*nstate_ncoef_proc;
    }
    for(i=irems+1; i < num_proc; i++){
      ioffv[i+1] = ioffv[i] + nstate_proc_min*nstate_ncoef_proc;
      inum[i]    = nstate_proc_min*nstate_ncoef_proc;
    }
    inum[num_proc] = nstate_proc_min*nstate_ncoef_proc;

/* 1) copy data into temp array */

    for(i=1;i <= (num_proc*nstate_proc_max*nstate_ncoef_proc_max);i++){ 
      c_temp[i] = 0.00;
    }

    for(i=1; i<= num_proc; i++){
      ioff = (i-1)*nstate_proc_max*nstate_ncoef_proc_max;
      for(j=1; j <= inum[i]; j++){
        c_temp[(ioff+j)] = c[(ioffv[i]+j)];
      }
    }

/* 2) copy back into c     */

    nfull = nstate_ncoef_proc_max*nstate_max;
    for(ig=1;ig<=nfull;ig++){c[ig] = c_temp[ig];}

  }/*endif: remainder */

/*======================================================================*/
/* II) Send the transformed position data                               */

  sendcounts = nstate_ncoef_proc_max*nstate_proc_max;
  recvcounts = nstate_ncoef_proc_max*nstate_proc_max;

  Alltoall(&c[1],sendcounts,MPI_DOUBLE,&c_temp[1],recvcounts,
                MPI_DOUBLE,comm);

/*=======================================================================*/
/* III) Extract the transformed position data                            */

  idv   = (nkf2*nkf3)/num_proc;
  irm   = (nkf2*nkf3)%num_proc;

  for(i = 0; i < num_proc; i++){
    inum[i+1] = ( i < irm ? (idv+1)*nkf1 : idv*nkf1);
  }

  for(i=1;i<=nstate_ncoef_proc_max*nstate_max;i++){c[i]=0.0;}

  for(is=1; is <= nstate_proc; is++){
    icount = 0;
    ioff_c = (is-1)*ncoef;
    for(iproc=1;iproc<=num_proc;iproc++){
      ioff = (is-1)*inum[iproc]
           + (iproc-1)*nstate_proc_max*nstate_ncoef_proc_max;
      for(i=1;i<= inum[iproc];i++){
        icount++;
        c[ioff_c+icount] = c_temp[ioff+i];
      }/*endfor*/
    }/*endfor*/
  }

  cfree(&ioffv[1]);
  cfree(&inum[1]);

/*========================================================================*/
}/*end routine*/
Esempio n. 2
0
int main(int argc, char**argv){

  int num_ranks, rank, split_num_ranks, split_rank;
  int outer_ranks, inner_ranks;
  int new_comm_id;
  int msg_size, loops;
  int slurm_id, run_index;
  MPI_Comm split_comm;
  FILE * timings;

  //Parse options
  char c;
  while ((c = getopt (argc, argv, "s:r:l:i:")) != -1){
    switch (c)
      {
      case 's':
	sscanf(optarg, "%d", &msg_size);
	break;
      case 'r':
	sscanf(optarg, "%d", &inner_ranks);
	break;
      case 'l':
	sscanf(optarg, "%d", &loops);
	break;
      case 'i':
	sscanf(optarg, "%d", &run_index);
	break;
      default:
	printf("Unrecognized option: %c\n", optopt);
	break;
      }
    if(c != 's' && c != 'i' && c != 'l' && c != 'r' ){break;}
  }
  printf("Successfully parsed options as: \n");
  printf("\tmsg_size: %d, inner_ranks: %d, loops: %d, run_index: %d\n", msg_size, inner_ranks, loops, run_index);

  //Open timings.out for writing
  timings = fopen("timings.out", "a");
  if(timings == NULL){
    printf("Error: cannot open timings.out\n");
  }

  //Start MPI, get num_ranks
  MPI_Init(NULL, NULL);
  MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
  if(num_ranks == 0){
    printf("MPI_Comm_size failure\n");
    exit(1);
  }

  //Calculate comm sizes
  outer_ranks = num_ranks - inner_ranks;
  if( (outer_ranks < 0 || inner_ranks < 0) && (rank == 0) ){
    printf("Error: bad comm sizes. They should be positive\n");
  }
  
  //Get global rank
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  int * splitter = (int*)malloc(sizeof(int)*num_ranks);
  for(int i = inner_ranks; i < num_ranks; i++) splitter[i] = OUTER_COMM;
  for(int i = 0; i < inner_ranks; i++) splitter[i] = INNER_COMM;
  
  //split communicator
  MPI_Comm_split(MPI_COMM_WORLD, splitter[rank], 1, &split_comm);
  MPI_Comm_size(split_comm, &split_num_ranks);
  MPI_Comm_rank(split_comm, &split_rank);
  MPI_Barrier(MPI_COMM_WORLD);
  
  
  //run the inner communicator as a warm-up, seems to reduce variance
  if(splitter[rank] == INNER_COMM){
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //start network counters region 1
  MPI_Pcontrol(1);

  //run the inside alone, as a baseline
  float run1;
  if(splitter[rank] == INNER_COMM){
    run1 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //start network counters region 2
  MPI_Pcontrol(2);

  //run both communicators
  float run2;
  if(splitter[rank] == INNER_COMM){
    run2 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); 
  }else{
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }

  //stop network counters
  MPI_Pcontrol(0);

  //print timings
  if(splitter[rank] == INNER_COMM && split_rank==0) fprintf(timings, "%d,%f,%f\n", run_index, run1, run2);

  //free(recv);
  free(splitter);
  MPI_Finalize();
  exit(0);
}
Esempio n. 3
0
void cp_state_gvec_trans_bck(double *c,double *c_temp,double *ct_temp,
                             int nstate,int nstate_max,int ncoef,
                             int nstate_proc,int nstate_proc_max,
                             int nstate_ncoef_proc_max,int nstate_ncoef_proc,
                             int num_proc,int myid,MPI_Comm comm)

/*======================================================================*/
/*                Begin Routine */
{   /*begin routine */

/*======================================================================*/
/*               Local variable declarations                            */
  
  double *c_temp_pt,*ct_temp_pt;
  int nfull,nblock;
  int irem,ig,is,ioff;
  int i,ioff_temp;
  int ioff_c;
  int joff;
  int iproc,itemp,iii;
  int proc_rem;
  int nstate_ncoef_proc_now;
  int sendcounts,recvcounts;

/*========================================================================*/
/*              Incoming variable declarations                            */

/* nstate            = Total number of states                             */
/* nstate_proc       = number of states on this processor                 */
/* nstate_proc_max   = maximum number of states on any processor          */
/* ncoef             = Total number of coefficients in a state            */
/* nstate_ncoef_proc = Number of coefficients in a state on this processor*/
/*                     in the transposed data                             */
/* nstate_ncoef_proc_max = Maximum number of coefficients in a state on   */
/*                          any processesor in the transposed data        */
/* nstate_max        = nstate_proc_max*num_proc                           */
/* c                 = nstate_proc x ncoef array of coefficients          */
/* c_temp            = transposed data stored as nstate x ncoef_proc_max  */
/* ct_temp           = scratch space to help make transposed data         */
/* nscr_size         = size of scratch nstate_ncoef_proc_max*nstate_max   */

/*========================================================================*/
/* I) Internal rearrangement of coeff data                                */

  irem = (nstate % num_proc);

  if(irem != 0){

/*------------------------------------------------------------------------*/
/* A) copy full blocks                                                    */
    nfull = nstate_proc_max*nstate_ncoef_proc_max*irem;
    for(ig=1;ig<=nfull;ig++){ct_temp[ig] = c_temp[ig];}
/*------------------------------------------------------------------------*/
/* B) copy partial blocks                                                 */
    nblock = (nstate_proc_max-1)*nstate_ncoef_proc_max;
    ioff = nfull; joff=nfull;
    for(iproc=irem+1;iproc<=num_proc;iproc++){
     for(ig=1;ig<=nblock;ig++){ct_temp[(ig+joff)] = c_temp[(ig+ioff)];}
     ioff += nblock;joff += (nblock+nstate_ncoef_proc_max);
    }/*endfor*/

  }else{

/*------------------------------------------------------------------------*/
/* A) copy all blocks                                                     */

    nfull = nstate_ncoef_proc_max*nstate;
    for(ig=1;ig<=nfull;ig++){ct_temp[ig] = c_temp[ig];}

  }/*endif: remainder */
    
#ifdef DEBUG
  if(myid==0){
    printf("1st Step: This is ct_temp\n");
  }/*endif*/
  for(is=1;is<=num_proc;is++){
    if(myid==is-1){
      for(ig=1;ig<=nstate_ncoef_proc_max*nstate;ig++){
         printf("%d %d %g\n",ig,is,ct_temp[ig]);
      }/*endfor*/
    }/*endif*/
    scanf("%d",&ig);
    Barrier(comm);
  }/*endfor*/
#endif
  
/*========================================================================*/
/* II) Send the transformed position data                               */

  sendcounts = nstate_ncoef_proc_max*nstate_proc_max;
  recvcounts = nstate_ncoef_proc_max*nstate_proc_max;

  ct_temp_pt = ct_temp+1;
  c_temp_pt = c_temp+1;
  Alltoall(ct_temp_pt,sendcounts,MPI_DOUBLE,c_temp_pt,recvcounts,
                MPI_DOUBLE,comm);

#ifdef DEBUG
  if(myid==0){
    printf("2nd step: This is c_temp\n");
  }/*endif*/
  for(is=1;is<=num_proc;is++){
    if(myid==is-1){
      for(ig=1;ig<=nstate_ncoef_proc_max*nstate_max;ig++){
         printf("%d %d %g\n",ig,is,c_temp[ig]);
      }/*endfor*/
    }/*endif*/
    scanf("%d",&ig);
    Barrier(comm);
  }/*endfor*/
#endif


/*========================================================================*/
/* III) Extract the transformed position data                               */

  for(i=1;i<=nstate_ncoef_proc_max*nstate_max;i++){c[i]=0.0;}
  proc_rem = ncoef % num_proc;

  for(is=1;is<=nstate_proc;is++){
    ioff = 0;
    ioff_c = (is-1)*ncoef;
    for(iproc=1;iproc<=num_proc;iproc++){
      ioff_temp = (is-1)*nstate_ncoef_proc_max
                + (iproc-1)*(nstate_proc_max*nstate_ncoef_proc_max);
      nstate_ncoef_proc_now = nstate_ncoef_proc_max;
      if((iproc>proc_rem)&&(proc_rem>0)) nstate_ncoef_proc_now--;
      for(ig=1;ig<=nstate_ncoef_proc_now;ig++){
        itemp = ig+ioff_temp;
        i     = ig+ioff;
        c[(i+ioff_c)] = c_temp[itemp];
      }/*endfor*/   
      ioff += nstate_ncoef_proc_now;
    }/*endfor*/   
  }/*endfor*/   

#ifdef DEBUG
  if(myid==0){
    printf("Last step: This is c\n");
  }/*endif*/
  for(iproc=1;iproc<=num_proc;iproc++){
    if(myid==iproc-1){
     for(is=1;is<=nstate_proc;is++){
       ioff_c = (is-1)*ncoef;
       for(ig=1;ig<=ncoef;ig++){
        printf("%d %d %d %g\n",iproc,is,ig,c[(ig+ioff_c)]);
       }/*endfor*/
     }/*endfor*/
    }/*endif*/
    scanf("%d",&iii);
    Barrier(comm);
  }/*endfor*/
#endif
/*========================================================================*/
   }/*end routine*/
Esempio n. 4
0
int main(int argc, char**argv){

  int num_ranks, rank, split_num_ranks, split_rank;
  int outer_ranks, inner_ranks;
  int new_comm_id;
  int msg_size, loops;
  int slurm_id, run_index;
  MPI_Comm split_comm;
  FILE * timings, * configs;
  int assignment;
  int custom;

  char c;
  while ((c = getopt (argc, argv, "s:r:l:i:ac:")) != -1){
    switch (c)
      {
      case 's':
	sscanf(optarg, "%d", &msg_size);
	break;
      case 'r':
	sscanf(optarg, "%d", &inner_ranks);
	break;
      case 'l':
	sscanf(optarg, "%d", &loops);
	break;
      case 'i':
	sscanf(optarg, "%d", &run_index);
	break;
      case 'a':
	sscanf(optarg, "%d", &assignment);
	assignment = 0;
	break;
      case 'c':
	sscanf(optarg, "%d", &custom);
	break;
      default:
	printf("Unrecognized option: %c\n", optopt);
	break;
      }
    if(c != 's' && c != 'i' && c != 'l' && c != 'r' ){break;}
  }

  timings = fopen("timings.out", "a");
  char configs_buf[128] = {0};
  sprintf(configs_buf, "config-%d.out", run_index);
  configs = fopen(configs_buf, "a");

  MPI_Init(NULL, NULL);
  MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
  if(num_ranks == 0){
    printf("MPI_Comm_size failure\n");
    exit(1);
  }
  outer_ranks = num_ranks - inner_ranks;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  //get node names
  char name[MPI_MAX_PROCESSOR_NAME] = {0};
  char * recv = (char*)calloc(MPI_MAX_PROCESSOR_NAME*num_ranks, sizeof(char));
  int proc_len;
  MPI_Get_processor_name(name, &proc_len);
  name[proc_len] = 0;
  MPI_Gather(name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, recv, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, MPI_COMM_WORLD);

  int * splitter = (int*)malloc(sizeof(int)*num_ranks);
  for(int i = 0; i < num_ranks; i++) splitter[i] = OUTER_COMM;
  if(!custom){
    if(rank == 0){
      if(assignment == RANDOM){
	int num_assigned = 0;
	while(num_assigned < inner_ranks){
	  int val = rand() % num_ranks;
	  if(splitter[val] == INNER_COMM){
	    continue;
	  }else{
	    splitter[val] = INNER_COMM;
	    num_assigned += 1;
	  }
	}
      }else if(assignment == APLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[0] == 0 || dims[0] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == APLANES_COARSE){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[0] == 0 || dims[0] == 1){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == BPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[1] == 0 || dims[1] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == CPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[2] == 0 || dims[2] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == DPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[3] == 0 || dims[3] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == EPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[4] == 0){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == SQUAREAB1){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 1) && (dims[1] == 0 || dims[1] == 1)) || 
	      ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 2 || dims[1] == 3))){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == SQUAREAB2){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 2) && (dims[1] == 0 || dims[1] == 2)) || 
	      ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 1 || dims[1] == 3))){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == ALTERABC_NONE){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 1) && (dims[1] == 0 || dims[1] == 1) && (dims[2] == 0 || dims[2] == 1)) || 
	      ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 2 || dims[1] == 3) && (dims[2] == 0 || dims[2] == 1)) || 
	      ((dims[0] == 0 || dims[0] == 1) && (dims[1] == 2 || dims[1] == 3) && (dims[2] == 2 || dims[2] == 3)) || 
	      ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 0 || dims[1] == 1) && (dims[2] == 2 || dims[2] == 3))) {
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == ALTERABC_ALL){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 2) && (dims[1] == 0 || dims[1] == 2) && (dims[2] == 0 || dims[2] == 2)) || 
	      ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 1 || dims[1] == 3) && (dims[2] == 0 || dims[2] == 2)) || 
	      ((dims[0] == 0 || dims[0] == 2) && (dims[1] == 1 || dims[1] == 3) && (dims[2] == 1 || dims[2] == 3)) || 
	      ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 0 || dims[1] == 2) && (dims[2] == 1 || dims[2] == 3))) {
	    splitter[i] = INNER_COMM;
	  }
	}
      }
    }
  }else{ //using custon mapping in map.out
    for(int i = 0; i < num_ranks/2; i++){
      splitter[i] = INNER_COMM;
    }
  }
    
  MPI_Bcast(splitter, num_ranks, MPI_INT, 0, MPI_COMM_WORLD);
  
  //split communicator
  MPI_Comm_split(MPI_COMM_WORLD, splitter[rank], 1, &split_comm);
  MPI_Comm_size(split_comm, &split_num_ranks);
  MPI_Comm_rank(split_comm, &split_rank);
  MPI_Barrier(MPI_COMM_WORLD);
    
  
  //print names to file
  if(rank == 0){
    fprintf(configs,"rank,comm,node\n");
    for(int i = 0; i < num_ranks; i++){
      fprintf(configs,"%d,%d,%s\n", i, splitter[i], recv + i*MPI_MAX_PROCESSOR_NAME);
    }   
  }
  
  //run the inner communicator as a warm-up, seems to reduce variance
  if(splitter[rank] == INNER_COMM){
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //run the inside alone, as a baseline

  //start network counters region 1
  MPI_Pcontrol(1);

  float run1;
  if(splitter[rank] == INNER_COMM){
    run1 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //start network counters region 2
  MPI_Pcontrol(2);

  //run both communicators
  float run2;
  if(splitter[rank] == INNER_COMM){
    run2 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); 
  }else{
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }

  //stop network counters
  MPI_Pcontrol(0);

  //print timings
  if(splitter[rank] == INNER_COMM && split_rank==0) fprintf(timings, "%d,%f,%f\n", run_index, run1, run2);

  //free(recv);
  free(splitter);
  MPI_Finalize();
  exit(0);
}