예제 #1
0
main (int argc, char **argv)
{
  MPI_Init (&argc, &argv);
  MPI_Barrier (MPI_COMM_WORLD);	/*  profiling should initially be disabled  */
  MPI_Pcontrol (1);		/*  enable profiling  */
  MPI_Pcontrol (2);		/*  reset call site data  */
  MPI_Barrier (MPI_COMM_WORLD);
  MPI_Pcontrol (0);		/*  disable profiling  */
  MPI_Barrier (MPI_COMM_WORLD);
  MPI_Pcontrol (1);		/*  enable profiling  */
  MPI_Barrier (MPI_COMM_WORLD);
  MPI_Finalize ();
}
예제 #2
0
int main(int argc, char **argv)
{
  MPI_Init(&argc, &argv);

  int i, myrank, numranks, groupsize;
  int dims[3] = {0, 0, 0};          
  int temp[3] = {0, 0, 0};          
  int coord[3] = {0, 0, 0};          
  int periods[3] = {1, 1, 1};
  double startTime, stopTime;

  MPI_Comm cartcomm, subcomm;

  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &numranks);

  dims[MP_X] = atoi(argv[1]);
  dims[MP_Y] = atoi(argv[2]);
  dims[MP_Z] = atoi(argv[3]);
  MPI_Dims_create(numranks, 3, dims);
  MPI_Cart_create(MPI_COMM_WORLD, 3, dims, periods, 1, &cartcomm);
  MPI_Cart_get(cartcomm, 3, dims, periods, coord);
  temp[MP_X] = 0; temp[MP_Y] = 1; temp[MP_Z] = 0;
  MPI_Cart_sub(cartcomm, temp, &subcomm);

  MPI_Comm_size(subcomm,&groupsize);
  int perrank = atoi(argv[4]);
  char *sendbuf = (char*)malloc(perrank*groupsize);
  char *recvbuf = (char*)malloc(perrank*groupsize);

  MPI_Barrier(cartcomm);
  MPI_Pcontrol(1);
  startTime = MPI_Wtime();

  for (i=0; i<MAX_ITER; i++) {
    MPI_Alltoall(sendbuf, perrank, MPI_CHAR, recvbuf, perrank, MPI_CHAR, subcomm);
  }

  MPI_Barrier(cartcomm);
  stopTime = MPI_Wtime();
  MPI_Pcontrol(0);

  if(myrank == 0) {
    printf("Completed %d iterations for subcom size %d, perrank %d\n", i, groupsize, perrank);
    printf("Time elapsed: %f\n", stopTime - startTime);
  }

  MPI_Finalize();
  return 0;
}
예제 #3
0
파일: hello.c 프로젝트: pyrovski/Adagio
int main(int argc, char *argv[]) {
   int rank, size;
   char hostname[1000];
   int iters=20, i;

   gethostname(hostname, 1000);
   printf("Running %s on %s\n", argv[0], hostname);
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &size);

   MPI_Barrier(MPI_COMM_WORLD);
   for(i=0; i<iters; i++){
#ifdef BLR_USE_JITTER
	   MPI_Pcontrol(0);
#endif
	   sleep(rank%2);
	   MPI_Barrier(MPI_COMM_WORLD);
   }
  

   //printf("Hello world, I am %d of %d\n", rank, size);

   MPI_Finalize();
   printf("Finished %s on %s\n", argv[0], hostname);
   return 0;
}
예제 #4
0
int main(int argc, char **argv)
{
    void *stat;
    pthread_attr_t attr;
    pthread_t thread[2];
    int provided = 0;

    //MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    fprintf(stderr, "test a: required: %d, provided: %d\n", MPI_THREAD_MULTIPLE, provided);
    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    fprintf(stderr, "test b: required: %d, provided: %d\n", MPI_THREAD_MULTIPLE, provided);

    if (provided < MPI_THREAD_SERIALIZED) {
        fprintf(stderr, "multi-thread not supported: provided: %d (SERIALIZED: %d, MULTIPLE: %d)\n",
                provided, MPI_THREAD_SERIALIZED, MPI_THREAD_MULTIPLE);
        exit(0);
    }
    exit(0);

    //  MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);   // rank -> rank of this processor
    MPI_Comm_size(MPI_COMM_WORLD, &size);   // size -> total number of processors

    srand((unsigned)time(NULL));

    msg_num = atoi(argv[1]);


#if 1
    MPI_Pcontrol(0);
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    // thread 0 will be sending   messages
    pthread_create(&thread[0], &attr, Send_Func_For_Thread, (void *)0);
    // thread 1 will be receiving messages
    pthread_create(&thread[1], &attr, Recv_Func_For_Thread, (void *)1);
    pthread_attr_destroy(&attr);
    pthread_join(thread[0], &stat);
    pthread_join(thread[1], &stat);
#endif

    fprintf(stdout, "Fnished : rank: %d\n", rank);
    fflush(stdout);


    MPI_Finalize();

    pthread_exit((void *)NULL);
    return 0;
}
예제 #5
0
FORT_DLL_SPEC void FORT_CALL mpi_pcontrol_ ( MPI_Fint *v1, MPI_Fint *ierr ){
    *ierr = MPI_Pcontrol( (int)*v1 );
}
예제 #6
0
int main(int argc, char **argv) {
  int myRank, numPes;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numPes);
  MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
  MPI_Request sreq[2], rreq[2];

  int blockDimX, arrayDimX, arrayDimY;

  if (argc != 2 && argc != 3) {
    printf("%s [array_size] \n", argv[0]);
    printf("%s [array_size_X] [array_size_Y] \n", argv[0]);
    MPI_Abort(MPI_COMM_WORLD, -1);
  }

  if(argc == 2) {
    arrayDimY = arrayDimX = atoi(argv[1]);
  }
  else {
    arrayDimX = atoi(argv[1]);
    arrayDimY = atoi(argv[2]);
  }

  if (arrayDimX % numPes != 0) {
    printf("array_size_X % numPes != 0!\n");
    MPI_Abort(MPI_COMM_WORLD, -1);
  }

  blockDimX = arrayDimX / numPes;

  int iterations = 0, i, j;
  double error = 1.0, max_error = 0.0;

  if(myRank == 0) {
    printf("Running Jacobi on %d processors\n", numPes);
    printf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY);
    printf("Block Dimensions: %d\n", blockDimX);
  }

  double **temperature;
  double **new_temperature;

  /* allocate two dimensional arrays */
  temperature = new double*[blockDimX+2];
  new_temperature = new double*[blockDimX+2];
  for (i=0; i<blockDimX+2; i++) {
    temperature[i] = new double[arrayDimY];
    new_temperature[i] = new double[arrayDimY];
  }
  for(i=0; i<blockDimX+2; i++) {
    for(j=0; j<arrayDimY; j++) {
      temperature[i][j] = 0.5;
      new_temperature[i][j] = 0.5;
    }
  }

  // boundary conditions
  if(myRank < numPes/2) {
    for(i=1; i<=blockDimX; i++)
      temperature[i][0] = 1.0;
  }

  if(myRank == numPes-1) {
    for(j=arrayDimY/2; j<arrayDimY; j++)
      temperature[blockDimX][j] = 0.0;
  }

  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Pcontrol(1);
  startTime = MPI_Wtime();

  while(/*error > 0.001 &&*/ iterations < MAX_ITER) {
    iterations++;

    /* Receive my bottom and top edge */
    MPI_Irecv(&temperature[blockDimX+1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), BOTTOM, MPI_COMM_WORLD, &rreq[BOTTOM-1]);
    MPI_Irecv(&temperature[0][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), TOP, MPI_COMM_WORLD, &rreq[TOP-1]);

    /* Send my top and bottom edge */
    MPI_Isend(&temperature[1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), BOTTOM, MPI_COMM_WORLD, &sreq[BOTTOM-1]);
    MPI_Isend(&temperature[blockDimX][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), TOP, MPI_COMM_WORLD, &sreq[TOP-1]);

    MPI_Waitall(2, rreq, MPI_STATUSES_IGNORE);
    MPI_Waitall(2, sreq, MPI_STATUSES_IGNORE);

    for(i=1; i<blockDimX+1; i++) {
      for(j=0; j<arrayDimY; j++) {
        /* update my value based on the surrounding values */
        new_temperature[i][j] = (temperature[i-1][j]+temperature[i+1][j]+temperature[i][wrap_y(j-1)]+temperature[i][wrap_y(j+1)]+temperature[i][j]) * 0.2;
      }
    }

    max_error = error = 0.0;
    for(i=1; i<blockDimX+1; i++) {
      for(j=0; j<arrayDimY; j++) {
	error = fabs(new_temperature[i][j] - temperature[i][j]);
	if(error > max_error)
	  max_error = error;
      }
    }
 
    double **tmp;
    tmp = temperature;
    temperature = new_temperature;
    new_temperature = tmp;

    // boundary conditions
    if(myRank < numPes/2) {
      for(i=1; i<=blockDimX; i++)
	temperature[i][0] = 1.0;
    }

    if(myRank == numPes-1) {
      for(j=arrayDimY/2; j<arrayDimY; j++)
	temperature[blockDimX][j] = 0.0;
    }

    //if(myRank == 0) printf("Iteration %d %f %f %f\n", iterations, max_error, temperature[1][0], temperature[1][1]);

    MPI_Allreduce(&max_error, &error, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
  } /* end of while loop */

  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Pcontrol(0);

  if(myRank == 0) {
    endTime = MPI_Wtime();
    printf("Completed %d iterations\n", iterations);
    printf("Time elapsed: %f\n", endTime - startTime);
  }

  MPI_Finalize();
  return 0;
} /* end function main */
예제 #7
0
/**
 * Main function which selects the process to be a master or a worker
 * based on MPI myid.
 *
 * @param argc Number of arguments.
 * @param argv Pointer to the argument pointers.
 * @return     0 on success.
 */
int main(int argc, char **argv)
{
    int myid, numprocs, i;
    int err = -1;
    struct test_params_s test_params;
    struct mpe_events_s mpe_events;
    struct frag_preresult_s **query_frag_preresult_matrix = NULL;

    memset(&test_params, 0, sizeof(struct test_params_s));
    MPI_Init(&argc, &argv);
#ifdef HAVE_MPE    
    MPI_Pcontrol(0);
#endif

    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    init_mpe_events(&mpe_events);
#ifdef HAVE_MPE
    if (myid == MASTER_NODE)
    {
	init_mpe_describe_state(&mpe_events);
    }
#endif
    if (myid == MASTER_NODE)
    {
	err = parse_args(argc, argv, &test_params);
	if (err >= 0)
	    print_settings(&test_params, numprocs);
	if (test_params.output_file != NULL)
	    MPI_File_delete(test_params.output_file, MPI_INFO_NULL);
	if (numprocs < 2)
	{
	    fprintf(stderr, "Must use at least 2  processes.\n");
	    err = -1;
	}
    }

    MPI_Bcast(&err, 1, MPI_INT, MASTER_NODE, MPI_COMM_WORLD);
    /* Quit if the parse_args failed */
    if (err != 0)
    {
	MPI_Finalize();
	return 0;
    }

    /* Master precalculates all the results for the queries and
     * reads in the database histogram parameters */
    if (myid == MASTER_NODE)
    {
	if ((query_frag_preresult_matrix = (struct frag_preresult_s **) 
	     malloc(test_params.query_count * 
		    sizeof(struct frag_preresult_s *))) == NULL)
	{
	    custom_debug(
		MASTER_ERR, 
		"M:malloc query_frag_preresult_matrix of size %d failed\n",
		test_params.query_count * sizeof(struct frag_preresult_s *));
	    return -1;
	}
	for (i = 0; i < test_params.query_count; i++)
	{
	    if ((query_frag_preresult_matrix[i] = (struct frag_preresult_s *)
		 malloc(test_params.total_frags * 
			sizeof(struct frag_preresult_s))) == NULL)
	    {
		custom_debug(
		    MASTER_ERR,
		    "M:malloc query_frag_preresult_matrix[%d] "
		    "of size %d failed\n", i,
		    test_params.total_frags *
		    sizeof(struct frag_preresult_s));
		return -1;
	    }
	    memset(query_frag_preresult_matrix[i], 0, 
		   test_params.total_frags * sizeof(struct frag_preresult_s));
	}
	precalculate_results(&test_params, query_frag_preresult_matrix);
	test_params.query_frag_preresult_matrix = query_frag_preresult_matrix;
	if (test_params.query_params_file != NULL)
	{
	    read_hist_params(&test_params, QUERY);
#if 0
	    print_hist_params(&test_params);
#endif
	}
	if (test_params.db_params_file != NULL)
	{
	    read_hist_params(&test_params, DATABASE);
#if 0
	    print_hist_params(&test_params);
#endif
	}
    }

    MPI_Barrier(MPI_COMM_WORLD);
#ifdef HAVE_MPE
    MPI_Pcontrol(1);
#endif

    /* Divide up into either a Master or Worker */
    mpe_events.total_time = MPI_Wtime();
    if (myid == 0)
    {
	err = master(myid,
		     numprocs,
		     &mpe_events,
		     &test_params);
	if (err != 0)
	    custom_debug(MASTER_ERR, "master failed\n");
	else
	    custom_debug(MASTER, "master (proc %d) reached last barrier\n",
			 myid);
    }
    else
    {
	err = worker(myid,
		     numprocs,
		     &mpe_events,
		     &test_params);
	if (err != 0)
	    custom_debug(WORKER_ERR, "worker failed\n");
	else
	    custom_debug(WORKER, "worker (proc %d) reached last barrier\n",
			 myid);
    }

    custom_MPE_Log_event(mpe_events.sync_start,
			 0, NULL, &mpe_events);
    MPI_Barrier(MPI_COMM_WORLD);
    custom_MPE_Log_event(mpe_events.sync_end,
			 0, NULL, &mpe_events);
    MPI_Pcontrol(0);    
    mpe_events.total_time = MPI_Wtime() - mpe_events.total_time;
#if 0
    print_timing(myid, &mpe_events);
#endif
    MPI_Barrier(MPI_COMM_WORLD);
    timing_reduce(myid, numprocs, &mpe_events);
    
    /* Clean up precomputed results and file */
    if (myid == MASTER_NODE)
    {
	if (test_params.query_params_file != NULL)
	{
	    free(test_params.query_params_file);
	    free(test_params.query_hist_list);
	}
	if (test_params.db_params_file != NULL)
	{
	    free(test_params.db_params_file);
	    free(test_params.db_hist_list);
	}

	MPI_File_delete(test_params.output_file, MPI_INFO_NULL);
	for (i = 0; i < test_params.query_count; i++)
	{
	    free(query_frag_preresult_matrix[i]);
	}
	free(query_frag_preresult_matrix);
    }

    MPI_Info_free(test_params.info_p);
    free(test_params.info_p);
    free(test_params.output_file);
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
    return 0;
}
예제 #8
0
int main(int argc, char **argv) {

  int    	size=-1,rank=-1, left=-1, right=-1, you=-1;
  int           ndata=127,ndata_max=127,seed;
  int           rv, nsec=0, count, cmpl;
  long long int i,j,k;
  unsigned long long int  nflop=0,nmem=1,nsleep=0,nrep=1, myflops;
  char 		*env_ptr, cbuf[4096];
  double 	*sbuf, *rbuf,*x;
  MPI_Status    *s;
  MPI_Request   *r;
  time_t	ts;


   seed = time(&ts);

   flags |= DOMPI;
   while(--argc && argv++) {
  if(!strcmp("-v",*argv)) {
    flags |= DOVERBOSE;
  } else if(!strcmp("-n",*argv)) {
    --argc; argv++;
    nflop = atol(*argv);
  } else if(!strcmp("-N",*argv)) {
    --argc; argv++;
    nrep = atol(*argv);
  } else if(!strcmp("-d",*argv)) {
    --argc; argv++;
    ndata_max = ndata = atol(*argv);
  } else if(!strcmp("-m",*argv)) {
    --argc; argv++;
    nmem = atol(*argv);
  } else if(!strcmp("-w",*argv)) {
    --argc; argv++;
    nsec = atoi(*argv);
  } else if(!strcmp("-s",*argv)) {
    --argc; argv++;
    nsleep = atol(*argv);
  } else if(!strcmp("-spray",*argv)) {
    flags |= DOSPRAY;
  } else if(!strcmp("-c",*argv)) {
    flags |= CORE;
  } else if(!strcmp("-r",*argv)) {
    flags |= REGION;
  } else if(!strcmp("-stair",*argv)) {
    flags |= STAIR_RANK;
  } else if(!strcmp("-stair_region",*argv)) {
    flags |= STAIR_REGION;
  } else if(!strcmp("-nompi",*argv)) {
    flags &= ~DOMPI;
  }
 }
 
 if(flags & DOMPI) {
  MPI_Init(&argc,&argv);

/* test double init 
  MPI_Init(&argc,&argv);
*/
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 }
 
 if(nsec > 0) {
  sleep(nsec);
 }

 if(nmem) {
 nmem = (nmem*1024*1024/sizeof(double));
 x = (double *)malloc((size_t)(nmem*sizeof(double)));
 for(j=0;j<nrep;j++) {
 for(i=0;i<nmem;i++) {
  x[i] = i;
 }
 for(i=0;i<nmem;i++) {
  x[i] = i*x[i];
 }
 if(x[nmem-1]*x[nmem-1] < 0) {
  printf("assumption about flop-test or optimization failed\n");
 }
 }
 if(0) free((char *)x);
}
 
/*
#define LONG_REGNAME rshouldbethelastchar
*/
#define LONG_REGNAME abcdefghijklmnopqrst

 if(flags & REGION) {
  MPI_Pcontrol(0,"enter_region(abcdefghijklmnopqrst)");
  sprintf(cbuf,"");
  MPI_Pcontrol(0,"get_region()",cbuf);
  if(strcmp(cbuf,"abcdefghijklmnopqrst")) {
   printf("%d in region = \"%s\" not \"%s\"\n",
	 rank,cbuf,"abcdefghijklmnopqrst");
   fflush(stdout);
  }
  MPI_Pcontrol(0,"exit_region(abcdefghijklmnopqrst)");
  MPI_Pcontrol(0,"get_region()",cbuf);
  if(strcmp(cbuf,"ipm_noregion")) {
   printf("%d out region = \"%s\" not \"%s\"\n",
	 rank,cbuf,"ipm_noregion");
   fflush(stdout);
  }
 }
  
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); 
 if(nflop) {
  x = (double *)malloc((size_t)(10*sizeof(double)));
  j = k = 0;
  for(i=0;i<10;i++) {
   x[i] = 1.0;
  }
if(flags & STAIR_RANK) { 
 myflops = (rank*nflop)/size;
} else {
 myflops = nflop;
}
  for(i=0;i<nflop;i++) {
   x[j] = x[j]*x[k];
   j = ((i%9)?(j+1):(0));
   k = ((i%8)?(k+1):(0));
  }
  free((char *)x);
 }

 if(nsleep) {
  sleep(nsleep);
 }
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); 
 
 if(nmem<nflop) nmem=nflop;
 
 if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem);
 
 fflush(stdout);
 
 if(flags & CORE) {
  for(i=0;;i++) {
   x[i] = x[i*i-1000];
  }
 }

 if(flags & DOMPI) {
  s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size));
  r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size));


  sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; }

  MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD);
  srand48(seed);

  for(i=0;i<nrep;i++) {
   MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD);
  }

  if(size>1) {
  if(!rank) {left=size-1;} else { left = rank-1;}
  if(rank == size-1) { right=0;} else {right=rank+1;}
  you =  (rank < size/2)?(rank+size/2):(rank-size/2);
  } else  {
   you = left = right = rank;
  }
 


  for(i=0;i<nrep;i++) {
   if(flags & DOSPRAY) {
    ndata = (long int)(drand48()*ndata_max)+1;
   }
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s);
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
  if(flags & REGION) MPI_Pcontrol(1,"region_a"); 
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
  MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,1,MPI_COMM_WORLD,s);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);

  MPI_Isend(sbuf,ndata/2,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r);
  MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &cmpl, s);
  MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, s);
  MPI_Get_count(s,MPI_DOUBLE,&count);
  MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s);
  if(count != ndata/2) {
  printf("error: MPI_Get_count(s,MPI_DOUBLE,&count) --> count = %d\n",count);
  }
  MPI_Wait(r,s);
/* FIXME - the following case may need to be addressed
  MPI_Test(r,&cmpl,s);
  printf("spam1 %d %d\n", s->MPI_SOURCE, cmpl);
  if(r != MPI_REQUEST_NULL) {
   MPI_Wait(r,s);
   printf("spam2 %d\n", s->MPI_SOURCE);
  }
*/

  MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r);
  MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD);
  MPI_Wait(r,s);

  for(j=0;j<size;j++) {
   MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j);
   MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j);
  }
  MPI_Waitall(2*size,r,s);
/*
  for(j=0;j<size;j++) {
   printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE);
  }
*/

  if(flags & REGION) MPI_Pcontrol(-1,"region_a"); 

  if(flags & REGION) MPI_Pcontrol(1,"region_b"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_b"); 

 if(1) {
  if(flags & REGION) MPI_Pcontrol(1,"region_c"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_c"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_d"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_d"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_e"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_e"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_f"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_f"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_g"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_g"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_h"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_h"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_i"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_i"); 
 }


  }


  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Finalize();
  }

  free((char *)rbuf);
  free((char *)sbuf);
  free((char *)r);
  free((char *)s);

  free((char *)x);

  return 0;   
}
예제 #9
0
void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack)
{
  int i;
  int *scount,*rcount;
  int tag,irnum;
  MPI_Request *request;
  MPI_Status *status;
  //
  scount=(int *)malloc(2*sizeof(int)*nsend);
  rcount=(int *) malloc(2*sizeof(int)*nrecv);
  request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv));
  status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv));
  //
  for(i=0;i<nsend;i++){
    scount[2*i]=sndPack[i].nints;			
    scount[2*i+1]=sndPack[i].nreals;
  }
  //
  irnum=0;
  tag=10;
  //
  for(i=0;i<nrecv;i++)
    MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],tag,scomm,&request[irnum++]);
  //
  for(i=0;i<nsend;i++)
    MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],tag,scomm,&request[irnum++]);
  //
  MPI_Waitall(irnum,request,status);
  for(i=0;i<nrecv;i++)
    {
      rcvPack[i].nints=rcount[2*i];
      rcvPack[i].nreals=rcount[2*i+1];
    }
  //
  irnum=0;
  for(i=0;i<nrecv;i++)
    {
      if (rcvPack[i].nints > 0) {
  tag=10;
	rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints);
	MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints,
		  MPI_INT,rcvMap[i],
		  tag,scomm,&request[irnum++]);
      }
      if (rcvPack[i].nreals > 0) {
  tag=20;
	rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals);
	MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals,
		  MPI_DOUBLE,rcvMap[i],
		  tag,scomm,&request[irnum++]);
      }
    }
  //
  for(i=0;i<nsend;i++)
    {
      if (sndPack[i].nints > 0){
  tag=10;
	MPI_Isend(sndPack[i].intData,sndPack[i].nints,
		  MPI_INT,sndMap[i],
		  tag,scomm,&request[irnum++]);
      }
      if (sndPack[i].nreals > 0){
  tag=20;
	MPI_Isend(sndPack[i].realData,sndPack[i].nreals,
		  MPI_DOUBLE,sndMap[i],
		  tag,scomm,&request[irnum++]);
      }
    }
  MPI_Pcontrol(1, "tioga_pc_waitall");
  MPI_Waitall(irnum,request,status);
  MPI_Pcontrol(-1, "tioga_pc_waitall");
  //
  free(scount);
  free(rcount);
  free(request);
  free(status);
}
예제 #10
0
void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack)
{
  int i;
  int *sint,*sreal,*rint,*rreal;
  int tag,irnum;
  MPI_Request *request;
  MPI_Status *status;
  //
  sint=(int *)malloc(sizeof(int)*numprocs);
  sreal=(int *) malloc(sizeof(int)*numprocs);
  rint=(int *)malloc(sizeof(int)*numprocs);
  rreal=(int *) malloc(sizeof(int)*numprocs);
  request=(MPI_Request *) malloc(sizeof(MPI_Request)*4*numprocs);
  status=(MPI_Status *) malloc(sizeof(MPI_Status)*4*numprocs);
  //
  for(i=0;i<numprocs;i++){
    sint[i]=sndPack[i].nints;			
    sreal[i]=sndPack[i].nreals;
  }
  //
  MPI_Alltoall(sint,1,MPI_INT,rint,1,MPI_INT,scomm);
  MPI_Alltoall(sreal,1,MPI_INT,rreal,1,MPI_INT,scomm);
  //
  for(i=0;i<numprocs;i++) {
    rcvPack[i].nints=rint[i];
    rcvPack[i].nreals=rreal[i];
  }
  //
  irnum=0;
  for(i=0;i<numprocs;i++)
    {
      if (rcvPack[i].nints > 0) {
	tag=1;
	rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints);
	MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints,
		  MPI_INT,i,
		  tag,scomm,&request[irnum++]);
      }
      if (rcvPack[i].nreals > 0) {
	tag=2;
	rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals);
	MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals,
		  MPI_DOUBLE,i,
		  tag,scomm,&request[irnum++]);
      }
    }
  for(i=0;i<numprocs;i++)
    {
      if (sndPack[i].nints > 0){
	tag=1;
	MPI_Isend(sndPack[i].intData,sndPack[i].nints,
		  MPI_INT,i,
		  tag,scomm,&request[irnum++]);
      }
      if (sndPack[i].nreals > 0){
	tag=2;
	MPI_Isend(sndPack[i].realData,sndPack[i].nreals,
		  MPI_DOUBLE,i,
		  tag,scomm,&request[irnum++]);
      }
    }
  MPI_Pcontrol(1, "tioga_pc_waitall");
  MPI_Waitall(irnum,request,status);
  MPI_Pcontrol(-1, "tioga_pc_waitall");

  free(sint);
  free(sreal);
  free(rint);
  free(rreal);
  free(request);
  free(status);
}
예제 #11
0
파일: cpilog.c 프로젝트: grondo/mvapich-cce
int main( int argc, char *argv[])
{
    int  n, myid, numprocs, i, j;
    double PI25DT = 3.141592653589793238462643;
    double mypi, pi, h, sum, x;
    double startwtime = 0.0, endwtime;
    int namelen; 
    int event1a, event1b, event2a, event2b,
        event3a, event3b, event4a, event4b;
    char processor_name[MPI_MAX_PROCESSOR_NAME];

    MPI_Init(&argc,&argv);

        MPI_Pcontrol( 0 );

    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&myid);

    MPI_Get_processor_name(processor_name,&namelen);
    fprintf(stderr,"Process %d running on %s\n", myid, processor_name);

    /*
        MPE_Init_log() & MPE_Finish_log() are NOT needed when
        liblmpe.a is linked with this program.  In that case,
        MPI_Init() would have called MPE_Init_log() already.
    */
/*
    MPE_Init_log();
*/

    /*  Get event ID from MPE, user should NOT assign event ID  */
    event1a = MPE_Log_get_event_number(); 
    event1b = MPE_Log_get_event_number(); 
    event2a = MPE_Log_get_event_number(); 
    event2b = MPE_Log_get_event_number(); 
    event3a = MPE_Log_get_event_number(); 
    event3b = MPE_Log_get_event_number(); 
    event4a = MPE_Log_get_event_number(); 
    event4b = MPE_Log_get_event_number(); 

    if (myid == 0) {
        MPE_Describe_state(event1a, event1b, "Broadcast", "red");
        MPE_Describe_state(event2a, event2b, "Compute",   "blue");
        MPE_Describe_state(event3a, event3b, "Reduce",    "green");
        MPE_Describe_state(event4a, event4b, "Sync",      "orange");
    }

    if (myid == 0) {
        n = 1000000;
        startwtime = MPI_Wtime();
    }
    MPI_Barrier(MPI_COMM_WORLD);

        MPI_Pcontrol( 1 );
        /*
    MPE_Start_log();
        */

    for (j = 0; j < 5; j++) {
        MPE_Log_event(event1a, 0, NULL);
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPE_Log_event(event1b, 0, NULL);

        MPE_Log_event(event4a, 0, NULL);
        MPI_Barrier(MPI_COMM_WORLD);
        MPE_Log_event(event4b, 0, NULL);

        MPE_Log_event(event2a, 0, NULL);
        h   = 1.0 / (double) n;
        sum = 0.0;
        for (i = myid + 1; i <= n; i += numprocs) {
            x = h * ((double)i - 0.5);
            sum += f(x);
        }
        mypi = h * sum;
        MPE_Log_event(event2b, 0, NULL);

        MPE_Log_event(event3a, 0, NULL);
        MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
        MPE_Log_event(event3b, 0, NULL);
    }
/*
    MPE_Finish_log("cpilog");
*/

    if (myid == 0) {
        endwtime = MPI_Wtime();
        printf("pi is approximately %.16f, Error is %.16f\n",
               pi, fabs(pi - PI25DT));
        printf("wall clock time = %f\n", endwtime-startwtime);
    }
    MPI_Finalize();
    return(0);
}
예제 #12
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service xfer_svc;

    int server_index=0;
    int rank_in_server=0;

    int transport_index=-1;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct xfer_args args;

    const int num_io_methods = 8;
    const int io_method_vals[] = {
            XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC,
            XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC,
            XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC,
            XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC};
    const char * io_method_names[] = {
            "write-encode-sync", "write-encode-async",
            "write-rdma-sync", "write-rdma-async",
            "read-encode-sync", "read-encode-async",
            "read-rdma-sync", "read-rdma-async"};

    const int nssi_transport_list[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_GEMINI,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_MPI};

    const int num_nssi_transports = 11;
    const int nssi_transport_vals[] = {
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10
            };
    const char * nssi_transport_names[] = {
            "portals",
            "ptl",
            "infiniband",
            "ib",
            "gemini",
            "gni",
            "bgpdcmf",
            "dcmf",
            "bgqpami",
            "pami",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.len = 1;
    args.delay = 1;
    args.io_method = XFER_WRITE_RDMA_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.num_servers = 1;
    args.num_threads = 0;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.kill_server_flag = true;
    args.block_distribution = true;


    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");
        parser.setOption("num-servers", &args.num_servers, "Number of server processes");
        parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process");
        parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment");
        parser.setOption("block-distribution", "rr-distribution", &args.block_distribution,
                "Use a block distribution scheme to assign clients to servers");

        // Set an enumeration command line option for the io_method
        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n"
                "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n"
                "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n"
                "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n"
                "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n"
                "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n"
                "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n"
                "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous");


        // Set an enumeration command line option for the NNTI transport
        parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals|ptl    : Cray or Schutt\n"
                "\t\t\tinfiniband|ib  : libibverbs\n"
                "\t\t\tgemini|gni     : Cray libugni (Gemini or Aries)\n"
                "\t\t\tbgpdcmf|dcmf   : IBM BG/P DCMF\n"
                "\t\t\tbgqpami|pami   : IBM BG/Q PAMI\n"
                "\t\t\tmpi            : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(args.debug_level, "transport_index=%d", transport_index);
    if (transport_index > -1) {
    	args.transport     =nssi_transport_list[transport_index];
    	args.transport_name=std::string(nssi_transport_names[transport_index]);
    }
	args.io_method_name=std::string(io_method_names[args.io_method]);

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting xfer-service test", rank);

#ifdef TRIOS_ENABLE_COMMSPLITTER
    if (args.transport == NSSI_RPC_MPI) {
        MPI_Pcontrol(0);
    }
#endif

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    int color = 0;  // color=0-->server, color=1-->client
    if (args.client_flag && args.server_flag) {
        if (np < 2) {
            log_error(debug_level, "Must use at least 2 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Put all the servers as the first ranks.
        if (rank < args.num_servers) {
            color = 0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            color = 1;  // all others are clients
            log_debug(debug_level, "rank=%d is a client", rank);
        }

        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }
    else {
        if (args.client_flag) {
            color=1;
            log_debug(debug_level, "rank=%d is a client", rank);
        }
        else if (args.server_flag) {
            color=0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            log_error(debug_level, "Must be either a client or a server");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }

    MPI_Comm_rank(comm, &splitrank);
    MPI_Comm_size(comm, &splitsize);

    log_debug(debug_level, "%d: Finished splitting communicators", rank);

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // If running as both client and server, gather and distribute
    // the server URLs to all the clients.
    if (args.server_flag && args.client_flag) {

        std::string all_urls;

        // This needs to be a vector of chars, not a string
        all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0');

        // Have servers gather their URLs
        if (color == 0) {
            assert(args.num_servers == splitsize);  // these should be equal

            log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str());

            // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD)
            MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR,
                    &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm);
        }

        // broadcast the full set of server urls to all processes
        MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD);

        log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size());

        if (color == 1) {

            // For block distribution scheme use the utility function (in xfer_util.cpp)
            if (args.block_distribution) {
                // Use this utility function to calculate the server_index
                xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
            }

            // Use a simple round robin distribution scheme
            else {
                server_index   = splitrank % args.num_servers;
                rank_in_server = splitrank / args.num_servers;
            }

            // Copy the server url out of the list of urls
            int offset = server_index * NSSI_URL_LEN;

            args.server_url = all_urls.substr(offset, NSSI_URL_LEN);

            log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
        }


        log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str());
    }

    // If running as a client only, have to get the list of servers from the urlfile.
    else if (!args.server_flag && args.client_flag){

        sleep(args.delay);  // give server time to get started

        std::vector< std::string > urlbuf;
        xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm);
        args.num_servers = urlbuf.size();

        // For block distribution scheme use the utility function (in xfer_util.cpp)
        if (args.block_distribution) {
            // Use this utility function to calculate the server_index
            xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
        }

        // Use a simple round robin distribution scheme
        else {
            server_index   = splitrank % args.num_servers;
            rank_in_server = splitrank / args.num_servers;
        }

        args.server_url = urlbuf[server_index];
        log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url = my_url;

        if (args.url_file.empty()) {
            log_error(debug_level, "Must set --url-file");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm);
    }

    // Set the debug level for the xfer service.
    xfer_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str());

    print_args(out, args, "%");

    log_debug(debug_level, "server_url=%s", args.server_url.c_str());

    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on node 0.
     *  In this example, the server is a single process.
     */
    if (color == 0) {
        rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, node 0, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref xfer_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {


            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str());
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(xfer_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        // wait for all the clients to connect
        MPI_Barrier(comm);

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(xfer_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            xfer_client_main(args, xfer_svc, comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if ((args.kill_server_flag) && (rank_in_server == 0)) {
                log_debug(debug_level, "%d: Halting xfer service", rank);
                rc = nssi_kill(&xfer_svc, 0, 5000);
            }
            rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc);
            if (rc != NSSI_OK) {
                log_error(xfer_debug_level, "could not free svc description: %s",
                        nssi_err_str(rc));
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
    	out << "\nEnd Result: TEST PASSED" << std::endl;
    else
    	out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
int
main(int argc, char* argv[])
{
    MPI_Datatype type;      /* MPI data type for communicating particle data  */
    int num_processors;     /* Number of processors being used                */
    int processor;          /* My processor number                            */
    int* num;               /* Number of particles on each processor          */
    int* offset;            /* Offset to start of each processor's particles  */
    int buffer_size;        /* Number of particles in pipeline data buffers   */
    particle_t* local;      /* Array containing our local particles           */

        
    /* Initialize MPI */
    MPI_Init(&argc, &argv);

        
    /* Create the MPI data type for communicating particle data */
    MPI_Type_contiguous(4, MPI_DOUBLE, &type);
    MPI_Type_commit(&type);
    
    
    /* Determine the number of procesors being used and our processor number */
    MPI_Comm_size(MPI_COMM_WORLD, &num_processors);
    MPI_Comm_rank(MPI_COMM_WORLD, &processor);


    /* Determine how the particles are allocated to the processors */
    {
	int p;
	
	num = (int*)malloc(num_processors * sizeof(int));
	offset = (int*)malloc(num_processors * sizeof(int));
	
	for(p = 0; p < num_processors; p++)
	    num[p] = (NumParticles / num_processors) +
		((p < (NumParticles % num_processors)) ? 1 : 0);	
	buffer_size = num[0];
	
	offset[0] = 0;
	for(p = 1; p < num_processors; p++)
	    offset[p] = offset[p - 1] + num[p - 1];
    }
    

    /* Distribute the initial particle state */
    {
	int i;
	particle_t* particles = NULL;
	
	if(processor == MasterProcessor) {
	    particles = (particle_t*)malloc(NumParticles * sizeof(particle_t));
	    
	    /*
	      CODE FOR READING INITIAL PARTICLE STATE DATA FROM
	      A FILE COULD BE PLACED HERE INSTEAD OF RANDOMIZATION
	    */

	    /* Randomize the particles */
	    for(i = 0; i < NumParticles; i++) {
		particles[i].mass = 1.0;
		particles[i].x = drand48();
		particles[i].y = drand48();
		particles[i].z = drand48();
	    }
	    
	}
	
	local = (particle_t*)malloc(num[processor] * sizeof(particle_t));
	
	MPI_Scatterv(particles, num, offset, type,
		     local, num[processor], type,
		     MasterProcessor, MPI_COMM_WORLD);
	
	if(processor == MasterProcessor)
	    free(particles);
    }
    
    
    /* Actual Simulation */
    {
	int iteration, stage, i, j;
	particle_t* buf_send;
	particle_t* buf_recv;
	double* tfx;
	double* tfy;
	double* tfz;
	double* ox;
	double* oy;
	double* oz;
	
        MPI_Pcontrol ( 1 );

	buf_send = (particle_t*)malloc(buffer_size * sizeof(particle_t));
	buf_recv = (particle_t*)malloc(buffer_size * sizeof(particle_t));	

	tfx = (double*)malloc(num[processor] * sizeof(double));
	tfy = (double*)malloc(num[processor] * sizeof(double));
	tfz = (double*)malloc(num[processor] * sizeof(double));
	ox = (double*)malloc(num[processor] * sizeof(double));
	oy = (double*)malloc(num[processor] * sizeof(double));
	oz = (double*)malloc(num[processor] * sizeof(double));
	
	/* Set the "old" position for each particle to its current position */
	for(i = 0; i < num[processor]; i++) {
	    ox[i] = local[i].x;
	    oy[i] = local[i].y;
	    oz[i] = local[i].z;
	}
	
	/* Time steps */
	for(iteration = 0; iteration < NumIterations; iteration++) {	    
	    
	    double f_max = -Infinity;	    

	    /* Show current iteration number */
	    if(processor == 1) {
		fprintf(stdout, "Iteration %d of %d...\n", 
			iteration + 1, NumIterations);
		fflush(stdout);
	    }

	    /* Zero the total force for each particle */
	    for(i = 0; i < num[processor]; i++) {
		tfx[i] = 0.0;
		tfy[i] = 0.0;
		tfz[i] = 0.0;
	    }
	    
	    /* Force computation pipeline */
	    for(stage = 0; stage < num_processors; stage++) {
		
		MPI_Request request[2];
		MPI_Status status[2];
		
		/* Prime the pipeline with our local data for stage zero */
		if(stage == 0)
		    memcpy(buf_send, local,
			   num[processor] * sizeof(particle_t));
		
		/* Issue the send/receive pair for this pipeline stage */
		if(stage < (num_processors - 1)) {
		    MPI_Isend(buf_send, buffer_size, type,
			      (processor - 1 + num_processors) % num_processors,
			      0, MPI_COMM_WORLD, &request[0]);
		    MPI_Irecv(buf_recv, buffer_size, type,
			      (processor + 1 + num_processors) % num_processors,
			      0, MPI_COMM_WORLD, &request[1]);
		}
		
		/* Compute forces */ 		
		for(i = 0; i < num[processor]; i++) {
		    
		    double r_min = +Infinity;
		    double fx = 0.0;
		    double fy = 0.0;			
		    double fz = 0.0;
		    double f = 0.0;
		    
		    for(j = 0; 
			j < num[(processor + stage) % num_processors];
			j++) {
			
			double rx = local[i].x - buf_send[j].x;
			double ry = local[i].y - buf_send[j].y;
			double rz = local[i].z - buf_send[j].z;
			double r = (rx * rx) + (ry * ry) + (rz * rz);
			
			if(r > 0.0) {
			    if(r < r_min)
				r_min = r;
			    fx -= buf_send[j].mass * (rx / r);
			    fy -= buf_send[j].mass * (ry / r);
			    fz -= buf_send[j].mass * (rz / r);
			}
			
		    }
		    
		    tfx[i] += fx;
		    tfy[i] += fy;
		    tfz[i] += fz;

		    /* Rough estimate of 1/m|df/dx| */
		    f = sqrt((fx * fx) + (fy * fy) + (fz * fz)) / r_min;
		    if(f > f_max)
			f_max = f;
		    
		}
		
		/* Complete the send/receive pair for this pipeline stage */
		if(stage < (num_processors - 1)) {
		    MPI_Waitall(2, request, status);
		    memcpy(buf_send, buf_recv,
			   buffer_size * sizeof(particle_t));
		}
		
	    }
	    
	    /*
	     * Compute new positions using a simple leapfrog time integration.
	     * Use a variable step version to simplify time-step control.
	     *
	     * Integration is (a0 * x^+) + (a1 * x) + (a2 * x^-) = f / m
	     *
	     * Stability criteria is roughly 2.0 / sqrt(1/m|df/dx|) >= dt
	     */
	    {
		static double dt_old = 0.001;
		static double dt_now = 0.001;
		double dt_est;
		double dt_new;
		
		double a0 = +2.0 / (dt_now * (dt_old + dt_now));
		double a1 = -2.0 / (dt_old * dt_now);
		double a2 = +2.0 / (dt_old * (dt_old + dt_now));
		
		for(i = 0; i < num[processor]; i++) {
		    
		    double x = local[i].x;
		    double y = local[i].y;
		    double z = local[i].z;
		    
		    local[i].x = (tfx[i] - (a1 * x) - (a2 * ox[i])) / a0;
		    local[i].y = (tfy[i] - (a1 * y) - (a2 * oy[i])) / a0;
		    local[i].z = (tfz[i] - (a1 * z) - (a2 * oz[i])) / a0;
		    
		    ox[i] = x;
		    oy[i] = y;
		    oz[i] = z;
		    
		}
		
		dt_est = 1.0 / sqrt(f_max);
		if(dt_est < MinTimeStep)
		    dt_est = MinTimeStep;
		
		MPI_Allreduce(&dt_est, &dt_new, 1, MPI_DOUBLE,
			      MPI_MIN, MPI_COMM_WORLD);
		
		if(dt_new < dt_now) {
		    dt_old = dt_now;
		    dt_now = dt_new;
		}
		else if(dt_new > (4.0 * dt_now)) {
		    dt_old = dt_now;
		    dt_now *= 2.0;
		}
		
	    }
	    
	}
	
	free(buf_send);
	free(buf_recv);
	free(tfx);
	free(tfy);
	free(tfz);
	free(ox);
	free(oy);
	free(oz);

        MPI_Pcontrol ( 0 );
	
    }
    

    /* Gather the final particle state */
    {
	particle_t* particles = NULL;
	
	if(processor == MasterProcessor)
	    particles = (particle_t*)malloc(NumParticles * sizeof(particle_t));
	
	MPI_Gatherv(local, num[processor], type,
		    particles, num, offset, type,
		    MasterProcessor, MPI_COMM_WORLD);
	
	free(local);
	
	if(processor == MasterProcessor) {

	    /*
	      CODE FOR WRITING FINAL PARTICLE STATE
	      DATA TO A FILE COULD BE PLACED HERE
	    */
	    
	    free(particles);	
	}
    }
    
    
    /* Free the particle distribution arrays */
    free(num);
    free(offset);
    
    
    /* Finalize MPI */
    MPI_Finalize();
    
    
    /* All Done */
    return 0;
}
예제 #14
0
int main(int argc, char **argv) {

  int    	size,rank, left, right, you, ndata=127,ndata_max=127,seed;
  int           rv;
  long long int i,j,k;
  unsigned long long int  nflop=0,nmem=1,nsleep=0,nrep=1, myflops;
  char 		*env_ptr;
  double 	*sbuf, *rbuf,*x;
  MPI_Status    *s;
  MPI_Request   *r;
  time_t	ts;


#ifdef HPM

   if((rv = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT )
   {
      fprintf(stderr, "Error: %d %s\n",rv, errstring);
      exit(1);
   }

   if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK)
   {
      printf("There are no counters available. \n");
      exit(1);
   }

  if ( (rv = PAPI_start_counters(events, 2)) != PAPI_OK) {
    fprintf(stdout, "ERROR PAPI_start_counters rv=%d\n", rv);
    exit(rv);
   }

#endif
   seed = time(&ts);

   flags |= DOMPI;
   while(--argc && argv++) {
  if(!strcmp("-v",*argv)) {
    flags |= DOVERBOSE;
  } else if(!strcmp("-n",*argv)) {
    --argc; argv++;
    nflop = atol(*argv);
  } else if(!strcmp("-N",*argv)) {
    --argc; argv++;
    nrep = atol(*argv);
  } else if(!strcmp("-d",*argv)) {
    --argc; argv++;
    ndata_max = ndata = atol(*argv);
  } else if(!strcmp("-m",*argv)) {
    --argc; argv++;
    nmem = atol(*argv);
  } else if(!strcmp("-s",*argv)) {
    --argc; argv++;
    nsleep = atol(*argv);
  } else if(!strcmp("-spray",*argv)) {
    flags |= DOSPRAY;
  } else if(!strcmp("-c",*argv)) {
    flags |= CORE;
  } else if(!strcmp("-r",*argv)) {
    flags |= REGION;
  } else if(!strcmp("-stair",*argv)) {
    flags |= STAIR_RANK;
  } else if(!strcmp("-stair_region",*argv)) {
    flags |= STAIR_REGION;
  } else if(!strcmp("-nompi",*argv)) {
    flags &= ~DOMPI;
  }
 }
 
 if(flags & DOMPI) {
  MPI_Init(&argc,&argv);

/*
  MPI_Init(&argc,&argv);
*/
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 }
 

 if(nmem) {
 nmem = (nmem*1024*1024/sizeof(double));
 x = (double *)malloc((size_t)(nmem*sizeof(double)));
 for(j=0;j<nrep;j++) {
 for(i=0;i<nmem;i++) {
  x[i] = i;
 }
 for(i=0;i<nmem;i++) {
  x[i] = i*x[i];
 }
 if(x[nmem-1]*x[nmem-1] < 0) {
  printf("trickster\n");
 }
 }
 if(0) free((char *)x);
}
 
#ifdef IPM
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); 
#endif
 if(nflop) {
  x = (double *)malloc((size_t)(10*sizeof(double)));
  j = k = 0;
  for(i=0;i<10;i++) {
   x[i] = 1.0;
  }
if(flags & STAIR_RANK) { 
 myflops = (rank*nflop)/size;
} else {
 myflops = nflop;
}
  for(i=0;i<nflop;i++) {
   x[j] = x[j]*x[k];
   j = ((i%9)?(j+1):(0));
   k = ((i%8)?(k+1):(0));
  }
  free((char *)x);
 }

 if(nsleep) {
  sleep(nsleep);
 }
#ifdef IPM
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); 
#endif
 
 if(nmem<nflop) nmem=nflop;
 
 if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem);
 
 fflush(stdout);
 
 if(flags & CORE) {
  for(i=0;;i++) {
   x[i] = x[i*i-1000];
  }
 }



  env_ptr = getenv("IPM_SOCKET");
  if(env_ptr) {
   printf("IPM: %d IPM_SOCKET in app %s\n", rank, env_ptr);
  }
  
 if(flags & DOMPI) {
  s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size));
  r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size));


  sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; }

  MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD);
  srand48(seed);

  for(i=0;i<nrep;i++) {
   MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD);
  }

  if(size>1) {
  if(!rank) {left=size-1;} else { left = rank-1;}
  if(rank == size-1) { right=0;} else {right=rank+1;}
  you =  (rank < size/2)?(rank+size/2):(rank-size/2);


  for(i=0;i<nrep;i++) {
   if(flags & DOSPRAY) {
    ndata = (long int)(drand48()*ndata_max)+1;
   }
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s);
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_a"); 
#endif
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);

  MPI_Isend(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r);
  MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s);
  MPI_Wait(r,s);

  MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r);
  MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD);
  MPI_Wait(r,s);

  
  for(j=0;j<size;j++) {
   MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j);
   MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j);
  }
  MPI_Waitall(2*size,r,s);
/*
  for(j=0;j<size;j++) {
   printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE);
  }
*/

#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_a"); 
#endif

#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_b"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_b"); 
#endif

 if(1) {
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_c"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_c"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_d"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_d"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_e"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_e"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_f"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_f"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_g"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_g"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_h"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_h"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_i"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_i"); 
#endif
 }

  }
  }


  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Finalize();
  }

#ifdef HPM
   if ((rv=PAPI_stop_counters(values, 2)) != PAPI_OK) {
    fprintf(stdout, "ERROR PAPI_stop_counters rv=%d\n", rv);
    exit(rv);
   }
   printf("PAPI: total instruction/cycles  %lld/%lld %.3e \n", values[0], values[1], values[0]/(values[1]*1.0) );
#endif 

  return 0;   
}
예제 #15
0
int main(int argc, char *argv[]) {
 char region_name[4096];

 MPI_Pcontrol(1,"get_region();",(void *)region_name); 
 return 0;
}
예제 #16
0
파일: bench.c 프로젝트: plavin/bgq_synth_2
int main(int argc, char**argv){

  int num_ranks, rank, split_num_ranks, split_rank;
  int outer_ranks, inner_ranks;
  int new_comm_id;
  int msg_size, loops;
  int slurm_id, run_index;
  MPI_Comm split_comm;
  FILE * timings;

  //Parse options
  char c;
  while ((c = getopt (argc, argv, "s:r:l:i:")) != -1){
    switch (c)
      {
      case 's':
	sscanf(optarg, "%d", &msg_size);
	break;
      case 'r':
	sscanf(optarg, "%d", &inner_ranks);
	break;
      case 'l':
	sscanf(optarg, "%d", &loops);
	break;
      case 'i':
	sscanf(optarg, "%d", &run_index);
	break;
      default:
	printf("Unrecognized option: %c\n", optopt);
	break;
      }
    if(c != 's' && c != 'i' && c != 'l' && c != 'r' ){break;}
  }
  printf("Successfully parsed options as: \n");
  printf("\tmsg_size: %d, inner_ranks: %d, loops: %d, run_index: %d\n", msg_size, inner_ranks, loops, run_index);

  //Open timings.out for writing
  timings = fopen("timings.out", "a");
  if(timings == NULL){
    printf("Error: cannot open timings.out\n");
  }

  //Start MPI, get num_ranks
  MPI_Init(NULL, NULL);
  MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
  if(num_ranks == 0){
    printf("MPI_Comm_size failure\n");
    exit(1);
  }

  //Calculate comm sizes
  outer_ranks = num_ranks - inner_ranks;
  if( (outer_ranks < 0 || inner_ranks < 0) && (rank == 0) ){
    printf("Error: bad comm sizes. They should be positive\n");
  }
  
  //Get global rank
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  int * splitter = (int*)malloc(sizeof(int)*num_ranks);
  for(int i = inner_ranks; i < num_ranks; i++) splitter[i] = OUTER_COMM;
  for(int i = 0; i < inner_ranks; i++) splitter[i] = INNER_COMM;
  
  //split communicator
  MPI_Comm_split(MPI_COMM_WORLD, splitter[rank], 1, &split_comm);
  MPI_Comm_size(split_comm, &split_num_ranks);
  MPI_Comm_rank(split_comm, &split_rank);
  MPI_Barrier(MPI_COMM_WORLD);
  
  
  //run the inner communicator as a warm-up, seems to reduce variance
  if(splitter[rank] == INNER_COMM){
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //start network counters region 1
  MPI_Pcontrol(1);

  //run the inside alone, as a baseline
  float run1;
  if(splitter[rank] == INNER_COMM){
    run1 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //start network counters region 2
  MPI_Pcontrol(2);

  //run both communicators
  float run2;
  if(splitter[rank] == INNER_COMM){
    run2 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); 
  }else{
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }

  //stop network counters
  MPI_Pcontrol(0);

  //print timings
  if(splitter[rank] == INNER_COMM && split_rank==0) fprintf(timings, "%d,%f,%f\n", run_index, run1, run2);

  //free(recv);
  free(splitter);
  MPI_Finalize();
  exit(0);
}
예제 #17
0
void M3_profile( int sectionID, const char *sectionName, int operationFlag )
{
  static char *staticTitleString = NULL;
  static char **staticProfileName = NULL;
  static int64_t *staticNumCalls = NULL;
  static double *staticTotalTime = NULL;
  static double *staticStartTime = NULL;
#ifdef USE_PAPI
  static int64_t *staticFlopCount = NULL;
  static int64_t *staticFlipCount = NULL;
  static int64_t *staticFlopCounter = NULL;
  static int64_t *staticFlipCounter = NULL;
#endif
  static double staticInitTime = 0;
  static char staticInitDate[256]={0};
  static int staticProfileLevel = -1;

#ifdef USE_PAPI
#define M3_NUM_PAPI_EVENTS 2
  int papiEvents[M3_NUM_PAPI_EVENTS] = {PAPI_FP_OPS, PAPI_FP_INS};
  static long long int papiCounters[M3_NUM_PAPI_EVENTS] = {0};
#endif
  double finalTime;
  int64_t *agInt64 = NULL;
  double *agDouble = NULL;
  int64_t i, j;
  long int k;
  int myRank = -1;
  int numProc = 1;
  FILE *outFile;  
  char *tempPtr, fileName[256], tempString[256];
  char myHostname[256] = {0};
  double mpiTic;
  double mpiToc;
  struct timeval tic;
  struct timezone tz;
  time_t tt;
  long int pid;
  char pcontrolID[16] = {0};

  if( staticProfileLevel == -1 )
  {
    /* Look for environment variable.  */
    tempPtr = getenv("M3_PROFILE_LEVEL");
    if( tempPtr )
      staticProfileLevel = atoi( tempPtr );
    else
      staticProfileLevel = M3_PROFILE_LEVEL;
  }

  if( staticProfileLevel == 0 )
    return;



#ifdef USE_MPI
  MPI_Comm_rank(MPI_COMM_WORLD, &myRank );
  MPI_Comm_size(MPI_COMM_WORLD, &numProc );
#endif 

  sprintf(fileName, "M3_Profile():  profile ID out of range, must be between 0 and %i", M3_PROFILE_MAX_SECTIONS - 1);
  assert(sectionID >= 0 && sectionID < M3_PROFILE_MAX_SECTIONS);

  switch( operationFlag )
  {
    case M3_PROFILE_INIT:
      assert(staticProfileName == NULL &&
	     staticNumCalls == NULL && 
	     staticTotalTime == NULL && 
	     staticStartTime == NULL);
      if( sectionName && strlen(sectionName) )
      {
        staticTitleString = (char *)calloc( 4*(strlen(sectionName)/4 +1 ), sizeof(char) );
        assert(staticTitleString != NULL);
        strcpy(staticTitleString, sectionName );
      }

      staticProfileName = (char **)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(char*) );
      staticNumCalls = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t) );
      staticTotalTime = (double *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(double) );
      staticStartTime = (double *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(double) );
      assert(staticProfileName && staticNumCalls && staticTotalTime && staticStartTime);
#ifdef USE_PAPI
      staticFlopCount = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t));
      staticFlipCount = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t));
      staticFlopCounter = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t));
      staticFlipCounter = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t));
      assert(staticFlopCount && staticFlipCount);
      assert(staticFlopCounter && staticFlipCounter);
#endif

      gettimeofday(&tic, &tz);
#ifdef USE_MPI
      staticInitTime = MPI_Wtime( );
#else
      staticInitTime = tic.tv_sec + tic.tv_usec*1e-6;
#endif
      tt = tic.tv_sec;
      ctime_r(&tt, staticInitDate );

#ifdef USE_PAPI      
      PAPI_start_counters(papiEvents, M3_NUM_PAPI_EVENTS);
#endif

#ifdef USE_MPI
      if (myRank == 0) {
	mkdir( "m3_profile", S_IRWXU );
      }
#else
      mkdir( "m3_profile", S_IRWXU );
#endif

      break;
    case M3_PROFILE_FINALIZE:
      /* Check to see if it was initialized */
      if( staticProfileName == NULL ||
          staticNumCalls == NULL || 
          staticTotalTime == NULL )
      {
	/*        fprintf(stderr, "WARNING:  M3_Profile, finalized without initializing\n");  */
        break;
      }

      myHostname[255] = 0;
      gethostname(myHostname, 255);
      pid = (long int)getpid();  

      for( j = 0; j < 2; j++ )
      {
#ifdef USE_MPI
        if( j == 1 )
	{
          /* Get aggregate statistics */
          if( myRank == 0 )
	  {
            agInt64 = (int64_t*)calloc(M3_PROFILE_MAX_SECTIONS, sizeof(int64_t));
            agDouble = (double*)calloc(M3_PROFILE_MAX_SECTIONS, sizeof(double));
            assert( agInt64 && agDouble );
          }
          MPI_Reduce( staticNumCalls, agInt64, M3_PROFILE_MAX_SECTIONS, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD );
          if( myRank == 0 )
            memcpy( staticNumCalls, agInt64 , sizeof(int64_t)*M3_PROFILE_MAX_SECTIONS );
          
          MPI_Reduce( staticTotalTime, agDouble, M3_PROFILE_MAX_SECTIONS, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD );
          if( myRank == 0 )
            memcpy( staticTotalTime, agDouble , sizeof(double)*M3_PROFILE_MAX_SECTIONS );
#ifdef USE_PAPI
          MPI_Reduce( staticFlopCount, agInt64, M3_PROFILE_MAX_SECTIONS, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD );
          if( myRank == 0 )
            memcpy( staticFlopCount, agInt64, sizeof(int64_t)*M3_PROFILE_MAX_SECTIONS );
          MPI_Reduce( staticFlipCount, agInt64, M3_PROFILE_MAX_SECTIONS, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD );
          if( myRank == 0 )
            memcpy( staticFlipCount, agInt64, sizeof(int64_t)*M3_PROFILE_MAX_SECTIONS );
#endif
          if( myRank == 0 )
	  {
            free(agInt64);
            free(agDouble);
          }
          else
            break;
        }
#else
        /* If not using mpi, don't need to collect aggregate statistics */
        if( j == 1 )
          break;
#endif

        k = 60*lrint(staticInitTime/60);

        /* m3_profile_title_date.proc */
        if( staticTitleString )
        {
          tempPtr = strchr( staticTitleString, ' ');
          if(tempPtr)
            *tempPtr = '\0';

          sprintf( fileName, "m3_profile/m3_profile_%s_%li_%s_%li", staticTitleString, k, myHostname, pid);

          if(tempPtr)
            *tempPtr = ' ';
        }
        else
	{
          sprintf( fileName, "m3_profile/m3_profile_%li", k );          
        }

#ifdef USE_MPI
        if( j == 0 ) 
          sprintf( tempString, ".%i", myRank );
        else
          strcpy( tempString, ".all"); 
        
        strcat( fileName, tempString );
#endif

        
        if( ( staticProfileLevel == 2 ) ||
            ( staticProfileLevel == 1 && j == 0 && numProc == 1 ) || 
            ( staticProfileLevel == 1 && j == 1 ) )
	{
          /* Open the output file.  */ 
          outFile = fopen( fileName, "w");
          assert(outFile != NULL);

          /* Write a title */
          if( staticTitleString )
            fprintf(outFile, "M3_Profile:  %s\n\n", staticTitleString );
          else
            fprintf(outFile, "M3_Profile\n\n" );
          

          /* Write the init date, and the run time.  */
#ifdef USE_MPI
          fprintf(outFile, "Number of processors:  %i\n", numProc );
          finalTime = MPI_Wtime();
#else
          gettimeofday(&tic, &tz );
          finalTime = tic.tv_sec + tic.tv_usec*1e-6;
#endif
          fprintf( outFile, "Start date %s\n", staticInitDate );
          fprintf( outFile, "Run time in seconds:  %e\n\n", finalTime - staticInitTime );

          if( j == 1 )
            fprintf(outFile, "Aggregate statistics\n\n");

          for( i = 0; i < M3_PROFILE_MAX_SECTIONS; i++ )
          {
            if( staticNumCalls[i] )
            {
              fprintf(outFile, "-----------------------------\n");
              fprintf(outFile, "    Profile ID number:  %lli\n", i);
              if( staticProfileName[i] )
                fprintf(outFile, "    %s\n", staticProfileName[i] );
              fprintf(outFile, "      Total number of calls:          %lli\n", staticNumCalls[i]);
              fprintf(outFile, "      Total time (seconds):           %e\n", staticTotalTime[i]);
              fprintf(outFile, "      Mean time per call (seconds):   %e\n", staticTotalTime[i]/staticNumCalls[i]);
              fprintf(outFile, "      Mean time per task (seconds):   %e\n", staticTotalTime[i]/numProc );
              fprintf(outFile, "      Percent of wall clock           %.6f %%\n", staticTotalTime[i]/numProc/(finalTime - staticInitTime)*100 );
#ifdef USE_PAPI
              fprintf(outFile, "      Flop count:                     %lli\n", staticFlopCount[i]);
              fprintf(outFile, "      Flop rate:                      %.6e\n", staticFlopCount[i]/staticTotalTime[i]);
              fprintf(outFile, "      Flip count:                     %lli\n", staticFlipCount[i]);
              fprintf(outFile, "      Flip rate:                      %.6e\n", staticFlipCount[i]/staticTotalTime[i]);
#endif
              fprintf(outFile, "\n\n");
            }
          }

          fclose(outFile);
        }
      }


      /* Free up static memory */
      if( staticTitleString )
      {
        free(staticTitleString);
        staticTitleString = NULL;
      }

      if( staticProfileName )
      {
        for( i = 0; i < M3_PROFILE_MAX_SECTIONS; i++ )
          if( staticProfileName[i] )
            free( staticProfileName[i] );
        free(staticProfileName);
        staticProfileName = NULL;
      }

      if( staticNumCalls )
      {
        free( staticNumCalls );
        staticNumCalls = NULL;
      }
      
      if( staticTotalTime )
      {
        free( staticTotalTime );
        staticTotalTime = NULL;
      }

      if( staticStartTime )
      {
        free(staticStartTime );
        staticStartTime = NULL;
      }
#ifdef USE_PAPI
      if( staticFlopCount );
      {
        free(staticFlopCount);
        staticFlopCount = NULL;
      }
      if( staticFlipCount );
      {
        free(staticFlipCount);
        staticFlipCount = NULL;
      }
#endif

      break;
    case M3_PROFILE_START:
      if( staticProfileName == NULL ||
          staticNumCalls == NULL || 
          staticTotalTime == NULL )
      {
	/*        fprintf(stderr, "WARNING:  M3_Profile, called without initializing\n");  */
        break;
      }
      if( staticProfileName[sectionID] == NULL )
      {
        staticProfileName[sectionID] = (char*)calloc(4*(strlen(sectionName)/4 + 1), sizeof(char));
        assert(staticProfileName[sectionID] != NULL);
        strcpy(staticProfileName[sectionID], sectionName);
      }
#ifdef USE_MPI
#ifndef USE_PAPI
      sprintf( pcontrolID, "%i", sectionID);
      MPI_Pcontrol( 1, pcontrolID );
#endif
#endif
#ifdef USE_MPI
      staticStartTime[sectionID] = MPI_Wtime();
#else
      gettimeofday(&tic, &tz);
      staticStartTime[sectionID] = tic.tv_sec + tic.tv_usec*1e-6;
#endif
#ifdef USE_PAPI
      PAPI_accum_counters(papiCounters, M3_NUM_PAPI_EVENTS );
      staticFlopCounter[sectionID] = papiCounters[0];
      staticFlipCounter[sectionID] = papiCounters[1];
#endif
      break;
    case M3_PROFILE_STOP:
      if( staticProfileName == NULL ||
          staticNumCalls == NULL || 
          staticTotalTime == NULL )
      {
	/*        fprintf(stderr, "WARNING:  M3_Profile, called without initializing\n");  */
        break;
      }
#ifdef USE_MPI
#ifndef USE_PAPI
      sprintf( pcontrolID, "%i", sectionID);
      MPI_Pcontrol( -1, pcontrolID );
#endif
#endif
      staticNumCalls[sectionID]++;
#ifdef USE_MPI
      staticTotalTime[sectionID] += MPI_Wtime() - staticStartTime[sectionID];
#else
      gettimeofday(&tic, &tz);
      staticTotalTime[sectionID] += (tic.tv_sec + tic.tv_usec*1e-6) - staticStartTime[sectionID];
#endif
#ifdef USE_PAPI
      PAPI_accum_counters(papiCounters, M3_NUM_PAPI_EVENTS );
      staticFlopCount[sectionID] += papiCounters[0] - staticFlopCounter[sectionID];
      staticFlipCount[sectionID] += papiCounters[1] - staticFlipCounter[sectionID];
#endif
      break;
  }
}
예제 #18
0
파일: cpilog.c 프로젝트: FelixPascal/BCILAB
int main( int argc, char *argv[] )
{
    int  n, myid, numprocs, ii, jj;
    double PI25DT = 3.141592653589793238462643;
    double mypi, pi, h, sum, x;
    double startwtime = 0.0, endwtime;
    int namelen; 
    int event1a, event1b, event2a, event2b,
        event3a, event3b, event4a, event4b;
    int event1, event2, event3;
    char processor_name[ MPI_MAX_PROCESSOR_NAME ];

    MPI_Init( &argc, &argv );
        
        MPI_Pcontrol( 0 );

    MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
    MPI_Comm_rank( MPI_COMM_WORLD, &myid );

    MPI_Get_processor_name( processor_name, &namelen );
    fprintf( stderr, "Process %d running on %s\n", myid, processor_name );

    /*
        MPE_Init_log() & MPE_Finish_log() are NOT needed when
        liblmpe.a is linked with this program.  In that case,
        MPI_Init() would have called MPE_Init_log() already.
    */
#if defined( NO_MPI_LOGGING )
    MPE_Init_log();
#endif

    /*
        user should NOT assign eventIDs directly in MPE_Describe_state()
        Get the eventIDs for user-defined STATES(rectangles) from
        MPE_Log_get_state_eventIDs() instead of the deprecated function
        MPE_Log_get_event_number().
    */
    MPE_Log_get_state_eventIDs( &event1a, &event1b );
    MPE_Log_get_state_eventIDs( &event2a, &event2b );
    MPE_Log_get_state_eventIDs( &event3a, &event3b );
    MPE_Log_get_state_eventIDs( &event4a, &event4b );

    if ( myid == 0 ) {
        MPE_Describe_state( event1a, event1b, "Broadcast", "red" );
        MPE_Describe_state( event2a, event2b, "Sync", "orange" );
        MPE_Describe_state( event3a, event3b, "Compute", "blue" );
        MPE_Describe_state( event4a, event4b, "Reduce", "green" );
    }

    /* Get event ID for Solo-Event(single timestamp object) from MPE */
    MPE_Log_get_solo_eventID( &event1 );
    MPE_Log_get_solo_eventID( &event2 );
    MPE_Log_get_solo_eventID( &event3 );

    if ( myid == 0 ) {
       MPE_Describe_event( event1, "Broadcast Post", "white" );
       MPE_Describe_event( event2, "Compute Start", "purple" );
       MPE_Describe_event( event3, "Compute End", "navy" );
    }

    if ( myid == 0 ) {
        n = 1000000;
        startwtime = MPI_Wtime();
    }
    MPI_Barrier( MPI_COMM_WORLD );

    MPI_Pcontrol( 1 );
    /*
    MPE_Start_log();
    */

    for ( jj = 0; jj < 5; jj++ ) {
        MPE_Log_event( event1a, 0, NULL );
        MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD );
        MPE_Log_event( event1b, 0, NULL );

        MPE_Log_event( event1, 0, NULL );
    
        MPE_Log_event( event2a, 0, NULL );
        MPI_Barrier( MPI_COMM_WORLD );
        MPE_Log_event( event2b, 0, NULL );

        MPE_Log_event( event2, 0, NULL );
        MPE_Log_event( event3a, 0, NULL );
        h   = 1.0 / (double) n;
        sum = 0.0;
        for ( ii = myid + 1; ii <= n; ii += numprocs ) {
            x = h * ((double)ii - 0.5);
            sum += f(x);
        }
        mypi = h * sum;
        MPE_Log_event( event3b, 0, NULL );
        MPE_Log_event( event3, 0, NULL );

        pi = 0.0;
        MPE_Log_event( event4a, 0, NULL );
        MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD );
        MPE_Log_event( event4b, 0, NULL );

        MPE_Log_sync_clocks();
    }
#if defined( NO_MPI_LOGGING )
    if ( argv != NULL )
        MPE_Finish_log( argv[0] );
    else
        MPE_Finish_log( "cpilog" );
#endif

    if ( myid == 0 ) {
        endwtime = MPI_Wtime();
        printf( "pi is approximately %.16f, Error is %.16f\n",
                pi, fabs(pi - PI25DT) );
        printf( "wall clock time = %f\n", endwtime-startwtime );
    }
    MPI_Finalize();
    return( 0 );
}
예제 #19
0
파일: bench.c 프로젝트: plavin/bgq_synth
int main(int argc, char**argv){

  int num_ranks, rank, split_num_ranks, split_rank;
  int outer_ranks, inner_ranks;
  int new_comm_id;
  int msg_size, loops;
  int slurm_id, run_index;
  MPI_Comm split_comm;
  FILE * timings, * configs;
  int assignment;
  int custom;

  char c;
  while ((c = getopt (argc, argv, "s:r:l:i:ac:")) != -1){
    switch (c)
      {
      case 's':
	sscanf(optarg, "%d", &msg_size);
	break;
      case 'r':
	sscanf(optarg, "%d", &inner_ranks);
	break;
      case 'l':
	sscanf(optarg, "%d", &loops);
	break;
      case 'i':
	sscanf(optarg, "%d", &run_index);
	break;
      case 'a':
	sscanf(optarg, "%d", &assignment);
	assignment = 0;
	break;
      case 'c':
	sscanf(optarg, "%d", &custom);
	break;
      default:
	printf("Unrecognized option: %c\n", optopt);
	break;
      }
    if(c != 's' && c != 'i' && c != 'l' && c != 'r' ){break;}
  }

  timings = fopen("timings.out", "a");
  char configs_buf[128] = {0};
  sprintf(configs_buf, "config-%d.out", run_index);
  configs = fopen(configs_buf, "a");

  MPI_Init(NULL, NULL);
  MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
  if(num_ranks == 0){
    printf("MPI_Comm_size failure\n");
    exit(1);
  }
  outer_ranks = num_ranks - inner_ranks;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  //get node names
  char name[MPI_MAX_PROCESSOR_NAME] = {0};
  char * recv = (char*)calloc(MPI_MAX_PROCESSOR_NAME*num_ranks, sizeof(char));
  int proc_len;
  MPI_Get_processor_name(name, &proc_len);
  name[proc_len] = 0;
  MPI_Gather(name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, recv, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, MPI_COMM_WORLD);

  int * splitter = (int*)malloc(sizeof(int)*num_ranks);
  for(int i = 0; i < num_ranks; i++) splitter[i] = OUTER_COMM;
  if(!custom){
    if(rank == 0){
      if(assignment == RANDOM){
	int num_assigned = 0;
	while(num_assigned < inner_ranks){
	  int val = rand() % num_ranks;
	  if(splitter[val] == INNER_COMM){
	    continue;
	  }else{
	    splitter[val] = INNER_COMM;
	    num_assigned += 1;
	  }
	}
      }else if(assignment == APLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[0] == 0 || dims[0] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == APLANES_COARSE){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[0] == 0 || dims[0] == 1){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == BPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[1] == 0 || dims[1] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == CPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[2] == 0 || dims[2] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == DPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[3] == 0 || dims[3] == 2){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == EPLANES){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (dims[4] == 0){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == SQUAREAB1){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 1) && (dims[1] == 0 || dims[1] == 1)) || 
	      ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 2 || dims[1] == 3))){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == SQUAREAB2){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 2) && (dims[1] == 0 || dims[1] == 2)) || 
	      ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 1 || dims[1] == 3))){
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == ALTERABC_NONE){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 1) && (dims[1] == 0 || dims[1] == 1) && (dims[2] == 0 || dims[2] == 1)) || 
	      ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 2 || dims[1] == 3) && (dims[2] == 0 || dims[2] == 1)) || 
	      ((dims[0] == 0 || dims[0] == 1) && (dims[1] == 2 || dims[1] == 3) && (dims[2] == 2 || dims[2] == 3)) || 
	      ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 0 || dims[1] == 1) && (dims[2] == 2 || dims[2] == 3))) {
	    splitter[i] = INNER_COMM;
	  }
	}
      }else if(assignment == ALTERABC_ALL){
	for(int i = 0; i < num_ranks; i++){
	  int dims[5] = {0};
	  get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims);
	  if (((dims[0] == 0 || dims[0] == 2) && (dims[1] == 0 || dims[1] == 2) && (dims[2] == 0 || dims[2] == 2)) || 
	      ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 1 || dims[1] == 3) && (dims[2] == 0 || dims[2] == 2)) || 
	      ((dims[0] == 0 || dims[0] == 2) && (dims[1] == 1 || dims[1] == 3) && (dims[2] == 1 || dims[2] == 3)) || 
	      ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 0 || dims[1] == 2) && (dims[2] == 1 || dims[2] == 3))) {
	    splitter[i] = INNER_COMM;
	  }
	}
      }
    }
  }else{ //using custon mapping in map.out
    for(int i = 0; i < num_ranks/2; i++){
      splitter[i] = INNER_COMM;
    }
  }
    
  MPI_Bcast(splitter, num_ranks, MPI_INT, 0, MPI_COMM_WORLD);
  
  //split communicator
  MPI_Comm_split(MPI_COMM_WORLD, splitter[rank], 1, &split_comm);
  MPI_Comm_size(split_comm, &split_num_ranks);
  MPI_Comm_rank(split_comm, &split_rank);
  MPI_Barrier(MPI_COMM_WORLD);
    
  
  //print names to file
  if(rank == 0){
    fprintf(configs,"rank,comm,node\n");
    for(int i = 0; i < num_ranks; i++){
      fprintf(configs,"%d,%d,%s\n", i, splitter[i], recv + i*MPI_MAX_PROCESSOR_NAME);
    }   
  }
  
  //run the inner communicator as a warm-up, seems to reduce variance
  if(splitter[rank] == INNER_COMM){
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //run the inside alone, as a baseline

  //start network counters region 1
  MPI_Pcontrol(1);

  float run1;
  if(splitter[rank] == INNER_COMM){
    run1 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  //start network counters region 2
  MPI_Pcontrol(2);

  //run both communicators
  float run2;
  if(splitter[rank] == INNER_COMM){
    run2 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); 
  }else{
    Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops);
  }

  //stop network counters
  MPI_Pcontrol(0);

  //print timings
  if(splitter[rank] == INNER_COMM && split_rank==0) fprintf(timings, "%d,%f,%f\n", run_index, run1, run2);

  //free(recv);
  free(splitter);
  MPI_Finalize();
  exit(0);
}
예제 #20
0
파일: red.c 프로젝트: arkuzmin/ppp
int main (int argc,char **argv)
{
	MPI_Status status;

	int rank, size;
	struct
	{
		int value;
		int rank;
	} num, max, rcvd;

	MPI_Init(&argc,&argv);
	MPI_Comm_rank (MPI_COMM_WORLD,&rank);
	MPI_Comm_size (MPI_COMM_WORLD,&size);
	
	char *tracefile = getenv("TVTRACE");

	if( tracefile != NULL ){
		printf( "tv tracefile=%s\n", tracefile );
		MPI_Pcontrol(TRACEFILES, NULL, tracefile, 0);      
	}
	else{
		MPI_Pcontrol(TRACEFILES, NULL, "trace", 0);
	}
	MPI_Pcontrol(TRACELEVEL, 1, 1, 1);
	MPI_Pcontrol(TRACENODE, 1000000, 1, 1);

	num.value = my_random(rank);
	num.rank = rank;
	printf("Node %d: value = %d\n", num.rank, num.value);

	double sTime, eTime;
	sTime = MPI_Wtime();
	MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, "");
	
	MPI_Reduce(&num, &max, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD);
	
	MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, "");
	eTime = MPI_Wtime();
	
	MPI_Barrier( MPI_COMM_WORLD );
	
	MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, "");
	if (rank == 0)
	{
		print_result("MPI_Reduce", max.rank, max.value, eTime - sTime);
		sTime = MPI_Wtime();
		max.value = num.value;
		max.rank = num.rank;
		int i;
		for(i = 1; i < size; i++)
		{
			MPI_Recv(&rcvd, 1, MPI_2INT, i, TAG, MPI_COMM_WORLD, &status);
			if (rcvd.value > max.value)
			{
				max.value = rcvd.value;
				max.rank = rcvd.rank;
			} 
		}
		eTime = MPI_Wtime();
		print_result("Send-receive", max.rank, max.value, eTime - sTime);
	}
	else
	{
		MPI_Ssend(&num, 1, MPI_2INT, 0, TAG, MPI_COMM_WORLD);
	}
	MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, "");

#if 0


	if( !rank ){
		double *a,*b,*c, *c0;
		int i,i1,j,k;
		int ann;
		MPI_Status *st;
		MPI_Request *rq,rq1;
		rq = (MPI_Request*) malloc( (size-1)*sizeof(MPI_Request) );
		st = (MPI_Status*) malloc( (size-1)*sizeof(MPI_Status) );


		ann=an/size+((an%size)?1:0);
		//      printf("[%d]ann=%d\n", rank, ann );

		a=(double*) malloc(am*an*sizeof(double));
		b=(double*) malloc(am*bm*sizeof(double));
		c=(double*) malloc(an*bm*sizeof(double));
		for(i=0;i<am*an;i++)
		a[i]=rand()%301;
		for(i=0;i<am*bm;i++)
		b[i]=rand()%251;
		printf( "Data ready [%d]\n", rank );
		
		c0 = (double*)malloc(an*bm*sizeof(double));

		
		time = MPI_Wtime();  
		for (i=0; i<an; i++)
		for (j=0; j<bm; j++)
		{
			double s = 0.0;
			for (k=0; k<am; k++)
			s+= a[i*am+k]*b[k*bm+j];
			c0[i*bm+j] = s;
		} 
		time = MPI_Wtime() - time;
		printf("Time seq[%d] = %lf\n", rank, time );
		time_seq = time;

		MPI_Barrier( MPI_COMM_WORLD );
		time=MPI_Wtime();

		MPI_Bcast( b, am*bm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		printf( "Data Bcast [%d]\n", rank );

		for( i1=0, j=1; j<size; j++, i1+=ann*am ){
			printf( "Data to Send [%d] %016x[%4d] =>> %d\n", rank, a+i1, i1, j );
			MPI_Isend( a+i1, ann*am, MPI_DOUBLE, j, 101, MPI_COMM_WORLD, &rq1 );
			MPI_Request_free( &rq1 ); 
			printf( "Data Send [%d] =>> %d\n", rank, j );
		}
		printf( "Data Send [%d]\n", rank );
		
		MPI_Isend( a+i1, 1, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &rq1 );
		MPI_Request_free( &rq1 ); 
		
		printf( "Data Send [%d] =>> %d\n", rank, j );


		for(i=(i1/am);i<an;i++)
		for(j=0;j<bm;j++){
			double s=0.0;
			for(k=0;k<am;k++)
			s+=a[i*am+k]*b[k*bm+j];
			c[i*bm+j]=s;
		}

		printf( "Job done  [%d]\n", rank );
		for( i1=0, j=1; j<size; j++, i1+=(ann*bm) ){
			printf( "Data to Recv [%d] %016x[%4d] =>> %d\n", rank, c+i1, i1/bm, j );
			MPI_Irecv( c+i1, ann*am, MPI_DOUBLE, j, 102, MPI_COMM_WORLD, rq+(j-1) );
		}         
		MPI_Waitall( size-1, rq, st );
		
		time=MPI_Wtime()-time;
		printf("time [%d]=%12.8lf\n",rank,time);
		time_par = time;

		printf( "Data collected [%d]\n", rank );
		
		time=MPI_Wtime();
		int ok = 1;
		for(i=0;i<an*bm;i++)
		if( c[i] != c0[i] ){
			ok = 0;
			printf( "Fail [%d %d] %lf != %lf\n", i/bm, i%bm, c[i], c0[i] );
			break;
		}
		time=MPI_Wtime()-time;
		if( ok ){
			printf( "Data verifeid [%d] time = %lf\n", rank, time );
			printf( "SpeedUp S(%d) = %14.10lf\n", size, time_seq/time_par );
			printf( "Efitncy E(%d) = %14.10lf\n", size, time_seq/(time_par*size) );
		}
		
	}
	else
	{
		int ann;
		double *a,*b,*c;
		MPI_Status st;
		int i,j,k;

		MPI_Pcontrol(TRACEEVENT, "entry", 0, 0, "");

		ann= an/size + ((an%size)?1:0);
		//      if(rank==1)
		//        printf("[%d]ann=%d = %d / %d \n", rank, ann, an, size );
		
		a=(double*)malloc(ann*am*sizeof(double));
		b=(double*)malloc(bm*am*sizeof(double));
		c=(double*)malloc(ann*bm*sizeof(double));
		printf( "Mem allocated [%d]\n", rank );

		
		MPI_Barrier( MPI_COMM_WORLD );
		MPI_Pcontrol(TRACEEVENT, "exit", 0, 0, "");
		time = MPI_Wtime();


		MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, "");
		
		MPI_Bcast(b,am*bm,MPI_DOUBLE,0,MPI_COMM_WORLD);
		printf( "Data Bcast [%d]\n", rank );
		
		MPI_Recv( a, ann*am, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &st);
		printf( "Data Recv [%d]\n", rank );
		
		MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, "");
		
		MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, "");
		for( i=0; i<ann; i++ )
		for(j=0;j<bm;j++){
			double s=0.0;
			
			for( k=0; k<am; k++ ){
				s+=a[i*am+k]*b[k*bm+j];
			}
			/*    
			if(1==rank){
			if(0==j){
				printf( "c[%d<%d %d] = %lf\n", i,ann,j, s );
			}
			}
*/
			c[i*bm+j]=s;
		}
		printf( "Job done  [%d]\n", rank );
		MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, "");

		MPI_Pcontrol(TRACEEVENT, "entry", 3, 0, "");
		MPI_Send( c, ann*bm,  MPI_DOUBLE, 0, 102, MPI_COMM_WORLD);
		printf( "Data returned [%d]\n", rank );
		MPI_Pcontrol(TRACEEVENT, "exit", 3, 0, "");

		time=MPI_Wtime()-time;
		printf("time [%d]=%12.8lf\n",rank,time);
	}

#endif

	MPI_Finalize();
	return 0;
}
예제 #21
0
int main( int argc, char *argv[] )
{
    int  n, myid, numprocs, ii, jj;
    double PI25DT = 3.141592653589793238462643;
    double mypi, pi, h, sum, x;
    double startwtime = 0.0, endwtime;
    int namelen; 
    int event1a, event1b, event2a, event2b,
        event3a, event3b, event4a, event4b;
    char processor_name[ MPI_MAX_PROCESSOR_NAME ];

    MPE_LOG_BYTES  bytebuf;
    int            bytebuf_pos;


    MPI_Init( &argc, &argv );
        
        MPI_Pcontrol( 0 );

    MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
    MPI_Comm_rank( MPI_COMM_WORLD, &myid );

    MPI_Get_processor_name( processor_name, &namelen );
    fprintf( stderr, "Process %d running on %s\n", myid, processor_name );

    /*
        MPE_Init_log() & MPE_Finish_log() are NOT needed when
        liblmpe.a is linked with this program.  In that case,
        MPI_Init() would have called MPE_Init_log() already.
    */
#if defined( NO_MPI_LOGGING )
    MPE_Init_log();
#endif

    /*  Get event ID from MPE, user should NOT assign event ID directly */
    event1a = MPE_Log_get_event_number(); 
    event1b = MPE_Log_get_event_number(); 
    event2a = MPE_Log_get_event_number(); 
    event2b = MPE_Log_get_event_number(); 
    event3a = MPE_Log_get_event_number(); 
    event3b = MPE_Log_get_event_number(); 
    event4a = MPE_Log_get_event_number(); 
    event4b = MPE_Log_get_event_number(); 

    if ( myid == 0 ) {
        MPE_Describe_state( event1a, event1b, "Broadcast", "red" );
        MPE_Describe_info_state( event2a, event2b, "Sync", "orange",
                                 "source = %s()'s line %d." );
        MPE_Describe_info_state( event3a, event3b, "Compute", "blue",
                                 "mypi = %E computed at iteration %d." );
        MPE_Describe_info_state( event4a, event4b, "Reduce", "green",
                                 "final pi = %E at iteration %d." );
    }

    if ( myid == 0 ) {
        n = 1000000;
        startwtime = MPI_Wtime();
    }
    MPI_Barrier( MPI_COMM_WORLD );

    MPI_Pcontrol( 1 );
    /*
    MPE_Start_log();
    */

    for ( jj = 0; jj < ITER_COUNT; jj++ ) {
        MPE_Log_event( event1a, 0, NULL );
        MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD );
        MPE_Log_event( event1b, 0, NULL );
    
        MPE_Log_event( event2a, 0, NULL );
        MPI_Barrier( MPI_COMM_WORLD );
            int line_num;
            bytebuf_pos = 0;
            MPE_Log_pack( bytebuf, &bytebuf_pos, 's',
                          sizeof(__func__)-1, __func__ );
            line_num = __LINE__;
            MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &line_num );
        MPE_Log_event( event2b, 0, bytebuf );

        MPE_Log_event( event3a, 0, NULL );
        h   = 1.0 / (double) n;
        sum = 0.0;
        for ( ii = myid + 1; ii <= n; ii += numprocs ) {
            x = h * ((double)ii - 0.5);
            sum += f(x);
        }
        mypi = h * sum;
            bytebuf_pos = 0;
            MPE_Log_pack( bytebuf, &bytebuf_pos, 'E', 1, &mypi );
            MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &jj );
        MPE_Log_event( event3b, 0, bytebuf );

        pi = 0.0;
        MPE_Log_event( event4a, 0, NULL );
        MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD );
            bytebuf_pos = 0;
            MPE_Log_pack( bytebuf, &bytebuf_pos, 'E', 1, &pi );
            MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &jj );
        MPE_Log_event( event4b, 0, bytebuf );
    }
#if defined( NO_MPI_LOGGING )
    if ( argv != NULL )
        MPE_Finish_log( argv[0] );
    else
        MPE_Finish_log( "cpilog" );
#endif

    if ( myid == 0 ) {
        endwtime = MPI_Wtime();
        printf( "pi is approximately %.16f, Error is %.16f\n",
                pi, fabs(pi - PI25DT) );
        printf( "wall clock time = %f\n", endwtime-startwtime );
    }

    MPI_Finalize();
    return( 0 );
}