main (int argc, char **argv) { MPI_Init (&argc, &argv); MPI_Barrier (MPI_COMM_WORLD); /* profiling should initially be disabled */ MPI_Pcontrol (1); /* enable profiling */ MPI_Pcontrol (2); /* reset call site data */ MPI_Barrier (MPI_COMM_WORLD); MPI_Pcontrol (0); /* disable profiling */ MPI_Barrier (MPI_COMM_WORLD); MPI_Pcontrol (1); /* enable profiling */ MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int i, myrank, numranks, groupsize; int dims[3] = {0, 0, 0}; int temp[3] = {0, 0, 0}; int coord[3] = {0, 0, 0}; int periods[3] = {1, 1, 1}; double startTime, stopTime; MPI_Comm cartcomm, subcomm; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &numranks); dims[MP_X] = atoi(argv[1]); dims[MP_Y] = atoi(argv[2]); dims[MP_Z] = atoi(argv[3]); MPI_Dims_create(numranks, 3, dims); MPI_Cart_create(MPI_COMM_WORLD, 3, dims, periods, 1, &cartcomm); MPI_Cart_get(cartcomm, 3, dims, periods, coord); temp[MP_X] = 0; temp[MP_Y] = 1; temp[MP_Z] = 0; MPI_Cart_sub(cartcomm, temp, &subcomm); MPI_Comm_size(subcomm,&groupsize); int perrank = atoi(argv[4]); char *sendbuf = (char*)malloc(perrank*groupsize); char *recvbuf = (char*)malloc(perrank*groupsize); MPI_Barrier(cartcomm); MPI_Pcontrol(1); startTime = MPI_Wtime(); for (i=0; i<MAX_ITER; i++) { MPI_Alltoall(sendbuf, perrank, MPI_CHAR, recvbuf, perrank, MPI_CHAR, subcomm); } MPI_Barrier(cartcomm); stopTime = MPI_Wtime(); MPI_Pcontrol(0); if(myrank == 0) { printf("Completed %d iterations for subcom size %d, perrank %d\n", i, groupsize, perrank); printf("Time elapsed: %f\n", stopTime - startTime); } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, size; char hostname[1000]; int iters=20, i; gethostname(hostname, 1000); printf("Running %s on %s\n", argv[0], hostname); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<iters; i++){ #ifdef BLR_USE_JITTER MPI_Pcontrol(0); #endif sleep(rank%2); MPI_Barrier(MPI_COMM_WORLD); } //printf("Hello world, I am %d of %d\n", rank, size); MPI_Finalize(); printf("Finished %s on %s\n", argv[0], hostname); return 0; }
int main(int argc, char **argv) { void *stat; pthread_attr_t attr; pthread_t thread[2]; int provided = 0; //MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); fprintf(stderr, "test a: required: %d, provided: %d\n", MPI_THREAD_MULTIPLE, provided); MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); fprintf(stderr, "test b: required: %d, provided: %d\n", MPI_THREAD_MULTIPLE, provided); if (provided < MPI_THREAD_SERIALIZED) { fprintf(stderr, "multi-thread not supported: provided: %d (SERIALIZED: %d, MULTIPLE: %d)\n", provided, MPI_THREAD_SERIALIZED, MPI_THREAD_MULTIPLE); exit(0); } exit(0); // MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // rank -> rank of this processor MPI_Comm_size(MPI_COMM_WORLD, &size); // size -> total number of processors srand((unsigned)time(NULL)); msg_num = atoi(argv[1]); #if 1 MPI_Pcontrol(0); pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); // thread 0 will be sending messages pthread_create(&thread[0], &attr, Send_Func_For_Thread, (void *)0); // thread 1 will be receiving messages pthread_create(&thread[1], &attr, Recv_Func_For_Thread, (void *)1); pthread_attr_destroy(&attr); pthread_join(thread[0], &stat); pthread_join(thread[1], &stat); #endif fprintf(stdout, "Fnished : rank: %d\n", rank); fflush(stdout); MPI_Finalize(); pthread_exit((void *)NULL); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_pcontrol_ ( MPI_Fint *v1, MPI_Fint *ierr ){ *ierr = MPI_Pcontrol( (int)*v1 ); }
int main(int argc, char **argv) { int myRank, numPes; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numPes); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); MPI_Request sreq[2], rreq[2]; int blockDimX, arrayDimX, arrayDimY; if (argc != 2 && argc != 3) { printf("%s [array_size] \n", argv[0]); printf("%s [array_size_X] [array_size_Y] \n", argv[0]); MPI_Abort(MPI_COMM_WORLD, -1); } if(argc == 2) { arrayDimY = arrayDimX = atoi(argv[1]); } else { arrayDimX = atoi(argv[1]); arrayDimY = atoi(argv[2]); } if (arrayDimX % numPes != 0) { printf("array_size_X % numPes != 0!\n"); MPI_Abort(MPI_COMM_WORLD, -1); } blockDimX = arrayDimX / numPes; int iterations = 0, i, j; double error = 1.0, max_error = 0.0; if(myRank == 0) { printf("Running Jacobi on %d processors\n", numPes); printf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY); printf("Block Dimensions: %d\n", blockDimX); } double **temperature; double **new_temperature; /* allocate two dimensional arrays */ temperature = new double*[blockDimX+2]; new_temperature = new double*[blockDimX+2]; for (i=0; i<blockDimX+2; i++) { temperature[i] = new double[arrayDimY]; new_temperature[i] = new double[arrayDimY]; } for(i=0; i<blockDimX+2; i++) { for(j=0; j<arrayDimY; j++) { temperature[i][j] = 0.5; new_temperature[i][j] = 0.5; } } // boundary conditions if(myRank < numPes/2) { for(i=1; i<=blockDimX; i++) temperature[i][0] = 1.0; } if(myRank == numPes-1) { for(j=arrayDimY/2; j<arrayDimY; j++) temperature[blockDimX][j] = 0.0; } MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol(1); startTime = MPI_Wtime(); while(/*error > 0.001 &&*/ iterations < MAX_ITER) { iterations++; /* Receive my bottom and top edge */ MPI_Irecv(&temperature[blockDimX+1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), BOTTOM, MPI_COMM_WORLD, &rreq[BOTTOM-1]); MPI_Irecv(&temperature[0][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), TOP, MPI_COMM_WORLD, &rreq[TOP-1]); /* Send my top and bottom edge */ MPI_Isend(&temperature[1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), BOTTOM, MPI_COMM_WORLD, &sreq[BOTTOM-1]); MPI_Isend(&temperature[blockDimX][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), TOP, MPI_COMM_WORLD, &sreq[TOP-1]); MPI_Waitall(2, rreq, MPI_STATUSES_IGNORE); MPI_Waitall(2, sreq, MPI_STATUSES_IGNORE); for(i=1; i<blockDimX+1; i++) { for(j=0; j<arrayDimY; j++) { /* update my value based on the surrounding values */ new_temperature[i][j] = (temperature[i-1][j]+temperature[i+1][j]+temperature[i][wrap_y(j-1)]+temperature[i][wrap_y(j+1)]+temperature[i][j]) * 0.2; } } max_error = error = 0.0; for(i=1; i<blockDimX+1; i++) { for(j=0; j<arrayDimY; j++) { error = fabs(new_temperature[i][j] - temperature[i][j]); if(error > max_error) max_error = error; } } double **tmp; tmp = temperature; temperature = new_temperature; new_temperature = tmp; // boundary conditions if(myRank < numPes/2) { for(i=1; i<=blockDimX; i++) temperature[i][0] = 1.0; } if(myRank == numPes-1) { for(j=arrayDimY/2; j<arrayDimY; j++) temperature[blockDimX][j] = 0.0; } //if(myRank == 0) printf("Iteration %d %f %f %f\n", iterations, max_error, temperature[1][0], temperature[1][1]); MPI_Allreduce(&max_error, &error, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } /* end of while loop */ MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol(0); if(myRank == 0) { endTime = MPI_Wtime(); printf("Completed %d iterations\n", iterations); printf("Time elapsed: %f\n", endTime - startTime); } MPI_Finalize(); return 0; } /* end function main */
/** * Main function which selects the process to be a master or a worker * based on MPI myid. * * @param argc Number of arguments. * @param argv Pointer to the argument pointers. * @return 0 on success. */ int main(int argc, char **argv) { int myid, numprocs, i; int err = -1; struct test_params_s test_params; struct mpe_events_s mpe_events; struct frag_preresult_s **query_frag_preresult_matrix = NULL; memset(&test_params, 0, sizeof(struct test_params_s)); MPI_Init(&argc, &argv); #ifdef HAVE_MPE MPI_Pcontrol(0); #endif MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); init_mpe_events(&mpe_events); #ifdef HAVE_MPE if (myid == MASTER_NODE) { init_mpe_describe_state(&mpe_events); } #endif if (myid == MASTER_NODE) { err = parse_args(argc, argv, &test_params); if (err >= 0) print_settings(&test_params, numprocs); if (test_params.output_file != NULL) MPI_File_delete(test_params.output_file, MPI_INFO_NULL); if (numprocs < 2) { fprintf(stderr, "Must use at least 2 processes.\n"); err = -1; } } MPI_Bcast(&err, 1, MPI_INT, MASTER_NODE, MPI_COMM_WORLD); /* Quit if the parse_args failed */ if (err != 0) { MPI_Finalize(); return 0; } /* Master precalculates all the results for the queries and * reads in the database histogram parameters */ if (myid == MASTER_NODE) { if ((query_frag_preresult_matrix = (struct frag_preresult_s **) malloc(test_params.query_count * sizeof(struct frag_preresult_s *))) == NULL) { custom_debug( MASTER_ERR, "M:malloc query_frag_preresult_matrix of size %d failed\n", test_params.query_count * sizeof(struct frag_preresult_s *)); return -1; } for (i = 0; i < test_params.query_count; i++) { if ((query_frag_preresult_matrix[i] = (struct frag_preresult_s *) malloc(test_params.total_frags * sizeof(struct frag_preresult_s))) == NULL) { custom_debug( MASTER_ERR, "M:malloc query_frag_preresult_matrix[%d] " "of size %d failed\n", i, test_params.total_frags * sizeof(struct frag_preresult_s)); return -1; } memset(query_frag_preresult_matrix[i], 0, test_params.total_frags * sizeof(struct frag_preresult_s)); } precalculate_results(&test_params, query_frag_preresult_matrix); test_params.query_frag_preresult_matrix = query_frag_preresult_matrix; if (test_params.query_params_file != NULL) { read_hist_params(&test_params, QUERY); #if 0 print_hist_params(&test_params); #endif } if (test_params.db_params_file != NULL) { read_hist_params(&test_params, DATABASE); #if 0 print_hist_params(&test_params); #endif } } MPI_Barrier(MPI_COMM_WORLD); #ifdef HAVE_MPE MPI_Pcontrol(1); #endif /* Divide up into either a Master or Worker */ mpe_events.total_time = MPI_Wtime(); if (myid == 0) { err = master(myid, numprocs, &mpe_events, &test_params); if (err != 0) custom_debug(MASTER_ERR, "master failed\n"); else custom_debug(MASTER, "master (proc %d) reached last barrier\n", myid); } else { err = worker(myid, numprocs, &mpe_events, &test_params); if (err != 0) custom_debug(WORKER_ERR, "worker failed\n"); else custom_debug(WORKER, "worker (proc %d) reached last barrier\n", myid); } custom_MPE_Log_event(mpe_events.sync_start, 0, NULL, &mpe_events); MPI_Barrier(MPI_COMM_WORLD); custom_MPE_Log_event(mpe_events.sync_end, 0, NULL, &mpe_events); MPI_Pcontrol(0); mpe_events.total_time = MPI_Wtime() - mpe_events.total_time; #if 0 print_timing(myid, &mpe_events); #endif MPI_Barrier(MPI_COMM_WORLD); timing_reduce(myid, numprocs, &mpe_events); /* Clean up precomputed results and file */ if (myid == MASTER_NODE) { if (test_params.query_params_file != NULL) { free(test_params.query_params_file); free(test_params.query_hist_list); } if (test_params.db_params_file != NULL) { free(test_params.db_params_file); free(test_params.db_hist_list); } MPI_File_delete(test_params.output_file, MPI_INFO_NULL); for (i = 0; i < test_params.query_count; i++) { free(query_frag_preresult_matrix[i]); } free(query_frag_preresult_matrix); } MPI_Info_free(test_params.info_p); free(test_params.info_p); free(test_params.output_file); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size=-1,rank=-1, left=-1, right=-1, you=-1; int ndata=127,ndata_max=127,seed; int rv, nsec=0, count, cmpl; long long int i,j,k; unsigned long long int nflop=0,nmem=1,nsleep=0,nrep=1, myflops; char *env_ptr, cbuf[4096]; double *sbuf, *rbuf,*x; MPI_Status *s; MPI_Request *r; time_t ts; seed = time(&ts); flags |= DOMPI; while(--argc && argv++) { if(!strcmp("-v",*argv)) { flags |= DOVERBOSE; } else if(!strcmp("-n",*argv)) { --argc; argv++; nflop = atol(*argv); } else if(!strcmp("-N",*argv)) { --argc; argv++; nrep = atol(*argv); } else if(!strcmp("-d",*argv)) { --argc; argv++; ndata_max = ndata = atol(*argv); } else if(!strcmp("-m",*argv)) { --argc; argv++; nmem = atol(*argv); } else if(!strcmp("-w",*argv)) { --argc; argv++; nsec = atoi(*argv); } else if(!strcmp("-s",*argv)) { --argc; argv++; nsleep = atol(*argv); } else if(!strcmp("-spray",*argv)) { flags |= DOSPRAY; } else if(!strcmp("-c",*argv)) { flags |= CORE; } else if(!strcmp("-r",*argv)) { flags |= REGION; } else if(!strcmp("-stair",*argv)) { flags |= STAIR_RANK; } else if(!strcmp("-stair_region",*argv)) { flags |= STAIR_REGION; } else if(!strcmp("-nompi",*argv)) { flags &= ~DOMPI; } } if(flags & DOMPI) { MPI_Init(&argc,&argv); /* test double init MPI_Init(&argc,&argv); */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); } if(nsec > 0) { sleep(nsec); } if(nmem) { nmem = (nmem*1024*1024/sizeof(double)); x = (double *)malloc((size_t)(nmem*sizeof(double))); for(j=0;j<nrep;j++) { for(i=0;i<nmem;i++) { x[i] = i; } for(i=0;i<nmem;i++) { x[i] = i*x[i]; } if(x[nmem-1]*x[nmem-1] < 0) { printf("assumption about flop-test or optimization failed\n"); } } if(0) free((char *)x); } /* #define LONG_REGNAME rshouldbethelastchar */ #define LONG_REGNAME abcdefghijklmnopqrst if(flags & REGION) { MPI_Pcontrol(0,"enter_region(abcdefghijklmnopqrst)"); sprintf(cbuf,""); MPI_Pcontrol(0,"get_region()",cbuf); if(strcmp(cbuf,"abcdefghijklmnopqrst")) { printf("%d in region = \"%s\" not \"%s\"\n", rank,cbuf,"abcdefghijklmnopqrst"); fflush(stdout); } MPI_Pcontrol(0,"exit_region(abcdefghijklmnopqrst)"); MPI_Pcontrol(0,"get_region()",cbuf); if(strcmp(cbuf,"ipm_noregion")) { printf("%d out region = \"%s\" not \"%s\"\n", rank,cbuf,"ipm_noregion"); fflush(stdout); } } if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); if(nflop) { x = (double *)malloc((size_t)(10*sizeof(double))); j = k = 0; for(i=0;i<10;i++) { x[i] = 1.0; } if(flags & STAIR_RANK) { myflops = (rank*nflop)/size; } else { myflops = nflop; } for(i=0;i<nflop;i++) { x[j] = x[j]*x[k]; j = ((i%9)?(j+1):(0)); k = ((i%8)?(k+1):(0)); } free((char *)x); } if(nsleep) { sleep(nsleep); } if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); if(nmem<nflop) nmem=nflop; if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem); fflush(stdout); if(flags & CORE) { for(i=0;;i++) { x[i] = x[i*i-1000]; } } if(flags & DOMPI) { s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size)); r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size)); sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; } MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD); srand48(seed); for(i=0;i<nrep;i++) { MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD); } if(size>1) { if(!rank) {left=size-1;} else { left = rank-1;} if(rank == size-1) { right=0;} else {right=rank+1;} you = (rank < size/2)?(rank+size/2):(rank-size/2); } else { you = left = right = rank; } for(i=0;i<nrep;i++) { if(flags & DOSPRAY) { ndata = (long int)(drand48()*ndata_max)+1; } MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); if(flags & REGION) MPI_Pcontrol(1,"region_a"); MPI_Barrier(MPI_COMM_WORLD); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,1,MPI_COMM_WORLD,s); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Isend(sbuf,ndata/2,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r); MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &cmpl, s); MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, s); MPI_Get_count(s,MPI_DOUBLE,&count); MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s); if(count != ndata/2) { printf("error: MPI_Get_count(s,MPI_DOUBLE,&count) --> count = %d\n",count); } MPI_Wait(r,s); /* FIXME - the following case may need to be addressed MPI_Test(r,&cmpl,s); printf("spam1 %d %d\n", s->MPI_SOURCE, cmpl); if(r != MPI_REQUEST_NULL) { MPI_Wait(r,s); printf("spam2 %d\n", s->MPI_SOURCE); } */ MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r); MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD); MPI_Wait(r,s); for(j=0;j<size;j++) { MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j); MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j); } MPI_Waitall(2*size,r,s); /* for(j=0;j<size;j++) { printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE); } */ if(flags & REGION) MPI_Pcontrol(-1,"region_a"); if(flags & REGION) MPI_Pcontrol(1,"region_b"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_b"); if(1) { if(flags & REGION) MPI_Pcontrol(1,"region_c"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_c"); if(flags & REGION) MPI_Pcontrol(1,"region_d"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_d"); if(flags & REGION) MPI_Pcontrol(1,"region_e"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_e"); if(flags & REGION) MPI_Pcontrol(1,"region_f"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_f"); if(flags & REGION) MPI_Pcontrol(1,"region_g"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_g"); if(flags & REGION) MPI_Pcontrol(1,"region_h"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_h"); if(flags & REGION) MPI_Pcontrol(1,"region_i"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_i"); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); } free((char *)rbuf); free((char *)sbuf); free((char *)r); free((char *)s); free((char *)x); return 0; }
void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) { int i; int *scount,*rcount; int tag,irnum; MPI_Request *request; MPI_Status *status; // scount=(int *)malloc(2*sizeof(int)*nsend); rcount=(int *) malloc(2*sizeof(int)*nrecv); request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); // for(i=0;i<nsend;i++){ scount[2*i]=sndPack[i].nints; scount[2*i+1]=sndPack[i].nreals; } // irnum=0; tag=10; // for(i=0;i<nrecv;i++) MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],tag,scomm,&request[irnum++]); // for(i=0;i<nsend;i++) MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],tag,scomm,&request[irnum++]); // MPI_Waitall(irnum,request,status); for(i=0;i<nrecv;i++) { rcvPack[i].nints=rcount[2*i]; rcvPack[i].nreals=rcount[2*i+1]; } // irnum=0; for(i=0;i<nrecv;i++) { if (rcvPack[i].nints > 0) { tag=10; rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, MPI_INT,rcvMap[i], tag,scomm,&request[irnum++]); } if (rcvPack[i].nreals > 0) { tag=20; rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, MPI_DOUBLE,rcvMap[i], tag,scomm,&request[irnum++]); } } // for(i=0;i<nsend;i++) { if (sndPack[i].nints > 0){ tag=10; MPI_Isend(sndPack[i].intData,sndPack[i].nints, MPI_INT,sndMap[i], tag,scomm,&request[irnum++]); } if (sndPack[i].nreals > 0){ tag=20; MPI_Isend(sndPack[i].realData,sndPack[i].nreals, MPI_DOUBLE,sndMap[i], tag,scomm,&request[irnum++]); } } MPI_Pcontrol(1, "tioga_pc_waitall"); MPI_Waitall(irnum,request,status); MPI_Pcontrol(-1, "tioga_pc_waitall"); // free(scount); free(rcount); free(request); free(status); }
void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) { int i; int *sint,*sreal,*rint,*rreal; int tag,irnum; MPI_Request *request; MPI_Status *status; // sint=(int *)malloc(sizeof(int)*numprocs); sreal=(int *) malloc(sizeof(int)*numprocs); rint=(int *)malloc(sizeof(int)*numprocs); rreal=(int *) malloc(sizeof(int)*numprocs); request=(MPI_Request *) malloc(sizeof(MPI_Request)*4*numprocs); status=(MPI_Status *) malloc(sizeof(MPI_Status)*4*numprocs); // for(i=0;i<numprocs;i++){ sint[i]=sndPack[i].nints; sreal[i]=sndPack[i].nreals; } // MPI_Alltoall(sint,1,MPI_INT,rint,1,MPI_INT,scomm); MPI_Alltoall(sreal,1,MPI_INT,rreal,1,MPI_INT,scomm); // for(i=0;i<numprocs;i++) { rcvPack[i].nints=rint[i]; rcvPack[i].nreals=rreal[i]; } // irnum=0; for(i=0;i<numprocs;i++) { if (rcvPack[i].nints > 0) { tag=1; rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, MPI_INT,i, tag,scomm,&request[irnum++]); } if (rcvPack[i].nreals > 0) { tag=2; rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, MPI_DOUBLE,i, tag,scomm,&request[irnum++]); } } for(i=0;i<numprocs;i++) { if (sndPack[i].nints > 0){ tag=1; MPI_Isend(sndPack[i].intData,sndPack[i].nints, MPI_INT,i, tag,scomm,&request[irnum++]); } if (sndPack[i].nreals > 0){ tag=2; MPI_Isend(sndPack[i].realData,sndPack[i].nreals, MPI_DOUBLE,i, tag,scomm,&request[irnum++]); } } MPI_Pcontrol(1, "tioga_pc_waitall"); MPI_Waitall(irnum,request,status); MPI_Pcontrol(-1, "tioga_pc_waitall"); free(sint); free(sreal); free(rint); free(rreal); free(request); free(status); }
int main( int argc, char *argv[]) { int n, myid, numprocs, i, j; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; int event1a, event1b, event2a, event2b, event3a, event3b, event4a, event4b; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc,&argv); MPI_Pcontrol( 0 ); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); MPI_Get_processor_name(processor_name,&namelen); fprintf(stderr,"Process %d running on %s\n", myid, processor_name); /* MPE_Init_log() & MPE_Finish_log() are NOT needed when liblmpe.a is linked with this program. In that case, MPI_Init() would have called MPE_Init_log() already. */ /* MPE_Init_log(); */ /* Get event ID from MPE, user should NOT assign event ID */ event1a = MPE_Log_get_event_number(); event1b = MPE_Log_get_event_number(); event2a = MPE_Log_get_event_number(); event2b = MPE_Log_get_event_number(); event3a = MPE_Log_get_event_number(); event3b = MPE_Log_get_event_number(); event4a = MPE_Log_get_event_number(); event4b = MPE_Log_get_event_number(); if (myid == 0) { MPE_Describe_state(event1a, event1b, "Broadcast", "red"); MPE_Describe_state(event2a, event2b, "Compute", "blue"); MPE_Describe_state(event3a, event3b, "Reduce", "green"); MPE_Describe_state(event4a, event4b, "Sync", "orange"); } if (myid == 0) { n = 1000000; startwtime = MPI_Wtime(); } MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol( 1 ); /* MPE_Start_log(); */ for (j = 0; j < 5; j++) { MPE_Log_event(event1a, 0, NULL); MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); MPE_Log_event(event1b, 0, NULL); MPE_Log_event(event4a, 0, NULL); MPI_Barrier(MPI_COMM_WORLD); MPE_Log_event(event4b, 0, NULL); MPE_Log_event(event2a, 0, NULL); h = 1.0 / (double) n; sum = 0.0; for (i = myid + 1; i <= n; i += numprocs) { x = h * ((double)i - 0.5); sum += f(x); } mypi = h * sum; MPE_Log_event(event2b, 0, NULL); MPE_Log_event(event3a, 0, NULL); MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPE_Log_event(event3b, 0, NULL); } /* MPE_Finish_log("cpilog"); */ if (myid == 0) { endwtime = MPI_Wtime(); printf("pi is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT)); printf("wall clock time = %f\n", endwtime-startwtime); } MPI_Finalize(); return(0); }
int main(int argc, char *argv[]) { int np=1, rank=0; int splitrank, splitsize; int rc = 0; nssi_service xfer_svc; int server_index=0; int rank_in_server=0; int transport_index=-1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Barrier(MPI_COMM_WORLD); Teuchos::oblackholestream blackhole; std::ostream &out = ( rank == 0 ? std::cout : blackhole ); struct xfer_args args; const int num_io_methods = 8; const int io_method_vals[] = { XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC, XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC, XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC, XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC}; const char * io_method_names[] = { "write-encode-sync", "write-encode-async", "write-rdma-sync", "write-rdma-async", "read-encode-sync", "read-encode-async", "read-rdma-sync", "read-rdma-async"}; const int nssi_transport_list[] = { NSSI_RPC_PTL, NSSI_RPC_PTL, NSSI_RPC_IB, NSSI_RPC_IB, NSSI_RPC_GEMINI, NSSI_RPC_GEMINI, NSSI_RPC_BGPDCMF, NSSI_RPC_BGPDCMF, NSSI_RPC_BGQPAMI, NSSI_RPC_BGQPAMI, NSSI_RPC_MPI}; const int num_nssi_transports = 11; const int nssi_transport_vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char * nssi_transport_names[] = { "portals", "ptl", "infiniband", "ib", "gemini", "gni", "bgpdcmf", "dcmf", "bgqpami", "pami", "mpi" }; // Initialize arguments args.transport=NSSI_DEFAULT_TRANSPORT; args.len = 1; args.delay = 1; args.io_method = XFER_WRITE_RDMA_SYNC; args.debug_level = LOG_WARN; args.num_trials = 1; args.num_reqs = 1; args.result_file_mode = "a"; args.result_file = ""; args.url_file = ""; args.logfile = ""; args.client_flag = true; args.server_flag = true; args.num_servers = 1; args.num_threads = 0; args.timeout = 500; args.num_retries = 5; args.validate_flag = true; args.kill_server_flag = true; args.block_distribution = true; bool success = true; /** * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line * options to control the behavior of the test code. To evaluate performance, * the "num-trials", "num-reqs", and "len" options control the amount of data transferred * between client and server. The "io-method" selects the type of data transfer. The * server-url specifies the URL of the server. If running as a server, the server-url * provides a recommended URL when initializing the network transport. */ try { //out << Teuchos::Teuchos_Version() << std::endl << std::endl; // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor parser; parser.setDocString( "This example program demonstrates a simple data-transfer service " "built using the NEtwork Scalable Service Interface (Nessie)." ); /* To set and option, it must be given a name and default value. Additionally, each option can be given a help std::string. Although it is not necessary, a help std::string aids a users comprehension of the acceptable command line arguments. Some examples of setting command line options are: */ parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" ); parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" ); parser.setOption("server", "no-server", &args.server_flag, "Run the server" ); parser.setOption("client", "no-client", &args.client_flag, "Run the client"); parser.setOption("len", &args.len, "The number of structures in an input buffer"); parser.setOption("debug",(int*)(&args.debug_level), "Debug level"); parser.setOption("logfile", &args.logfile, "log file"); parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)"); parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial"); parser.setOption("result-file", &args.result_file, "Where to store results"); parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result"); parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server"); parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data"); parser.setOption("num-servers", &args.num_servers, "Number of server processes"); parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process"); parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment"); parser.setOption("block-distribution", "rr-distribution", &args.block_distribution, "Use a block distribution scheme to assign clients to servers"); // Set an enumeration command line option for the io_method parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names, "I/O Methods for the example: \n" "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n" "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n" "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n" "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n" "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n" "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n" "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n" "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous"); // Set an enumeration command line option for the NNTI transport parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names, "NSSI transports (not all are available on every platform): \n" "\t\t\tportals|ptl : Cray or Schutt\n" "\t\t\tinfiniband|ib : libibverbs\n" "\t\t\tgemini|gni : Cray libugni (Gemini or Aries)\n" "\t\t\tbgpdcmf|dcmf : IBM BG/P DCMF\n" "\t\t\tbgqpami|pami : IBM BG/Q PAMI\n" "\t\t\tmpi : isend/irecv implementation\n" ); /* There are also two methods that control the behavior of the command line processor. First, for the command line processor to allow an unrecognized a command line option to be ignored (and only have a warning printed), use: */ parser.recogniseAllOptions(true); /* Second, by default, if the parser finds a command line option it doesn't recognize or finds the --help option, it will throw an std::exception. If you want prevent a command line processor from throwing an std::exception (which is important in this program since we don't have an try/catch around this) when it encounters a unrecognized option or help is printed, use: */ parser.throwExceptions(false); /* We now parse the command line where argc and argv are passed to the parse method. Note that since we have turned off std::exception throwing above we had better grab the return argument so that we can see what happened and act accordingly. */ Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } // Here is where you would use these command line arguments but for this example program // we will just print the help message with the new values of the command-line arguments. //if (rank == 0) // out << "\nPrinting help message with new values of command-line arguments ...\n\n"; //parser.printHelpMessage(argv[0],out); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success); log_debug(args.debug_level, "transport_index=%d", transport_index); if (transport_index > -1) { args.transport =nssi_transport_list[transport_index]; args.transport_name=std::string(nssi_transport_names[transport_index]); } args.io_method_name=std::string(io_method_names[args.io_method]); log_debug(args.debug_level, "%d: Finished processing arguments", rank); if (!success) { MPI_Abort(MPI_COMM_WORLD, 1); } if (!args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && !args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } log_level debug_level = args.debug_level; // Communicator used for both client and server (may split if using client and server) MPI_Comm comm; log_debug(debug_level, "%d: Starting xfer-service test", rank); #ifdef TRIOS_ENABLE_COMMSPLITTER if (args.transport == NSSI_RPC_MPI) { MPI_Pcontrol(0); } #endif /** * Since this test can be run as a server, client, or both, we need to play some fancy * MPI games to get the communicators working correctly. If we're executing as both * a client and a server, we split the communicator so that the client thinks its * running by itself. */ int color = 0; // color=0-->server, color=1-->client if (args.client_flag && args.server_flag) { if (np < 2) { log_error(debug_level, "Must use at least 2 MPI processes for client and server mode"); MPI_Abort(MPI_COMM_WORLD, -1); } // Split the communicators. Put all the servers as the first ranks. if (rank < args.num_servers) { color = 0; log_debug(debug_level, "rank=%d is a server", rank); } else { color = 1; // all others are clients log_debug(debug_level, "rank=%d is a client", rank); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } else { if (args.client_flag) { color=1; log_debug(debug_level, "rank=%d is a client", rank); } else if (args.server_flag) { color=0; log_debug(debug_level, "rank=%d is a server", rank); } else { log_error(debug_level, "Must be either a client or a server"); MPI_Abort(MPI_COMM_WORLD, -1); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } MPI_Comm_rank(comm, &splitrank); MPI_Comm_size(comm, &splitsize); log_debug(debug_level, "%d: Finished splitting communicators", rank); /** * Initialize the Nessie interface by specifying a transport, encoding scheme, and a * recommended URL. \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it * is often the case that only one type of transport exists on a particular platform. * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and * \ref NSSI_RPC_IB. We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE * should always be used for the second argument. The URL can be specified (as we did for * the server, or NULL (as we did for the client). This is a recommended value. Use the * \ref nssi_get_url function to find the actual value. */ nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL); // Get the Server URL std::string my_url(NSSI_URL_LEN, '\0'); nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN); // If running as both client and server, gather and distribute // the server URLs to all the clients. if (args.server_flag && args.client_flag) { std::string all_urls; // This needs to be a vector of chars, not a string all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0'); // Have servers gather their URLs if (color == 0) { assert(args.num_servers == splitsize); // these should be equal log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str()); // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD) MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR, &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm); } // broadcast the full set of server urls to all processes MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD); log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size()); if (color == 1) { // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } // Copy the server url out of the list of urls int offset = server_index * NSSI_URL_LEN; args.server_url = all_urls.substr(offset, NSSI_URL_LEN); log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str()); } // If running as a client only, have to get the list of servers from the urlfile. else if (!args.server_flag && args.client_flag){ sleep(args.delay); // give server time to get started std::vector< std::string > urlbuf; xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm); args.num_servers = urlbuf.size(); // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } args.server_url = urlbuf[server_index]; log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } else if (args.server_flag && !args.client_flag) { args.server_url = my_url; if (args.url_file.empty()) { log_error(debug_level, "Must set --url-file"); MPI_Abort(MPI_COMM_WORLD, -1); } xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm); } // Set the debug level for the xfer service. xfer_debug_level = args.debug_level; // Print the arguments after they've all been set. log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str()); print_args(out, args, "%"); log_debug(debug_level, "server_url=%s", args.server_url.c_str()); //------------------------------------------------------------------------------ /** If we're running this job with a server, the server always executes on node 0. * In this example, the server is a single process. */ if (color == 0) { rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm); log_debug(debug_level, "Server is finished"); } // ------------------------------------------------------------------------------ /** The parallel client will execute this branch. The root node, node 0, of the client connects * connects with the server, using the \ref nssi_get_service function. Then the root * broadcasts the service description to the other clients before starting the main * loop of the client code by calling \ref xfer_client_main. */ else { int i; int client_rank; // get rank within the client communicator MPI_Comm_rank(comm, &client_rank); nssi_init((nssi_rpc_transport)args.transport); // Only one process needs to connect to the service // TODO: Make get_service a collective call (some transports do not need a connection) //if (client_rank == 0) { { // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str()); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(xfer_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } } // wait for all the clients to connect MPI_Barrier(comm); //MPI_Bcast(&rc, 1, MPI_INT, 0, comm); if (rc == NSSI_OK) { if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i); // Broadcast the service description to the other clients //log_debug(xfer_debug_level, "Bcasting svc to other clients"); //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm); log_debug(debug_level, "Starting client main"); // Start the client code xfer_client_main(args, xfer_svc, comm); MPI_Barrier(comm); // Tell one of the clients to kill the server if ((args.kill_server_flag) && (rank_in_server == 0)) { log_debug(debug_level, "%d: Halting xfer service", rank); rc = nssi_kill(&xfer_svc, 0, 5000); } rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc); if (rc != NSSI_OK) { log_error(xfer_debug_level, "could not free svc description: %s", nssi_err_str(rc)); } } else { if (client_rank == 0) log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i); success = false; //MPI_Abort(MPI_COMM_WORLD, -1); } nssi_fini((nssi_rpc_transport)args.transport); } log_debug(debug_level, "%d: clean up nssi", rank); MPI_Barrier(MPI_COMM_WORLD); // Clean up nssi_rpc rc = nssi_rpc_fini((nssi_rpc_transport)args.transport); if (rc != NSSI_OK) log_error(debug_level, "Error in nssi_rpc_fini"); log_debug(debug_level, "%d: MPI_Finalize()", rank); MPI_Finalize(); logger_fini(); if(success && (rc == NSSI_OK)) out << "\nEnd Result: TEST PASSED" << std::endl; else out << "\nEnd Result: TEST FAILED" << std::endl; return ((success && (rc==NSSI_OK)) ? 0 : 1 ); }
int main(int argc, char* argv[]) { MPI_Datatype type; /* MPI data type for communicating particle data */ int num_processors; /* Number of processors being used */ int processor; /* My processor number */ int* num; /* Number of particles on each processor */ int* offset; /* Offset to start of each processor's particles */ int buffer_size; /* Number of particles in pipeline data buffers */ particle_t* local; /* Array containing our local particles */ /* Initialize MPI */ MPI_Init(&argc, &argv); /* Create the MPI data type for communicating particle data */ MPI_Type_contiguous(4, MPI_DOUBLE, &type); MPI_Type_commit(&type); /* Determine the number of procesors being used and our processor number */ MPI_Comm_size(MPI_COMM_WORLD, &num_processors); MPI_Comm_rank(MPI_COMM_WORLD, &processor); /* Determine how the particles are allocated to the processors */ { int p; num = (int*)malloc(num_processors * sizeof(int)); offset = (int*)malloc(num_processors * sizeof(int)); for(p = 0; p < num_processors; p++) num[p] = (NumParticles / num_processors) + ((p < (NumParticles % num_processors)) ? 1 : 0); buffer_size = num[0]; offset[0] = 0; for(p = 1; p < num_processors; p++) offset[p] = offset[p - 1] + num[p - 1]; } /* Distribute the initial particle state */ { int i; particle_t* particles = NULL; if(processor == MasterProcessor) { particles = (particle_t*)malloc(NumParticles * sizeof(particle_t)); /* CODE FOR READING INITIAL PARTICLE STATE DATA FROM A FILE COULD BE PLACED HERE INSTEAD OF RANDOMIZATION */ /* Randomize the particles */ for(i = 0; i < NumParticles; i++) { particles[i].mass = 1.0; particles[i].x = drand48(); particles[i].y = drand48(); particles[i].z = drand48(); } } local = (particle_t*)malloc(num[processor] * sizeof(particle_t)); MPI_Scatterv(particles, num, offset, type, local, num[processor], type, MasterProcessor, MPI_COMM_WORLD); if(processor == MasterProcessor) free(particles); } /* Actual Simulation */ { int iteration, stage, i, j; particle_t* buf_send; particle_t* buf_recv; double* tfx; double* tfy; double* tfz; double* ox; double* oy; double* oz; MPI_Pcontrol ( 1 ); buf_send = (particle_t*)malloc(buffer_size * sizeof(particle_t)); buf_recv = (particle_t*)malloc(buffer_size * sizeof(particle_t)); tfx = (double*)malloc(num[processor] * sizeof(double)); tfy = (double*)malloc(num[processor] * sizeof(double)); tfz = (double*)malloc(num[processor] * sizeof(double)); ox = (double*)malloc(num[processor] * sizeof(double)); oy = (double*)malloc(num[processor] * sizeof(double)); oz = (double*)malloc(num[processor] * sizeof(double)); /* Set the "old" position for each particle to its current position */ for(i = 0; i < num[processor]; i++) { ox[i] = local[i].x; oy[i] = local[i].y; oz[i] = local[i].z; } /* Time steps */ for(iteration = 0; iteration < NumIterations; iteration++) { double f_max = -Infinity; /* Show current iteration number */ if(processor == 1) { fprintf(stdout, "Iteration %d of %d...\n", iteration + 1, NumIterations); fflush(stdout); } /* Zero the total force for each particle */ for(i = 0; i < num[processor]; i++) { tfx[i] = 0.0; tfy[i] = 0.0; tfz[i] = 0.0; } /* Force computation pipeline */ for(stage = 0; stage < num_processors; stage++) { MPI_Request request[2]; MPI_Status status[2]; /* Prime the pipeline with our local data for stage zero */ if(stage == 0) memcpy(buf_send, local, num[processor] * sizeof(particle_t)); /* Issue the send/receive pair for this pipeline stage */ if(stage < (num_processors - 1)) { MPI_Isend(buf_send, buffer_size, type, (processor - 1 + num_processors) % num_processors, 0, MPI_COMM_WORLD, &request[0]); MPI_Irecv(buf_recv, buffer_size, type, (processor + 1 + num_processors) % num_processors, 0, MPI_COMM_WORLD, &request[1]); } /* Compute forces */ for(i = 0; i < num[processor]; i++) { double r_min = +Infinity; double fx = 0.0; double fy = 0.0; double fz = 0.0; double f = 0.0; for(j = 0; j < num[(processor + stage) % num_processors]; j++) { double rx = local[i].x - buf_send[j].x; double ry = local[i].y - buf_send[j].y; double rz = local[i].z - buf_send[j].z; double r = (rx * rx) + (ry * ry) + (rz * rz); if(r > 0.0) { if(r < r_min) r_min = r; fx -= buf_send[j].mass * (rx / r); fy -= buf_send[j].mass * (ry / r); fz -= buf_send[j].mass * (rz / r); } } tfx[i] += fx; tfy[i] += fy; tfz[i] += fz; /* Rough estimate of 1/m|df/dx| */ f = sqrt((fx * fx) + (fy * fy) + (fz * fz)) / r_min; if(f > f_max) f_max = f; } /* Complete the send/receive pair for this pipeline stage */ if(stage < (num_processors - 1)) { MPI_Waitall(2, request, status); memcpy(buf_send, buf_recv, buffer_size * sizeof(particle_t)); } } /* * Compute new positions using a simple leapfrog time integration. * Use a variable step version to simplify time-step control. * * Integration is (a0 * x^+) + (a1 * x) + (a2 * x^-) = f / m * * Stability criteria is roughly 2.0 / sqrt(1/m|df/dx|) >= dt */ { static double dt_old = 0.001; static double dt_now = 0.001; double dt_est; double dt_new; double a0 = +2.0 / (dt_now * (dt_old + dt_now)); double a1 = -2.0 / (dt_old * dt_now); double a2 = +2.0 / (dt_old * (dt_old + dt_now)); for(i = 0; i < num[processor]; i++) { double x = local[i].x; double y = local[i].y; double z = local[i].z; local[i].x = (tfx[i] - (a1 * x) - (a2 * ox[i])) / a0; local[i].y = (tfy[i] - (a1 * y) - (a2 * oy[i])) / a0; local[i].z = (tfz[i] - (a1 * z) - (a2 * oz[i])) / a0; ox[i] = x; oy[i] = y; oz[i] = z; } dt_est = 1.0 / sqrt(f_max); if(dt_est < MinTimeStep) dt_est = MinTimeStep; MPI_Allreduce(&dt_est, &dt_new, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); if(dt_new < dt_now) { dt_old = dt_now; dt_now = dt_new; } else if(dt_new > (4.0 * dt_now)) { dt_old = dt_now; dt_now *= 2.0; } } } free(buf_send); free(buf_recv); free(tfx); free(tfy); free(tfz); free(ox); free(oy); free(oz); MPI_Pcontrol ( 0 ); } /* Gather the final particle state */ { particle_t* particles = NULL; if(processor == MasterProcessor) particles = (particle_t*)malloc(NumParticles * sizeof(particle_t)); MPI_Gatherv(local, num[processor], type, particles, num, offset, type, MasterProcessor, MPI_COMM_WORLD); free(local); if(processor == MasterProcessor) { /* CODE FOR WRITING FINAL PARTICLE STATE DATA TO A FILE COULD BE PLACED HERE */ free(particles); } } /* Free the particle distribution arrays */ free(num); free(offset); /* Finalize MPI */ MPI_Finalize(); /* All Done */ return 0; }
int main(int argc, char **argv) { int size,rank, left, right, you, ndata=127,ndata_max=127,seed; int rv; long long int i,j,k; unsigned long long int nflop=0,nmem=1,nsleep=0,nrep=1, myflops; char *env_ptr; double *sbuf, *rbuf,*x; MPI_Status *s; MPI_Request *r; time_t ts; #ifdef HPM if((rv = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { fprintf(stderr, "Error: %d %s\n",rv, errstring); exit(1); } if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK) { printf("There are no counters available. \n"); exit(1); } if ( (rv = PAPI_start_counters(events, 2)) != PAPI_OK) { fprintf(stdout, "ERROR PAPI_start_counters rv=%d\n", rv); exit(rv); } #endif seed = time(&ts); flags |= DOMPI; while(--argc && argv++) { if(!strcmp("-v",*argv)) { flags |= DOVERBOSE; } else if(!strcmp("-n",*argv)) { --argc; argv++; nflop = atol(*argv); } else if(!strcmp("-N",*argv)) { --argc; argv++; nrep = atol(*argv); } else if(!strcmp("-d",*argv)) { --argc; argv++; ndata_max = ndata = atol(*argv); } else if(!strcmp("-m",*argv)) { --argc; argv++; nmem = atol(*argv); } else if(!strcmp("-s",*argv)) { --argc; argv++; nsleep = atol(*argv); } else if(!strcmp("-spray",*argv)) { flags |= DOSPRAY; } else if(!strcmp("-c",*argv)) { flags |= CORE; } else if(!strcmp("-r",*argv)) { flags |= REGION; } else if(!strcmp("-stair",*argv)) { flags |= STAIR_RANK; } else if(!strcmp("-stair_region",*argv)) { flags |= STAIR_REGION; } else if(!strcmp("-nompi",*argv)) { flags &= ~DOMPI; } } if(flags & DOMPI) { MPI_Init(&argc,&argv); /* MPI_Init(&argc,&argv); */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); } if(nmem) { nmem = (nmem*1024*1024/sizeof(double)); x = (double *)malloc((size_t)(nmem*sizeof(double))); for(j=0;j<nrep;j++) { for(i=0;i<nmem;i++) { x[i] = i; } for(i=0;i<nmem;i++) { x[i] = i*x[i]; } if(x[nmem-1]*x[nmem-1] < 0) { printf("trickster\n"); } } if(0) free((char *)x); } #ifdef IPM if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); #endif if(nflop) { x = (double *)malloc((size_t)(10*sizeof(double))); j = k = 0; for(i=0;i<10;i++) { x[i] = 1.0; } if(flags & STAIR_RANK) { myflops = (rank*nflop)/size; } else { myflops = nflop; } for(i=0;i<nflop;i++) { x[j] = x[j]*x[k]; j = ((i%9)?(j+1):(0)); k = ((i%8)?(k+1):(0)); } free((char *)x); } if(nsleep) { sleep(nsleep); } #ifdef IPM if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); #endif if(nmem<nflop) nmem=nflop; if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem); fflush(stdout); if(flags & CORE) { for(i=0;;i++) { x[i] = x[i*i-1000]; } } env_ptr = getenv("IPM_SOCKET"); if(env_ptr) { printf("IPM: %d IPM_SOCKET in app %s\n", rank, env_ptr); } if(flags & DOMPI) { s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size)); r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size)); sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; } MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD); srand48(seed); for(i=0;i<nrep;i++) { MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD); } if(size>1) { if(!rank) {left=size-1;} else { left = rank-1;} if(rank == size-1) { right=0;} else {right=rank+1;} you = (rank < size/2)?(rank+size/2):(rank-size/2); for(i=0;i<nrep;i++) { if(flags & DOSPRAY) { ndata = (long int)(drand48()*ndata_max)+1; } MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_a"); #endif MPI_Barrier(MPI_COMM_WORLD); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Isend(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r); MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s); MPI_Wait(r,s); MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r); MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD); MPI_Wait(r,s); for(j=0;j<size;j++) { MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j); MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j); } MPI_Waitall(2*size,r,s); /* for(j=0;j<size;j++) { printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE); } */ #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_a"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_b"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_b"); #endif if(1) { #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_c"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_c"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_d"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_d"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_e"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_e"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_f"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_f"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_g"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_g"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_h"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_h"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_i"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_i"); #endif } } } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); } #ifdef HPM if ((rv=PAPI_stop_counters(values, 2)) != PAPI_OK) { fprintf(stdout, "ERROR PAPI_stop_counters rv=%d\n", rv); exit(rv); } printf("PAPI: total instruction/cycles %lld/%lld %.3e \n", values[0], values[1], values[0]/(values[1]*1.0) ); #endif return 0; }
int main(int argc, char *argv[]) { char region_name[4096]; MPI_Pcontrol(1,"get_region();",(void *)region_name); return 0; }
int main(int argc, char**argv){ int num_ranks, rank, split_num_ranks, split_rank; int outer_ranks, inner_ranks; int new_comm_id; int msg_size, loops; int slurm_id, run_index; MPI_Comm split_comm; FILE * timings; //Parse options char c; while ((c = getopt (argc, argv, "s:r:l:i:")) != -1){ switch (c) { case 's': sscanf(optarg, "%d", &msg_size); break; case 'r': sscanf(optarg, "%d", &inner_ranks); break; case 'l': sscanf(optarg, "%d", &loops); break; case 'i': sscanf(optarg, "%d", &run_index); break; default: printf("Unrecognized option: %c\n", optopt); break; } if(c != 's' && c != 'i' && c != 'l' && c != 'r' ){break;} } printf("Successfully parsed options as: \n"); printf("\tmsg_size: %d, inner_ranks: %d, loops: %d, run_index: %d\n", msg_size, inner_ranks, loops, run_index); //Open timings.out for writing timings = fopen("timings.out", "a"); if(timings == NULL){ printf("Error: cannot open timings.out\n"); } //Start MPI, get num_ranks MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); if(num_ranks == 0){ printf("MPI_Comm_size failure\n"); exit(1); } //Calculate comm sizes outer_ranks = num_ranks - inner_ranks; if( (outer_ranks < 0 || inner_ranks < 0) && (rank == 0) ){ printf("Error: bad comm sizes. They should be positive\n"); } //Get global rank MPI_Comm_rank(MPI_COMM_WORLD, &rank); int * splitter = (int*)malloc(sizeof(int)*num_ranks); for(int i = inner_ranks; i < num_ranks; i++) splitter[i] = OUTER_COMM; for(int i = 0; i < inner_ranks; i++) splitter[i] = INNER_COMM; //split communicator MPI_Comm_split(MPI_COMM_WORLD, splitter[rank], 1, &split_comm); MPI_Comm_size(split_comm, &split_num_ranks); MPI_Comm_rank(split_comm, &split_rank); MPI_Barrier(MPI_COMM_WORLD); //run the inner communicator as a warm-up, seems to reduce variance if(splitter[rank] == INNER_COMM){ Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); } MPI_Barrier(MPI_COMM_WORLD); //start network counters region 1 MPI_Pcontrol(1); //run the inside alone, as a baseline float run1; if(splitter[rank] == INNER_COMM){ run1 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); } MPI_Barrier(MPI_COMM_WORLD); //start network counters region 2 MPI_Pcontrol(2); //run both communicators float run2; if(splitter[rank] == INNER_COMM){ run2 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); }else{ Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); } //stop network counters MPI_Pcontrol(0); //print timings if(splitter[rank] == INNER_COMM && split_rank==0) fprintf(timings, "%d,%f,%f\n", run_index, run1, run2); //free(recv); free(splitter); MPI_Finalize(); exit(0); }
void M3_profile( int sectionID, const char *sectionName, int operationFlag ) { static char *staticTitleString = NULL; static char **staticProfileName = NULL; static int64_t *staticNumCalls = NULL; static double *staticTotalTime = NULL; static double *staticStartTime = NULL; #ifdef USE_PAPI static int64_t *staticFlopCount = NULL; static int64_t *staticFlipCount = NULL; static int64_t *staticFlopCounter = NULL; static int64_t *staticFlipCounter = NULL; #endif static double staticInitTime = 0; static char staticInitDate[256]={0}; static int staticProfileLevel = -1; #ifdef USE_PAPI #define M3_NUM_PAPI_EVENTS 2 int papiEvents[M3_NUM_PAPI_EVENTS] = {PAPI_FP_OPS, PAPI_FP_INS}; static long long int papiCounters[M3_NUM_PAPI_EVENTS] = {0}; #endif double finalTime; int64_t *agInt64 = NULL; double *agDouble = NULL; int64_t i, j; long int k; int myRank = -1; int numProc = 1; FILE *outFile; char *tempPtr, fileName[256], tempString[256]; char myHostname[256] = {0}; double mpiTic; double mpiToc; struct timeval tic; struct timezone tz; time_t tt; long int pid; char pcontrolID[16] = {0}; if( staticProfileLevel == -1 ) { /* Look for environment variable. */ tempPtr = getenv("M3_PROFILE_LEVEL"); if( tempPtr ) staticProfileLevel = atoi( tempPtr ); else staticProfileLevel = M3_PROFILE_LEVEL; } if( staticProfileLevel == 0 ) return; #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &myRank ); MPI_Comm_size(MPI_COMM_WORLD, &numProc ); #endif sprintf(fileName, "M3_Profile(): profile ID out of range, must be between 0 and %i", M3_PROFILE_MAX_SECTIONS - 1); assert(sectionID >= 0 && sectionID < M3_PROFILE_MAX_SECTIONS); switch( operationFlag ) { case M3_PROFILE_INIT: assert(staticProfileName == NULL && staticNumCalls == NULL && staticTotalTime == NULL && staticStartTime == NULL); if( sectionName && strlen(sectionName) ) { staticTitleString = (char *)calloc( 4*(strlen(sectionName)/4 +1 ), sizeof(char) ); assert(staticTitleString != NULL); strcpy(staticTitleString, sectionName ); } staticProfileName = (char **)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(char*) ); staticNumCalls = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t) ); staticTotalTime = (double *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(double) ); staticStartTime = (double *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(double) ); assert(staticProfileName && staticNumCalls && staticTotalTime && staticStartTime); #ifdef USE_PAPI staticFlopCount = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t)); staticFlipCount = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t)); staticFlopCounter = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t)); staticFlipCounter = (int64_t *)calloc( M3_PROFILE_MAX_SECTIONS, sizeof(int64_t)); assert(staticFlopCount && staticFlipCount); assert(staticFlopCounter && staticFlipCounter); #endif gettimeofday(&tic, &tz); #ifdef USE_MPI staticInitTime = MPI_Wtime( ); #else staticInitTime = tic.tv_sec + tic.tv_usec*1e-6; #endif tt = tic.tv_sec; ctime_r(&tt, staticInitDate ); #ifdef USE_PAPI PAPI_start_counters(papiEvents, M3_NUM_PAPI_EVENTS); #endif #ifdef USE_MPI if (myRank == 0) { mkdir( "m3_profile", S_IRWXU ); } #else mkdir( "m3_profile", S_IRWXU ); #endif break; case M3_PROFILE_FINALIZE: /* Check to see if it was initialized */ if( staticProfileName == NULL || staticNumCalls == NULL || staticTotalTime == NULL ) { /* fprintf(stderr, "WARNING: M3_Profile, finalized without initializing\n"); */ break; } myHostname[255] = 0; gethostname(myHostname, 255); pid = (long int)getpid(); for( j = 0; j < 2; j++ ) { #ifdef USE_MPI if( j == 1 ) { /* Get aggregate statistics */ if( myRank == 0 ) { agInt64 = (int64_t*)calloc(M3_PROFILE_MAX_SECTIONS, sizeof(int64_t)); agDouble = (double*)calloc(M3_PROFILE_MAX_SECTIONS, sizeof(double)); assert( agInt64 && agDouble ); } MPI_Reduce( staticNumCalls, agInt64, M3_PROFILE_MAX_SECTIONS, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); if( myRank == 0 ) memcpy( staticNumCalls, agInt64 , sizeof(int64_t)*M3_PROFILE_MAX_SECTIONS ); MPI_Reduce( staticTotalTime, agDouble, M3_PROFILE_MAX_SECTIONS, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); if( myRank == 0 ) memcpy( staticTotalTime, agDouble , sizeof(double)*M3_PROFILE_MAX_SECTIONS ); #ifdef USE_PAPI MPI_Reduce( staticFlopCount, agInt64, M3_PROFILE_MAX_SECTIONS, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); if( myRank == 0 ) memcpy( staticFlopCount, agInt64, sizeof(int64_t)*M3_PROFILE_MAX_SECTIONS ); MPI_Reduce( staticFlipCount, agInt64, M3_PROFILE_MAX_SECTIONS, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); if( myRank == 0 ) memcpy( staticFlipCount, agInt64, sizeof(int64_t)*M3_PROFILE_MAX_SECTIONS ); #endif if( myRank == 0 ) { free(agInt64); free(agDouble); } else break; } #else /* If not using mpi, don't need to collect aggregate statistics */ if( j == 1 ) break; #endif k = 60*lrint(staticInitTime/60); /* m3_profile_title_date.proc */ if( staticTitleString ) { tempPtr = strchr( staticTitleString, ' '); if(tempPtr) *tempPtr = '\0'; sprintf( fileName, "m3_profile/m3_profile_%s_%li_%s_%li", staticTitleString, k, myHostname, pid); if(tempPtr) *tempPtr = ' '; } else { sprintf( fileName, "m3_profile/m3_profile_%li", k ); } #ifdef USE_MPI if( j == 0 ) sprintf( tempString, ".%i", myRank ); else strcpy( tempString, ".all"); strcat( fileName, tempString ); #endif if( ( staticProfileLevel == 2 ) || ( staticProfileLevel == 1 && j == 0 && numProc == 1 ) || ( staticProfileLevel == 1 && j == 1 ) ) { /* Open the output file. */ outFile = fopen( fileName, "w"); assert(outFile != NULL); /* Write a title */ if( staticTitleString ) fprintf(outFile, "M3_Profile: %s\n\n", staticTitleString ); else fprintf(outFile, "M3_Profile\n\n" ); /* Write the init date, and the run time. */ #ifdef USE_MPI fprintf(outFile, "Number of processors: %i\n", numProc ); finalTime = MPI_Wtime(); #else gettimeofday(&tic, &tz ); finalTime = tic.tv_sec + tic.tv_usec*1e-6; #endif fprintf( outFile, "Start date %s\n", staticInitDate ); fprintf( outFile, "Run time in seconds: %e\n\n", finalTime - staticInitTime ); if( j == 1 ) fprintf(outFile, "Aggregate statistics\n\n"); for( i = 0; i < M3_PROFILE_MAX_SECTIONS; i++ ) { if( staticNumCalls[i] ) { fprintf(outFile, "-----------------------------\n"); fprintf(outFile, " Profile ID number: %lli\n", i); if( staticProfileName[i] ) fprintf(outFile, " %s\n", staticProfileName[i] ); fprintf(outFile, " Total number of calls: %lli\n", staticNumCalls[i]); fprintf(outFile, " Total time (seconds): %e\n", staticTotalTime[i]); fprintf(outFile, " Mean time per call (seconds): %e\n", staticTotalTime[i]/staticNumCalls[i]); fprintf(outFile, " Mean time per task (seconds): %e\n", staticTotalTime[i]/numProc ); fprintf(outFile, " Percent of wall clock %.6f %%\n", staticTotalTime[i]/numProc/(finalTime - staticInitTime)*100 ); #ifdef USE_PAPI fprintf(outFile, " Flop count: %lli\n", staticFlopCount[i]); fprintf(outFile, " Flop rate: %.6e\n", staticFlopCount[i]/staticTotalTime[i]); fprintf(outFile, " Flip count: %lli\n", staticFlipCount[i]); fprintf(outFile, " Flip rate: %.6e\n", staticFlipCount[i]/staticTotalTime[i]); #endif fprintf(outFile, "\n\n"); } } fclose(outFile); } } /* Free up static memory */ if( staticTitleString ) { free(staticTitleString); staticTitleString = NULL; } if( staticProfileName ) { for( i = 0; i < M3_PROFILE_MAX_SECTIONS; i++ ) if( staticProfileName[i] ) free( staticProfileName[i] ); free(staticProfileName); staticProfileName = NULL; } if( staticNumCalls ) { free( staticNumCalls ); staticNumCalls = NULL; } if( staticTotalTime ) { free( staticTotalTime ); staticTotalTime = NULL; } if( staticStartTime ) { free(staticStartTime ); staticStartTime = NULL; } #ifdef USE_PAPI if( staticFlopCount ); { free(staticFlopCount); staticFlopCount = NULL; } if( staticFlipCount ); { free(staticFlipCount); staticFlipCount = NULL; } #endif break; case M3_PROFILE_START: if( staticProfileName == NULL || staticNumCalls == NULL || staticTotalTime == NULL ) { /* fprintf(stderr, "WARNING: M3_Profile, called without initializing\n"); */ break; } if( staticProfileName[sectionID] == NULL ) { staticProfileName[sectionID] = (char*)calloc(4*(strlen(sectionName)/4 + 1), sizeof(char)); assert(staticProfileName[sectionID] != NULL); strcpy(staticProfileName[sectionID], sectionName); } #ifdef USE_MPI #ifndef USE_PAPI sprintf( pcontrolID, "%i", sectionID); MPI_Pcontrol( 1, pcontrolID ); #endif #endif #ifdef USE_MPI staticStartTime[sectionID] = MPI_Wtime(); #else gettimeofday(&tic, &tz); staticStartTime[sectionID] = tic.tv_sec + tic.tv_usec*1e-6; #endif #ifdef USE_PAPI PAPI_accum_counters(papiCounters, M3_NUM_PAPI_EVENTS ); staticFlopCounter[sectionID] = papiCounters[0]; staticFlipCounter[sectionID] = papiCounters[1]; #endif break; case M3_PROFILE_STOP: if( staticProfileName == NULL || staticNumCalls == NULL || staticTotalTime == NULL ) { /* fprintf(stderr, "WARNING: M3_Profile, called without initializing\n"); */ break; } #ifdef USE_MPI #ifndef USE_PAPI sprintf( pcontrolID, "%i", sectionID); MPI_Pcontrol( -1, pcontrolID ); #endif #endif staticNumCalls[sectionID]++; #ifdef USE_MPI staticTotalTime[sectionID] += MPI_Wtime() - staticStartTime[sectionID]; #else gettimeofday(&tic, &tz); staticTotalTime[sectionID] += (tic.tv_sec + tic.tv_usec*1e-6) - staticStartTime[sectionID]; #endif #ifdef USE_PAPI PAPI_accum_counters(papiCounters, M3_NUM_PAPI_EVENTS ); staticFlopCount[sectionID] += papiCounters[0] - staticFlopCounter[sectionID]; staticFlipCount[sectionID] += papiCounters[1] - staticFlipCounter[sectionID]; #endif break; } }
int main( int argc, char *argv[] ) { int n, myid, numprocs, ii, jj; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; int event1a, event1b, event2a, event2b, event3a, event3b, event4a, event4b; int event1, event2, event3; char processor_name[ MPI_MAX_PROCESSOR_NAME ]; MPI_Init( &argc, &argv ); MPI_Pcontrol( 0 ); MPI_Comm_size( MPI_COMM_WORLD, &numprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &myid ); MPI_Get_processor_name( processor_name, &namelen ); fprintf( stderr, "Process %d running on %s\n", myid, processor_name ); /* MPE_Init_log() & MPE_Finish_log() are NOT needed when liblmpe.a is linked with this program. In that case, MPI_Init() would have called MPE_Init_log() already. */ #if defined( NO_MPI_LOGGING ) MPE_Init_log(); #endif /* user should NOT assign eventIDs directly in MPE_Describe_state() Get the eventIDs for user-defined STATES(rectangles) from MPE_Log_get_state_eventIDs() instead of the deprecated function MPE_Log_get_event_number(). */ MPE_Log_get_state_eventIDs( &event1a, &event1b ); MPE_Log_get_state_eventIDs( &event2a, &event2b ); MPE_Log_get_state_eventIDs( &event3a, &event3b ); MPE_Log_get_state_eventIDs( &event4a, &event4b ); if ( myid == 0 ) { MPE_Describe_state( event1a, event1b, "Broadcast", "red" ); MPE_Describe_state( event2a, event2b, "Sync", "orange" ); MPE_Describe_state( event3a, event3b, "Compute", "blue" ); MPE_Describe_state( event4a, event4b, "Reduce", "green" ); } /* Get event ID for Solo-Event(single timestamp object) from MPE */ MPE_Log_get_solo_eventID( &event1 ); MPE_Log_get_solo_eventID( &event2 ); MPE_Log_get_solo_eventID( &event3 ); if ( myid == 0 ) { MPE_Describe_event( event1, "Broadcast Post", "white" ); MPE_Describe_event( event2, "Compute Start", "purple" ); MPE_Describe_event( event3, "Compute End", "navy" ); } if ( myid == 0 ) { n = 1000000; startwtime = MPI_Wtime(); } MPI_Barrier( MPI_COMM_WORLD ); MPI_Pcontrol( 1 ); /* MPE_Start_log(); */ for ( jj = 0; jj < 5; jj++ ) { MPE_Log_event( event1a, 0, NULL ); MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD ); MPE_Log_event( event1b, 0, NULL ); MPE_Log_event( event1, 0, NULL ); MPE_Log_event( event2a, 0, NULL ); MPI_Barrier( MPI_COMM_WORLD ); MPE_Log_event( event2b, 0, NULL ); MPE_Log_event( event2, 0, NULL ); MPE_Log_event( event3a, 0, NULL ); h = 1.0 / (double) n; sum = 0.0; for ( ii = myid + 1; ii <= n; ii += numprocs ) { x = h * ((double)ii - 0.5); sum += f(x); } mypi = h * sum; MPE_Log_event( event3b, 0, NULL ); MPE_Log_event( event3, 0, NULL ); pi = 0.0; MPE_Log_event( event4a, 0, NULL ); MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); MPE_Log_event( event4b, 0, NULL ); MPE_Log_sync_clocks(); } #if defined( NO_MPI_LOGGING ) if ( argv != NULL ) MPE_Finish_log( argv[0] ); else MPE_Finish_log( "cpilog" ); #endif if ( myid == 0 ) { endwtime = MPI_Wtime(); printf( "pi is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT) ); printf( "wall clock time = %f\n", endwtime-startwtime ); } MPI_Finalize(); return( 0 ); }
int main(int argc, char**argv){ int num_ranks, rank, split_num_ranks, split_rank; int outer_ranks, inner_ranks; int new_comm_id; int msg_size, loops; int slurm_id, run_index; MPI_Comm split_comm; FILE * timings, * configs; int assignment; int custom; char c; while ((c = getopt (argc, argv, "s:r:l:i:ac:")) != -1){ switch (c) { case 's': sscanf(optarg, "%d", &msg_size); break; case 'r': sscanf(optarg, "%d", &inner_ranks); break; case 'l': sscanf(optarg, "%d", &loops); break; case 'i': sscanf(optarg, "%d", &run_index); break; case 'a': sscanf(optarg, "%d", &assignment); assignment = 0; break; case 'c': sscanf(optarg, "%d", &custom); break; default: printf("Unrecognized option: %c\n", optopt); break; } if(c != 's' && c != 'i' && c != 'l' && c != 'r' ){break;} } timings = fopen("timings.out", "a"); char configs_buf[128] = {0}; sprintf(configs_buf, "config-%d.out", run_index); configs = fopen(configs_buf, "a"); MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); if(num_ranks == 0){ printf("MPI_Comm_size failure\n"); exit(1); } outer_ranks = num_ranks - inner_ranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get node names char name[MPI_MAX_PROCESSOR_NAME] = {0}; char * recv = (char*)calloc(MPI_MAX_PROCESSOR_NAME*num_ranks, sizeof(char)); int proc_len; MPI_Get_processor_name(name, &proc_len); name[proc_len] = 0; MPI_Gather(name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, recv, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, MPI_COMM_WORLD); int * splitter = (int*)malloc(sizeof(int)*num_ranks); for(int i = 0; i < num_ranks; i++) splitter[i] = OUTER_COMM; if(!custom){ if(rank == 0){ if(assignment == RANDOM){ int num_assigned = 0; while(num_assigned < inner_ranks){ int val = rand() % num_ranks; if(splitter[val] == INNER_COMM){ continue; }else{ splitter[val] = INNER_COMM; num_assigned += 1; } } }else if(assignment == APLANES){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (dims[0] == 0 || dims[0] == 2){ splitter[i] = INNER_COMM; } } }else if(assignment == APLANES_COARSE){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (dims[0] == 0 || dims[0] == 1){ splitter[i] = INNER_COMM; } } }else if(assignment == BPLANES){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (dims[1] == 0 || dims[1] == 2){ splitter[i] = INNER_COMM; } } }else if(assignment == CPLANES){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (dims[2] == 0 || dims[2] == 2){ splitter[i] = INNER_COMM; } } }else if(assignment == DPLANES){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (dims[3] == 0 || dims[3] == 2){ splitter[i] = INNER_COMM; } } }else if(assignment == EPLANES){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (dims[4] == 0){ splitter[i] = INNER_COMM; } } }else if(assignment == SQUAREAB1){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (((dims[0] == 0 || dims[0] == 1) && (dims[1] == 0 || dims[1] == 1)) || ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 2 || dims[1] == 3))){ splitter[i] = INNER_COMM; } } }else if(assignment == SQUAREAB2){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (((dims[0] == 0 || dims[0] == 2) && (dims[1] == 0 || dims[1] == 2)) || ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 1 || dims[1] == 3))){ splitter[i] = INNER_COMM; } } }else if(assignment == ALTERABC_NONE){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (((dims[0] == 0 || dims[0] == 1) && (dims[1] == 0 || dims[1] == 1) && (dims[2] == 0 || dims[2] == 1)) || ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 2 || dims[1] == 3) && (dims[2] == 0 || dims[2] == 1)) || ((dims[0] == 0 || dims[0] == 1) && (dims[1] == 2 || dims[1] == 3) && (dims[2] == 2 || dims[2] == 3)) || ((dims[0] == 2 || dims[0] == 3) && (dims[1] == 0 || dims[1] == 1) && (dims[2] == 2 || dims[2] == 3))) { splitter[i] = INNER_COMM; } } }else if(assignment == ALTERABC_ALL){ for(int i = 0; i < num_ranks; i++){ int dims[5] = {0}; get_dim(recv + i*MPI_MAX_PROCESSOR_NAME, dims); if (((dims[0] == 0 || dims[0] == 2) && (dims[1] == 0 || dims[1] == 2) && (dims[2] == 0 || dims[2] == 2)) || ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 1 || dims[1] == 3) && (dims[2] == 0 || dims[2] == 2)) || ((dims[0] == 0 || dims[0] == 2) && (dims[1] == 1 || dims[1] == 3) && (dims[2] == 1 || dims[2] == 3)) || ((dims[0] == 1 || dims[0] == 3) && (dims[1] == 0 || dims[1] == 2) && (dims[2] == 1 || dims[2] == 3))) { splitter[i] = INNER_COMM; } } } } }else{ //using custon mapping in map.out for(int i = 0; i < num_ranks/2; i++){ splitter[i] = INNER_COMM; } } MPI_Bcast(splitter, num_ranks, MPI_INT, 0, MPI_COMM_WORLD); //split communicator MPI_Comm_split(MPI_COMM_WORLD, splitter[rank], 1, &split_comm); MPI_Comm_size(split_comm, &split_num_ranks); MPI_Comm_rank(split_comm, &split_rank); MPI_Barrier(MPI_COMM_WORLD); //print names to file if(rank == 0){ fprintf(configs,"rank,comm,node\n"); for(int i = 0; i < num_ranks; i++){ fprintf(configs,"%d,%d,%s\n", i, splitter[i], recv + i*MPI_MAX_PROCESSOR_NAME); } } //run the inner communicator as a warm-up, seems to reduce variance if(splitter[rank] == INNER_COMM){ Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); } MPI_Barrier(MPI_COMM_WORLD); //run the inside alone, as a baseline //start network counters region 1 MPI_Pcontrol(1); float run1; if(splitter[rank] == INNER_COMM){ run1 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); } MPI_Barrier(MPI_COMM_WORLD); //start network counters region 2 MPI_Pcontrol(2); //run both communicators float run2; if(splitter[rank] == INNER_COMM){ run2 = Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); }else{ Alltoall(split_comm, split_num_ranks, split_rank, msg_size, loops); } //stop network counters MPI_Pcontrol(0); //print timings if(splitter[rank] == INNER_COMM && split_rank==0) fprintf(timings, "%d,%f,%f\n", run_index, run1, run2); //free(recv); free(splitter); MPI_Finalize(); exit(0); }
int main (int argc,char **argv) { MPI_Status status; int rank, size; struct { int value; int rank; } num, max, rcvd; MPI_Init(&argc,&argv); MPI_Comm_rank (MPI_COMM_WORLD,&rank); MPI_Comm_size (MPI_COMM_WORLD,&size); char *tracefile = getenv("TVTRACE"); if( tracefile != NULL ){ printf( "tv tracefile=%s\n", tracefile ); MPI_Pcontrol(TRACEFILES, NULL, tracefile, 0); } else{ MPI_Pcontrol(TRACEFILES, NULL, "trace", 0); } MPI_Pcontrol(TRACELEVEL, 1, 1, 1); MPI_Pcontrol(TRACENODE, 1000000, 1, 1); num.value = my_random(rank); num.rank = rank; printf("Node %d: value = %d\n", num.rank, num.value); double sTime, eTime; sTime = MPI_Wtime(); MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, ""); MPI_Reduce(&num, &max, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD); MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, ""); eTime = MPI_Wtime(); MPI_Barrier( MPI_COMM_WORLD ); MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, ""); if (rank == 0) { print_result("MPI_Reduce", max.rank, max.value, eTime - sTime); sTime = MPI_Wtime(); max.value = num.value; max.rank = num.rank; int i; for(i = 1; i < size; i++) { MPI_Recv(&rcvd, 1, MPI_2INT, i, TAG, MPI_COMM_WORLD, &status); if (rcvd.value > max.value) { max.value = rcvd.value; max.rank = rcvd.rank; } } eTime = MPI_Wtime(); print_result("Send-receive", max.rank, max.value, eTime - sTime); } else { MPI_Ssend(&num, 1, MPI_2INT, 0, TAG, MPI_COMM_WORLD); } MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, ""); #if 0 if( !rank ){ double *a,*b,*c, *c0; int i,i1,j,k; int ann; MPI_Status *st; MPI_Request *rq,rq1; rq = (MPI_Request*) malloc( (size-1)*sizeof(MPI_Request) ); st = (MPI_Status*) malloc( (size-1)*sizeof(MPI_Status) ); ann=an/size+((an%size)?1:0); // printf("[%d]ann=%d\n", rank, ann ); a=(double*) malloc(am*an*sizeof(double)); b=(double*) malloc(am*bm*sizeof(double)); c=(double*) malloc(an*bm*sizeof(double)); for(i=0;i<am*an;i++) a[i]=rand()%301; for(i=0;i<am*bm;i++) b[i]=rand()%251; printf( "Data ready [%d]\n", rank ); c0 = (double*)malloc(an*bm*sizeof(double)); time = MPI_Wtime(); for (i=0; i<an; i++) for (j=0; j<bm; j++) { double s = 0.0; for (k=0; k<am; k++) s+= a[i*am+k]*b[k*bm+j]; c0[i*bm+j] = s; } time = MPI_Wtime() - time; printf("Time seq[%d] = %lf\n", rank, time ); time_seq = time; MPI_Barrier( MPI_COMM_WORLD ); time=MPI_Wtime(); MPI_Bcast( b, am*bm, MPI_DOUBLE, 0, MPI_COMM_WORLD); printf( "Data Bcast [%d]\n", rank ); for( i1=0, j=1; j<size; j++, i1+=ann*am ){ printf( "Data to Send [%d] %016x[%4d] =>> %d\n", rank, a+i1, i1, j ); MPI_Isend( a+i1, ann*am, MPI_DOUBLE, j, 101, MPI_COMM_WORLD, &rq1 ); MPI_Request_free( &rq1 ); printf( "Data Send [%d] =>> %d\n", rank, j ); } printf( "Data Send [%d]\n", rank ); MPI_Isend( a+i1, 1, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &rq1 ); MPI_Request_free( &rq1 ); printf( "Data Send [%d] =>> %d\n", rank, j ); for(i=(i1/am);i<an;i++) for(j=0;j<bm;j++){ double s=0.0; for(k=0;k<am;k++) s+=a[i*am+k]*b[k*bm+j]; c[i*bm+j]=s; } printf( "Job done [%d]\n", rank ); for( i1=0, j=1; j<size; j++, i1+=(ann*bm) ){ printf( "Data to Recv [%d] %016x[%4d] =>> %d\n", rank, c+i1, i1/bm, j ); MPI_Irecv( c+i1, ann*am, MPI_DOUBLE, j, 102, MPI_COMM_WORLD, rq+(j-1) ); } MPI_Waitall( size-1, rq, st ); time=MPI_Wtime()-time; printf("time [%d]=%12.8lf\n",rank,time); time_par = time; printf( "Data collected [%d]\n", rank ); time=MPI_Wtime(); int ok = 1; for(i=0;i<an*bm;i++) if( c[i] != c0[i] ){ ok = 0; printf( "Fail [%d %d] %lf != %lf\n", i/bm, i%bm, c[i], c0[i] ); break; } time=MPI_Wtime()-time; if( ok ){ printf( "Data verifeid [%d] time = %lf\n", rank, time ); printf( "SpeedUp S(%d) = %14.10lf\n", size, time_seq/time_par ); printf( "Efitncy E(%d) = %14.10lf\n", size, time_seq/(time_par*size) ); } } else { int ann; double *a,*b,*c; MPI_Status st; int i,j,k; MPI_Pcontrol(TRACEEVENT, "entry", 0, 0, ""); ann= an/size + ((an%size)?1:0); // if(rank==1) // printf("[%d]ann=%d = %d / %d \n", rank, ann, an, size ); a=(double*)malloc(ann*am*sizeof(double)); b=(double*)malloc(bm*am*sizeof(double)); c=(double*)malloc(ann*bm*sizeof(double)); printf( "Mem allocated [%d]\n", rank ); MPI_Barrier( MPI_COMM_WORLD ); MPI_Pcontrol(TRACEEVENT, "exit", 0, 0, ""); time = MPI_Wtime(); MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, ""); MPI_Bcast(b,am*bm,MPI_DOUBLE,0,MPI_COMM_WORLD); printf( "Data Bcast [%d]\n", rank ); MPI_Recv( a, ann*am, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &st); printf( "Data Recv [%d]\n", rank ); MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, ""); MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, ""); for( i=0; i<ann; i++ ) for(j=0;j<bm;j++){ double s=0.0; for( k=0; k<am; k++ ){ s+=a[i*am+k]*b[k*bm+j]; } /* if(1==rank){ if(0==j){ printf( "c[%d<%d %d] = %lf\n", i,ann,j, s ); } } */ c[i*bm+j]=s; } printf( "Job done [%d]\n", rank ); MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, ""); MPI_Pcontrol(TRACEEVENT, "entry", 3, 0, ""); MPI_Send( c, ann*bm, MPI_DOUBLE, 0, 102, MPI_COMM_WORLD); printf( "Data returned [%d]\n", rank ); MPI_Pcontrol(TRACEEVENT, "exit", 3, 0, ""); time=MPI_Wtime()-time; printf("time [%d]=%12.8lf\n",rank,time); } #endif MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int n, myid, numprocs, ii, jj; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; int event1a, event1b, event2a, event2b, event3a, event3b, event4a, event4b; char processor_name[ MPI_MAX_PROCESSOR_NAME ]; MPE_LOG_BYTES bytebuf; int bytebuf_pos; MPI_Init( &argc, &argv ); MPI_Pcontrol( 0 ); MPI_Comm_size( MPI_COMM_WORLD, &numprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &myid ); MPI_Get_processor_name( processor_name, &namelen ); fprintf( stderr, "Process %d running on %s\n", myid, processor_name ); /* MPE_Init_log() & MPE_Finish_log() are NOT needed when liblmpe.a is linked with this program. In that case, MPI_Init() would have called MPE_Init_log() already. */ #if defined( NO_MPI_LOGGING ) MPE_Init_log(); #endif /* Get event ID from MPE, user should NOT assign event ID directly */ event1a = MPE_Log_get_event_number(); event1b = MPE_Log_get_event_number(); event2a = MPE_Log_get_event_number(); event2b = MPE_Log_get_event_number(); event3a = MPE_Log_get_event_number(); event3b = MPE_Log_get_event_number(); event4a = MPE_Log_get_event_number(); event4b = MPE_Log_get_event_number(); if ( myid == 0 ) { MPE_Describe_state( event1a, event1b, "Broadcast", "red" ); MPE_Describe_info_state( event2a, event2b, "Sync", "orange", "source = %s()'s line %d." ); MPE_Describe_info_state( event3a, event3b, "Compute", "blue", "mypi = %E computed at iteration %d." ); MPE_Describe_info_state( event4a, event4b, "Reduce", "green", "final pi = %E at iteration %d." ); } if ( myid == 0 ) { n = 1000000; startwtime = MPI_Wtime(); } MPI_Barrier( MPI_COMM_WORLD ); MPI_Pcontrol( 1 ); /* MPE_Start_log(); */ for ( jj = 0; jj < ITER_COUNT; jj++ ) { MPE_Log_event( event1a, 0, NULL ); MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD ); MPE_Log_event( event1b, 0, NULL ); MPE_Log_event( event2a, 0, NULL ); MPI_Barrier( MPI_COMM_WORLD ); int line_num; bytebuf_pos = 0; MPE_Log_pack( bytebuf, &bytebuf_pos, 's', sizeof(__func__)-1, __func__ ); line_num = __LINE__; MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &line_num ); MPE_Log_event( event2b, 0, bytebuf ); MPE_Log_event( event3a, 0, NULL ); h = 1.0 / (double) n; sum = 0.0; for ( ii = myid + 1; ii <= n; ii += numprocs ) { x = h * ((double)ii - 0.5); sum += f(x); } mypi = h * sum; bytebuf_pos = 0; MPE_Log_pack( bytebuf, &bytebuf_pos, 'E', 1, &mypi ); MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &jj ); MPE_Log_event( event3b, 0, bytebuf ); pi = 0.0; MPE_Log_event( event4a, 0, NULL ); MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); bytebuf_pos = 0; MPE_Log_pack( bytebuf, &bytebuf_pos, 'E', 1, &pi ); MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &jj ); MPE_Log_event( event4b, 0, bytebuf ); } #if defined( NO_MPI_LOGGING ) if ( argv != NULL ) MPE_Finish_log( argv[0] ); else MPE_Finish_log( "cpilog" ); #endif if ( myid == 0 ) { endwtime = MPI_Wtime(); printf( "pi is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT) ); printf( "wall clock time = %f\n", endwtime-startwtime ); } MPI_Finalize(); return( 0 ); }