int main (int argc, char **argv) { /* arrays used to contain each PE's rows - specify cols, no need to spec rows */ float **U_Curr; float **U_Next; /* helper variables */ /* available iterator */ int i, j, k, m, n; int per_proc, remainder, my_start_row, my_end_row, my_num_rows; int verbose = 0; int show_time = 0; double time; double t, tv[2]; /*OpenSHMEM initilization*/ start_pes (0); p = _num_pes (); my_rank = _my_pe (); if (p > 8) { fprintf(stderr, "Ignoring test when run with more than 8 pes\n"); return 77; } /* argument processing done by everyone */ int c, errflg; extern char *optarg; extern int optind, optopt; while ((c = getopt (argc, argv, "e:h:m:tw:v")) != -1) { switch (c) { case 'e': EPSILON = atof (optarg); break; case 'h': HEIGHT = atoi (optarg); break; case 'm': /* selects the numerical methods */ switch (atoi (optarg)) { case 1: /* jacobi */ meth = 1; break; case 2: /* gauss-seidel */ meth = 2; break; case 3: /* sor */ meth = 3; break; } break; case 't': show_time++; /* overridden by -v (verbose) */ break; case 'w': WIDTH = atoi (optarg); break; case 'v': verbose++; break; /* handle bad arguments */ case ':': /* -h or -w without operand */ if (ROOT == my_rank) fprintf (stderr, "Option -%c requires an operand\n", optopt); errflg++; break; case '?': if (ROOT == my_rank) fprintf (stderr, "Unrecognized option: -%c\n", optopt); errflg++; break; } } if (ROOT == my_rank && argc < 2) { printf ("Using defaults: -h 20 -w 20 -m 2\n"); } // if (0 < errflg) // exit(EXIT_FAILURE); /* wait for user to input runtime params */ for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) pSync[i] = _SHMEM_SYNC_VALUE; shmem_barrier_all (); /* broadcast method to use */ shmem_broadcast32 (&meth, &meth, 1, 0, 0, 0, p, pSync); switch (meth) { case 1: method = &jacobi; break; case 2: method = &gauss_seidel; break; case 3: method = &sor; break; } /* let each processor decide what rows(s) it owns */ my_start_row = get_start (my_rank); my_end_row = get_end (my_rank); my_num_rows = get_num_rows (my_rank); if (0 < verbose) printf ("proc %d contains (%d) rows %d to %d\n", my_rank, my_num_rows, my_start_row, my_end_row); fflush (stdout); /* allocate 2d array */ U_Curr = (float **) malloc (sizeof (float *) * my_num_rows); U_Curr[0] = (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H)); for (i = 1; i < my_num_rows; i++) { U_Curr[i] = U_Curr[i - 1] + (int) floor (WIDTH / H); } /* allocate 2d array */ U_Next = (float **) malloc (sizeof (float *) * my_num_rows); U_Next[0] = (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H)); for (i = 1; i < my_num_rows; i++) { U_Next[i] = U_Next[i - 1] + (int) floor (WIDTH / H); } /* initialize global grid */ init_domain (U_Curr, my_rank); init_domain (U_Next, my_rank); /* iterate for solution */ if (my_rank == ROOT) { tv[0] = gettime (); } k = 1; while (1) { method (U_Curr, U_Next); local_convergence_sqd = get_convergence_sqd (U_Curr, U_Next, my_rank); shmem_barrier_all (); shmem_float_sum_to_all (&convergence_sqd, &local_convergence_sqd, 1, 0, 0, p, pWrk, pSync); if (my_rank == ROOT) { convergence = sqrt (convergence_sqd); if (verbose == 1) { printf ("L2 = %f\n", convergence); } } /* broadcast method to use */ shmem_barrier_all (); shmem_broadcast32 (&convergence, &convergence, 1, 0, 0, 0, p, pSync); if (convergence <= EPSILON) { break; } /* copy U_Next to U_Curr */ for (j = my_start_row; j <= my_end_row; j++) { for (i = 0; i < (int) floor (WIDTH / H); i++) { U_Curr[j - my_start_row][i] = U_Next[j - my_start_row][i]; } } k++; //MPI_Barrier(MPI_COMM_WORLD); shmem_barrier_all (); } /* say something at the end */ if (my_rank == ROOT) { //time = MPI_Wtime() - time; tv[1] = gettime (); t = dt (&tv[1], &tv[0]); printf ("Estimated time to convergence in %d iterations using %d processors on a %dx%d grid is %f seconds\n", k, p, (int) floor (WIDTH / H), (int) floor (HEIGHT / H), t / 1000000.0); } //MPI_Finalize(); exit (EXIT_SUCCESS); return 0; }
int main(int argc, char** argv) { int p,my_rank; /* arrays used to contain each PE's rows - specify cols, no need to spec rows */ float **U_Curr; float **U_Next; /* helper variables */ float convergence,convergence_sqd,local_convergence_sqd; /* available iterator */ int i,j,k,m,n; int per_proc,remainder,my_start_row,my_end_row,my_num_rows; int verbose = 0; int show_time = 0; double time; /* initialize mpi stuff */ MPI_Init(&argc, &argv); /* get number of procs */ MPI_Comm_size(MPI_COMM_WORLD,&p); /* get rank of current process */ MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); /* argument processing done by everyone */ int c,errflg; extern char *optarg; extern int optind, optopt; while ((c = getopt(argc, argv, "e:h:m:tw:v")) != -1) { switch(c) { case 'e': EPSILON = atof(optarg); break; case 'h': HEIGHT = atoi(optarg); break; case 'm': /* selects the numerical methods */ switch(atoi(optarg)) { case 1: /* jacobi */ meth = 1; break; case 2: /* gauss-seidel */ meth = 2; break; case 3: /* sor */ meth = 3; break; } break; case 't': show_time++; /* overridden by -v (verbose) */ break; case 'w': WIDTH = atoi(optarg); break; case 'v': verbose++; break; /* handle bad arguments */ case ':': /* -h or -w without operand */ if (ROOT == my_rank) fprintf(stderr,"Option -%c requires an operand\n", optopt); errflg++; break; case '?': if (ROOT == my_rank) fprintf(stderr,"Unrecognized option: -%c\n", optopt); errflg++; break; } } /* if (0 < errflg) exit(EXIT_FAILURE); */ /* wait for user to input runtime params */ MPI_Barrier(MPI_COMM_WORLD); /* broadcast method to use */ (void) MPI_Bcast(&meth,1,MPI_INT,0,MPI_COMM_WORLD); switch (meth) { case 1: method = &jacobi; break; case 2: method = &gauss_seidel; break; case 3: method = &sor; break; } /* let each processor decide what rows(s) it owns */ my_start_row = get_start(my_rank); my_end_row = get_end(my_rank); my_num_rows = get_num_rows(my_rank); if ( 0 < verbose ) printf("proc %d contains (%d) rows %d to %d\n",my_rank,my_num_rows,my_start_row,my_end_row); fflush(stdout); /* allocate 2d array */ U_Curr = (float**)malloc(sizeof(float*)*my_num_rows); U_Curr[0] = (float*)malloc(sizeof(float)*my_num_rows*(int)floor(WIDTH/H)); for (i=1;i<my_num_rows;i++) { U_Curr[i] = U_Curr[i-1]+(int)floor(WIDTH/H); } /* allocate 2d array */ U_Next = (float**)malloc(sizeof(float*)*my_num_rows); U_Next[0] = (float*)malloc(sizeof(float)*my_num_rows*(int)floor(WIDTH/H)); for (i=1;i<my_num_rows;i++) { U_Next[i] = U_Next[i-1]+(int)floor(WIDTH/H); } /* initialize global grid */ init_domain(U_Curr,my_rank); init_domain(U_Next,my_rank); /* iterate for solution */ if (my_rank == ROOT) { time = MPI_Wtime(); } k = 1; while (1) { method(U_Curr,U_Next); local_convergence_sqd = get_convergence_sqd(U_Curr,U_Next,my_rank); MPI_Reduce(&local_convergence_sqd,&convergence_sqd,1,MPI_FLOAT,MPI_SUM,ROOT,MPI_COMM_WORLD); if (my_rank == ROOT) { convergence = sqrt(convergence_sqd); if (verbose == 1) { printf("L2 = %f\n",convergence); } } /* broadcast method to use */ (void) MPI_Bcast(&convergence,1,MPI_INT,0,MPI_COMM_WORLD); if (convergence <= EPSILON) { break; } /* copy U_Next to U_Curr */ for (j=my_start_row;j<=my_end_row;j++) { for (i=0;i<(int)floor(WIDTH/H);i++) { U_Curr[j-my_start_row][i] = U_Next[j-my_start_row][i]; } } k++; MPI_Barrier(MPI_COMM_WORLD); } /* say something at the end */ if (my_rank == ROOT) { time = MPI_Wtime() - time; if (0 < verbose) { printf("Estimated time to convergence in %d iterations using %d processors on a %dx%d grid is %f seconds\n",k,p,(int)floor(WIDTH/H),(int)floor(HEIGHT/H),time); } else if (show_time) { printf("%f\n",time); } /* else show nothing */ } MPI_Finalize(); exit(EXIT_SUCCESS); return 0; }