int main(int argc, char **argv) { long int j, k; /* dummies */ double scalar; /* constant used in Triad operation */ int iterations; /* number of times vector loop gets repeated */ long int length, /* total vector length */ offset; /* offset between vectors a and b, and b and c */ double bytes; /* memory IO size */ size_t space; /* memory used for a single vector */ double nstream_time, /* timing parameters */ avgtime = 0.0, maxtime = 0.0, mintime = 366.0*24.0*3600.0; /* set the minimum time to a large value; one leap year should be enough */ /********************************************************************************** * process and test input parameters ***********************************************************************************/ if (argc != 4){ printf("Usage: %s <# iterations> <vector length> <offset>\n", *argv); exit(EXIT_FAILURE); } iterations = atoi(*++argv); length = atol(*++argv); offset = atol(*++argv); if ((iterations < 1)) { printf("ERROR: Invalid number of iterations: %d\n", iterations); exit(EXIT_FAILURE); } if (length < 0) { printf("ERROR: Invalid vector length: %ld\n", length); exit(EXIT_FAILURE); } if (offset < 0) { printf("ERROR: Incvalid array offset: %ld\n", offset); exit(EXIT_FAILURE); } #ifdef STATIC_ALLOCATION if ((3*length + 2*offset) > N) { printf("ERROR: vector length/offset %ld/%ld too ", length, offset); printf("large; increase MAXLENGTH in Makefile or decrease vector length\n"); exit(EXIT_FAILURE); } #endif #ifndef STATIC_ALLOCATION space = (3*length + 2*offset)*sizeof(double); a = (double *) malloc(space); if (!a) { printf("ERROR: Could not allocate %ld words for vectors\n", 3*length+2*offset); exit(EXIT_FAILURE); } #endif b = a + length + offset; c = b + length + offset; printf("Serial stream triad: A = B + scalar*C\n"); printf("Vector length = %ld\n", length); printf("Offset = %ld\n", offset); printf("Number of iterations = %d\n", iterations); #pragma vector always for (j=0; j<length; j++) { a[j] = 0.0; b[j] = 2.0; c[j] = 2.0; } /* --- MAIN LOOP --- repeat Triad iterations times --- */ scalar = SCALAR; for (k=0; k<iterations; k++) { nstream_time = wtime(); #pragma vector always for (j=0; j<length; j++) a[j] = b[j]+scalar*c[j]; if (k>0 || iterations==1) { /* skip the first iteration */ nstream_time = wtime() - nstream_time; avgtime = avgtime + nstream_time; mintime = MIN(mintime, nstream_time); maxtime = MAX(maxtime, nstream_time); } /* insert a dependency between iterations to avoid dead-code elimination */ #pragma vector always for (j=0; j<length; j++) b[j] = a[j]; } /********************************************************************* ** Analyze and output results. *********************************************************************/ bytes = 3.0 * sizeof(double) * length; if (checkTRIADresults(iterations, length)) { avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MB/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * bytes/mintime, avgtime, mintime); printf(", Max time (s): %lf\n", maxtime); } else exit(EXIT_FAILURE); return 0; }
int main(int argc, char **argv) { long int j, iter; /* dummies */ double scalar; /* constant used in Triad operation */ int iterations; /* number of times vector loop gets repeated */ long int length, /* vector length per processor */ total_length, /* total vector length */ offset; /* offset between vectors a and b, and b and c */ double bytes; /* memory IO size */ size_t space; /* memory used for a single vector */ double nstream_time, /* timing parameters */ avgtime = 0.0, maxtime = 0.0, mintime = 366.0*8760.0*3600.0; /* set the minimum time to a large value; one leap year should be enough */ int Num_procs, /* process parameters */ my_ID, /* rank of calling process */ root=0; /* ID of master process */ int error=0; /* error flag for individual process */ /********************************************************************************** * process and test input parameters ***********************************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&Num_procs); MPI_Comm_rank(MPI_COMM_WORLD,&my_ID); if (my_ID == root) { printf("MPI stream triad: A = B + scalar*C\n"); if (argc != 4) { printf("Usage: %s <# iterations> <vector length> <offset>\n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if (iterations < 1) { printf("ERROR: Invalid number of iterations: %d\n", iterations); error = 1; goto ENDOFTESTS; } total_length = atol(*++argv); if (total_length < Num_procs) { printf("ERROR: Invalid vector length: %ld\n", total_length); error = 1; goto ENDOFTESTS; } else length = total_length/Num_procs; offset = atol(*++argv); if (offset < 0) { printf("ERROR: Invalid array offset: %ld\n", offset); error = 1; goto ENDOFTESTS; } #ifdef STATIC_ALLOCATION if ((3*length + 2*offset) > N) { printf("ERROR: vector length/offset %ld/%ld too ", total_length, offset); printf("large; increase MAXLENGTH in Makefile or decrease vector length\n"); error = 1; goto ENDOFTESTS; } #endif ENDOFTESTS: ; } bail_out(error); /* broadcast initialization data */ MPI_Bcast(&length,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&offset,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&iterations,1, MPI_INT, root, MPI_COMM_WORLD); #ifndef STATIC_ALLOCATION space = (3*length + 2*offset)*sizeof(double); a = (double *) malloc(space); if (!a && my_ID == root) { printf("ERROR: Could not allocate %ld bytes for vectors\n", (long int)space); error = 1; } bail_out(error); #endif b = a + length + offset; c = b + length + offset; bytes = 3.0 * sizeof(double) * length * Num_procs; if (my_ID == root) { printf("Number of processes = %d\n", Num_procs); printf("Vector length = %ld\n", total_length); printf("Offset = %ld\n", offset); printf("Number of iterations = %d\n", iterations); } #pragma vector always for (j=0; j<length; j++) { a[j] = 0.0; b[j] = 2.0; c[j] = 2.0; } /* --- MAIN LOOP --- repeat Triad iterations times --- */ scalar = SCALAR; for (iter=0; iter<iterations; iter++) { MPI_Barrier(MPI_COMM_WORLD); if (my_ID == root) { nstream_time = wtime(); } #pragma vector always for (j=0; j<length; j++) a[j] = b[j]+scalar*c[j]; if (my_ID == root) { if (iter>0 || iterations==1) { /* skip the first iteration */ nstream_time = wtime() - nstream_time; avgtime = avgtime + nstream_time; mintime = MIN(mintime, nstream_time); maxtime = MAX(maxtime, nstream_time); } } /* insert a dependency between iterations to avoid dead-code elimination */ #pragma vector always for (j=0; j<length; j++) b[j] = a[j]; } /********************************************************************* ** Analyze and output results. *********************************************************************/ if (my_ID == root) { if (checkTRIADresults(iterations, length)) { avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MB/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * bytes/mintime, avgtime, mintime); printf(", Max time (s): %lf\n", maxtime); } else error = 1; } bail_out(error); MPI_Finalize(); }
int main(int argc, char **argv) { long int j, iter; /* dummies */ double scalar; /* constant used in Triad operation */ int iterations; /* number of times vector loop gets repeated */ long int length, /* vector length per rank */ total_length, /* total vector length */ offset; /* offset between vectors a and b, and b and c */ double bytes; /* memory IO size */ size_t space; /* memory used for a single vector */ double local_nstream_time,/* timing parameters */ nstream_time, avgtime; int Num_procs, /* number of ranks */ my_ID, /* rank */ root=0; /* ID of master rank */ int error=0; /* error flag for individual rank */ double * RESTRICT a; /* main vector */ double * RESTRICT b; /* main vector */ double * RESTRICT c; /* main vector */ /********************************************************************************** * process and test input parameters ***********************************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&Num_procs); MPI_Comm_rank(MPI_COMM_WORLD,&my_ID); if (my_ID == root) { if (argc != 4){ printf("Usage: %s <# iterations> <vector length> <offset>\n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if (iterations < 1) { printf("ERROR: Invalid number of iterations: %d\n", iterations); error = 1; goto ENDOFTESTS; } total_length = atol(*++argv); if (total_length < Num_procs) { printf("ERROR: Invalid vector length: %ld\n", total_length); error = 1; goto ENDOFTESTS; } else length = total_length/Num_procs; offset = atol(*++argv); if (offset < 0) { printf("ERROR: Invalid array offset: %ld\n", offset); error = 1; goto ENDOFTESTS; } ENDOFTESTS:; } bail_out(error); /* broadcast initialization data */ MPI_Bcast(&length,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&offset,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&iterations,1, MPI_INT, root, MPI_COMM_WORLD); space = (3*length + 2*offset)*sizeof(double); a = (double *) malloc(space); if (!a && my_ID == root) { printf("ERROR: Could not allocate %ld bytes for vectors\n", (long int)space); error = 1; } bail_out(error); b = a + length + offset; c = b + length + offset; bytes = 4.0 * sizeof(double) * length * Num_procs; if (my_ID == root) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("MPI stream triad: A = B + scalar*C\n"); printf("Number of ranks = %d\n", Num_procs); printf("Vector length = %ld\n", total_length); printf("Offset = %ld\n", offset); printf("Number of iterations = %d\n", iterations); } #pragma vector always for (j=0; j<length; j++) { a[j] = 0.0; b[j] = 2.0; c[j] = 2.0; } /* --- MAIN LOOP --- repeat Triad iterations times --- */ scalar = SCALAR; for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_nstream_time = wtime(); } #pragma vector always for (j=0; j<length; j++) a[j] += b[j]+scalar*c[j]; } /* end iterations */ /********************************************************************* ** Analyze and output results. *********************************************************************/ local_nstream_time = wtime() - local_nstream_time; MPI_Reduce(&local_nstream_time, &nstream_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); if (my_ID == root) { if (checkTRIADresults(iterations, length, a)) { avgtime = nstream_time/iterations; printf("Rate (MB/s): %lf Avg time (s): %lf\n", 1.0E-06 * bytes/avgtime, avgtime); } else error = 1; } bail_out(error); MPI_Finalize(); }