void report_mflops(const bench_problem *p, double *t, int st) { struct stats s; mkstat(t, st, &s); ovtpvt("(%g %g %g %g)\n", mflops(p, s.max), mflops(p, s.avg), mflops(p, s.min), mflops(p, s.median)); }
void report_verbose(const bench_problem *p, double *t, int st) { struct stats s; char bmin[64], bmax[64], bavg[64], bmedian[64], btmin[64]; char bsetup[64]; int copyp = tensor_sz(p->sz) == 1; mkstat(t, st, &s); sprintf_time(s.min, bmin, 64); sprintf_time(s.max, bmax, 64); sprintf_time(s.avg, bavg, 64); sprintf_time(s.median, bmedian, 64); sprintf_time(time_min, btmin, 64); sprintf_time(p->setup_time, bsetup, 64); ovtpvt("Problem: %s, setup: %s, time: %s, %s: %.5g\n", p->pstring, bsetup, bmin, copyp ? "fp-move/us" : "``mflops''", mflops(p, s.min)); if (verbose) { ovtpvt("Took %d measurements for at least %s each.\n", st, btmin); ovtpvt("Time: min %s, max %s, avg %s, median %s\n", bmin, bmax, bavg, bmedian); } }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); if (paranoid && !(flags & FFTW_IN_PLACE)) { begin = fftw_get_time(); test_ergun(n, dir, plan); end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for validation: %f s\n", t)); } FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { fftw_complex *in; fftwnd_plan plan; double t; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (fftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); fftw_free(in); WHEN_VERBOSE(1, printf("\n")); }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Get_processor_name(processor_name,&namelen); fprintf(stderr, "[%d] %s\n", id, processor_name); initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); float *sendp2_buf = (float*)malloc(MIMAX*MKMAX*sizeof(float)*4); sendp2_lo_sendbuf = &sendp2_buf[MIMAX*MKMAX*0]; sendp2_lo_recvbuf = &sendp2_buf[MIMAX*MKMAX*1]; sendp2_hi_sendbuf = &sendp2_buf[MIMAX*MKMAX*2]; sendp2_hi_recvbuf = &sendp2_buf[MIMAX*MKMAX*3]; #pragma acc enter data create(sendp2_buf[0:MIMAX*MKMAX*4]) if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } #pragma acc data copyin(p, bnd, wrk1, wrk2, a, b, c) present(sendp2_buf[0:MIMAX*MKMAX*4]) { MPI_Barrier(MPI_COMM_WORLD); cpu0= gettime(); gosa= jacobi(nn); cpu1= gettime(); cpu = cpu1 - cpu0; MPI_Allreduce(MPI_IN_PLACE, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); nn= LOOP_TIMES; halo_time = 0.0; if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0= gettime(); gosa= jacobi(nn); cpu1= gettime(); cpu = cpu1 - cpu0; MPI_Allreduce(MPI_IN_PLACE, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&halo_time, &max_halo_time, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&halo_time, &ave_halo_time, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); ave_halo_time /= npe; }//end of acc data if(id == 0){ printf("cpu : %f sec. halo(AVE.) %f sec. halo(MAX) %f sec.\n", cpu, ave_halo_time, max_halo_time); printf("Loop executed for %d times\n",nn); printf("Gosa : %e \n",gosa); printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop)); printf("Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } free(sendp2_buf); MPI_Finalize(); return (0); }
void report_benchmark(const bench_problem *p, double *t, int st) { struct stats s; mkstat(t, st, &s); ovtpvt("%.5g %.8g %g\n", mflops(p, s.min), s.min, p->setup_time); }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); hime_err_init(id); if (argc != 3) { if (id == 0) { printf("./bmt <Restart #> <Checkpoint interval (steps)>\n"); printf("\n"); printf(" Restart #:\n"); printf(" Checkpiont id at which bmt starts\n"); printf(" Checkpoint interval (steps):\n"); printf(" # of Steps to skip checkpointing\n"); printf(""); } MPI_Finalize(); exit(0); } restart_id = atoi(argv[1]); interval = atoi(argv[2]); hime_dbgi(0, "Checkpoint directory: %s", CHECKPOINT_DIR); hime_dbgi(0, "Checkpoint interval: %d", interval); if (restart_id > 0) { hime_dbgi(0, "Restart ID: %d", restart_id); restart(restart_id); } initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } MPI_Barrier(MPI_COMM_WORLD); cpu0= MPI_Wtime(); gosa= jacobi(nn); cpu1= MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0 = MPI_Wtime(); gosa = jacobi(nn); cpu1 = MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if(id == 0){ printf("cpu : %f sec.\n", cpu); printf("Loop executed for %d times\n",nn); printf("Gosa : %e \n",gosa); printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop)); printf("Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } MPI_Finalize(); return (0); }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size, nalloc; fftw_complex *in, *work; fftw_plan plan = 0; fftw_mpi_plan mpi_plan; double t, t0 = 0.0; if (specific || !(flags & FFTW_IN_PLACE)) return; if (io_okay && !only_parallel) plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); CHECK(mpi_plan, "failed to create plan!"); fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); if (io_okay && !only_parallel) nalloc = n; else nalloc = total_local_size; in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); if (io_okay) { WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan)); } if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, work, 1, 0), in, n * howmany_fields, t0); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t)); } fftw_free(in); fftw_free(work); fftw_mpi_destroy_plan(mpi_plan); WHEN_VERBOSE(1, my_printf("\n")); }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { int local_nx, local_x_start, local_ny_after_transpose, local_y_start_after_transpose, total_local_size; fftw_complex *in, *work; fftwnd_plan plan = 0; fftwnd_mpi_plan mpi_plan; double t, t0 = 0.0; int i, N; if (sz.rank < 2) return; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); if (specific) { return; } else { if (io_okay && !only_parallel) plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } CHECK(mpi_plan != NULL, "can't create plan"); fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start, &local_ny_after_transpose, &local_y_start_after_transpose, &total_local_size); if (io_okay && !only_parallel) in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); else in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields * sizeof(fftw_complex)); if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t0); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, NULL, FFTW_NORMAL_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, NULL, FFTW_TRANSPOSED_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, work, FFTW_NORMAL_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, work, FFTW_TRANSPOSED_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t)); } fftwnd_mpi_destroy_plan(mpi_plan); fftw_free(in); fftw_free(work); WHEN_VERBOSE(1, my_printf("\n")); }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); #ifdef SCR_ENABLE SCR_Init(); #endif MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } MPI_Barrier(MPI_COMM_WORLD); cpu0= MPI_Wtime(); gosa= jacobi(nn); cpu1= MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0 = MPI_Wtime(); // nn = 10000000; gosa = jacobi(nn); cpu1 = MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if(id == 0){ fprintf(stderr, "cpu : %f sec.\n", cpu); fprintf(stderr, "Loop executed for %d times\n",nn); fprintf(stderr, "Gosa : %e \n",gosa); fprintf(stderr, "GFLOPS measured : %f\n",mflops(nn,cpu,flop)/1000.0); fprintf(stderr, "Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } #ifdef SCR_ENABLE SCR_Finalize(); #endif MPI_Finalize(); return (0); }
int main() { int i, j, k, nn; float gosa; double cpu, cpu0, cpu1, flop, target; int myrank = xmp_node_num() - 1; target = 60.0; omega = 0.8; imax = MIMAX; jmax = MJMAX; kmax = MKMAX; /* * Initializing matrixes */ initmt(); if (myrank == 0) { printf("mimax = %d mjmax = %d mkmax = %d\n",MIMAX, MJMAX, MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); } nn= 3; if (myrank == 0) { printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } cpu0= xmp_wtime(); gosa= jacobi(nn); cpu1= xmp_wtime(); cpu= cpu1 - cpu0; flop= fflop(imax,jmax,kmax); if (myrank == 0) { printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); #pragma xmp reduction (max:nn) if (myrank == 0) { printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ cpu0 = xmp_wtime(); gosa = jacobi(nn); cpu1 = xmp_wtime(); cpu= cpu1 - cpu0; if (myrank == 0) { printf(" Loop executed for %d times\n",nn); printf(" Gosa : %e \n",gosa); printf(" MFLOPS measured : %f\tcpu : %f\n",mflops(nn,cpu,flop),cpu); printf(" Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } return (0); }