示例#1
0
文件: report.c 项目: bambang/vsipl
void report_mflops(const bench_problem *p, double *t, int st)
{
     struct stats s;
     mkstat(t, st, &s);
     ovtpvt("(%g %g %g %g)\n", 
	    mflops(p, s.max), mflops(p, s.avg), 
	    mflops(p, s.min), mflops(p, s.median));
}
示例#2
0
文件: report.c 项目: bambang/vsipl
void report_verbose(const bench_problem *p, double *t, int st)
{
     struct stats s;
     char bmin[64], bmax[64], bavg[64], bmedian[64], btmin[64];
     char bsetup[64];
     int copyp = tensor_sz(p->sz) == 1;

     mkstat(t, st, &s);

     sprintf_time(s.min, bmin, 64);
     sprintf_time(s.max, bmax, 64);
     sprintf_time(s.avg, bavg, 64);
     sprintf_time(s.median, bmedian, 64);
     sprintf_time(time_min, btmin, 64);
     sprintf_time(p->setup_time, bsetup, 64);

     ovtpvt("Problem: %s, setup: %s, time: %s, %s: %.5g\n",
	    p->pstring, bsetup, bmin, 
	    copyp ? "fp-move/us" : "``mflops''",
	    mflops(p, s.min));

     if (verbose) {
	  ovtpvt("Took %d measurements for at least %s each.\n", st, btmin);
	  ovtpvt("Time: min %s, max %s, avg %s, median %s\n",
		 bmin, bmax, bavg, bmedian);
     }
}
示例#3
0
文件: fftw_test.c 项目: Pinkii-/PCA
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags 
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags 
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     if (paranoid && !(flags & FFTW_IN_PLACE)) {
	  begin = fftw_get_time();
	  test_ergun(n, dir, plan);
	  end = fftw_get_time();
	  t = fftw_time_to_sec(fftw_time_diff(end, begin));
	  WHEN_VERBOSE(2, printf("time for validation: %f s\n", t));
     }
     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
示例#4
0
文件: fftw_test.c 项目: Pinkii-/PCA
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_complex *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     in = (fftw_complex *) fftw_malloc(N * howmany_fields *
				       sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					     speed_flag | flags
					     | wisdom_flag | no_vector_flag,
					     in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan(sz.rank, sz.narray,
				    dir, speed_flag | flags 
				    | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (fftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t);

     fftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
示例#5
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  int    namelen;
  char   processor_name[MPI_MAX_PROCESSOR_NAME];
  MPI_Get_processor_name(processor_name,&namelen);
  fprintf(stderr, "[%d] %s\n", id, processor_name);

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  float *sendp2_buf = (float*)malloc(MIMAX*MKMAX*sizeof(float)*4);
  sendp2_lo_sendbuf = &sendp2_buf[MIMAX*MKMAX*0];
  sendp2_lo_recvbuf = &sendp2_buf[MIMAX*MKMAX*1];
  sendp2_hi_sendbuf = &sendp2_buf[MIMAX*MKMAX*2];
  sendp2_hi_recvbuf = &sendp2_buf[MIMAX*MKMAX*3];
#pragma acc enter data create(sendp2_buf[0:MIMAX*MKMAX*4])

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

#pragma acc data copyin(p, bnd, wrk1, wrk2, a, b, c) present(sendp2_buf[0:MIMAX*MKMAX*4])
  {
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= gettime();
  gosa= jacobi(nn);
  cpu1= gettime();
  cpu = cpu1 - cpu0;

  MPI_Allreduce(MPI_IN_PLACE,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);

  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }

  nn= (int)(target/(cpu/3.0));
  nn= LOOP_TIMES;
  halo_time = 0.0;
  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= gettime();
  gosa= jacobi(nn);
  cpu1= gettime();
  cpu = cpu1 - cpu0;

  MPI_Allreduce(MPI_IN_PLACE,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  MPI_Allreduce(&halo_time,
                &max_halo_time,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  MPI_Allreduce(&halo_time,
                &ave_halo_time,
                1,
                MPI_DOUBLE,
                MPI_SUM,
                MPI_COMM_WORLD);
  ave_halo_time /= npe;
  }//end of acc data

  if(id == 0){
    printf("cpu : %f sec. halo(AVE.) %f sec. halo(MAX) %f sec.\n", cpu, ave_halo_time, max_halo_time);
    printf("Loop executed for %d times\n",nn);
    printf("Gosa : %e \n",gosa);
    printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop));
    printf("Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }

  free(sendp2_buf);

  MPI_Finalize();

  return (0);
}
示例#6
0
文件: report.c 项目: bambang/vsipl
void report_benchmark(const bench_problem *p, double *t, int st)
{
     struct stats s;
     mkstat(t, st, &s);
     ovtpvt("%.5g %.8g %g\n", mflops(p, s.min), s.min, p->setup_time);
}
示例#7
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  hime_err_init(id);
  if (argc != 3) {
    if (id == 0) {
      printf("./bmt <Restart #> <Checkpoint interval (steps)>\n");
      printf("\n");
      printf("   Restart #:\n");
      printf("      Checkpiont id at which bmt starts\n");
      printf("   Checkpoint interval (steps):\n");
      printf("      # of Steps to skip checkpointing\n");
      printf("");
    }
    MPI_Finalize();
    exit(0);
  }
  
  restart_id = atoi(argv[1]);
  interval   = atoi(argv[2]);

  hime_dbgi(0, "Checkpoint directory: %s", CHECKPOINT_DIR);
  hime_dbgi(0, "Checkpoint interval:  %d", interval);

  if (restart_id > 0) {
    hime_dbgi(0, "Restart ID:  %d", restart_id);
    restart(restart_id);
  }  

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= MPI_Wtime();
  gosa= jacobi(nn);
  cpu1= MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);
  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }
  nn= (int)(target/(cpu/3.0));

  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0 = MPI_Wtime();
  gosa = jacobi(nn);
  cpu1 = MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  if(id == 0){
    printf("cpu : %f sec.\n", cpu);
    printf("Loop executed for %d times\n",nn);
    printf("Gosa : %e \n",gosa);
    printf("MFLOPS measured : %f\n",mflops(nn,cpu,flop));
    printf("Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }


  MPI_Finalize();
  
  return (0);
}
示例#8
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size, nalloc;
     fftw_complex *in, *work;
     fftw_plan plan = 0;
     fftw_mpi_plan mpi_plan;
     double t, t0 = 0.0;

     if (specific || !(flags & FFTW_IN_PLACE))
	  return;

     if (io_okay && !only_parallel)
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);

     mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir,
				     speed_flag | flags
				     | wisdom_flag | no_vector_flag);

     CHECK(mpi_plan, "failed to create plan!");

     fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     if (io_okay && !only_parallel)
	  nalloc = n;
     else
	  nalloc = total_local_size;

     in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
				       * sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
					 * sizeof(fftw_complex));

     if (io_okay) {
	  WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan));
     }

     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftw(plan, howmany_fields,
			     in, howmany_fields, 1, work, 1, 0),
			in, n * howmany_fields, t0);

	  fftw_destroy_plan(plan);

	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     }
     
     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftw_free(in);
     fftw_free(work);
     fftw_mpi_destroy_plan(mpi_plan);

     WHEN_VERBOSE(1, my_printf("\n"));
}
示例#9
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     int local_nx, local_x_start, local_ny_after_transpose,
	  local_y_start_after_transpose, total_local_size;
     fftw_complex *in, *work;
     fftwnd_plan plan = 0;
     fftwnd_mpi_plan mpi_plan;
     double t, t0 = 0.0;
     int i, N;
     
     if (sz.rank < 2)
	  return;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     if (specific) {
	  return;
     } else {
	  if (io_okay && !only_parallel)
	       plan = fftwnd_create_plan(sz.rank, sz.narray,
					 dir, speed_flag | flags
					 | wisdom_flag | no_vector_flag);
	  mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray,
					    dir, speed_flag | flags
					    | wisdom_flag | no_vector_flag);
     }
     CHECK(mpi_plan != NULL, "can't create plan");

     fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start,
			    &local_ny_after_transpose,
			    &local_y_start_after_transpose,
			    &total_local_size);

     if (io_okay && !only_parallel)
	  in = (fftw_complex *) fftw_malloc(N * howmany_fields *
					    sizeof(fftw_complex));
     else
	  in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					    sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					 sizeof(fftw_complex));
     
     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			      in, howmany_fields, 1, 0, 0, 0),
		       in, N * howmany_fields, t0);

	  fftwnd_destroy_plan(plan);
	  
	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n",
				 smart_sprint_time(t0)));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	    WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftwnd_mpi_destroy_plan(mpi_plan);

     fftw_free(in);
     fftw_free(work);

     WHEN_VERBOSE(1, my_printf("\n"));
}
示例#10
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
#ifdef SCR_ENABLE
  SCR_Init();
#endif
  
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= MPI_Wtime();
  gosa= jacobi(nn);
  cpu1= MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);
  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }
 nn= (int)(target/(cpu/3.0));

  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0 = MPI_Wtime();
  //  nn = 10000000;
  gosa = jacobi(nn);
  cpu1 = MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  if(id == 0){
    fprintf(stderr, "cpu : %f sec.\n", cpu);
    fprintf(stderr, "Loop executed for %d times\n",nn);
    fprintf(stderr, "Gosa : %e \n",gosa);
    fprintf(stderr, "GFLOPS measured : %f\n",mflops(nn,cpu,flop)/1000.0);
    fprintf(stderr, "Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }

#ifdef SCR_ENABLE  
  SCR_Finalize();
#endif
  MPI_Finalize();
  
  return (0);
}
示例#11
0
文件: himeno.c 项目: expajp/xmp
int
main()
{
  int    i, j, k, nn;
  float  gosa;
  double cpu, cpu0, cpu1, flop, target;

  int    myrank = xmp_node_num() - 1;

  target = 60.0;
  omega = 0.8;
  imax = MIMAX;
  jmax = MJMAX;
  kmax = MKMAX;

  /*
   *    Initializing matrixes
   */
  initmt();

  if (myrank == 0) {
    printf("mimax = %d mjmax = %d mkmax = %d\n",MIMAX, MJMAX, MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
  }

  nn= 3;

  if (myrank == 0) {
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

  cpu0= xmp_wtime();
  gosa= jacobi(nn);
  cpu1= xmp_wtime();
  cpu= cpu1 - cpu0;

  flop= fflop(imax,jmax,kmax);

  if (myrank == 0) {
    printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa);
  }

  nn= (int)(target/(cpu/3.0));
#pragma xmp reduction (max:nn)

  if (myrank == 0) {
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  cpu0 = xmp_wtime();
  gosa = jacobi(nn);
  cpu1 = xmp_wtime();

  cpu= cpu1 - cpu0;
  
  if (myrank == 0) {
    printf(" Loop executed for %d times\n",nn);
    printf(" Gosa : %e \n",gosa);
    printf(" MFLOPS measured : %f\tcpu : %f\n",mflops(nn,cpu,flop),cpu);
    printf(" Score based on Pentium III 600MHz : %f\n",
	   mflops(nn,cpu,flop)/82.84);
  }
  
  return (0);
}