double bytes_all(unsigned int i_timesteps) {
  return (bytes_local(i_timesteps) + bytes_bndneigh(i_timesteps));
}
Example #2
0
int main(int argc, char* argv[]) {
  if (argc != 4) {
    printf("Wrong parameters!\n");
    printf(" #cells #timesteps kernel\n");
    printf("   kernel-values: all, local, neigh, ader, vol, bndlocal\n");
    return -1;
  }

  unsigned int i_cells = atoi(argv[1]);
  unsigned int i_timesteps = atoi(argv[2]);
  std::string s_part;
  s_part.assign(argv[3]);

  // double-check if the selected kernel exists
  if ( (s_part.compare("all") != 0) &&
       (s_part.compare("local") != 0) &&
       (s_part.compare("neigh") != 0) &&
       (s_part.compare("ader") != 0) &&
       (s_part.compare("vol") != 0) &&
       (s_part.compare("bndlocal") != 0) )
  {
    printf("Wrong parameters!\n");
    printf(" #cells #timesteps kernel\n");
    printf("   kernel-values: all, local, neigh, ader, vol, bndlocal\n");
    return -1;
  }

  printf("Allocating fake data...\n");
  i_cells = init_data_structures(i_cells);
  printf("...done\n\n");

  struct timeval start_time, end_time;
  size_t cycles_start, cycles_end;
  double total = 0.0;
  double total_cycles = 0.0;

  // init OpenMP and LLC
  if (s_part.compare("all") == 0) {
    computeLocalIntegration();
    computeNeighboringIntegration();
  } else if (s_part.compare("local") == 0) {
    computeLocalIntegration();
  } else if (s_part.compare("neigh") == 0) {
    computeNeighboringIntegration();
  } else if (s_part.compare("ader") == 0) {
    computeAderIntegration();
  } else if (s_part.compare("vol") == 0) {
    computeVolumeIntegration();
  } else {
    computeLocalBoundaryIntegration();
  }

  gettimeofday(&start_time, NULL);
#ifdef __USE_RDTSC
  cycles_start = __rdtsc();
#endif

  if (s_part.compare("all") == 0) {
    for (unsigned int t = 0; t < i_timesteps; t++) {
      computeLocalIntegration();
      computeNeighboringIntegration();
    }
  } else if (s_part.compare("local") == 0) {
    for (unsigned int t = 0; t < i_timesteps; t++) {
      computeLocalIntegration();
    }
  } else if (s_part.compare("neigh") == 0) {
    for (unsigned int t = 0; t < i_timesteps; t++) {
      computeNeighboringIntegration();
    }
  } else if (s_part.compare("ader") == 0) {
    for (unsigned int t = 0; t < i_timesteps; t++) {
      computeAderIntegration();
    }
  } else if (s_part.compare("vol") == 0) {
    for (unsigned int t = 0; t < i_timesteps; t++) {
      computeVolumeIntegration();
    }
  } else {
    for (unsigned int t = 0; t < i_timesteps; t++) {
      computeLocalBoundaryIntegration();
    }
  }
#ifdef __USE_RDTSC  
  cycles_end = __rdtsc();
#endif
  gettimeofday(&end_time, NULL);
  total = sec(start_time, end_time);
#ifdef __USE_RDTSC
  printf("Cycles via __rdtsc()!\n");
  total_cycles = (double)(cycles_end-cycles_start);
#else
  total_cycles = derive_cycles_from_time(total);
#endif

  printf("=================================================\n");
  printf("===            PERFORMANCE SUMMARY            ===\n");
  printf("=================================================\n");
  printf("seissol proxy mode                  : %s\n", s_part.c_str());
  printf("time for seissol proxy              : %f\n", total);
  printf("cycles                              : %f\n\n", total_cycles);
  seissol_flops actual_flops;
  if (s_part.compare("all") == 0) {
    actual_flops = flops_all_actual(i_timesteps);
    printf("GFLOP (non-zero) for seissol proxy  : %f\n", actual_flops.d_nonZeroFlops/(1e9));
    printf("GFLOP (hardware) for seissol proxy  : %f\n", actual_flops.d_hardwareFlops/(1e9));
    //printf("GFLOP (estimate) for seissol proxy  : %f\n", flops_all(i_timesteps)/(1e9));
    printf("GiB (estimate) for seissol proxy    : %f\n\n", bytes_all(i_timesteps)/(1024.0*1024.0*1024.0));
    printf("FLOPS/cycle (non-zero)              : %f\n", actual_flops.d_nonZeroFlops/total_cycles);
    printf("FLOPS/cycle (hardware)              : %f\n", actual_flops.d_hardwareFlops/total_cycles);
    printf("Bytes/cycle (estimate)              : %f\n\n", bytes_all(i_timesteps)/total_cycles);
    printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total);
    printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total);
    printf("GiB/s (estimate) for seissol proxy  : %f\n", (bytes_all(i_timesteps)/(1024.0*1024.0*1024.0))/total);
  } else if (s_part.compare("local") == 0) {
    actual_flops = flops_local_actual(i_timesteps);
    printf("GFLOP (non-zero) for seissol proxy  : %f\n", actual_flops.d_nonZeroFlops/(1e9));
    printf("GFLOP (hardware) for seissol proxy  : %f\n", actual_flops.d_hardwareFlops/(1e9));
    //printf("GFLOP (estimate) for seissol proxy  : %f\n", flops_local(i_timesteps)/(1e9));
    printf("GiB (estimate) for seissol proxy    : %f\n\n", bytes_local(i_timesteps)/(1024.0*1024.0*1024.0));
    printf("FLOPS/cycle (non-zero)              : %f\n", actual_flops.d_nonZeroFlops/total_cycles);
    printf("FLOPS/cycle (hardware)              : %f\n", actual_flops.d_hardwareFlops/total_cycles);
    printf("Bytes/cycle (estimate)              : %f\n\n", bytes_local(i_timesteps)/total_cycles);
    printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total);
    printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total);
    printf("GiB/s (estimate) for seissol proxy  : %f\n", (bytes_local(i_timesteps)/(1024.0*1024.0*1024.0))/total);
  } else if (s_part.compare("neigh") == 0) {
    actual_flops = flops_bndneigh_actual(i_timesteps);
    printf("GFLOP (non-zero) for seissol proxy  : %f\n", actual_flops.d_nonZeroFlops/(1e9));
    printf("GFLOP (hardware) for seissol proxy  : %f\n", actual_flops.d_hardwareFlops/(1e9));
    //printf("GFLOP (estimate) for seissol proxy  : %f\n", flops_bndneigh(i_timesteps)/(1e9));
    printf("GiB (estimate) for seissol proxy    : %f\n\n", bytes_bndneigh(i_timesteps)/(1024.0*1024.0*1024.0));
    printf("FLOPS/cycle (non-zero)              : %f\n", actual_flops.d_nonZeroFlops/total_cycles);
    printf("FLOPS/cycle (hardware)              : %f\n", actual_flops.d_hardwareFlops/total_cycles);
    printf("Bytes/cycle (estimate)              : %f\n\n", bytes_bndneigh(i_timesteps)/total_cycles);
    printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total);
    printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total);
    printf("GiB/s (estimate) for seissol proxy  : %f\n", (bytes_bndneigh(i_timesteps)/(1024.0*1024.0*1024.0))/total);
  } else if (s_part.compare("ader") == 0) {
    actual_flops = flops_ader_actual(i_timesteps);
    printf("GFLOP (non-zero) for seissol proxy  : %f\n", actual_flops.d_nonZeroFlops/(1e9));
    printf("GFLOP (hardware) for seissol proxy  : %f\n", actual_flops.d_hardwareFlops/(1e9));
    //printf("GFLOP (estimate) for seissol proxy  : %f\n", flops_ader(i_timesteps)/(1e9));
    printf("GiB (estimate) for seissol proxy    : %f\n\n", bytes_ader(i_timesteps)/(1024.0*1024.0*1024.0));
    printf("FLOPS/cycle (non-zero)              : %f\n", actual_flops.d_nonZeroFlops/total_cycles);
    printf("FLOPS/cycle (hardware)              : %f\n", actual_flops.d_hardwareFlops/total_cycles);
    printf("Bytes/cycle (estimate)              : %f\n\n", bytes_ader(i_timesteps)/total_cycles);
    printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total);
    printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total);
    printf("GiB/s (estimate) for seissol proxy  : %f\n", (bytes_ader(i_timesteps)/(1024.0*1024.0*1024.0))/total);
  } else if (s_part.compare("vol") == 0) {
    actual_flops = flops_vol_actual(i_timesteps);
    printf("GFLOP (non-zero) for seissol proxy  : %f\n", actual_flops.d_nonZeroFlops/(1e9));
    printf("GFLOP (hardware) for seissol proxy  : %f\n", actual_flops.d_hardwareFlops/(1e9));
    //printf("GFLOP (estimate) for seissol proxy  : %f\n", flops_vol(i_timesteps)/(1e9));
    printf("GiB (estimate) for seissol proxy    : %f\n\n", bytes_vol(i_timesteps)/(1024.0*1024.0*1024.0));
    printf("FLOPS/cycle (non-zero)              : %f\n", actual_flops.d_nonZeroFlops/total_cycles);
    printf("FLOPS/cycle (hardware)              : %f\n", actual_flops.d_hardwareFlops/total_cycles);
    printf("Bytes/cycle (estimate)              : %f\n\n", bytes_vol(i_timesteps)/total_cycles);
    printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total);
    printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total);
    printf("GiB/s (estimate) for seissol proxy  : %f\n", (bytes_vol(i_timesteps)/(1024.0*1024.0*1024.0))/total);
  } else {
    actual_flops = flops_bndlocal_actual(i_timesteps);
    printf("GFLOP (non-zero) for seissol proxy  : %f\n", actual_flops.d_nonZeroFlops/(1e9));
    printf("GFLOP (hardware) for seissol proxy  : %f\n", actual_flops.d_hardwareFlops/(1e9));
    //printf("GFLOP (estimate) for seissol proxy  : %f\n", flops_bndlocal(i_timesteps)/(1e9));
    printf("GiB (estimate) for seissol proxy    : %f\n\n", bytes_bndlocal(i_timesteps)/(1024.0*1024.0*1024.0));
    printf("FLOPS/cycle (non-zero)              : %f\n", actual_flops.d_nonZeroFlops/total_cycles);
    printf("FLOPS/cycle (hardware)              : %f\n", actual_flops.d_hardwareFlops/total_cycles);
    printf("Bytes/cycle (estimate)              : %f\n\n", bytes_bndlocal(i_timesteps)/total_cycles);
    printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total);
    printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total);
    printf("GiB/s (estimate) for seissol proxy  : %f\n", (bytes_bndlocal(i_timesteps)/(1024.0*1024.0*1024.0))/total);
  }
  printf("=================================================\n");
  printf("\n");

  free_data_structures();
  
  return 0;
}