double bytes_all(unsigned int i_timesteps) { return (bytes_local(i_timesteps) + bytes_bndneigh(i_timesteps)); }
int main(int argc, char* argv[]) { if (argc != 4) { printf("Wrong parameters!\n"); printf(" #cells #timesteps kernel\n"); printf(" kernel-values: all, local, neigh, ader, vol, bndlocal\n"); return -1; } unsigned int i_cells = atoi(argv[1]); unsigned int i_timesteps = atoi(argv[2]); std::string s_part; s_part.assign(argv[3]); // double-check if the selected kernel exists if ( (s_part.compare("all") != 0) && (s_part.compare("local") != 0) && (s_part.compare("neigh") != 0) && (s_part.compare("ader") != 0) && (s_part.compare("vol") != 0) && (s_part.compare("bndlocal") != 0) ) { printf("Wrong parameters!\n"); printf(" #cells #timesteps kernel\n"); printf(" kernel-values: all, local, neigh, ader, vol, bndlocal\n"); return -1; } printf("Allocating fake data...\n"); i_cells = init_data_structures(i_cells); printf("...done\n\n"); struct timeval start_time, end_time; size_t cycles_start, cycles_end; double total = 0.0; double total_cycles = 0.0; // init OpenMP and LLC if (s_part.compare("all") == 0) { computeLocalIntegration(); computeNeighboringIntegration(); } else if (s_part.compare("local") == 0) { computeLocalIntegration(); } else if (s_part.compare("neigh") == 0) { computeNeighboringIntegration(); } else if (s_part.compare("ader") == 0) { computeAderIntegration(); } else if (s_part.compare("vol") == 0) { computeVolumeIntegration(); } else { computeLocalBoundaryIntegration(); } gettimeofday(&start_time, NULL); #ifdef __USE_RDTSC cycles_start = __rdtsc(); #endif if (s_part.compare("all") == 0) { for (unsigned int t = 0; t < i_timesteps; t++) { computeLocalIntegration(); computeNeighboringIntegration(); } } else if (s_part.compare("local") == 0) { for (unsigned int t = 0; t < i_timesteps; t++) { computeLocalIntegration(); } } else if (s_part.compare("neigh") == 0) { for (unsigned int t = 0; t < i_timesteps; t++) { computeNeighboringIntegration(); } } else if (s_part.compare("ader") == 0) { for (unsigned int t = 0; t < i_timesteps; t++) { computeAderIntegration(); } } else if (s_part.compare("vol") == 0) { for (unsigned int t = 0; t < i_timesteps; t++) { computeVolumeIntegration(); } } else { for (unsigned int t = 0; t < i_timesteps; t++) { computeLocalBoundaryIntegration(); } } #ifdef __USE_RDTSC cycles_end = __rdtsc(); #endif gettimeofday(&end_time, NULL); total = sec(start_time, end_time); #ifdef __USE_RDTSC printf("Cycles via __rdtsc()!\n"); total_cycles = (double)(cycles_end-cycles_start); #else total_cycles = derive_cycles_from_time(total); #endif printf("=================================================\n"); printf("=== PERFORMANCE SUMMARY ===\n"); printf("=================================================\n"); printf("seissol proxy mode : %s\n", s_part.c_str()); printf("time for seissol proxy : %f\n", total); printf("cycles : %f\n\n", total_cycles); seissol_flops actual_flops; if (s_part.compare("all") == 0) { actual_flops = flops_all_actual(i_timesteps); printf("GFLOP (non-zero) for seissol proxy : %f\n", actual_flops.d_nonZeroFlops/(1e9)); printf("GFLOP (hardware) for seissol proxy : %f\n", actual_flops.d_hardwareFlops/(1e9)); //printf("GFLOP (estimate) for seissol proxy : %f\n", flops_all(i_timesteps)/(1e9)); printf("GiB (estimate) for seissol proxy : %f\n\n", bytes_all(i_timesteps)/(1024.0*1024.0*1024.0)); printf("FLOPS/cycle (non-zero) : %f\n", actual_flops.d_nonZeroFlops/total_cycles); printf("FLOPS/cycle (hardware) : %f\n", actual_flops.d_hardwareFlops/total_cycles); printf("Bytes/cycle (estimate) : %f\n\n", bytes_all(i_timesteps)/total_cycles); printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total); printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total); printf("GiB/s (estimate) for seissol proxy : %f\n", (bytes_all(i_timesteps)/(1024.0*1024.0*1024.0))/total); } else if (s_part.compare("local") == 0) { actual_flops = flops_local_actual(i_timesteps); printf("GFLOP (non-zero) for seissol proxy : %f\n", actual_flops.d_nonZeroFlops/(1e9)); printf("GFLOP (hardware) for seissol proxy : %f\n", actual_flops.d_hardwareFlops/(1e9)); //printf("GFLOP (estimate) for seissol proxy : %f\n", flops_local(i_timesteps)/(1e9)); printf("GiB (estimate) for seissol proxy : %f\n\n", bytes_local(i_timesteps)/(1024.0*1024.0*1024.0)); printf("FLOPS/cycle (non-zero) : %f\n", actual_flops.d_nonZeroFlops/total_cycles); printf("FLOPS/cycle (hardware) : %f\n", actual_flops.d_hardwareFlops/total_cycles); printf("Bytes/cycle (estimate) : %f\n\n", bytes_local(i_timesteps)/total_cycles); printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total); printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total); printf("GiB/s (estimate) for seissol proxy : %f\n", (bytes_local(i_timesteps)/(1024.0*1024.0*1024.0))/total); } else if (s_part.compare("neigh") == 0) { actual_flops = flops_bndneigh_actual(i_timesteps); printf("GFLOP (non-zero) for seissol proxy : %f\n", actual_flops.d_nonZeroFlops/(1e9)); printf("GFLOP (hardware) for seissol proxy : %f\n", actual_flops.d_hardwareFlops/(1e9)); //printf("GFLOP (estimate) for seissol proxy : %f\n", flops_bndneigh(i_timesteps)/(1e9)); printf("GiB (estimate) for seissol proxy : %f\n\n", bytes_bndneigh(i_timesteps)/(1024.0*1024.0*1024.0)); printf("FLOPS/cycle (non-zero) : %f\n", actual_flops.d_nonZeroFlops/total_cycles); printf("FLOPS/cycle (hardware) : %f\n", actual_flops.d_hardwareFlops/total_cycles); printf("Bytes/cycle (estimate) : %f\n\n", bytes_bndneigh(i_timesteps)/total_cycles); printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total); printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total); printf("GiB/s (estimate) for seissol proxy : %f\n", (bytes_bndneigh(i_timesteps)/(1024.0*1024.0*1024.0))/total); } else if (s_part.compare("ader") == 0) { actual_flops = flops_ader_actual(i_timesteps); printf("GFLOP (non-zero) for seissol proxy : %f\n", actual_flops.d_nonZeroFlops/(1e9)); printf("GFLOP (hardware) for seissol proxy : %f\n", actual_flops.d_hardwareFlops/(1e9)); //printf("GFLOP (estimate) for seissol proxy : %f\n", flops_ader(i_timesteps)/(1e9)); printf("GiB (estimate) for seissol proxy : %f\n\n", bytes_ader(i_timesteps)/(1024.0*1024.0*1024.0)); printf("FLOPS/cycle (non-zero) : %f\n", actual_flops.d_nonZeroFlops/total_cycles); printf("FLOPS/cycle (hardware) : %f\n", actual_flops.d_hardwareFlops/total_cycles); printf("Bytes/cycle (estimate) : %f\n\n", bytes_ader(i_timesteps)/total_cycles); printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total); printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total); printf("GiB/s (estimate) for seissol proxy : %f\n", (bytes_ader(i_timesteps)/(1024.0*1024.0*1024.0))/total); } else if (s_part.compare("vol") == 0) { actual_flops = flops_vol_actual(i_timesteps); printf("GFLOP (non-zero) for seissol proxy : %f\n", actual_flops.d_nonZeroFlops/(1e9)); printf("GFLOP (hardware) for seissol proxy : %f\n", actual_flops.d_hardwareFlops/(1e9)); //printf("GFLOP (estimate) for seissol proxy : %f\n", flops_vol(i_timesteps)/(1e9)); printf("GiB (estimate) for seissol proxy : %f\n\n", bytes_vol(i_timesteps)/(1024.0*1024.0*1024.0)); printf("FLOPS/cycle (non-zero) : %f\n", actual_flops.d_nonZeroFlops/total_cycles); printf("FLOPS/cycle (hardware) : %f\n", actual_flops.d_hardwareFlops/total_cycles); printf("Bytes/cycle (estimate) : %f\n\n", bytes_vol(i_timesteps)/total_cycles); printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total); printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total); printf("GiB/s (estimate) for seissol proxy : %f\n", (bytes_vol(i_timesteps)/(1024.0*1024.0*1024.0))/total); } else { actual_flops = flops_bndlocal_actual(i_timesteps); printf("GFLOP (non-zero) for seissol proxy : %f\n", actual_flops.d_nonZeroFlops/(1e9)); printf("GFLOP (hardware) for seissol proxy : %f\n", actual_flops.d_hardwareFlops/(1e9)); //printf("GFLOP (estimate) for seissol proxy : %f\n", flops_bndlocal(i_timesteps)/(1e9)); printf("GiB (estimate) for seissol proxy : %f\n\n", bytes_bndlocal(i_timesteps)/(1024.0*1024.0*1024.0)); printf("FLOPS/cycle (non-zero) : %f\n", actual_flops.d_nonZeroFlops/total_cycles); printf("FLOPS/cycle (hardware) : %f\n", actual_flops.d_hardwareFlops/total_cycles); printf("Bytes/cycle (estimate) : %f\n\n", bytes_bndlocal(i_timesteps)/total_cycles); printf("GFLOPS (non-zero) for seissol proxy : %f\n", (actual_flops.d_nonZeroFlops/(1e9))/total); printf("GFLOPS (hardware) for seissol proxy : %f\n", (actual_flops.d_hardwareFlops/(1e9))/total); printf("GiB/s (estimate) for seissol proxy : %f\n", (bytes_bndlocal(i_timesteps)/(1024.0*1024.0*1024.0))/total); } printf("=================================================\n"); printf("\n"); free_data_structures(); return 0; }