int main(int argc, char *argv[]) { double results[NUM_TRIALS]; int i, papi_setnum; // initialize papi int desired_events[] = {PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L2_DCA, PAPI_L2_DCM, PAPI_L3_DCA, PAPI_L3_DCM, PAPI_TLB_DM, PAPI_LD_INS, PAPI_SR_INS}; int num_desired = 9; PAPI_event_set_wrapper_t* event_sets; int num_sets; papi_init(desired_events, num_desired, &event_sets, &num_sets); // input parameters int log2_stanzaLength = atoi(argv[1]); int log2_numIterations = atoi(argv[2]); // compute actual values from base 2 logs stanzaLength = 1; for (i=0; i<log2_stanzaLength; i++) { stanzaLength *= 2; } numIterations = 1; for (i=0; i<log2_numIterations; i++) { numIterations *= 2; } int arrayLength = stanzaLength; printf("\nstanzaLength = %d\n", stanzaLength); printf("arrayLength = %d\n", arrayLength); printf("numIterations = %d\n", numIterations); printf("num_sets = %d\n\n", num_sets); // allocate working arrays A = (double *) malloc(arrayLength * sizeof(double)); B = (double *) malloc(arrayLength * sizeof(double)); if (A==NULL) { printf("Error on array A malloc.\n"); exit(EXIT_FAILURE); } if (B==NULL) { printf("Error on array B malloc.\n"); exit(EXIT_FAILURE); } // initialize arrays init_flush_cache_array(); initArrays(); for (papi_setnum=0; papi_setnum < num_sets; papi_setnum++) { PAPI_MAKE_MEASUREMENTS(event_sets[papi_setnum].set, cacheBenchmark(), NUM_TRIALS, results); print_measurements(&(event_sets[papi_setnum]), results, NUM_TRIALS); } papi_cleanup(event_sets, num_sets); return 0; }
int main(int argc, char *argv[]) { double results[NUM_TRIALS]; #if !defined(DEBUG) #if defined(PAPI_ENABLED) int papi_setnum, num_desired, num_sets; #else double median_counts_per_sec; #endif #endif int i; printf("7-point stencil, no add, naive C code with non-periodic boundary conditions\n"); #if !defined(DEBUG) #if defined(PAPI_ENABLED) // initialize papi int desired_events[] = {PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L2_DCA, PAPI_L2_DCM, PAPI_L3_DCM, PAPI_TLB_DM, PAPI_LD_INS, PAPI_SR_INS}; num_desired = 9; PAPI_event_set_wrapper_t* event_sets; papi_init(desired_events, num_desired, &event_sets, &num_sets); #else // calculate clock rate GET_CLOCK_RATE(results, NUM_TRIALS); median_counts_per_sec = find_median(results, NUM_TRIALS); #endif #endif // initialize arrays init_flush_cache_array(); malloc_grids(argv); printf("\n"); #if defined(DEBUG) init_grids(); printf("SINGLY NESTED LOOP:\n"); printf("\nGRID A BEFORE:"); print_grid(A); printf("\nGRID B BEFORE:"); print_grid(B); naive_singly_nested_loop(); printf("\nGRID A AFTER:"); print_grid(A); printf("\nGRID B AFTER:"); print_grid(B); init_grids(); printf("TRIPLY NESTED LOOPS:\n"); printf("\nGRID A BEFORE:"); print_grid(A); printf("\nGRID B BEFORE:"); print_grid(B); naive_triply_nested_loops(); printf("\nGRID A AFTER:"); print_grid(A); printf("\nGRID B AFTER:"); print_grid(B); #else #if defined(PAPI_ENABLED) printf("SINGLY NESTED LOOP:\n"); for (papi_setnum=0; papi_setnum < num_sets; papi_setnum++) { PAPI_MAKE_MEASUREMENTS(event_sets[papi_setnum].set, naive_singly_nested_loop(), NUM_TRIALS, results); print_papi_measurements(&(event_sets[papi_setnum]), results, NUM_TRIALS); } printf("\n"); printf("TRIPLY NESTED LOOPS:\n"); for (papi_setnum=0; papi_setnum < num_sets; papi_setnum++) { PAPI_MAKE_MEASUREMENTS(event_sets[papi_setnum].set, naive_triply_nested_loops(), NUM_TRIALS, results); print_papi_measurements(&(event_sets[papi_setnum]), results, NUM_TRIALS); } printf("\n"); papi_cleanup(event_sets, num_sets); #else printf("SINGLY NESTED LOOP:\n"); TIMER_MAKE_MEASUREMENTS(naive_singly_nested_loop(), results, NUM_TRIALS); print_timer_measurements(results, NUM_TRIALS, median_counts_per_sec); printf("\n"); printf("TRIPLY NESTED LOOPS:\n"); TIMER_MAKE_MEASUREMENTS(naive_triply_nested_loops(), results, NUM_TRIALS); print_timer_measurements(results, NUM_TRIALS, median_counts_per_sec); printf("\n"); printf("\n"); #endif #endif printf("\nFinal interior values: A[%lu, %lu, %lu] = %4.2e, B[%lu, %lu, %lu] = %4.2e\n", nx/2, ny/2, nz/2, A[Index3D(nx/2, ny/2, nz/2)], nx/2, ny/2, nz/2, B[Index3D(nx/2, ny/2, nz/2)]); fc_checksum(); free(A); free(B); return EXIT_SUCCESS; }