/** * \brief A bit-twiddling load which will run within the given bytes of memory. * \param [in] plan The struct that holds the plan's data values. * \return int Error flag value * \sa parseCBAPlan * \sa makeCBAPlan * \sa initCBAPlan * \sa perfCBAPlan * \sa killCBAPlan */ int execCBAPlan(void *plan){ #ifdef HAVE_PAPI int k; long long start, end; #endif //HAVE_PAPI int i, j; int niters; ORB_t t1, t2; Plan *p; CBA_data *ci; p = (Plan *)plan; ci = (CBA_data *)p->vptr; /* update execution count */ p->exec_count++; for(i = 0; i < ci->niter; i += NITERS){ niters = ci->niter - i; if(niters > NITERS){ niters = NITERS; } for(j = 0; j < niters; j++){ /* pick NITERS random rows in the range 1..(nrow-1) */ ci->out[j] = 1 + (brand(&(ci->br)) % (ci->nrows - 1)); ci->out[j] <<= 48; /* store index in high 16 bits */ } if(DO_PERF){ #ifdef HAVE_PAPI /* Start PAPI counters and time */ TEST_PAPI(PAPI_reset(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); start = PAPI_get_real_usec(); #endif //HAVE_PAPI ORB_read(t1); } //DO_PERF cnt_bit_arr (ci->work, ci->nrows, ci->ncols, ci->out, niters); if(DO_PERF){ ORB_read(t2); #ifdef HAVE_PAPI end = PAPI_get_real_usec(); //PAPI time /* Collect PAPI counters and store time elapsed */ TEST_PAPI(PAPI_accum(p->PAPI_EventSet, p->PAPI_Results), PAPI_OK, MyRank, 9999, PRINT_SOME); for(k = 0; k < p->PAPI_Num_Events && k < TOTAL_PAPI_EVENTS; k++){ p->PAPI_Times[k] += (end - start); } #endif //HAVE_PAPI perftimer_accumulate(&p->timers, TIMER0, ORB_cycles_a(t2, t1)); } //DO_PERF } return ERR_CLEAN; } /* execCBAPlan */
/** * \brief <DESCRIPTION of your plan goes here..> * \param plan The Plan struct that holds the plan's data values. * \return int Error flag value */ int execDOPENACCGEMMPlan(void *plan){ // <- Replace YOUR_NAME with the name of your module. #ifdef HAVE_PAPI int k; long long start, end; #endif //HAVE_PAPI ORB_t t1, t2; // Storage for timestamps, used to accurately find the runtime of the plan execution. Plan *p; p = (Plan *)plan; p->exec_count++; // Update the execution counter stored in the plan. DOPENACCGEMM_DATA *local_data = (DOPENACCGEMM_DATA *)p->vptr; int error; #ifdef HAVE_PAPI /* Start PAPI counters and time */ TEST_PAPI(PAPI_reset(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); start = PAPI_get_real_usec(); #endif //HAVE_PAPI ORB_read(t1); // Store the timestamp for the beginning of the execution. int jdx; for(jdx=0;jdx < local_data->loop_count; jdx++) { systemburn_openaccblas_dgemm(local_data); } // -------------------------------------------- // Plan is executed here... // -------------------------------------------- ORB_read(t2); // Store timestamp for the end of execution. #ifdef HAVE_PAPI end = PAPI_get_real_usec(); //PAPI time /* Collect PAPI counters and store time elapsed */ TEST_PAPI(PAPI_accum(p->PAPI_EventSet, p->PAPI_Results), PAPI_OK, MyRank, 9999, PRINT_SOME); for(k = 0; k < p->PAPI_Num_Events && k < TOTAL_PAPI_EVENTS; k++){ p->PAPI_Times[k] += (end - start); } #endif //HAVE_PAPI perftimer_accumulate(&p->timers, TIMER0, ORB_cycles_a(t2, t1)); // Store the difference between the timestamps in the plan's timers. if(CHECK_CALC){ // Evaluates to true if the '-t' option is passed on the commandline. ORB_read(t1); // ---------------------------------------------------------------- // Optional: Check calculations performed in execution above. // ---------------------------------------------------------------- ORB_read(t2); perftimer_accumulate(&p->timers, TIMER1, ORB_cycles_a(t2, t1)); } return ERR_CLEAN; // <- This inicates a clean run with no errors. Does not need to be changed. } /* execDOPENACCGEMMPlan */
int HWCBE_PAPI_Reset (unsigned int tid) { if (PAPI_reset(HWCEVTSET(tid)) != PAPI_OK) { fprintf (stderr, PACKAGE_NAME": PAPI_reset failed for thread %d evtset %d (%s:%d)\n", \ tid, HWCEVTSET(tid), __FILE__, __LINE__); return 0; } return 1; }
void papi_init() { int max; /* Check PAPI sanity */ if (PAPI_VER_CURRENT != PAPI_library_init(PAPI_VER_CURRENT)) papi_eprintf("PAPI_library_init error.\n"); max = PAPI_num_counters(); PAPI_reset(max); }
int PAPI_accum_var (int EventSet, long_long values[2][NEVENTS]) { long_long dummy_values[NEVENTS]; long_long a; int i; PAPI_read (EventSet, dummy_values); for (i = 0; i < NEVENTS; i++) { a = dummy_values[i]; values[0][i] += a; values[1][i] += a * a; } return (PAPI_reset(EventSet)); }
int PAPI_accum_min (int EventSet, long_long *values) { long_long dummy_values[NEVENTS]; long_long a; long_long b; int i; PAPI_read (EventSet, dummy_values); for (i = 0; i < NEVENTS; i++) { a = dummy_values[i]; b = values[i]; if (a < b) values[i] = a; } return (PAPI_reset(EventSet)); }
int _internal_hl_read_cnts(long long * values, int array_len, int flag) { int retval; HighLevelInfo *state = NULL; if ((retval = _internal_check_state(&state)) != PAPI_OK) return (retval); if (state->running != HL_START_COUNTERS || array_len < state->num_evts) return (PAPI_EINVAL); if (flag == PAPI_HL_ACCUM) return (PAPI_accum(state->EventSet, values)); else if (flag == PAPI_HL_READ) { if ((retval = PAPI_read(state->EventSet, values)) != PAPI_OK) return (retval); return (PAPI_reset(state->EventSet)); } /* Invalid flag passed in */ return (PAPI_EINVAL); }
void pwr_start_energy_count(pwr_ctx_t *ctx) { if (ctx == NULL) { ctx->error = PWR_INIT_ERR; return; } if (!pwr_is_initialized(ctx, PWR_MODULE_ENERGY)) { ctx->error = PWR_UNINITIALIZED; return; } ctx->error = PWR_OK; if (ctx->emeas_running) { pwr_stop_energy_count(ctx); } PAPI_reset(ctx->event_set); ctx->emeas->duration = PAPI_get_real_nsec(); ctx->emeas_running = true; PAPI_start(ctx->event_set); }
void papi_set_events(char *metric) { const size_t n = 1; int max; long_long *papi_tmp; int papi_events[1]; int code; max = PAPI_num_counters(); if (n > max) papi_eprintf("Too many counters requested.\n"); papi_tmp = malloc(sizeof(*papi_tmp) * n); PAPI_reset(max); PAPI_stop_counters(papi_tmp, n); if (PAPI_event_name_to_code(metric, &code) != PAPI_OK) papi_eprintf("Unknown PAPI event %s.\n", metric); if (code == 0) papi_eprintf("Unknown PAPI event %s.\n", metric); papi_events[0] = code; PAPI_start_counters(papi_events, n); if (PAPI_read_counters(papi_tmp, n) != PAPI_OK) papi_eprintf("Problem reading counters %s:%d.\n", __FILE__, __LINE__); free(papi_tmp); }
main() { int retval, EventSet=PAPI_NULL; long_long values[1]; int i, guard=0;; int num=0; char ab; char t0[1000000]; char *t1 = (char*)malloc(sizeof(char)*100000000); char t4[1000000]; /* Initialize the PAPI library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { fprintf(stderr, "PAPI library init error!\n"); exit(1); } /* Create the Event Set */ if (PAPI_create_eventset(&EventSet) != PAPI_OK) printf("error\n"); /* Add Total Instructions Executed to our Event Set */ if (PAPI_add_event(EventSet, PAPI_TOT_CYC) != PAPI_OK) printf("error\n"); /* Start counting events in the Event Set */ if (PAPI_start(EventSet) != PAPI_OK) printf("error\n"); /* Reset the counting events in the Event Set */ if (PAPI_reset(EventSet) != PAPI_OK) printf("error\n"); /**********************************/ //la primera vez nada para que las instrucciones se guarden en la cache? /* Read the counting events in the Event Set */ if (PAPI_read(EventSet, values) != PAPI_OK) printf("error\n"); for(i=0; i<10000000; i++){ if (PAPI_reset(EventSet) != PAPI_OK) printf("error\n"); ab = t1[i]; if (PAPI_read(EventSet, values) != PAPI_OK) printf("error\n"); // printf("%d\n",values[0]); if(values[0]>1000){ // printf(" tiempo: %lld, i=%d, desde ultimo: %d\n",values[0], i, i-guard); printf("%d\n",i-guard); //printf("%d\n",values[0]); guard = i; } } free(t1); }
int main(int argc, char** argv) { // Parse and validate command-line arguments. if (argc != 2) usage("Invalid number of arguments"); std::string arg(argv[1]); const test_vector_t* test_vec = nullptr; if (arg == "objc") test_vec = &objc_tests; else if (arg == "calls") test_vec = &call_tests; else if (arg == "classes") test_vec = &classes_tests; else if (arg == "hierarchy") test_vec = &hierarchy_tests; else usage("Invalid argument"); // Initialize PAPI library. int rc = PAPI_library_init(PAPI_VER_CURRENT); if (rc != PAPI_VER_CURRENT && rc > 0) fail("PAPI library version mismatch"); else if (rc < 0) fail("failed to init PAPI libarary"); rc = PAPI_is_initialized(); if (rc != PAPI_LOW_LEVEL_INITED) fail("failed to init PAPI libarary"); // Create and fill event set with 2 events: total cycles and total insns. // TODO: implement RAII wrappers for PAPI. int event_set = PAPI_NULL; rc = PAPI_create_eventset(&event_set); if (rc != PAPI_OK) papi_fail(rc, "failed to create event set"); rc = PAPI_add_event(event_set, PAPI_TOT_CYC); if (rc != PAPI_OK) papi_fail(rc, "failed to add total cycles counter"); rc = PAPI_add_event(event_set, PAPI_TOT_INS); if (rc != PAPI_OK) papi_fail(rc, "failed to add total insns counter"); // Warm up. for (const test_t& test : *test_vec) test.second(); // Output CSV header. std::cout << "test,tot_cyc,tot_insn,iter\n"; enum counter_t { CNT_TOT_CYC = 0, CNT_TOT_INS = 1, CNT_NUM }; long long counters[CNT_NUM]; // Benchmark. for (const test_t& test : *test_vec) { // Start counters. rc = PAPI_start(event_set); if (rc != PAPI_OK) papi_fail(rc, "failed to start counters"); // Run tests. for (size_t i = 0; i < num_iter; i++) test.second(); // Stop counters and output one CSV row. rc = PAPI_stop(event_set, counters); if (rc != PAPI_OK) papi_fail(rc, "failed to stop counters"); rc = PAPI_reset(event_set); if (rc != PAPI_OK) papi_fail(rc, "failed to reset counters"); std::cout << test.first << ',' << counters[CNT_TOT_CYC] << ',' << counters[CNT_TOT_INS] << ',' << num_iter << '\n'; } // Destroy PAPI event set. rc = PAPI_cleanup_eventset(event_set); if (rc != PAPI_OK) papi_fail(rc, "failed to cleanup event set"); rc = PAPI_destroy_eventset(&event_set); if (rc != PAPI_OK) papi_fail(rc, "failed to destroy event set"); return 0; }
int main(int argc, char **argv) { PAPI_event_info_t info; char name2[PAPI_MAX_STR_LEN]; int i, j, retval, idx, repeats; int iters = NUM_FLOPS; double x = 1.1, y, dtmp; long long t1, t2; long long values[MAXEVENTS], refvals[MAXEVENTS]; int nsamples[MAXEVENTS], truelist[MAXEVENTS], ntrue; #ifdef STARTSTOP long long dummies[MAXEVENTS]; #endif int sleep_time = SLEEPTIME; double valsample[MAXEVENTS][REPEATS]; double valsum[MAXEVENTS]; double avg[MAXEVENTS]; double spread[MAXEVENTS]; int nevents = MAXEVENTS, nev1; int eventset = PAPI_NULL; int events[MAXEVENTS]; int eventidx[MAXEVENTS]; int eventmap[MAXEVENTS]; int fails; events[0] = PAPI_FP_INS; events[1] = PAPI_TOT_CYC; events[2] = PAPI_TOT_INS; events[3] = PAPI_TOT_IIS; events[4] = PAPI_INT_INS; events[5] = PAPI_STL_CCY; events[6] = PAPI_BR_INS; events[7] = PAPI_SR_INS; events[8] = PAPI_LD_INS; for (i = 0; i < MAXEVENTS; i++) { values[i] = 0; valsum[i] = 0; nsamples[i] = 0; } if (argc > 1) { if (!strcmp(argv[1], "TESTS_QUIET")) tests_quiet(argc, argv); else { sleep_time = atoi(argv[1]); if (sleep_time <= 0) sleep_time = SLEEPTIME; } } if (!TESTS_QUIET) { printf("\nFunctional check of multiplexing routines.\n"); printf("Adding and removing events from an event set.\n\n"); } if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); #ifdef MPX init_multiplex(); #endif if ((retval = PAPI_create_eventset(&eventset))) test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval); #ifdef MPX /* In Component PAPI, EventSets must be assigned a component index before you can fiddle with their internals. 0 is always the cpu component */ retval = PAPI_assign_eventset_component(eventset, 0); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_assign_eventset_component", retval); if ((retval = PAPI_set_multiplex(eventset))) test_fail(__FILE__, __LINE__, "PAPI_set_multiplex", retval); #endif nevents = MAXEVENTS; for (i = 0; i < nevents; i++) { if ((retval = PAPI_add_event(eventset, events[i]))) { for (j = i; j < MAXEVENTS; j++) events[j] = events[j + 1]; nevents--; i--; } } if (nevents < 3) test_skip(__FILE__, __LINE__, "Not enough events left...", 0); /* Find a reasonable number of iterations (each * event active 20 times) during the measurement */ t2 = 10000 * 20 * nevents; /* Target: 10000 usec/multiplex, 20 repeats */ if (t2 > 30e6) test_skip(__FILE__, __LINE__, "This test takes too much time", retval); /* Measure one run */ t1 = PAPI_get_real_usec(); y = dummy3(x, iters); t1 = PAPI_get_real_usec() - t1; if (t2 > t1) /* Scale up execution time to match t2 */ iters = iters * (int)(t2 / t1); else if (t1 > 30e6) /* Make sure execution time is < 30s per repeated test */ test_skip(__FILE__, __LINE__, "This test takes too much time", retval); j = nevents; for (i = 1; i < nevents; i = i + 2) eventidx[--j] = i; for (i = 0; i < nevents; i = i + 2) eventidx[--j] = i; assert(j == 0); for (i = 0; i < nevents; i++) eventmap[i] = i; x = 1.0; if (!TESTS_QUIET) printf("\nReference run:\n"); t1 = PAPI_get_real_usec(); if ((retval = PAPI_start(eventset))) test_fail(__FILE__, __LINE__, "PAPI_start", retval); y = dummy3(x, iters); PAPI_read(eventset, refvals); t2 = PAPI_get_real_usec(); ntrue = nevents; PAPI_list_events(eventset, truelist, &ntrue); if (!TESTS_QUIET) { printf("\tOperations= %.1f Mflop", y * 1e-6); printf("\t(%g Mflop/s)\n\n", ((float) y / (t2 - t1))); printf("%20s %16s %-15s %-15s\n", "PAPI measurement:", "Acquired count", "Expected event", "PAPI_list_events"); } if (!TESTS_QUIET) { for (j = 0; j < nevents; j++) { PAPI_get_event_info(events[j], &info); PAPI_event_code_to_name(truelist[j], name2); if (!TESTS_QUIET) printf("%20s = %16lld %-15s %-15s %s\n", info.short_descr, refvals[j], info.symbol, name2, strcmp(info.symbol, name2) ? "*** MISMATCH ***" : ""); } printf("\n"); } nev1 = nevents; repeats = nevents * 4; for (i = 0; i < repeats; i++) { if ((i % nevents) + 1 == nevents) continue; if (!TESTS_QUIET) printf("\nTest %d (of %d):\n", i + 1 - i / nevents, repeats - 4); if ((retval = PAPI_stop(eventset, values))) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); j = eventidx[i % nevents]; if ((i / nevents) % 2 == 0) { PAPI_get_event_info(events[j], &info); if (!TESTS_QUIET) printf("Removing event[%d]: %s\n", j, info.short_descr); if ((retval = PAPI_remove_event(eventset, events[j]))) test_fail(__FILE__, __LINE__, "PAPI_remove_event", retval); nev1--; for (idx = 0; eventmap[idx] != j; idx++); for (j = idx; j < nev1; j++) eventmap[j] = eventmap[j + 1]; } else { PAPI_get_event_info(events[j], &info); if (!TESTS_QUIET) printf("Adding event[%d]: %s\n", j, info.short_descr); if ((retval = PAPI_add_event(eventset, events[j]))) test_fail(__FILE__, __LINE__, "PAPI_add_event", retval); eventmap[nev1] = j; nev1++; } if ((retval = PAPI_start(eventset))) test_fail(__FILE__, __LINE__, "PAPI_start", retval); x = 1.0; #ifndef STARTSTOP if ((retval = PAPI_reset(eventset))) test_fail(__FILE__, __LINE__, "PAPI_reset", retval); #else if ((retval = PAPI_stop(eventset, dummies))) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); if ((retval = PAPI_start(eventset))) test_fail(__FILE__, __LINE__, "PAPI_start", retval); #endif t1 = PAPI_get_real_usec(); y = dummy3(x, iters); PAPI_read(eventset, values); t2 = PAPI_get_real_usec(); if (!TESTS_QUIET) { printf("\n(calculated independent of PAPI)\n"); printf("\tOperations= %.1f Mflop", y * 1e-6); printf("\t(%g Mflop/s)\n\n", ((float) y / (t2 - t1))); printf("%20s %16s %-15s %-15s\n", "PAPI measurement:", "Acquired count", "Expected event", "PAPI_list_events"); } ntrue = nev1; PAPI_list_events(eventset, truelist, &ntrue); for (j = 0; j < nev1; j++) { idx = eventmap[j]; /* printf("Mapping: Counter %d -> slot %d.\n",j,idx); */ PAPI_get_event_info(events[idx], &info); PAPI_event_code_to_name(truelist[j], name2); if (!TESTS_QUIET) printf("%20s = %16lld %-15s %-15s %s\n", info.short_descr, values[j], info.symbol, name2, strcmp(info.symbol, name2) ? "*** MISMATCH ***" : ""); dtmp = (double) values[j]; valsum[idx] += dtmp; valsample[idx][nsamples[idx]] = dtmp; nsamples[idx]++; } if (!TESTS_QUIET) printf("\n"); } if (!TESTS_QUIET) { printf("\n\nEstimated variance relative to average counts:\n"); for (j = 0; j < nev1; j++) printf(" Event %.2d", j); printf("\n"); } fails = nevents; /* Due to limited precision of floating point cannot really use typical standard deviation compuation for large numbers with very small variations. Instead compute the std devation problems with precision. */ for (j = 0; j < nev1; j++) { avg[j] = valsum[j] / nsamples[j]; spread[j] = 0; for (i=0; i < nsamples[j]; ++i) { double diff = (valsample[j][i] - avg[j]); spread[j] += diff * diff; } spread[j] = sqrt(spread[j] / nsamples[j]) / avg[j]; if (!TESTS_QUIET) printf("%9.2g ", spread[j]); /* Make sure that NaN get counted as errors */ if (spread[j] < MPX_TOLERANCE) fails--; else if (values[j] < MINCOUNTS) /* Neglect inprecise results with low counts */ fails--; } if (!TESTS_QUIET) { printf("\n\n"); for (j = 0; j < nev1; j++) { PAPI_get_event_info(events[j], &info); printf("Event %.2d: mean=%10.0f, sdev/mean=%7.2g nrpt=%2d -- %s\n", j, avg[j], spread[j], nsamples[j], info.short_descr); } printf("\n\n"); } if (fails) test_fail(__FILE__, __LINE__, "Values differ from reference", fails); else test_pass(__FILE__, NULL, 0); return 0; }
int main(int argc, char **argv) { int retval, num_tests = 5, num_events, tmp; long long **values; int EventSet=PAPI_NULL; int PAPI_event, mask; char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; const PAPI_hw_info_t *hw_info; tests_quiet(argc, argv); /* Set TESTS_QUIET variable */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); hw_info = PAPI_get_hardware_info(); if (hw_info == NULL) test_fail(__FILE__, __LINE__, "PAPI_get_hardware_info", 2); /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ EventSet = add_two_events(&num_events, &PAPI_event, hw_info, &mask); retval = PAPI_event_code_to_name(PAPI_event, event_name); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_event_code_to_name", retval); sprintf(add_event_str, "PAPI_add_event[%s]", event_name); values = allocate_test_space(num_tests, num_events); retval = PAPI_start(EventSet); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); do_flops(NUM_FLOPS); retval = PAPI_read(EventSet, values[0]); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_read", retval); retval = PAPI_reset(EventSet); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_reset", retval); do_flops(NUM_FLOPS); retval = PAPI_read(EventSet, values[1]); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_read", retval); do_flops(NUM_FLOPS); retval = PAPI_read(EventSet, values[2]); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_read", retval); do_flops(NUM_FLOPS); retval = PAPI_stop(EventSet, values[3]); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); retval = PAPI_read(EventSet, values[4]); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_read", retval); remove_test_events(&EventSet, mask); if (!TESTS_QUIET) { printf("Test case 1: Non-overlapping start, stop, read.\n"); printf("-----------------------------------------------\n"); tmp = PAPI_get_opt(PAPI_DEFDOM, NULL); printf("Default domain is: %d (%s)\n", tmp, stringify_all_domains(tmp)); tmp = PAPI_get_opt(PAPI_DEFGRN, NULL); printf("Default granularity is: %d (%s)\n", tmp, stringify_granularity(tmp)); printf("Using %d iterations of c += a*b\n", NUM_FLOPS); printf ("-------------------------------------------------------------------------\n"); printf("Test type : 1 2 3 4 5\n"); sprintf(add_event_str, "%s:", event_name); printf(TAB5, add_event_str, (values[0])[0], (values[1])[0], (values[2])[0], (values[3])[0], (values[4])[0]); printf(TAB5, "PAPI_TOT_CYC:", (values[0])[1], (values[1])[1], (values[2])[1], (values[3])[1], (values[4])[1]); printf ("-------------------------------------------------------------------------\n"); printf("Verification:\n"); printf("Row 1 Column 1 at least %d\n", NUM_FLOPS); printf("%% difference between %s 1 & 2: %.2f\n",add_event_str,100.0*(float)(values[0])[0]/(float)(values[1])[0]); printf("%% difference between %s 1 & 2: %.2f\n","PAPI_TOT_CYC",100.0*(float)(values[0])[1]/(float)(values[1])[1]); printf("Column 1 approximately equals column 2\n"); printf("Column 3 approximately equals 2 * column 2\n"); printf("Column 4 approximately equals 3 * column 2\n"); printf("Column 4 exactly equals column 5\n"); } { long long min, max; min = (long long) (values[1][0] * .9); max = (long long) (values[1][0] * 1.1); if (values[0][0] > max || values[0][0] < min || values[2][0] > (2 * max) || values[2][0] < (2 * min) || values[3][0] > (3 * max) || values[3][0] < (3 * min) || values[3][0] != values[4][0] || values[0][0] < (long long)NUM_FLOPS) { /* printf("min: "); printf(LLDFMT, min); printf("max: "); printf(LLDFMT, max); printf("1st: "); printf(LLDFMT, values[0][0]); printf("2nd: "); printf(LLDFMT, values[1][0]); printf("3rd: "); printf(LLDFMT, values[2][0]); printf("4th: "); printf(LLDFMT, values[3][0]); printf("5th: "); printf(LLDFMT, values[4][0]); printf("\n"); */ test_fail(__FILE__, __LINE__, event_name, 1); } min = (long long) (values[1][1] * .8); max = (long long) (values[1][1] * 1.2); if (values[0][1] > max || values[0][1] < min || values[2][1] > (2 * max) || values[2][1] < (2 * min) || values[3][1] > (3 * max) || values[3][1] < (3 * min) || values[3][1] != values[4][1]) { test_fail(__FILE__, __LINE__, "PAPI_TOT_CYC", 1); } } test_pass(__FILE__, values, num_tests); exit(1); }
int main(int argc, char **argv) { int i, retval, EventSet = PAPI_NULL; int bins = 100; int show_dist = 0, show_std_dev = 0; long long totcyc, values[2]; long long *array; tests_quiet(argc, argv); /* Set TESTS_QUIET variable */ for (i = 0; i < argc; i++) { if (argv[i]) { if (strstr(argv[i], "-b")) { bins = atoi(argv[i+1]); if (bins) i++; else { printf ("-b requires a bin count!\n"); exit(1); } } if (strstr(argv[i], "-d")) show_dist = 1; if (strstr(argv[i], "-h")) { print_help(); exit(1); } if (strstr(argv[i], "-s")) show_std_dev = 1; if (strstr(argv[i], "-t")) { num_iters = atol(argv[i+1]); if (num_iters) i++; else { printf ("-t requires a threshold value!\n"); exit(1); } } } } printf("Cost of execution for PAPI start/stop, read and accum.\n"); printf("This test takes a while. Please be patient...\n"); if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); if ((retval = PAPI_set_debug(PAPI_VERB_ECONT)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_set_debug", retval); if ((retval = PAPI_query_event(PAPI_TOT_CYC)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_query_event", retval); if ((retval = PAPI_query_event(PAPI_TOT_INS)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_query_event", retval); if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval); if ((retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_add_event", retval); if ((retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) if ((retval = PAPI_add_event(EventSet, PAPI_TOT_IIS)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_add_event", retval); /* Make sure no errors and warm up */ totcyc = PAPI_get_real_cyc(); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); if ((retval = PAPI_stop(EventSet, NULL)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); array = (long long *)malloc(num_iters*sizeof(long long)); if (array == NULL ) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); /* Determine clock latency */ printf("\nPerforming loop latency test...\n"); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } do_output(0, array, bins, show_std_dev, show_dist); /* Start the start/stop eval */ printf("\nPerforming start/stop test...\n"); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_start(EventSet); PAPI_stop(EventSet, values); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } do_output(1, array, bins, show_std_dev, show_dist); /* Start the read eval */ printf("\nPerforming read test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); PAPI_read(EventSet, values); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_read(EventSet, values); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); do_output(2, array, bins, show_std_dev, show_dist); /* Start the read with timestamp eval */ printf("\nPerforming read with timestamp test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); PAPI_read_ts(EventSet, values, &totcyc); for (i = 0; i < num_iters; i++) { PAPI_read_ts(EventSet, values, &array[i]); } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); /* post-process the timing array */ for (i = num_iters - 1; i > 0 ; i--) { array[i] -= array[i-1]; } array[0] -= totcyc; do_output(3, array, bins, show_std_dev, show_dist); /* Start the accum eval */ printf("\nPerforming accum test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); PAPI_accum(EventSet, values); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_accum(EventSet, values); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); do_output(4, array, bins, show_std_dev, show_dist); /* Start the reset eval */ printf("\nPerforming reset test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_reset(EventSet); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); do_output(5, array, bins, show_std_dev, show_dist); free(array); test_pass(__FILE__, NULL, 0); exit(1); }
int main(){ /************************************/ long_long checksum = 0; int i,j,k; for (i = 0; i < N; ++i) for (j = 0; j < N; ++j){ mul1[i][j]= (i+j) % 8 + 1; mul2[i][j]= (N-i+j) % 8 + 1; res[i][j] = 0; } /************************************/ int retval, EventSet=PAPI_NULL; long_long values[3]; long_long start_cycles, end_cycles, start_usec, end_usec; /* Initialize the PAPI library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { fprintf(stderr, "PAPI library init error!\n"); exit(1); } /* Create the Event Set */ if (PAPI_create_eventset(&EventSet) != PAPI_OK) handle_error(1, "create_eventset"); /* Add L1 data cache misses to the Event Set */ if (PAPI_add_event(EventSet,PAPI_L1_DCM) != PAPI_OK) handle_error(1,"add_event - L1_DCM"); /* Add load instructions completed to the Event Set */ if (PAPI_add_event(EventSet,PAPI_LD_INS) != PAPI_OK) handle_error(1,"add_event - LD_INS"); /* Add store instructions completed to the Event Set */ if (PAPI_add_event(EventSet,PAPI_SR_INS) != PAPI_OK) handle_error(1,"add_event - SR_INS"); /* Reset the counting events in the Event Set */ if (PAPI_reset(EventSet) != PAPI_OK) handle_error(1,"reset"); /* Read the counting of events in the Event Set */ if (PAPI_read(EventSet, values) != PAPI_OK) handle_error(1,"read"); printf("After resetting counter 'PAPI_L1_DCM' [x10^6]: %f\n", \ (double)(values[0])/1000000); printf("After resetting counter 'PAPI_LD_INS' [x10^6]: %f\n", \ (double)(values[1])/1000000); printf("After resetting counter 'PAPI_SR_INS' [x10^6]: %f\n", \ (double)(values[2])/1000000); /* Start counting events in the Event Set */ if (PAPI_start(EventSet) != PAPI_OK) handle_error(1,"start"); /* Gets the starting time in clock cycles */ start_cycles = PAPI_get_real_cyc(); /* Gets the starting time in microseconds */ start_usec = PAPI_get_real_usec(); /************************************/ /* MATRIX MULTIPLICATION */ /************************************/ for (i = 0; i < N; ++i) for (j = 0; j < N; ++j) for (k = 0; k < N; ++k) res[i][j] += mul1[i][k] * mul2[k][j]; /************************************/ /* Gets the ending time in clock cycles */ end_cycles = PAPI_get_real_cyc(); /* Gets the ending time in microseconds */ end_usec = PAPI_get_real_usec(); /* Stop the counting of events in the Event Set */ if (PAPI_stop(EventSet, values) != PAPI_OK) handle_error(1,"stop"); printf("After stopping counter 'PAPI_L1_DCM' [x10^6]: %f\n", \ (double)(values[0])/1000000); printf("After stopping counter 'PAPI_LD_INS' [x10^6]: %f\n", \ (double)(values[1])/1000000); printf("After stopping counter 'PAPI_SR_INS' [x10^6]: %f\n", \ (double)(values[2])/1000000); printf("Wall clock cycles [x10^6]: %f\n", \ (double)(end_cycles - start_cycles)/1000000); printf("Wall clock time [seconds]: %f\n", \ (double)(end_usec - start_usec)/1000000); for (i = 0; i < N; ++i) for (j = 0; j < N; ++j) checksum+=res[i][j]; printf("Matrix checksum: %lld\n", checksum); return(0); }
int main(int argc, char **argv) { PAPI_event_info_t info; int i, j, retval; int iters = NUM_FLOPS; double x = 1.1, y, dtmp; long long t1, t2; long long values[MAXEVENTS]; int sleep_time = SLEEPTIME; #ifdef STARTSTOP long long dummies[MAXEVENTS]; #endif double valsample[MAXEVENTS][REPEATS]; double valsum[MAXEVENTS]; double avg[MAXEVENTS]; double spread[MAXEVENTS]; int nevents = MAXEVENTS; int eventset = PAPI_NULL; int events[MAXEVENTS]; int fails; events[0] = PAPI_FP_INS; events[1] = PAPI_TOT_INS; events[2] = PAPI_INT_INS; events[3] = PAPI_TOT_CYC; events[4] = PAPI_STL_CCY; events[5] = PAPI_BR_INS; events[6] = PAPI_SR_INS; events[7] = PAPI_LD_INS; events[8] = PAPI_TOT_IIS; for (i = 0; i < MAXEVENTS; i++) { values[i] = 0; valsum[i] = 0; } if (argc > 1) { if (!strcmp(argv[1], "TESTS_QUIET")) tests_quiet(argc, argv); else { sleep_time = atoi(argv[1]); if (sleep_time <= 0) sleep_time = SLEEPTIME; } } if (!TESTS_QUIET) { printf("\nAccuracy check of multiplexing routines.\n"); printf("Investigating the variance of multiplexed measurements.\n\n"); } if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); #ifdef MPX init_multiplex(); #endif if ((retval = PAPI_create_eventset(&eventset))) test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval); #ifdef MPX /* In Component PAPI, EventSets must be assigned a component index before you can fiddle with their internals. 0 is always the cpu component */ retval = PAPI_assign_eventset_component(eventset, 0); if (retval != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_assign_eventset_component", retval); if ((retval = PAPI_set_multiplex(eventset))) test_fail(__FILE__, __LINE__, "PAPI_set_multiplex", retval); #endif nevents = MAXEVENTS; for (i = 0; i < nevents; i++) { if ((retval = PAPI_add_event(eventset, events[i]))) { for (j = i; j < MAXEVENTS; j++) events[j] = events[j + 1]; nevents--; i--; } } if (nevents < 2) test_skip(__FILE__, __LINE__, "Not enough events left...", 0); /* Find a reasonable number of iterations (each * event active 20 times) during the measurement */ t2 = 10000 * 20 * nevents; /* Target: 10000 usec/multiplex, 20 repeats */ if (t2 > 30e6) test_skip(__FILE__, __LINE__, "This test takes too much time", retval); /* Measure one run */ t1 = PAPI_get_real_usec(); y = dummy3(x, iters); t1 = PAPI_get_real_usec() - t1; if (t2 > t1) /* Scale up execution time to match t2 */ iters = iters * (int)(t2 / t1); else if (t1 > 30e6) /* Make sure execution time is < 30s per repeated test */ test_skip(__FILE__, __LINE__, "This test takes too much time", retval); if ((retval = PAPI_start(eventset))) test_fail(__FILE__, __LINE__, "PAPI_start", retval); for (i = 1; i <= REPEATS; i++) { x = 1.0; #ifndef STARTSTOP if ((retval = PAPI_reset(eventset))) test_fail(__FILE__, __LINE__, "PAPI_reset", retval); #else if ((retval = PAPI_stop(eventset, dummies))) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); if ((retval = PAPI_start(eventset))) test_fail(__FILE__, __LINE__, "PAPI_start", retval); #endif if (!TESTS_QUIET) printf("\nTest %d (of %d):\n", i, REPEATS); t1 = PAPI_get_real_usec(); y = dummy3(x, iters); PAPI_read(eventset, values); t2 = PAPI_get_real_usec(); if (!TESTS_QUIET) { printf("\n(calculated independent of PAPI)\n"); printf("\tOperations= %.1f Mflop", y * 1e-6); printf("\t(%g Mflop/s)\n\n", ((float) y / (t2 - t1))); printf("PAPI measurements:\n"); } for (j = 0; j < nevents; j++) { PAPI_get_event_info(events[j], &info); if (!TESTS_QUIET) { printf("%20s = ", info.short_descr); printf(LLDFMT, values[j]); printf("\n"); } dtmp = (double) values[j]; valsum[j] += dtmp; valsample[j][i-1] = dtmp; } if (!TESTS_QUIET) printf("\n"); } if (!TESTS_QUIET) { printf("\n\nEstimated variance relative to average counts:\n"); for (j = 0; j < nevents; j++) printf(" Event %.2d", j); printf("\n"); } fails = nevents; /* Due to limited precision of floating point cannot really use typical standard deviation compuation for large numbers with very small variations. Instead compute the std devation problems with precision. */ for (j = 0; j < nevents; j++) { avg[j] = valsum[j] / REPEATS; spread[j] = 0; for (i=0; i < REPEATS; ++i) { double diff = (valsample[j][i] - avg[j]); spread[j] += diff * diff; } spread[j] = sqrt(spread[j] / REPEATS) / avg[j]; if (!TESTS_QUIET) printf("%9.2g ", spread[j]); /* Make sure that NaN get counted as errors */ if (spread[j] < MPX_TOLERANCE) --fails; else if (valsum[j] < MINCOUNTS) /* Neglect inprecise results with low counts */ --fails; } if (!TESTS_QUIET) { printf("\n\n"); for (j = 0; j < nevents; j++) { PAPI_get_event_info(events[j], &info); printf("Event %.2d: mean=%10.0f, sdev/mean=%7.2g nrpt=%2d -- %s\n", j, avg[j], spread[j], REPEATS, info.short_descr); } printf("\n\n"); } if (fails) test_fail(__FILE__, __LINE__, "Values outside threshold", fails); else test_pass(__FILE__, NULL, 0); return 0; }
/** * \brief A 2 dimensional complex fast Fourier transform in a memory footprint of "size" bytes. * \param [in] plan Holds the data and memory for the plan. * \return int Error flag value * \sa parseFFT2Plan * \sa makeFFT2Plan * \sa initFFT2Plan * \sa perfFFT2Plan * \sa killFFT2Plan */ int execFFT2Plan(void *plan){ #ifdef HAVE_PAPI int k; long long start, end; #endif //HAVE_PAPI int i; ORB_t t1, t2; Plan *p; FFTdata *d; p = (Plan *)plan; d = (FFTdata *)p->vptr; assert(d); assert(d->forward); assert(d->backward); /* update execution count */ p->exec_count++; // for(i=0;i<d->M;i++) { if(d->forward){ if(DO_PERF){ #ifdef HAVE_PAPI /* Start PAPI counters and time */ TEST_PAPI(PAPI_reset(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); start = PAPI_get_real_usec(); #endif //HAVE_PAPI ORB_read(t1); } //DO_PERF fftw_execute(d->forward); if(DO_PERF){ ORB_read(t2); #ifdef HAVE_PAPI end = PAPI_get_real_usec(); //PAPI time /* Collect PAPI counters and store time elapsed */ TEST_PAPI(PAPI_accum(p->PAPI_EventSet, p->PAPI_Results), PAPI_OK, MyRank, 9999, PRINT_SOME); for(k = 0; k < p->PAPI_Num_Events && k < TOTAL_PAPI_EVENTS; k++){ p->PAPI_Times[k] += (end - start); } #endif //HAVE_PAPI perftimer_accumulate(&p->timers, TIMER0, ORB_cycles_a(t2, t1)); } //DO_PERF } if(d->backward){ if(DO_PERF){ #ifdef HAVE_PAPI /* Start PAPI counters and time */ TEST_PAPI(PAPI_reset(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); start = PAPI_get_real_usec(); #endif //HAVE_PAPI ORB_read(t1); } //DO_PERF fftw_execute(d->backward); if(DO_PERF){ ORB_read(t2); #ifdef HAVE_PAPI end = PAPI_get_real_usec(); //PAPI time /* Collect PAPI counters and store time elapsed */ TEST_PAPI(PAPI_accum(p->PAPI_EventSet, p->PAPI_Results), PAPI_OK, MyRank, 9999, PRINT_SOME); for(k = 0; k < p->PAPI_Num_Events && k < TOTAL_PAPI_EVENTS; k++){ p->PAPI_Times[k] += (end - start); } #endif //HAVE_PAPI perftimer_accumulate(&p->timers, TIMER1, ORB_cycles_a(t2, t1)); } //DO_PERF } // } return ERR_CLEAN; } /* execFFT2Plan */
int main(int argc, char* argv[]) { double v1[N], v2[N], v3[N], r1[N], r2[N]; double a=1.01,b=1.02,c=1.03,t=0.0,t2=0.0; int i, rank; int retval, perr, ev_set = PAPI_NULL; int encoding; long long counts[NCOUNTS]; #include "bglpersonality.h" #include "rts.h" if(PAPI_VER_CURRENT!=(perr=PAPI_library_init(PAPI_VER_CURRENT))) printf("\nPAPI_library_init failed. %s\n",PAPI_strerror(perr)); { BGLPersonality me; rts_get_personality(&me,sizeof(me)); if(me.xCoord != 0 ) goto fine; if(me.yCoord != 0 ) goto fine; if(me.zCoord != 0 ) goto fine; } for(i=0;i<N;i++) { v1[i]=1.01+0.01*i; v2[i]=2.01+0.01*i; v3[i]=3.01+0.01*i; r1[i]=v1[i]*v2[i]+v3[i]; } if((perr=PAPI_create_eventset(&ev_set))) printf("\nPAPI_create_eventset failed. %s\n",PAPI_strerror(perr)); /* encoding=( BGL_FPU_ARITH_MULT_DIV & 0x3FF ); encoding=( BGL_FPU_ARITH_ADD_SUBTRACT & 0x3FF ); encoding=( BGL_FPU_ARITH_TRINARY_OP & 0x3FF ); */ if((perr=PAPI_add_event(ev_set,PAPI_TOT_CYC))) printf("PAPI_add_event failed. %s\n",PAPI_strerror(perr)); retval = PAPI_event_name_to_code("BGL_FPU_ARITH_OEDIPUS_OP", &encoding); if (retval != PAPI_OK) printf("%s:%d PAPI_event_name_to_code %d\n", __FILE__,__LINE__, retval); if((perr=PAPI_add_event(ev_set,encoding))) printf("\nPAPI_add_event failed. %s\n",PAPI_strerror(perr)); retval = PAPI_event_name_to_code("BGL_2NDFPU_ARITH_OEDIPUS_OP", &encoding); if (retval != PAPI_OK) printf("%s:%d PAPI_event_name_to_code %d\n", __FILE__,__LINE__, retval); if((perr=PAPI_add_event(ev_set,encoding))) printf("\nPAPI_add_event failed. %s\n",PAPI_strerror(perr)); retval = PAPI_event_name_to_code("BGL_FPU_LDST_QUAD_LD", &encoding); if (retval != PAPI_OK) printf("%s:%d PAPI_event_name_to_code %d\n", __FILE__,__LINE__, retval); if((perr=PAPI_add_event(ev_set,encoding))) printf("\nPAPI_add_event failed. %s\n",PAPI_strerror(perr)); retval = PAPI_event_name_to_code("BGL_2NDFPU_LDST_QUAD_LD", &encoding); if (retval != PAPI_OK) printf("%s:%d PAPI_event_name_to_code %d\n", __FILE__,__LINE__, retval); if((perr=PAPI_add_event(ev_set,encoding))) printf("\nPAPI_add_event failed. %s\n",PAPI_strerror(perr)); printf("\nAssigning a vector of length %1d and computing " "A()=B()*C()+D().\n",N); if((perr=PAPI_start(ev_set))) printf("\nPAPI_start_event failed. %s\n",PAPI_strerror(perr)); for(i=0;i<N;i++) r2[i]=-1.001; fpmaddv(N,v1,v2,v3,r2); if((perr=PAPI_read(ev_set,counts))) printf("PAPI_read failed. %s\n",PAPI_strerror(perr)); printf("Counts registered: "); for(i=0;i<NCOUNTS;i++) printf(" %12llu",counts[i]); printf("\n"); for(i=0;i<N;i++) { printf(" %g * %g + % g = %g (%g)\n", v1[i],v2[i],v3[i],r2[i],r1[i]); } for(i=0;i<N;i++) r2[i]=-1.001; printf("\nResetting the running counter and computing " "A(1:%1d)=B()*C()+D().\n",N); if((perr=PAPI_reset(ev_set))) printf("\nPAPI_reset failed. %s\n",PAPI_strerror(perr)); fpmaddv(N,v1,v2,v3,r2); if((perr=PAPI_stop(ev_set,counts))) printf("PAPI_stop failed. %s\n",PAPI_strerror(perr)); for(i=0;i<N;i++) { printf(" %g * %g + % g = %g (%g)\n", v1[i],v2[i],v3[i],r2[i],v1[i]*v2[i]+v3[i]); } printf("Testing to read stopped counters\n"); if((perr=PAPI_read(ev_set,counts))) printf("PAPI_read failed. %s\n",PAPI_strerror(perr)); printf("Counts registered: "); for(i=0;i<NCOUNTS;i++) printf(" %12llu",counts[i]); printf("\n"); fine: PAPI_shutdown(); return 0; }
int main (int argc, char **argv) { int retval,i; int EventSet = PAPI_NULL; long long values[NUM_EVENTS]; const PAPI_component_info_t *cmpinfo = NULL; int numcmp,cid,example_cid=-1; int code,maximum_code=0; char event_name[PAPI_MAX_STR_LEN]; PAPI_event_info_t event_info; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } if (!TESTS_QUIET) { printf( "Testing example component with PAPI %d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) ); } /* Find our component */ numcmp = PAPI_num_components(); for( cid=0; cid<numcmp; cid++) { if ( (cmpinfo = PAPI_get_component_info(cid)) == NULL) { test_fail(__FILE__, __LINE__, "PAPI_get_component_info failed\n", 0); } if (!TESTS_QUIET) { printf("\tComponent %d - %d events - %s\n", cid, cmpinfo->num_native_events, cmpinfo->name); } if (strstr(cmpinfo->name,"example.c")) { /* FOUND! */ example_cid=cid; } } if (example_cid<0) { test_skip(__FILE__, __LINE__, "Example component not found\n", 0); } if (!TESTS_QUIET) { printf("\nFound Example Component at id %d\n",example_cid); printf("\nListing all events in this component:\n"); } /**************************************************/ /* Listing all available events in this component */ /* Along with descriptions */ /**************************************************/ code = PAPI_NATIVE_MASK | PAPI_COMPONENT_MASK(example_cid); retval = PAPI_enum_event( &code, PAPI_ENUM_FIRST ); while ( retval == PAPI_OK ) { if (PAPI_event_code_to_name( code, event_name )!=PAPI_OK) { printf("Error translating %x\n",code); test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } if (PAPI_get_event_info( code, &event_info)!=PAPI_OK) { printf("Error getting info for event %x\n",code); test_fail( __FILE__, __LINE__, "PAPI_get_event_info()", retval ); } if (!TESTS_QUIET) { printf("\tEvent 0x%x: %s -- %s\n", code,event_name,event_info.long_descr); } maximum_code=code; retval = PAPI_enum_event( &code, PAPI_ENUM_EVENTS ); } if (!TESTS_QUIET) printf("\n"); /**********************************/ /* Try accessing an invalid event */ /**********************************/ retval=PAPI_event_code_to_name( maximum_code+10, event_name ); if (retval!=PAPI_ENOEVNT) { test_fail( __FILE__, __LINE__, "Failed to return PAPI_ENOEVNT on invalid event", retval ); } /***********************************/ /* Test the EXAMPLE_ZERO event */ /***********************************/ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset() failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_ZERO", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_ZERO not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) printf("Testing EXAMPLE_ZERO: %lld\n",values[0]); if (values[0]!=0) { test_fail( __FILE__, __LINE__, "Result should be 0!\n", 0); } retval = PAPI_cleanup_eventset(EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); } retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); } EventSet=PAPI_NULL; /***********************************/ /* Test the EXAMPLE_CONSTANT event */ /***********************************/ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset() failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_CONSTANT not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) printf("Testing EXAMPLE_CONSTANT: %lld\n",values[0]); if (values[0]!=42) { test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); } retval = PAPI_cleanup_eventset(EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); } retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); } EventSet=PAPI_NULL; /***********************************/ /* Test the EXAMPLE_AUTOINC event */ /***********************************/ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset() failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_AUTOINC", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_AUTOINC not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } if (!TESTS_QUIET) printf("Testing EXAMPLE_AUTOINC: "); for(i=0;i<10;i++) { retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) printf("%lld ",values[0]); if (values[0]!=i) { test_fail( __FILE__, __LINE__, "Result wrong!\n", 0); } } if (!TESTS_QUIET) printf("\n"); /***********************************/ /* Test multiple reads */ /***********************************/ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } for(i=0;i<10;i++) { retval=PAPI_read( EventSet, values); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read failed\n", retval); } if (!TESTS_QUIET) printf("%lld ",values[0]); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) printf("%lld\n",values[0]); // if (values[0]!=i) { // test_fail( __FILE__, __LINE__, "Result wrong!\n", 0); //} /***********************************/ /* Test PAPI_reset() */ /***********************************/ retval = PAPI_reset( EventSet); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_reset() failed\n",retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } retval = PAPI_reset( EventSet); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_reset() failed\n",retval ); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) printf("Testing EXAMPLE_AUTOINC after PAPI_reset(): %lld\n", values[0]); if (values[0]!=0) { test_fail( __FILE__, __LINE__, "Result not zero!\n", 0); } retval = PAPI_cleanup_eventset(EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); } retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); } EventSet=PAPI_NULL; /***********************************/ /* Test multiple events */ /***********************************/ if (!TESTS_QUIET) printf("Testing Multiple Events: "); retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset() failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_CONSTANT not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_GLOBAL_AUTOINC", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_GLOBAL_AUTOINC not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_ZERO", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_ZERO not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) { for(i=0;i<3;i++) { printf("%lld ",values[i]); } printf("\n"); } if (values[0]!=42) { test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); } if (values[2]!=0) { test_fail( __FILE__, __LINE__, "Result should be 0!\n", 0); } retval = PAPI_cleanup_eventset(EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); } retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); } EventSet=PAPI_NULL; /***********************************/ /* Test writing to an event */ /***********************************/ if (!TESTS_QUIET) printf("Testing Write\n"); retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset() failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_CONSTANT not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_GLOBAL_AUTOINC", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_GLOBAL_AUTOINC not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_event_name_to_code("EXAMPLE_ZERO", &code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "EXAMPLE_ZERO not found\n",retval ); } retval = PAPI_add_event( EventSet, code); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events failed\n", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start failed\n",retval ); } retval = PAPI_read ( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read failed\n",retval ); } if (!TESTS_QUIET) { printf("Before values: "); for(i=0;i<3;i++) { printf("%lld ",values[i]); } printf("\n"); } values[0]=100; values[1]=200; values[2]=300; retval = PAPI_write ( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_write failed\n",retval ); } retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); } if (!TESTS_QUIET) { printf("After values: "); for(i=0;i<3;i++) { printf("%lld ",values[i]); } printf("\n"); } if (values[0]!=42) { test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); } if (values[1]!=200) { test_fail( __FILE__, __LINE__, "Result should be 200!\n", 0); } if (values[2]!=0) { test_fail( __FILE__, __LINE__, "Result should be 0!\n", 0); } retval = PAPI_cleanup_eventset(EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); } retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); } EventSet=PAPI_NULL; /************/ /* All Done */ /************/ if (!TESTS_QUIET) printf("\n"); test_pass( __FILE__, NULL, 0 ); return 0; }
/** * \brief <DESCRIPTION of your plan goes here..> * \param plan The Plan struct that holds the plan's data values. * \return int Error flag value */ int execOPENCL_MEMPlan(void *plan){ // <- Replace YOUR_NAME with the name of your module. #ifdef HAVE_PAPI int k; long long start, end; #endif //HAVE_PAPI ORB_t t1, t2; // Storage for timestamps, used to accurately find the runtime of the plan execution. Plan *p; p = (Plan *)plan; p->exec_count++; // Update the execution counter stored in the plan. OPENCL_MEM_DATA *local_data = (OPENCL_MEM_DATA *)p->vptr; cl_int error; #ifdef HAVE_PAPI /* Start PAPI counters and time */ TEST_PAPI(PAPI_reset(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); start = PAPI_get_real_usec(); #endif //HAVE_PAPI ORB_read(t1); // Store the timestamp for the beginning of the execution. size_t work_size[1]; int idx,jdx; cl_mem buffer; for(jdx = 0; jdx < local_data->loop_count; jdx++){ for(idx = 0; idx < NUM_PATTERNS; idx++){ error = clSetKernelArg(local_data->kernel,0,sizeof(cl_mem),&(local_data->buffer)); assert(error == CL_SUCCESS); error = clSetKernelArg(local_data->kernel,1,sizeof(cl_ulong),(void *)&patterns[idx]); assert(error == CL_SUCCESS); work_size[0] = local_data->device_memory / sizeof(unsigned int); error = clEnqueueNDRangeKernel(local_data->opencl_queue, local_data->kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); assert(error == CL_SUCCESS); clEnqueueReadBuffer(local_data->opencl_queue, local_data->buffer, CL_TRUE, 0, local_data->device_memory, local_data->return_buffer, 0, NULL, NULL); } } // -------------------------------------------- // Plan is executed here... // -------------------------------------------- ORB_read(t2); // Store timestamp for the end of execution. #ifdef HAVE_PAPI end = PAPI_get_real_usec(); //PAPI time /* Collect PAPI counters and store time elapsed */ TEST_PAPI(PAPI_accum(p->PAPI_EventSet, p->PAPI_Results), PAPI_OK, MyRank, 9999, PRINT_SOME); for(k = 0; k < p->PAPI_Num_Events && k < TOTAL_PAPI_EVENTS; k++){ p->PAPI_Times[k] += (end - start); } #endif //HAVE_PAPI perftimer_accumulate(&p->timers, TIMER0, ORB_cycles_a(t2, t1)); // Store the difference between the timestamps in the plan's timers. if(CHECK_CALC){ // Evaluates to true if the '-t' option is passed on the commandline. ORB_read(t1); // ---------------------------------------------------------------- // Optional: Check calculations performed in execution above. // ---------------------------------------------------------------- ORB_read(t2); perftimer_accumulate(&p->timers, TIMER1, ORB_cycles_a(t2, t1)); } return ERR_CLEAN; // <- This inicates a clean run with no errors. Does not need to be changed. } /* execOPENCL_MEMPlan */
int main( int argc, char *argv[] ) { if ( argc < 4 ) { printf( "Usage: %s format_file input_file output_file\n", argv[0] ); return EXIT_FAILURE; } // For checking the library initialisation int retval; // EventSet for L2 & L3 cache misses and accesses int EventSet = PAPI_NULL; int EventSet1 = PAPI_NULL; // Data pointer for getting the cpu info const PAPI_hw_info_t * hwinfo = NULL; PAPI_mh_info_t mem_hrch; // Initialising the library retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { printf( "Initialisation of Papi failed \n" ); exit( 1 ); } if ( ( hwinfo = PAPI_get_hardware_info() ) == NULL ) { printf( "Unable to access hw info \n" ); return 1; } /* Accessing the cpus per node, threads per core, memory, frequency */ printf( "No. of cpus in one node : %d \n", hwinfo->ncpu ); printf( "Threads per core : %d \n", hwinfo->threads ); printf( "No. of cores per socket : %d \n", hwinfo->cores ); printf( "No. of sockets : %d \n", hwinfo->sockets ); printf( "Total CPUS in the entire system : %d \n", hwinfo->totalcpus ); /* Variables for reading counters of EventSet*/ long long eventValues[NUMEVENTS] = { 0 }; // long long eventFpValue[ NUM_FPEVENTS ] = {0}; char *format = argv[1]; char *file_in = argv[2]; char *file_out = argv[3]; char delim[] = "."; char *cp = (char *) malloc( sizeof(char) * 10 ); cp = strcpy( cp, file_in ); char *token = malloc( sizeof(char) * 10 ); token = strtok( cp, delim ); char * res_file = malloc( sizeof(char) * 30 ); res_file = strcpy( res_file, file_out ); res_file = strcat( res_file, token ); free( cp ); // free( token ); char *csv_file = malloc( sizeof(char) * 30 ); csv_file = strcpy( csv_file, res_file ); FILE *csv_fp = fopen( strcat( csv_file, OPTI ), "w" ); FILE *res_fp = fopen( strcat( res_file, "_psdats.dat" ), "w" ); int status = 0; free( res_file ); free( csv_file ); /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. The external cells are only ghost cells. * They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; // Parameters for measuring the time long long startusec, endusec; /*the total number of points (after conversion to unstructured mesh topology)*/ int nodeCnt; /* the array containing the coordinate of the points * (after conversion to unstructured mesh topology) */ int **points; /* the array containing the mesh elements (after conversion to unstructured mesh topology) */ int **elems; // Creating the eventSets if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) { printf( "Problem in create eventset \n" ); exit( 1 ); } // Create the Flops eventSet /*if ( PAPI_create_eventset( &EventSet1 ) != PAPI_OK ) { printf( "Problem in creating the flops eventset \n" ); exit(1); }*/ int EventCode[NUMEVENTS] = { PAPI_L2_TCM, PAPI_L2_TCA, PAPI_L3_TCM, PAPI_L3_TCA }; // int EventFpCode[ NUM_FPEVENTS ] = { PAPI_FP_OPS }; // Adding events to the eventset if ( PAPI_add_events( EventSet, EventCode, NUMEVENTS ) != PAPI_OK ) { printf( "Problem in adding events \n" ); exit( 1 ); } /*if( PAPI_add_events( EventSet1, EventFpCode, 1 ) != PAPI_OK ){ printf( "Problem in adding the flops event \n" ); exit( 1 ); }*/ printf( "Success in adding events \n" ); // Start the eventset counters PAPI_start( EventSet ); // PAPI_start( EventSet1 ); startusec = PAPI_get_real_usec(); /* initialization */ // read-in the input file int f_status; if ( !strcmp( format, "bin" ) ) { f_status = read_bin_formatted( file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } else if ( !strcmp( format, "txt" ) ) { f_status = read_formatted( file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } if ( f_status != 0 ) { printf( "failed to initialize data! \n" ); return EXIT_FAILURE; } // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double* resvec = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); /** the variation vector -> keeps the result in the end */ double* var = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); /** the computation vectors */ double* direc1 = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); double* direc2 = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); /** additional vectors */ double* cgup = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); double* oc = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* cnorm = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* adxor1 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* adxor2 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* dxor1 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* dxor2 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); // initialize the reference residual for ( int nc = nintci; nc <= nintcf; nc++ ) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt( resref ); if ( resref < 1.0e-15 ) { printf( "i/o - error: residue sum less than 1.e-15 - %lf\n", resref ); return EXIT_FAILURE; } // initialize the arrays for ( int nc = 0; nc <= 10; nc++ ) { oc[nc] = 0.0; cnorm[nc] = 1.0; } for ( int nc = nintci; nc <= nintcf; nc++ ) { cgup[nc] = 0.0; var[nc] = 0.0; } for ( int nc = nextci; nc <= nextcf; nc++ ) { var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for ( int nc = nintci; nc <= nintcf; nc++ ) cgup[nc] = 1.0 / bp[nc]; int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ endusec = PAPI_get_real_usec(); // Read the eventSet counters PAPI_read( EventSet, eventValues ); // PAPI_read( EventSet1, eventFpValue ); fprintf( res_fp, "Execution time in microseconds for the initialisation: %lld \n", endusec - startusec ); fprintf( res_fp, "Initialisation.... \n" ); fprintf( res_fp, "INPUT \t PAPI_L2_TCM \t %lld \n", eventValues[0] ); fprintf( res_fp, "INPUT \t PAPI_L2_TCA \t %lld \n", eventValues[1] ); fprintf( res_fp, "INPUT \t PAPI_L3_TCM \t %lld \n", eventValues[2] ); fprintf( res_fp, "INPUT \t PAPI_L3_TCA \t %lld \n", eventValues[3] ); // fprintf( res_fp, "INPUT \t PAPI_FP_OPS \t %lld \n", eventFpValue[0] ); // Cache miss rate calculations float L2_cache_miss_rate, L3_cache_miss_rate; L2_cache_miss_rate = ( (float) eventValues[0] / eventValues[1] ) * 100; L3_cache_miss_rate = ( (float) eventValues[2] / eventValues[3] ) * 100; fprintf( res_fp, "INPUT \t L2MissRate \t %f% \n", L2_cache_miss_rate ); fprintf( res_fp, "INPUT \t L3MissRate \t %f% \n", L3_cache_miss_rate ); fprintf( csv_fp, "Results for the INPUT phase \n" ); fprintf( csv_fp, "%s, %lld, %lld, %lld, %lld, %f, %f \n", OPTI, eventValues[0], eventValues[1], eventValues[2], eventValues[3], L2_cache_miss_rate, L3_cache_miss_rate ); // Resetting the event counters PAPI_reset( EventSet ); // PAPI_reset( EventSet1 ); fprintf( res_fp, "Starting with the computation part \n" ); startusec = PAPI_get_real_usec(); /* start computation loop */ while ( iter < 10000 ) { /* start phase 1 */ // update the old values of direc for ( int nc = nintci; nc <= nintcf; nc++ ) { direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for ( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if ( nor1 == 1 ) { oc1 = 0; occ = 0; for ( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for ( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; } else if ( nor1 == 2 ) { oc1 = 0; occ = 0; for ( int nc = nintci; nc <= nintcf; nc++ ) occ = occ + adxor1[nc] * direc2[nc]; oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for ( int nc = nintci; nc <= nintcf; nc++ ) occ = occ + adxor2[nc] * direc2[nc]; oc2 = occ / cnorm[2]; for ( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for ( int nc = nintci; nc <= nintcf; nc++ ) { cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for ( int nc = nintci; nc <= nintcf; nc++ ) { var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt( resnew ); ratio = resnew / resref; // exit on no improvements of residual if ( ratio <= 1.0e-10 ) break; iter++; // prepare additional arrays for the next iteration step if ( nor == nomax ) nor = 1; else { if ( nor == 1 ) { for ( int nc = nintci; nc <= nintcf; nc++ ) { dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if ( nor == 2 ) { for ( int nc = nintci; nc <= nintcf; nc++ ) { dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ endusec = PAPI_get_real_usec(); // Read the eventSet counters PAPI_read( EventSet, eventValues ); // PAPI_read( EventSet1, eventFpValue ); fprintf( res_fp, "Execution time in microseconds for the computation : %lld \n", endusec - startusec ); fprintf( res_fp, "CALC \t PAPI_L2_TCM \t %lld \n", eventValues[0] ); fprintf( res_fp, "CALC \t PAPI_L2_TCA \t %lld \n", eventValues[1] ); fprintf( res_fp, "CALC \t PAPI_L3_TCM \t %lld \n", eventValues[2] ); fprintf( res_fp, "CALC \t PAPI_L3_TCA \t %lld \n", eventValues[3] ); // fprintf( res_fp, "CALC \t PAPI_FP_OPS \t %lld \n", eventFpValue[0] ); L2_cache_miss_rate = ( (float) eventValues[0] / eventValues[1] ) * 100; L3_cache_miss_rate = ( (float) eventValues[2] / eventValues[3] ) * 100; fprintf( res_fp, "CALC \t L2MissRate \t %f%\n", L2_cache_miss_rate ); fprintf( res_fp, "CALC \t L3MissRate \t %f%\n", L3_cache_miss_rate ); fprintf( csv_fp, "Results for the CALC phase \n" ); fprintf( csv_fp, "%s, %lld, %lld, %lld, %lld, %f, %f \n", OPTI, eventValues[0], eventValues[1], eventValues[2], eventValues[3], L2_cache_miss_rate, L3_cache_miss_rate ); // Resetting the event counters PAPI_reset( EventSet ); // PAPI_reset( EventSet1 ); char *vtk_file = malloc( sizeof(char) * 30 ); fprintf( res_fp, "Starting with the output vtk part \n" ); startusec = PAPI_get_real_usec(); /* write output file */ vol2mesh( nintci, nintcf, lcc, &nodeCnt, &points, &elems ); if( write_result( file_in, file_out, nintci, nintcf, var, iter, ratio ) != 0 ) { printf( "error when trying to write to file %s\n", file_out ); } if( write_result_vtk( strcat( strcpy( vtk_file, file_out ), "SU.vtk" ), nintci, nintcf, nodeCnt, points, elems, su ) != 0 ) { printf( "error when trying to write to vtk file %s\n", "SU.vtk" ); } if( write_result_vtk( strcat( strcpy( vtk_file, file_out ), "CGUP.vtk" ), nintci, nintcf, nodeCnt, points, elems, cgup ) != 0 ) { printf( "error when trying to write to vtk file %s\n", "CGUP.vtk" ); } if( write_result_vtk( strcat( strcpy( vtk_file, file_out ), "VAR.vtk" ), nintci, nintcf, nodeCnt, points, elems, var ) != 0 ) { printf( "error when trying to write to vtk file %s\n", "VAR.vtk" ); } free( vtk_file ); /* finished computation loop */ endusec = PAPI_get_real_usec(); // Read the eventSet counters PAPI_stop( EventSet, eventValues ); // PAPI_stop( EventSet1, eventFpValue ); fprintf( res_fp, "Execution time in microseconds for the output vtk part : %lld \n", endusec - startusec ); fprintf( res_fp, "OUTPUT \t PAPI_L2_TCM \t %lld \n", eventValues[0] ); fprintf( res_fp, "OUTPUT \t PAPI_L2_TCA \t %lld \n", eventValues[1] ); fprintf( res_fp, "OUTPUT \t PAPI_L3_TCM \t %lld \n", eventValues[2] ); fprintf( res_fp, "OUTPUT \t PAPI_L3_TCA \t %lld \n", eventValues[3] ); // fprintf( res_fp, "CALC \t PAPI_FP_OPS \t %lld \n", eventFpValue[0] ); L2_cache_miss_rate = ( (float) eventValues[0] / eventValues[1] ) * 100; L3_cache_miss_rate = ( (float) eventValues[2] / eventValues[3] ) * 100; fprintf( res_fp, "OUTPUT \t L2MissRate \t %f%\n", L2_cache_miss_rate ); fprintf( res_fp, "OUTPUT \t L3MissRate \t %f%\n", L3_cache_miss_rate ); fprintf( csv_fp, "Results for the OUTPUT phase \n" ); fprintf( csv_fp, "%s, %lld, %lld, %lld, %lld, %f, %f \n", OPTI, eventValues[0], eventValues[1], eventValues[2], eventValues[3], L2_cache_miss_rate, L3_cache_miss_rate ); /* Free all the dynamically allocated memory */ free( direc2 ); free( direc1 ); free( dxor2 ); free( dxor1 ); free( adxor2 ); free( adxor1 ); free( cnorm ); free( oc ); free( var ); free( cgup ); free( resvec ); free( su ); free( bp ); free( bh ); free( bl ); free( bw ); free( bn ); free( be ); free( bs ); printf( "Simulation completed successfully!\n" ); fclose( res_fp ); fclose( csv_fp ); return EXIT_SUCCESS; }