static inline void perform_local_work(void) { # ifdef TIME_WORKLOAD qtimer_t work_timer = qtimer_create(); qtimer_start(work_timer); # endif // TIME_WORKLOAD volatile unsigned long work = workload; long rand_per = (long)qtimer_fastrand(); long rand_var = (long)qtimer_fastrand(); rand_per = (rand_per<0) ? (-rand_per)%100 : rand_per%100; if (rand_per < workload_per) { rand_var = (rand_var<0) ? (-rand_var)%100 : rand_var%100; work += (workload * (workload_var * 0.01)) * (rand_var * 0.01); } for (int i = 0; i < work; i++) { work = work % 1000000000; } work++; # ifdef TIME_WORKLOAD qtimer_stop(work_timer); fprintf(stdout, "Worked for %f\n", qtimer_secs(work_timer)); qtimer_destroy(work_timer); # endif // TIME_WORKLOAD }
int main(int argc, char *argv[]) { pthread_t rets[NUM_THREADS]; qtimer_t timer = qtimer_create(); double cumulative_time = 0.0; size_t counter; CHECK_VERBOSE(); for (int iteration = 0; iteration < 10; iteration++) { qtimer_start(timer); for (int i = 0; i < NUM_THREADS; i++) { pthread_create(&(rets[i]), NULL, qincr, &counter); } for (int i = 0; i < NUM_THREADS; i++) { pthread_join(rets[i], NULL); } qtimer_stop(timer); iprintf("\ttest iteration %i: %f secs\n", iteration, qtimer_secs(timer)); cumulative_time += qtimer_secs(timer); } printf("pthread time: %f\n", cumulative_time / 10.0); return 0; }
int main(int argc, char *argv[]) { aligned_t rets[NUM_THREADS]; qtimer_t timer = qtimer_create(); double cumulative_time = 0.0; if (qthread_initialize() != QTHREAD_SUCCESS) { fprintf(stderr, "qthread library could not be initialized!\n"); exit(EXIT_FAILURE); } CHECK_VERBOSE(); for (int iteration = 0; iteration < 10; iteration++) { qtimer_start(timer); for (int i = 0; i < NUM_THREADS; i++) { qthread_fork(qincr, NULL, &(rets[i])); } for (int i = 0; i < NUM_THREADS; i++) { qthread_readFF(NULL, &(rets[i])); } qtimer_stop(timer); iprintf("\ttest iteration %i: %f secs\n", iteration, qtimer_secs(timer)); cumulative_time += qtimer_secs(timer); } printf("qthread time: %f\n", cumulative_time / 10.0); return 0; }
void test_print_qthread(size_t i) { qtimer_t t = qtimer_create(); qtimer_start(t); do { qthread_yield(); qtimer_stop(t); } while(qtimer_secs(t) < 1); qtimer_destroy(t); //std::cout << i << "\n"; }
int main(int argc, char *argv[]) { uint64_t count = 1048576; int par_fork = 0; unsigned long threads = 1; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); NUMARG(count, "MT_COUNT"); NUMARG(par_fork, "MT_PAR_FORK"); assert(0 != count); #pragma omp parallel #pragma omp single { timer = qtimer_create(); threads = omp_get_num_threads(); if (par_fork) { qtimer_start(timer); #pragma omp parallel for for (uint64_t i = 0; i < count; i++) { #pragma omp task untied null_task(NULL); } } else { qtimer_start(timer); #pragma omp task untied for (uint64_t i = 0; i < count; i++) { #pragma omp task untied null_task(NULL); } } #pragma omp taskwait qtimer_stop(timer); } total_time = qtimer_secs(timer); qtimer_destroy(timer); printf("%lu %lu %f\n", threads, (unsigned long)count, total_time); return 0; }
int main(int argc, char *argv[]) { qtimer_t t; assert(qthread_initialize() == QTHREAD_SUCCESS); CHECK_VERBOSE(); t = qtimer_create(); assert(t); qtimer_start(t); qtimer_stop(t); if (qtimer_secs(t) == 0) { fprintf(stderr, "qtimer_secs(t) reported zero length time.\n"); } else if (qtimer_secs(t) < 0) { fprintf(stderr, "qtimer_secs(t) thinks time went backwards (%g).\n", qtimer_secs(t)); } iprintf("time to find self and assert it: %g secs\n", qtimer_secs(t)); qtimer_start(t); qtimer_stop(t); assert(qtimer_secs(t) >= 0.0); if (qtimer_secs(t) == 0.0) { iprintf("inlining reduces calltime to zero (apparently)\n"); } else { iprintf("smallest measurable time: %g secs\n", qtimer_secs(t)); } qtimer_destroy(t); // Now to test fastrand ks_test(); runs(); autocorrelation(); qthread_finalize(); return 0; }
int main(int argc, char *argv[]) { int n = 10; int m = 10; num_timesteps = 10; workload = 0; workload_per = 0; workload_var = 0; int print_final = 0; int alltime = 0; CHECK_VERBOSE(); NUMARG(n, "N"); NUMARG(m, "M"); NUMARG(num_timesteps, "TIMESTEPS"); NUMARG(workload, "WORKLOAD"); NUMARG(workload_per, "WORKLOAD_PER"); NUMARG(workload_var, "WORKLOAD_VAR"); NUMARG(print_final, "PRINT_FINAL"); NUMARG(alltime, "ALL_TIME"); assert (n > 0 && m > 0); // Initialize Qthreads assert(qthread_initialize() == 0); qtimer_t alloc_timer = qtimer_create(); qtimer_t init_timer = qtimer_create(); qtimer_t exec_timer = qtimer_create(); // Allocate memory for 3-stage stencil (with boundary padding) qtimer_start(alloc_timer); stencil_t points; points.N = n + 2; points.M = m + 2; for (int s = 0; s < NUM_STAGES; s++) { points.stage[s] = malloc(points.N*sizeof(aligned_t *)); assert(NULL != points.stage[s]); for (int i = 0; i < points.N; i++) { points.stage[s][i] = calloc(points.M, sizeof(aligned_t)); assert(NULL != points.stage[s][i]); } } qtimer_stop(alloc_timer); // Initialize first stage and set boundary conditions qtimer_start(init_timer); for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { qthread_writeF_const(&points.stage[0][i][j], 0); for (int s = 1; s < NUM_STAGES; s++) qthread_empty(&points.stage[s][i][j]); } } for (int i = 0; i < points.N; i++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][i][0], BOUNDARY); qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY); #else points.stage[s][i][0] = BOUNDARY; points.stage[s][i][points.M-1] = BOUNDARY; #endif } } for (int j = 0; j < points.M; j++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][0][j], BOUNDARY); qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY); #else points.stage[s][0][j] = BOUNDARY; points.stage[s][points.N-1][j] = BOUNDARY; #endif } } qtimer_stop(init_timer); // Create barrier to synchronize on completion of calculations qtimer_start(exec_timer); points.barrier = qt_feb_barrier_create(n*m+1); // Spawn tasks to start calculating updates at each point update_args_t args = {&points, -1, -1, 1, 1}; for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { args.i = i; args.j = j; qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL); } } // Wait for calculations to finish qt_feb_barrier_enter(points.barrier); qtimer_stop(exec_timer); // Print timing info if (alltime) { fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer)); fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer)); fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer)); } else { fprintf(stdout, "%f\n", qtimer_secs(exec_timer)); } // Print stencils if (print_final) { size_t final = (num_timesteps % NUM_STAGES); iprintf("Stage %lu:\n", prev_stage(prev_stage(final))); print_stage(&points, prev_stage(prev_stage(final))); iprintf("\nStage %lu:\n", prev_stage(final)); print_stage(&points, prev_stage(final)); iprintf("\nStage %lu:\n", final); print_stage(&points, final); } qt_feb_barrier_destroy(points.barrier); qtimer_destroy(alloc_timer); qtimer_destroy(init_timer); qtimer_destroy(exec_timer); // Free allocated memory for (int i = 0; i < points.N; i++) { free(points.stage[0][i]); free(points.stage[1][i]); free(points.stage[2][i]); } free(points.stage[0]); free(points.stage[1]); free(points.stage[2]); return 0; }
int main(int argc, char * argv[]) { RT_MODEL * S; const char * status; int_T count; int exit_code = exit_success; boolean_T parseError = FALSE; real_T final_time = -2; /* Let model select final time */ int scheduling_priority; struct qsched_param scheduling; t_period timeout; t_timer_notify notify; t_error result; /* * Make controller threads higher priority than external mode threads: * ext_priority = priority of lowest priority external mode thread * min_priority = minimum allowable priority of lowest priority model task * max_priority = maximum allowable priority of lowest priority model task */ int ext_priority = qsched_get_priority_min(QSCHED_FIFO); int min_priority = ext_priority + 2; int max_priority = qsched_get_priority_max(QSCHED_FIFO) - 0; qsigset_t signal_set; qsigaction_t action; int_T stack_size = 0; /* default stack size */ (void) ssPrintf("Entered main(argc=%d, argv=%p)\n", argc, argv); for (count = 0; count < argc; count++) { (void) ssPrintf(" argv[%d] = %s\n", count, argv[count]); } scheduling_priority = 2; /* default priority */ if (scheduling_priority < min_priority) scheduling_priority = min_priority; else if (scheduling_priority > max_priority) scheduling_priority = max_priority; /* * Parse the standard RTW parameters. Let all unrecognized parameters * pass through to external mode for parsing. NULL out all args handled * so that the external mode parsing can ignore them. */ for (count = 1; count < argc; ) { const char *option = argv[count++]; char extraneous_characters[2]; if ((strcmp(option, "-tf") == 0) && (count != argc)) {/* final time */ const char * tf_argument = argv[count++]; double time_value; /* use a double for the sscanf since real_T may be a float or a double depending on the platform */ if (strcmp(tf_argument, "inf") == 0) { time_value = RUN_FOREVER; } else { int items = sscanf(tf_argument, "%lf%1s", &time_value, extraneous_characters); if ((items != 1) || (time_value < 0.0) ) { (void) fprintf(stderr, "final_time must be a positive, real value or inf.\n"); parseError = true; break; } } final_time = (real_T) time_value; argv[count-2] = NULL; argv[count-1] = NULL; } else if ((strcmp(option, "-pri") == 0) && (count != argc)) {/* base priority */ const char * tf_argument = argv[count++]; int priority; /* use an int for the sscanf since int_T may be the wrong size depending on the platform */ int items = sscanf(tf_argument, "%d%1s", &priority, extraneous_characters); if ((items != 1) || (priority < min_priority) ) { (void) fprintf(stderr, "priority must be a greater than or equal to %d.\n", min_priority); parseError = true; break; } if (priority > max_priority) { (void) fprintf(stderr, "priority must be less than or equal to %d.\n", max_priority); parseError = true; break; } scheduling_priority = priority; argv[count-2] = NULL; argv[count-1] = NULL; } else if ((strcmp(option, "-ss") == 0) && (count != argc)) {/* stack size */ const char * stack_argument = argv[count++]; int stack; /* use an int for the sscanf since int_T may be the wrong size depending on the platform */ int items = sscanf(stack_argument, "%d%1s", &stack, extraneous_characters); if ((items != 1) || (stack < QTHREAD_STACK_MIN) ) { (void) fprintf(stderr, "stack size must be a integral value greater than or equal to %d.\n", QTHREAD_STACK_MIN); parseError = true; break; } stack_size = (int_T)stack; argv[count-2] = NULL; argv[count-1] = NULL; } else if ((strcmp(option, "-d") == 0) && (count != argc)) {/* current directory */ const char * path_name = argv[count++]; _chdir(path_name); argv[count-2] = NULL; argv[count-1] = NULL; } } rtExtModeQuarcParseArgs(argc, (const char **) argv, "shmem://Crane:1"); /* * Check for unprocessed ("unhandled") args. */ for (count = 1; count < argc; count++) { if (argv[count] != NULL) { (void) fprintf(stderr, "Unexpected command line argument: \"%s\".\n", argv[count]); parseError = TRUE; } } if (parseError) { (void) fprintf(stderr, "\nUsage: Crane -option1 val1 -option2 val2 -option3 ...\n\n"); (void) fprintf(stderr, "\t-tf 20 - sets final time to 20 seconds\n"); (void) fprintf(stderr, "\t-d C:\\data - sets current directory to C:\\data\n"); (void) fprintf(stderr, "\t-pri 5 - sets the minimum thread priority\n"); (void) fprintf(stderr, "\t-ss 65536 - sets the stack size for model threads\n"); (void) fprintf(stderr, "\t-w - wait for host to connect before starting\n"); (void) fprintf(stderr, "\t-uri shmem://mymodel - set external mode URL to \"shmem://mymodel\"\n"); (void) fprintf(stderr, "\n"); return (exit_failure); } /**************************** * Initialize global memory * ****************************/ (void)memset(&GBLbuf, 0, sizeof(GBLbuf)); /************************ * Initialize the model * ************************/ rt_InitInfAndNaN(sizeof(real_T)); S = Crane(); if (rtmGetErrorStatus(S) != NULL) { (void) fprintf(stderr, "Error during model registration: %s\n", rtmGetErrorStatus(S)); return (exit_failure); } if (final_time >= 0.0 || final_time == RUN_FOREVER) { rtmSetTFinal(S, final_time); } else { rtmSetTFinal(S, rtInf); } action.sa_handler = control_c_handler; action.sa_flags = 0; qsigemptyset(&action.sa_mask); qsigaction(SIGINT, &action, NULL); qsigaction(SIGBREAK, &action, NULL); qsigemptyset(&signal_set); qsigaddset(&signal_set, SIGINT); qsigaddset(&signal_set, SIGBREAK); qthread_sigmask(QSIG_UNBLOCK, &signal_set, NULL); initialize_sizes(S); initialize_sample_times(S); status = rt_SimInitTimingEngine(rtmGetNumSampleTimes(S), rtmGetStepSize(S), rtmGetSampleTimePtr(S), rtmGetOffsetTimePtr(S), rtmGetSampleHitPtr(S), rtmGetSampleTimeTaskIDPtr(S), rtmGetTStart(S), &rtmGetSimTimeStep(S), &rtmGetTimingData(S)); if (status != NULL) { (void) fprintf(stderr, "Failed to initialize sample time engine: %s\n", status); return (exit_failure); } rt_CreateIntegrationData(S); fflush(stdout); if (rtExtModeQuarcStartup(rtmGetRTWExtModeInfo(S), rtmGetNumSampleTimes(S), &rtmGetStopRequested(S), ext_priority, /* external mode thread priority */ stack_size, SS_HAVESTDIO)) { (void) ssPrintf("\n** starting the model **\n"); start(S); if (rtmGetErrorStatus(S) == NULL) { /************************************************************************* * Execute the model. *************************************************************************/ if (rtmGetTFinal(S) == RUN_FOREVER) { (void) ssPrintf("\n**May run forever. Model stop time set to infinity.**\n"); } timeout.seconds = (t_long) (rtmGetStepSize(S)); timeout.nanoseconds = (t_int) ((rtmGetStepSize(S) - timeout.seconds) * 1000000000L); result = qtimer_event_create(¬ify.notify_value.event); if (result == 0) { t_timer timer; scheduling.sched_priority = scheduling_priority; qthread_setschedparam(qthread_self(), QSCHED_FIFO, &scheduling); notify.notify_type = TIMER_NOTIFY_EVENT; result = qtimer_create(¬ify, &timer); if (result == 0) { result = qtimer_begin_resolution(timer, &timeout); if (result == 0) { t_period actual_timeout; (void) ssPrintf("Creating main thread with priority %d and period %g...\n", scheduling_priority, rtmGetStepSize(S)); result = qtimer_get_actual_period(timer, &timeout, &actual_timeout); if (result == 0 && (timeout.nanoseconds != actual_timeout.nanoseconds || timeout.seconds != actual_timeout.seconds)) (void) ssPrintf("*** Actual period will be %g ***\n", actual_timeout.seconds + 1e-9 * actual_timeout.nanoseconds); fflush(stdout); result = qtimer_set_time(timer, &timeout, true); if (result == 0) { /* Enter the periodic loop */ while (result == 0) { if (GBLbuf.stopExecutionFlag || rtmGetStopRequested(S)) { break; } if (rtmGetTFinal(S) != RUN_FOREVER && rtmGetTFinal(S) - rtmGetT (S) <= rtmGetT(S)*DBL_EPSILON) { break; } if (qtimer_get_overrun(timer) > 0) { (void) fprintf(stderr, "Sampling rate is too fast for base rate\n"); fflush(stderr); } rt_OneStep(S); result = qtimer_event_wait(notify.notify_value.event); } /* disarm the timer */ qtimer_cancel(timer); if (rtmGetStopRequested(S) == false && rtmGetErrorStatus(S) == NULL) { /* Execute model last time step if final time expired */ rt_OneStep(S); } (void) ssPrintf("Main thread exited\n"); } else { msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof (GBLbuf.submessage)); string_format(GBLbuf.message, sizeof(GBLbuf.message), "Unable to set base rate. %s", GBLbuf.submessage); rtmSetErrorStatus(S, GBLbuf.message); } qtimer_end_resolution(timer); } else { msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof (GBLbuf.submessage)); string_format(GBLbuf.message, sizeof(GBLbuf.message), "Sampling period of %lg is too fast for the system clock. %s", rtmGetStepSize(S), GBLbuf.submessage); rtmSetErrorStatus(S, GBLbuf.message); } qtimer_delete(timer); } else { msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof (GBLbuf.submessage)); string_format(GBLbuf.message, sizeof(GBLbuf.message), "Unable to create timer for base rate. %s", GBLbuf.submessage); rtmSetErrorStatus(S, GBLbuf.message); } } else { msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof (GBLbuf.submessage)); string_format(GBLbuf.message, sizeof(GBLbuf.message), "Unable to create timer event for base rate. %s", GBLbuf.submessage); rtmSetErrorStatus(S, GBLbuf.message); } GBLbuf.stopExecutionFlag = 1; } } else { rtmSetErrorStatus(S, "Unable to initialize external mode."); } rtExtSetReturnStatus(rtmGetErrorStatus(S)); rtExtModeQuarcCleanup(rtmGetNumSampleTimes(S)); /******************** * Cleanup and exit * ********************/ if (rtmGetErrorStatus(S) != NULL) { (void) fprintf(stderr, "%s\n", rtmGetErrorStatus(S)); exit_code = exit_failure; } (void) ssPrintf("Invoking model termination function...\n"); terminate(S); (void) ssPrintf("Exiting real-time code\n"); return (exit_code); }
int main(int argc, char *argv[]) { aligned_t *ui_array, *ui_array2; double *d_array, *d_array2; size_t len = 1000000; qtimer_t timer = qtimer_create(); double cumulative_time_qutil = 0.0; double cumulative_time_libc = 0.0; int using_doubles = 0; unsigned long iterations = 10; qthread_initialize(); CHECK_VERBOSE(); printf("%i threads\n", (int)qthread_num_workers()); NUMARG(len, "TEST_LEN"); NUMARG(iterations, "TEST_ITERATIONS"); NUMARG(using_doubles, "TEST_USING_DOUBLES"); printf("using %s\n", using_doubles ? "doubles" : "aligned_ts"); if (using_doubles) { d_array = calloc(len, sizeof(double)); printf("array is %s\n", human_readable(len * sizeof(double))); assert(d_array); // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL); for (unsigned int i = 0; i < len; i++) { d_array[i] = ((double)random()) / ((double)RAND_MAX) + random(); } d_array2 = calloc(len, sizeof(double)); assert(d_array2); // madvise(d_array2,len*sizeof(double), MADV_RANDOM); iprintf("double array generated...\n"); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qutil_qsort(d_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu doubles with qutil took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_qutil); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qsort(d_array2, len, sizeof(double), dcmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_libc /= (double)iterations; printf("sorting %lu doubles with libc took: %f seconds\n", (unsigned long)len, cumulative_time_libc); free(d_array); free(d_array2); } else { ui_array = calloc(len, sizeof(aligned_t)); printf("array is %s\n", human_readable(len * sizeof(aligned_t))); for (unsigned int i = 0; i < len; i++) { ui_array[i] = random(); } ui_array2 = calloc(len, sizeof(aligned_t)); iprintf("ui_array generated...\n"); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qutil_aligned_qsort(ui_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu aligned_ts with qutil took: %f seconds\n", (unsigned long)len, cumulative_time_qutil); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qsort(ui_array2, len, sizeof(double), acmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); } cumulative_time_libc /= (double)iterations; printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_libc); free(ui_array); free(ui_array2); } if (cumulative_time_qutil < cumulative_time_libc) { printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } else { printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } qtimer_destroy(timer); return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #pragma omp parallel #pragma omp single #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; #pragma omp parallel #pragma omp single nowait #pragma omp task untied retval = visit(&root, root.num_children); total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / omp_get_num_threads()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / omp_get_num_threads()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { size_t threads = 1000, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(threads, "THREADS"); NUMARG(iterations, "ITERATIONS"); initme = (aligned_t *)calloc(threads, sizeof(aligned_t)); assert(initme); rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t)); assert(rets); iprintf("creating the barrier for %zu threads\n", threads + 1); wait_on_me = qt_feb_barrier_create(threads + 1); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 0; i < threads; i++) { qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_feb_barrier_enter(wait_on_me); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); initme_idx = 0; for (i = 0; i < threads; i++) { if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Destroying barrier...\n"); qt_feb_barrier_destroy(wait_on_me); iprintf("Success!\n"); /* this loop shouldn't be necessary... but seems to avoid crashes in rare * cases (in other words there must a race condition in qthread_finalize() * if there are outstanding threads out there) */ for (i = 0; i < threads * 2; i++) { aligned_t tmp = 1; qthread_readFF(&tmp, rets + i); assert(tmp == 0); } return 0; }
int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned long tmp = 0; NUMARG(tmp, "UTS_TREE_TYPE"); tree_type = (tree_t)tmp; } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); { unsigned long tmp = 0; NUMARG(tmp, "UTS_SHAPE_FN"); shape_fn = (shape_t)tmp; } NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; { retval = _Cilk_spawn visit(root); _Cilk_sync; } total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS LOG_UTS_RESULTS_YAML(total_num_nodes, total_time) LOG_ENV_CILK_YAML() #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / __cilkrts_get_nworkers()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); // If the operator did not attempt to set a stack size, force // a reasonable lower bound if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE")) setenv("QT_STACK_SIZE", "32768", 0); assert(qthread_initialize() == 0); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); aligned_t donecount = 0; root.dc = &donecount; qthread_empty(&donecount); aligned_t tot = 0; root.acc = &tot; root.expect = 1; qthread_fork_syncvar(visit, &root, NULL); qthread_readFF(NULL, root.dc); total_num_nodes = tot; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #endif /* ifdef PRINT_STATS */ return 0; }