int main(int argc, char *argv[]) { aligned_t ret; qthread_init(2); CHECK_VERBOSE(); assert(qthread_num_shepherds() == 2); iprintf("now to fork to shepherd 0...\n"); qthread_fork_to(checkres, (void *)0, &ret, 0); qthread_readFF(&ret, &ret); iprintf("success in forking to shepherd 0!\n"); iprintf("now to fork to shepherd 1...\n"); qthread_fork_to(checkres, (void *)1, &ret, 1); qthread_readFF(&ret, &ret); iprintf("success in forking to shepherd 1!\n"); iprintf("now to fork the migrant...\n"); qthread_fork(migrant, NULL, &ret); iprintf("success in forking migrant!\n"); qthread_readFF(&ret, &ret); iprintf("migrant returned successfully!\n"); return 0; }
int main(int argc, char *argv[]) { aligned_t rets[NUM_THREADS]; qtimer_t timer = qtimer_create(); double cumulative_time = 0.0; if (qthread_initialize() != QTHREAD_SUCCESS) { fprintf(stderr, "qthread library could not be initialized!\n"); exit(EXIT_FAILURE); } CHECK_VERBOSE(); for (int iteration = 0; iteration < 10; iteration++) { qtimer_start(timer); for (int i = 0; i < NUM_THREADS; i++) { qthread_fork(qincr, NULL, &(rets[i])); } for (int i = 0; i < NUM_THREADS; i++) { qthread_readFF(NULL, &(rets[i])); } qtimer_stop(timer); iprintf("\ttest iteration %i: %f secs\n", iteration, qtimer_secs(timer)); cumulative_time += qtimer_secs(timer); } printf("qthread time: %f\n", cumulative_time / 10.0); return 0; }
void logputs (enum log_options o, const char *s) { FILE *fp; FILE *warcfp; check_redirect_output (); if (o == LOG_PROGRESS) fp = get_progress_fp (); else fp = get_log_fp (); if (fp == NULL) return; warcfp = get_warc_log_fp (); CHECK_VERBOSE (o); FPUTS (s, fp); if (warcfp != NULL) FPUTS (s, warcfp); if (save_context_p) saved_append (s); if (flush_log_p) logflush (); else needs_flushing = true; }
int main(int argc, char *argv[]) { pthread_t rets[NUM_THREADS]; qtimer_t timer = qtimer_create(); double cumulative_time = 0.0; size_t counter; CHECK_VERBOSE(); for (int iteration = 0; iteration < 10; iteration++) { qtimer_start(timer); for (int i = 0; i < NUM_THREADS; i++) { pthread_create(&(rets[i]), NULL, qincr, &counter); } for (int i = 0; i < NUM_THREADS; i++) { pthread_join(rets[i], NULL); } qtimer_stop(timer); iprintf("\ttest iteration %i: %f secs\n", iteration, qtimer_secs(timer)); cumulative_time += qtimer_secs(timer); } printf("pthread time: %f\n", cumulative_time / 10.0); return 0; }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { int count = 0; aligned_t max = 0; aligned_t tmp = 0; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(count, "COUNT"); iprintf("Main executing in team %lu (w/ parent %lu)\n", (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id()); assert(qt_team_id() == default_team_id); assert(qt_team_parent_id() == non_team_id); aligned_t hello_in_team_ret; qthread_fork(hello_in_team, NULL, &hello_in_team_ret); qthread_readFF(&tmp, &hello_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_rets[count]; for (int i = 0; i < count; i++) { qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]); } for (int i = 0; i < count; i++) { qthread_readFF(&tmp, &hello_new_team_rets[i]); max = MAX(max, tmp); } aligned_t hello_new_team_in_team_ret; qthread_fork_new_team( hello_new_team_in_team, NULL, &hello_new_team_in_team_ret); qthread_readFF(&tmp, &hello_new_team_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_new_team_ret; qthread_fork_new_team( hello_new_team_new_team, NULL, &hello_new_team_new_team_ret); qthread_readFF(&tmp, &hello_new_team_new_team_ret); max = MAX(max, tmp); iprintf("max is %lu\n", (unsigned long)max); if (count + 4 == max) { iprintf("SUCCEEDED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 0; } else { iprintf("FAILED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 1; } }
int main(int argc, char *argv[]) { CHECK_VERBOSE(); assert(qthread_init(1) == 0); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf(" %i threads total\n", qthread_num_workers()); testWriteFFWaits(); }
int main(int argc, char *argv[]) { uint64_t count = 1048576; int par_fork = 0; unsigned long threads = 1; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); NUMARG(count, "MT_COUNT"); NUMARG(par_fork, "MT_PAR_FORK"); assert(0 != count); #pragma omp parallel #pragma omp single { timer = qtimer_create(); threads = omp_get_num_threads(); if (par_fork) { qtimer_start(timer); #pragma omp parallel for for (uint64_t i = 0; i < count; i++) { #pragma omp task untied null_task(NULL); } } else { qtimer_start(timer); #pragma omp task untied for (uint64_t i = 0; i < count; i++) { #pragma omp task untied null_task(NULL); } } #pragma omp taskwait qtimer_stop(timer); } total_time = qtimer_secs(timer); qtimer_destroy(timer); printf("%lu %lu %f\n", threads, (unsigned long)count, total_time); return 0; }
int main(int argc, char *argv[]) { aligned_t tmp, ret = 0; int retval; long foobar = 1234567890; qthread_f funcs[2] = { returner, NULL }; CHECK_VERBOSE(); retval = spr_init(SPR_SPMD, funcs); if (retval != SPR_OK) { fprintf(stderr, "(%03d) spr_init returned %d\n", 0, retval); return -1; } my_id = spr_locale_id(); world_size = spr_num_locales(); iprintf("(%03d) Rank %d of %d is alive\n", my_id, my_id, world_size); spr_unify(); if (my_id != 0) { return -2; } int target = (world_size > 1) ? 1 : 0; retval = qthread_fork_remote(returner, &foobar, &ret, target, sizeof(long)); if (retval != 0) { fprintf(stderr, "(%03d) fork_remote returned %d\n", my_id, retval); return -3; } retval = qthread_readFE(&tmp, &ret); iprintf("(%03d) returner returned %ld\n", my_id, (long)tmp); if (retval != 0) { fprintf(stderr, "(%03d) readFE returned %d (%d)\n", my_id, retval, (int)tmp); return -4; } spr_fini(); return (tmp == foobar) ? 0 : 5; }
int main(int argc, char *argv[]) { CHECK_VERBOSE(); aligned_t tmp, ret = 0; int retval; long foobar = 1234567890; setenv("QT_MULTINODE","yes",1); qthread_initialize(); my_id = qthread_multinode_rank(); world_size = qthread_multinode_size(); iprintf("(%03d) Rank %d of %d is alive\n", my_id, my_id, world_size); retval = qthread_multinode_register(2, returner); if (retval != 0){ fprintf(stderr, "(%03d) multinode_register returned %d\n", my_id, retval); return 1; } qthread_multinode_run(); if (my_id != 0) return 2; int target = (world_size > 1) ? 1 : 0; retval = qthread_fork_remote(returner, &foobar, &ret, target, sizeof(long)); if (retval != 0) { fprintf(stderr, "(%03d) fork_remote returned %d\n", my_id, retval); return 3; } retval = qthread_readFE(&tmp, &ret); iprintf("(%03d) returner returned %ld\n", my_id, (long) tmp); if (retval != 0) { fprintf(stderr, "(%03d) readFE returned %d (%d)\n", my_id, retval, (int) tmp); return 4; } qthread_finalize(); return (tmp == foobar) ? 0 : 5; }
void logputs (enum log_options o, const char *s) { FILE *fp; check_redirect_output (); if ((fp = get_log_fp ()) == NULL) return; CHECK_VERBOSE (o); FPUTS (s, fp); if (save_context_p) saved_append (s); if (flush_log_p) logflush (); else needs_flushing = true; }
int main(int argc, char *argv[]) { assert(qthread_initialize() == QTHREAD_SUCCESS); CHECK_VERBOSE(); NUMARG(numincrs, "NUM_INCRS"); // future_init(128); iprintf("%i shepherds\n", qthread_num_shepherds()); iprintf("%i threads\n", qthread_num_workers()); qt_loop_balance_sinc(0, numincrs, sum, NULL); if (threads != numincrs) { iprintf("threads == %lu, not %lu\n", (unsigned long)threads, (unsigned long)numincrs); } assert(threads == numincrs); return 0; }
int main(int argc, char *argv[]) { qtimer_t t; assert(qthread_initialize() == QTHREAD_SUCCESS); CHECK_VERBOSE(); t = qtimer_create(); assert(t); qtimer_start(t); qtimer_stop(t); if (qtimer_secs(t) == 0) { fprintf(stderr, "qtimer_secs(t) reported zero length time.\n"); } else if (qtimer_secs(t) < 0) { fprintf(stderr, "qtimer_secs(t) thinks time went backwards (%g).\n", qtimer_secs(t)); } iprintf("time to find self and assert it: %g secs\n", qtimer_secs(t)); qtimer_start(t); qtimer_stop(t); assert(qtimer_secs(t) >= 0.0); if (qtimer_secs(t) == 0.0) { iprintf("inlining reduces calltime to zero (apparently)\n"); } else { iprintf("smallest measurable time: %g secs\n", qtimer_secs(t)); } qtimer_destroy(t); // Now to test fastrand ks_test(); runs(); autocorrelation(); qthread_finalize(); return 0; }
void logprintf (enum log_options o, const char *fmt, ...) { va_list args; struct logvprintf_state lpstate; bool done; check_redirect_output (); if (inhibit_logging) return; CHECK_VERBOSE (o); xzero (lpstate); do { va_start (args, fmt); done = log_vprintf_internal (&lpstate, fmt, args); va_end (args); } while (!done); }
/* * The main procedure simply creates a producer and a consumer task to run in * parallel */ int main(int argc, char *argv[]) { aligned_t t[2]; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(bufferSize, "BUFFERSIZE"); numItems = 8 * bufferSize; NUMARG(numItems, "NUMITEMS"); iprintf("%i threads...\n", qthread_num_shepherds()); buff = malloc(sizeof(aligned_t) * bufferSize); for (unsigned int i = 0; i < bufferSize; ++i) { buff[i] = 0; } qthread_fork(consumer, NULL, &t[0]); qthread_fork(producer, NULL, &t[1]); qthread_readFF(NULL, &t[0]); qthread_readFF(NULL, &t[1]); /* cleanup... unnecessary in general, but for the moment I'm tracking down * errors in the FEB system, so let's clean up */ for (unsigned int i = 0; i < bufferSize; ++i) { qthread_fill(buff + i); } free(buff); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { int n = 10; int m = 10; num_timesteps = 10; workload = 0; workload_per = 0; workload_var = 0; int print_final = 0; int alltime = 0; CHECK_VERBOSE(); NUMARG(n, "N"); NUMARG(m, "M"); NUMARG(num_timesteps, "TIMESTEPS"); NUMARG(workload, "WORKLOAD"); NUMARG(workload_per, "WORKLOAD_PER"); NUMARG(workload_var, "WORKLOAD_VAR"); NUMARG(print_final, "PRINT_FINAL"); NUMARG(alltime, "ALL_TIME"); assert (n > 0 && m > 0); // Initialize Qthreads assert(qthread_initialize() == 0); qtimer_t alloc_timer = qtimer_create(); qtimer_t init_timer = qtimer_create(); qtimer_t exec_timer = qtimer_create(); // Allocate memory for 3-stage stencil (with boundary padding) qtimer_start(alloc_timer); stencil_t points; points.N = n + 2; points.M = m + 2; for (int s = 0; s < NUM_STAGES; s++) { points.stage[s] = malloc(points.N*sizeof(aligned_t *)); assert(NULL != points.stage[s]); for (int i = 0; i < points.N; i++) { points.stage[s][i] = calloc(points.M, sizeof(aligned_t)); assert(NULL != points.stage[s][i]); } } qtimer_stop(alloc_timer); // Initialize first stage and set boundary conditions qtimer_start(init_timer); for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { qthread_writeF_const(&points.stage[0][i][j], 0); for (int s = 1; s < NUM_STAGES; s++) qthread_empty(&points.stage[s][i][j]); } } for (int i = 0; i < points.N; i++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][i][0], BOUNDARY); qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY); #else points.stage[s][i][0] = BOUNDARY; points.stage[s][i][points.M-1] = BOUNDARY; #endif } } for (int j = 0; j < points.M; j++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][0][j], BOUNDARY); qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY); #else points.stage[s][0][j] = BOUNDARY; points.stage[s][points.N-1][j] = BOUNDARY; #endif } } qtimer_stop(init_timer); // Create barrier to synchronize on completion of calculations qtimer_start(exec_timer); points.barrier = qt_feb_barrier_create(n*m+1); // Spawn tasks to start calculating updates at each point update_args_t args = {&points, -1, -1, 1, 1}; for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { args.i = i; args.j = j; qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL); } } // Wait for calculations to finish qt_feb_barrier_enter(points.barrier); qtimer_stop(exec_timer); // Print timing info if (alltime) { fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer)); fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer)); fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer)); } else { fprintf(stdout, "%f\n", qtimer_secs(exec_timer)); } // Print stencils if (print_final) { size_t final = (num_timesteps % NUM_STAGES); iprintf("Stage %lu:\n", prev_stage(prev_stage(final))); print_stage(&points, prev_stage(prev_stage(final))); iprintf("\nStage %lu:\n", prev_stage(final)); print_stage(&points, prev_stage(final)); iprintf("\nStage %lu:\n", final); print_stage(&points, final); } qt_feb_barrier_destroy(points.barrier); qtimer_destroy(alloc_timer); qtimer_destroy(init_timer); qtimer_destroy(exec_timer); // Free allocated memory for (int i = 0; i < points.N; i++) { free(points.stage[0][i]); free(points.stage[1][i]); free(points.stage[2][i]); } free(points.stage[0]); free(points.stage[1]); free(points.stage[2]); return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); // If the operator did not attempt to set a stack size, force // a reasonable lower bound if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE")) setenv("QT_STACK_SIZE", "32768", 0); assert(qthread_initialize() == 0); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); aligned_t donecount = 0; root.dc = &donecount; qthread_empty(&donecount); aligned_t tot = 0; root.acc = &tot; root.expect = 1; qthread_fork_syncvar(visit, &root, NULL); qthread_readFF(NULL, root.dc); total_num_nodes = tot; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned long tmp = 0; NUMARG(tmp, "UTS_TREE_TYPE"); tree_type = (tree_t)tmp; } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); { unsigned long tmp = 0; NUMARG(tmp, "UTS_SHAPE_FN"); shape_fn = (shape_t)tmp; } NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; { retval = _Cilk_spawn visit(root); _Cilk_sync; } total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS LOG_UTS_RESULTS_YAML(total_num_nodes, total_time) LOG_ENV_CILK_YAML() #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / __cilkrts_get_nworkers()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { size_t threads = 1000, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(threads, "THREADS"); NUMARG(iterations, "ITERATIONS"); initme = (aligned_t *)calloc(threads, sizeof(aligned_t)); assert(initme); rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t)); assert(rets); iprintf("creating the barrier for %zu threads\n", threads + 1); wait_on_me = qt_feb_barrier_create(threads + 1); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 0; i < threads; i++) { qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_feb_barrier_enter(wait_on_me); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); initme_idx = 0; for (i = 0; i < threads; i++) { if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Destroying barrier...\n"); qt_feb_barrier_destroy(wait_on_me); iprintf("Success!\n"); /* this loop shouldn't be necessary... but seems to avoid crashes in rare * cases (in other words there must a race condition in qthread_finalize() * if there are outstanding threads out there) */ for (i = 0; i < threads * 2; i++) { aligned_t tmp = 1; qthread_readFF(&tmp, rets + i); assert(tmp == 0); } return 0; }
int main(int argc, char *argv[]) { aligned_t *t[2]; uint64_t x_value; uint64_t pairs; assert(qthread_initialize() == 0); pairs = qthread_num_shepherds() * 6; CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); NUMARG(pairs, "PAIRS"); t[0] = calloc(pairs, sizeof(aligned_t)); t[1] = calloc(pairs, sizeof(aligned_t)); iprintf("%i threads...\n", qthread_num_shepherds()); iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w); qthread_syncvar_empty(&id); qthread_syncvar_writeF_const(&id, 1); iprintf("id = 0x%lx\n", (unsigned long)id.u.w); { uint64_t tmp = 0; qthread_syncvar_readFF(&tmp, &id); assert(tmp == 1); } iprintf("x's status is: %s (want full (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 1); qthread_syncvar_readFE(NULL, &x); iprintf("x's status became: %s (want empty (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 0); for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_readFF(NULL, &(t[0][i])); qthread_readFF(NULL, &(t[1][i])); } iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_fill(&x); iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_readFF(&x_value, &x); assert(qthread_syncvar_status(&x) == 1); free(t[0]); free(t[1]); if (x_value == iterations - 1) { iprintf("Success! x==%lu\n", (unsigned long)x_value); return 0; } else { fprintf(stderr, "Final value of x=%lu, expected %lu\n", (unsigned long)x_value, (unsigned long)(iterations - 1)); return -1; } }
int main(int argc, char *argv[]) { qarray *a; distribution_t disttypes[] = { FIXED_HASH, FIXED_FIELDS, ALL_LOCAL, ALL_RAND, ALL_LEAST, DIST_RAND, DIST_STRIPES, DIST_FIELDS, DIST_LEAST }; const char *distnames[] = { "FIXED_HASH", "FIXED_FIELDS", "ALL_LOCAL", "ALL_RAND", "ALL_LEAST", "DIST_RAND", "DIST_STRIPES", "DIST_FIELDS", "DIST_LEAST" }; unsigned int dt_index; unsigned int num_dists = sizeof(disttypes) / sizeof(distribution_t); unsigned int dists = (1 << num_dists) - 1; qthread_initialize(); CHECK_VERBOSE(); NUMARG(dists, "TEST_DISTS"); NUMARG(ELEMENT_COUNT, "ELEMENT_COUNT"); /* iterate over all the different distribution types */ for (dt_index = 0; dt_index < num_dists; dt_index++) { if ((dists & (1 << dt_index)) == 0) { continue; } /* test a basic array of doubles */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(double), disttypes[dt_index], 0, 0); assert(a); iprintf("%s: created basic array of doubles\n", distnames[dt_index]); qarray_iter(a, 0, ELEMENT_COUNT, assign1); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); assert(count == ELEMENT_COUNT); } { size_t i; for (i = 0; i < ELEMENT_COUNT; i++) { double elem = *(double *)qarray_elem_nomigrate(a, i); if (elem != 1.0) { printf ("element %lu is %f instead of 1.0, disttype = %s\n", (unsigned long)i, elem, distnames[dt_index]); assert(elem == 1.0); } } } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); /* now test an array of giant things */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(bigobj), disttypes[dt_index], 0, 0); iprintf("%s: created array of big objects\n", distnames[dt_index]); qarray_iter(a, 0, ELEMENT_COUNT, assignall1); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); // assert(count == ELEMENT_COUNT); } { size_t i; char fail = 0; for (i = 0; i < ELEMENT_COUNT; i++) { char *elem = (char *)qarray_elem_nomigrate(a, i); size_t j; for (j = 0; j < sizeof(bigobj); j++) { if (elem[j] != 1) { printf ( "byte %lu of element %lu is %i instead of 1, dt_index = %u\n", (unsigned long)j, (unsigned long)i, elem[j], dt_index); fail = 1; break; } } } assert(fail == 0); } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); /* now test an array of weird-sized things */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(offsize), disttypes[dt_index], 0, 0); iprintf("%s: created array of odd-sized objects\n", distnames[dt_index]); qarray_iter_loop(a, 0, ELEMENT_COUNT, assignoff1, NULL); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); assert(count == ELEMENT_COUNT); } { size_t i; for (i = 0; i < ELEMENT_COUNT; i++) { char *elem = (char *)qarray_elem_nomigrate(a, i); size_t j; for (j = 0; j < sizeof(offsize); j++) { if (elem[j] != 1) { printf ( "byte %lu of element %lu is %i instead of 1, dt_index = %u\n", (unsigned long)j, (unsigned long)i, elem[j], dt_index); assert(elem[j] == 1); } } } } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); } return 0; }
int main(int argc, char *argv[]) { aligned_t return_value = 0; int status, ret; CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED"); status = qthread_initialize(); assert(status == QTHREAD_SUCCESS); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf(" %i threads total\n", qthread_num_workers()); iprintf("Creating the queue...\n"); the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0); assert(the_queue); iprintf("---------------------------------------------------------\n"); iprintf("\tSINGLE THREAD TEST\n\n"); iprintf("1/4 Spawning thread to be queued...\n"); status = qthread_fork(tobequeued, NULL, &return_value); assert(status == QTHREAD_SUCCESS); iprintf("2/4 Waiting for thread to queue itself...\n"); while(qthread_queue_length(the_queue) != 1) qthread_yield(); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/4 Releasing the queue...\n"); qthread_queue_release_all(the_queue); ret = qthread_readFF(NULL, &return_value); assert(ret == QTHREAD_SUCCESS); assert(threads_in == 1); assert(awoke == 1); assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("4/4 Test passed!\n"); iprintf("---------------------------------------------------------\n"); iprintf("\tMULTI THREAD TEST\n\n"); threads_in = 0; awoke = 0; aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED); iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED); for (int i=0; i<THREADS_ENQUEUED; i++) { status = qthread_fork(tobequeued, NULL, retvals + i); assert(status == QTHREAD_SUCCESS); } iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED); while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield(); assert(threads_in == THREADS_ENQUEUED); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/6 Releasing a single thread...\n"); qthread_queue_release_one(the_queue); iprintf("4/6 Waiting for that thread to exit\n"); while (awoke == 0) qthread_yield(); assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1)); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("5/6 Releasing the rest of the threads...\n"); qthread_queue_release_all(the_queue); for (int i=0; i<THREADS_ENQUEUED; i++) { ret = qthread_readFF(NULL, retvals + i); assert(ret == QTHREAD_SUCCESS); } assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("6/6 Test passed!\n"); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #pragma omp parallel #pragma omp single #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; #pragma omp parallel #pragma omp single nowait #pragma omp task untied retval = visit(&root, root.num_children); total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / omp_get_num_threads()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / omp_get_num_threads()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { aligned_t *ui_array, *ui_array2; double *d_array, *d_array2; size_t len = 1000000; qtimer_t timer = qtimer_create(); double cumulative_time_qutil = 0.0; double cumulative_time_libc = 0.0; int using_doubles = 0; unsigned long iterations = 10; qthread_initialize(); CHECK_VERBOSE(); printf("%i threads\n", (int)qthread_num_workers()); NUMARG(len, "TEST_LEN"); NUMARG(iterations, "TEST_ITERATIONS"); NUMARG(using_doubles, "TEST_USING_DOUBLES"); printf("using %s\n", using_doubles ? "doubles" : "aligned_ts"); if (using_doubles) { d_array = calloc(len, sizeof(double)); printf("array is %s\n", human_readable(len * sizeof(double))); assert(d_array); // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL); for (unsigned int i = 0; i < len; i++) { d_array[i] = ((double)random()) / ((double)RAND_MAX) + random(); } d_array2 = calloc(len, sizeof(double)); assert(d_array2); // madvise(d_array2,len*sizeof(double), MADV_RANDOM); iprintf("double array generated...\n"); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qutil_qsort(d_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu doubles with qutil took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_qutil); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qsort(d_array2, len, sizeof(double), dcmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_libc /= (double)iterations; printf("sorting %lu doubles with libc took: %f seconds\n", (unsigned long)len, cumulative_time_libc); free(d_array); free(d_array2); } else { ui_array = calloc(len, sizeof(aligned_t)); printf("array is %s\n", human_readable(len * sizeof(aligned_t))); for (unsigned int i = 0; i < len; i++) { ui_array[i] = random(); } ui_array2 = calloc(len, sizeof(aligned_t)); iprintf("ui_array generated...\n"); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qutil_aligned_qsort(ui_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu aligned_ts with qutil took: %f seconds\n", (unsigned long)len, cumulative_time_qutil); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qsort(ui_array2, len, sizeof(double), acmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); } cumulative_time_libc /= (double)iterations; printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_libc); free(ui_array); free(ui_array2); } if (cumulative_time_qutil < cumulative_time_libc) { printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } else { printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } qtimer_destroy(timer); return 0; }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { size_t depth = 3; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(depth, "TEST_DEPTH"); // Test creating an empty sinc { qt_sinc_t zero_sinc; qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0); qt_sinc_wait(&zero_sinc, NULL); qt_sinc_fini(&zero_sinc); qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0); qt_sinc_expect(three_sinc, 3); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qt_sinc_wait(three_sinc, NULL); qt_sinc_destroy(three_sinc); } qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2); // Spawn additional waits aligned_t rets[3]; { qthread_fork(wait_on_sinc, sinc, &rets[0]); qthread_fork(wait_on_sinc, sinc, &rets[1]); qthread_fork(wait_on_sinc, sinc, &rets[2]); } { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_create(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); for (int i = 0; i < 3; i++) qthread_readFF(NULL, &rets[i]); // Reset the sinc qt_sinc_reset(sinc, 2); // Second use { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_reset(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); qt_sinc_destroy(sinc); return 0; }