int main(int argc, char *argv[]) { aligned_t rets[NUM_THREADS]; qtimer_t timer = qtimer_create(); double cumulative_time = 0.0; if (qthread_initialize() != QTHREAD_SUCCESS) { fprintf(stderr, "qthread library could not be initialized!\n"); exit(EXIT_FAILURE); } CHECK_VERBOSE(); for (int iteration = 0; iteration < 10; iteration++) { qtimer_start(timer); for (int i = 0; i < NUM_THREADS; i++) { qthread_fork(qincr, NULL, &(rets[i])); } for (int i = 0; i < NUM_THREADS; i++) { qthread_readFF(NULL, &(rets[i])); } qtimer_stop(timer); iprintf("\ttest iteration %i: %f secs\n", iteration, qtimer_secs(timer)); cumulative_time += qtimer_secs(timer); } printf("qthread time: %f\n", cumulative_time / 10.0); return 0; }
void Qthread::initialize( int thread_count ) { // Environment variable: QTHREAD_NUM_SHEPHERDS // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP // Environment variable: QTHREAD_HWPAR { char buffer[256]; snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count); putenv(buffer); } const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) && ( thread_count == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) && ( thread_count == qthread_num_workers() ); bool ok_symmetry = true ; if ( ok_init ) { Impl::s_number_shepherds = qthread_num_shepherds(); Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ; for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) { ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) ); } } if ( ! ok_init || ! ok_symmetry ) { std::ostringstream msg ; msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ; msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD); msg << " : qthread_num_workers = " << qthread_num_workers(); if ( ! ok_symmetry ) { msg << " : qthread_num_workers_local = {" ; for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) { msg << " " << qthread_num_workers_local(i) ; } msg << " }" ; } Impl::s_number_workers = 0 ; Impl::s_number_shepherds = 0 ; Impl::s_number_workers_per_shepherd = 0 ; if ( ok_init ) { qthread_finalize(); } Kokkos::Impl::throw_runtime_exception( msg.str() ); } Impl::QthreadExec::resize_worker_scratch( 256 , 256 ); // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { int count = 0; aligned_t max = 0; aligned_t tmp = 0; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(count, "COUNT"); iprintf("Main executing in team %lu (w/ parent %lu)\n", (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id()); assert(qt_team_id() == default_team_id); assert(qt_team_parent_id() == non_team_id); aligned_t hello_in_team_ret; qthread_fork(hello_in_team, NULL, &hello_in_team_ret); qthread_readFF(&tmp, &hello_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_rets[count]; for (int i = 0; i < count; i++) { qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]); } for (int i = 0; i < count; i++) { qthread_readFF(&tmp, &hello_new_team_rets[i]); max = MAX(max, tmp); } aligned_t hello_new_team_in_team_ret; qthread_fork_new_team( hello_new_team_in_team, NULL, &hello_new_team_in_team_ret); qthread_readFF(&tmp, &hello_new_team_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_new_team_ret; qthread_fork_new_team( hello_new_team_new_team, NULL, &hello_new_team_new_team_ret); qthread_readFF(&tmp, &hello_new_team_new_team_ret); max = MAX(max, tmp); iprintf("max is %lu\n", (unsigned long)max); if (count + 4 == max) { iprintf("SUCCEEDED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 0; } else { iprintf("FAILED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 1; } }
int main(){ assert(qthread_initialize() == 0); qt_sinc_t sinc; qt_sinc_init(&sinc, 0, NULL, NULL, 1); int ret; args_t args = { 30, &sinc, &ret }; qthread_fork_copyargs(fib, &args, sizeof(args_t), NULL); qt_sinc_wait(&sinc, NULL); printf("%d\n", ret); }
static void *initializer(void *junk) { qthread_initialize(); MACHINE_FENCE; chpl_qthread_done_initializing = 1; qthread_syncvar_readFF(NULL, &canexit); qthread_finalize(); MACHINE_FENCE; done_finalizing = 1; return NULL; }
void run() { setenv("QT_NUM_SHEPHERDS", boost::lexical_cast<std::string>(this->osthreads_).c_str(), 1); setenv("QT_NUM_WORKERS_PER_SHEPHERD", "1", 1); qthread_initialize(); // Cold run //kernel(); // Hot run results_type results = kernel(); print_results(results); }
static void *initializer(void *junk) { qthread_initialize(); (void) pthread_mutex_lock(&done_init_final_mux); // implicit memory fence chpl_qthread_done_initializing = 1; (void) pthread_mutex_unlock(&done_init_final_mux); qthread_syncvar_readFF(NULL, &canexit); qthread_finalize(); (void) pthread_mutex_lock(&done_init_final_mux); // implicit memory fence done_finalizing = 1; (void) pthread_mutex_unlock(&done_init_final_mux); return NULL; }
int main(int argc, char *argv[]) { CHECK_VERBOSE(); aligned_t tmp, ret = 0; int retval; long foobar = 1234567890; setenv("QT_MULTINODE","yes",1); qthread_initialize(); my_id = qthread_multinode_rank(); world_size = qthread_multinode_size(); iprintf("(%03d) Rank %d of %d is alive\n", my_id, my_id, world_size); retval = qthread_multinode_register(2, returner); if (retval != 0){ fprintf(stderr, "(%03d) multinode_register returned %d\n", my_id, retval); return 1; } qthread_multinode_run(); if (my_id != 0) return 2; int target = (world_size > 1) ? 1 : 0; retval = qthread_fork_remote(returner, &foobar, &ret, target, sizeof(long)); if (retval != 0) { fprintf(stderr, "(%03d) fork_remote returned %d\n", my_id, retval); return 3; } retval = qthread_readFE(&tmp, &ret); iprintf("(%03d) returner returned %ld\n", my_id, (long) tmp); if (retval != 0) { fprintf(stderr, "(%03d) readFE returned %d (%d)\n", my_id, retval, (int) tmp); return 4; } qthread_finalize(); return (tmp == foobar) ? 0 : 5; }
int main(int argc, char *argv[]) { assert(qthread_initialize() == QTHREAD_SUCCESS); CHECK_VERBOSE(); NUMARG(numincrs, "NUM_INCRS"); // future_init(128); iprintf("%i shepherds\n", qthread_num_shepherds()); iprintf("%i threads\n", qthread_num_workers()); qt_loop_balance_sinc(0, numincrs, sum, NULL); if (threads != numincrs) { iprintf("threads == %lu, not %lu\n", (unsigned long)threads, (unsigned long)numincrs); } assert(threads == numincrs); return 0; }
int main(int argc, char *argv[]) { qtimer_t t; assert(qthread_initialize() == QTHREAD_SUCCESS); CHECK_VERBOSE(); t = qtimer_create(); assert(t); qtimer_start(t); qtimer_stop(t); if (qtimer_secs(t) == 0) { fprintf(stderr, "qtimer_secs(t) reported zero length time.\n"); } else if (qtimer_secs(t) < 0) { fprintf(stderr, "qtimer_secs(t) thinks time went backwards (%g).\n", qtimer_secs(t)); } iprintf("time to find self and assert it: %g secs\n", qtimer_secs(t)); qtimer_start(t); qtimer_stop(t); assert(qtimer_secs(t) >= 0.0); if (qtimer_secs(t) == 0.0) { iprintf("inlining reduces calltime to zero (apparently)\n"); } else { iprintf("smallest measurable time: %g secs\n", qtimer_secs(t)); } qtimer_destroy(t); // Now to test fastrand ks_test(); runs(); autocorrelation(); qthread_finalize(); return 0; }
/* * The main procedure simply creates a producer and a consumer task to run in * parallel */ int main(int argc, char *argv[]) { aligned_t t[2]; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(bufferSize, "BUFFERSIZE"); numItems = 8 * bufferSize; NUMARG(numItems, "NUMITEMS"); iprintf("%i threads...\n", qthread_num_shepherds()); buff = malloc(sizeof(aligned_t) * bufferSize); for (unsigned int i = 0; i < bufferSize; ++i) { buff[i] = 0; } qthread_fork(consumer, NULL, &t[0]); qthread_fork(producer, NULL, &t[1]); qthread_readFF(NULL, &t[0]); qthread_readFF(NULL, &t[1]); /* cleanup... unnecessary in general, but for the moment I'm tracking down * errors in the FEB system, so let's clean up */ for (unsigned int i = 0; i < bufferSize; ++i) { qthread_fill(buff + i); } free(buff); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { int n = 10; int m = 10; num_timesteps = 10; workload = 0; workload_per = 0; workload_var = 0; int print_final = 0; int alltime = 0; CHECK_VERBOSE(); NUMARG(n, "N"); NUMARG(m, "M"); NUMARG(num_timesteps, "TIMESTEPS"); NUMARG(workload, "WORKLOAD"); NUMARG(workload_per, "WORKLOAD_PER"); NUMARG(workload_var, "WORKLOAD_VAR"); NUMARG(print_final, "PRINT_FINAL"); NUMARG(alltime, "ALL_TIME"); assert (n > 0 && m > 0); // Initialize Qthreads assert(qthread_initialize() == 0); qtimer_t alloc_timer = qtimer_create(); qtimer_t init_timer = qtimer_create(); qtimer_t exec_timer = qtimer_create(); // Allocate memory for 3-stage stencil (with boundary padding) qtimer_start(alloc_timer); stencil_t points; points.N = n + 2; points.M = m + 2; for (int s = 0; s < NUM_STAGES; s++) { points.stage[s] = malloc(points.N*sizeof(aligned_t *)); assert(NULL != points.stage[s]); for (int i = 0; i < points.N; i++) { points.stage[s][i] = calloc(points.M, sizeof(aligned_t)); assert(NULL != points.stage[s][i]); } } qtimer_stop(alloc_timer); // Initialize first stage and set boundary conditions qtimer_start(init_timer); for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { qthread_writeF_const(&points.stage[0][i][j], 0); for (int s = 1; s < NUM_STAGES; s++) qthread_empty(&points.stage[s][i][j]); } } for (int i = 0; i < points.N; i++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][i][0], BOUNDARY); qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY); #else points.stage[s][i][0] = BOUNDARY; points.stage[s][i][points.M-1] = BOUNDARY; #endif } } for (int j = 0; j < points.M; j++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][0][j], BOUNDARY); qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY); #else points.stage[s][0][j] = BOUNDARY; points.stage[s][points.N-1][j] = BOUNDARY; #endif } } qtimer_stop(init_timer); // Create barrier to synchronize on completion of calculations qtimer_start(exec_timer); points.barrier = qt_feb_barrier_create(n*m+1); // Spawn tasks to start calculating updates at each point update_args_t args = {&points, -1, -1, 1, 1}; for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { args.i = i; args.j = j; qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL); } } // Wait for calculations to finish qt_feb_barrier_enter(points.barrier); qtimer_stop(exec_timer); // Print timing info if (alltime) { fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer)); fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer)); fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer)); } else { fprintf(stdout, "%f\n", qtimer_secs(exec_timer)); } // Print stencils if (print_final) { size_t final = (num_timesteps % NUM_STAGES); iprintf("Stage %lu:\n", prev_stage(prev_stage(final))); print_stage(&points, prev_stage(prev_stage(final))); iprintf("\nStage %lu:\n", prev_stage(final)); print_stage(&points, prev_stage(final)); iprintf("\nStage %lu:\n", final); print_stage(&points, final); } qt_feb_barrier_destroy(points.barrier); qtimer_destroy(alloc_timer); qtimer_destroy(init_timer); qtimer_destroy(exec_timer); // Free allocated memory for (int i = 0; i < points.N; i++) { free(points.stage[0][i]); free(points.stage[1][i]); free(points.stage[2][i]); } free(points.stage[0]); free(points.stage[1]); free(points.stage[2]); return 0; }
int main(int argc, char *argv[]) { aligned_t *t[2]; uint64_t x_value; uint64_t pairs; assert(qthread_initialize() == 0); pairs = qthread_num_shepherds() * 6; CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); NUMARG(pairs, "PAIRS"); t[0] = calloc(pairs, sizeof(aligned_t)); t[1] = calloc(pairs, sizeof(aligned_t)); iprintf("%i threads...\n", qthread_num_shepherds()); iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w); qthread_syncvar_empty(&id); qthread_syncvar_writeF_const(&id, 1); iprintf("id = 0x%lx\n", (unsigned long)id.u.w); { uint64_t tmp = 0; qthread_syncvar_readFF(&tmp, &id); assert(tmp == 1); } iprintf("x's status is: %s (want full (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 1); qthread_syncvar_readFE(NULL, &x); iprintf("x's status became: %s (want empty (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 0); for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_readFF(NULL, &(t[0][i])); qthread_readFF(NULL, &(t[1][i])); } iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_fill(&x); iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_readFF(&x_value, &x); assert(qthread_syncvar_status(&x) == 1); free(t[0]); free(t[1]); if (x_value == iterations - 1) { iprintf("Success! x==%lu\n", (unsigned long)x_value); return 0; } else { fprintf(stderr, "Final value of x=%lu, expected %lu\n", (unsigned long)x_value, (unsigned long)(iterations - 1)); return -1; } }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); // If the operator did not attempt to set a stack size, force // a reasonable lower bound if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE")) setenv("QT_STACK_SIZE", "32768", 0); assert(qthread_initialize() == 0); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); aligned_t donecount = 0; root.dc = &donecount; qthread_empty(&donecount); aligned_t tot = 0; root.acc = &tot; root.expect = 1; qthread_fork_syncvar(visit, &root, NULL); qthread_readFF(NULL, root.dc); total_num_nodes = tot; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { qarray *a; distribution_t disttypes[] = { FIXED_HASH, FIXED_FIELDS, ALL_LOCAL, ALL_RAND, ALL_LEAST, DIST_RAND, DIST_STRIPES, DIST_FIELDS, DIST_LEAST }; const char *distnames[] = { "FIXED_HASH", "FIXED_FIELDS", "ALL_LOCAL", "ALL_RAND", "ALL_LEAST", "DIST_RAND", "DIST_STRIPES", "DIST_FIELDS", "DIST_LEAST" }; unsigned int dt_index; unsigned int num_dists = sizeof(disttypes) / sizeof(distribution_t); unsigned int dists = (1 << num_dists) - 1; qthread_initialize(); CHECK_VERBOSE(); NUMARG(dists, "TEST_DISTS"); NUMARG(ELEMENT_COUNT, "ELEMENT_COUNT"); /* iterate over all the different distribution types */ for (dt_index = 0; dt_index < num_dists; dt_index++) { if ((dists & (1 << dt_index)) == 0) { continue; } /* test a basic array of doubles */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(double), disttypes[dt_index], 0, 0); assert(a); iprintf("%s: created basic array of doubles\n", distnames[dt_index]); qarray_iter(a, 0, ELEMENT_COUNT, assign1); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); assert(count == ELEMENT_COUNT); } { size_t i; for (i = 0; i < ELEMENT_COUNT; i++) { double elem = *(double *)qarray_elem_nomigrate(a, i); if (elem != 1.0) { printf ("element %lu is %f instead of 1.0, disttype = %s\n", (unsigned long)i, elem, distnames[dt_index]); assert(elem == 1.0); } } } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); /* now test an array of giant things */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(bigobj), disttypes[dt_index], 0, 0); iprintf("%s: created array of big objects\n", distnames[dt_index]); qarray_iter(a, 0, ELEMENT_COUNT, assignall1); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); // assert(count == ELEMENT_COUNT); } { size_t i; char fail = 0; for (i = 0; i < ELEMENT_COUNT; i++) { char *elem = (char *)qarray_elem_nomigrate(a, i); size_t j; for (j = 0; j < sizeof(bigobj); j++) { if (elem[j] != 1) { printf ( "byte %lu of element %lu is %i instead of 1, dt_index = %u\n", (unsigned long)j, (unsigned long)i, elem[j], dt_index); fail = 1; break; } } } assert(fail == 0); } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); /* now test an array of weird-sized things */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(offsize), disttypes[dt_index], 0, 0); iprintf("%s: created array of odd-sized objects\n", distnames[dt_index]); qarray_iter_loop(a, 0, ELEMENT_COUNT, assignoff1, NULL); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); assert(count == ELEMENT_COUNT); } { size_t i; for (i = 0; i < ELEMENT_COUNT; i++) { char *elem = (char *)qarray_elem_nomigrate(a, i); size_t j; for (j = 0; j < sizeof(offsize); j++) { if (elem[j] != 1) { printf ( "byte %lu of element %lu is %i instead of 1, dt_index = %u\n", (unsigned long)j, (unsigned long)i, elem[j], dt_index); assert(elem[j] == 1); } } } } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); } return 0; }
int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { aligned_t return_value = 0; int status, ret; CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED"); status = qthread_initialize(); assert(status == QTHREAD_SUCCESS); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf(" %i threads total\n", qthread_num_workers()); iprintf("Creating the queue...\n"); the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0); assert(the_queue); iprintf("---------------------------------------------------------\n"); iprintf("\tSINGLE THREAD TEST\n\n"); iprintf("1/4 Spawning thread to be queued...\n"); status = qthread_fork(tobequeued, NULL, &return_value); assert(status == QTHREAD_SUCCESS); iprintf("2/4 Waiting for thread to queue itself...\n"); while(qthread_queue_length(the_queue) != 1) qthread_yield(); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/4 Releasing the queue...\n"); qthread_queue_release_all(the_queue); ret = qthread_readFF(NULL, &return_value); assert(ret == QTHREAD_SUCCESS); assert(threads_in == 1); assert(awoke == 1); assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("4/4 Test passed!\n"); iprintf("---------------------------------------------------------\n"); iprintf("\tMULTI THREAD TEST\n\n"); threads_in = 0; awoke = 0; aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED); iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED); for (int i=0; i<THREADS_ENQUEUED; i++) { status = qthread_fork(tobequeued, NULL, retvals + i); assert(status == QTHREAD_SUCCESS); } iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED); while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield(); assert(threads_in == THREADS_ENQUEUED); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/6 Releasing a single thread...\n"); qthread_queue_release_one(the_queue); iprintf("4/6 Waiting for that thread to exit\n"); while (awoke == 0) qthread_yield(); assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1)); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("5/6 Releasing the rest of the threads...\n"); qthread_queue_release_all(the_queue); for (int i=0; i<THREADS_ENQUEUED; i++) { ret = qthread_readFF(NULL, retvals + i); assert(ret == QTHREAD_SUCCESS); } assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("6/6 Test passed!\n"); return EXIT_SUCCESS; }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { size_t depth = 3; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(depth, "TEST_DEPTH"); // Test creating an empty sinc { qt_sinc_t zero_sinc; qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0); qt_sinc_wait(&zero_sinc, NULL); qt_sinc_fini(&zero_sinc); qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0); qt_sinc_expect(three_sinc, 3); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qt_sinc_wait(three_sinc, NULL); qt_sinc_destroy(three_sinc); } qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2); // Spawn additional waits aligned_t rets[3]; { qthread_fork(wait_on_sinc, sinc, &rets[0]); qthread_fork(wait_on_sinc, sinc, &rets[1]); qthread_fork(wait_on_sinc, sinc, &rets[2]); } { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_create(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); for (int i = 0; i < 3; i++) qthread_readFF(NULL, &rets[i]); // Reset the sinc qt_sinc_reset(sinc, 2); // Second use { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_reset(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); qt_sinc_destroy(sinc); return 0; }
int main(int argc, char *argv[]) { size_t threads = 1000, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(threads, "THREADS"); NUMARG(iterations, "ITERATIONS"); initme = (aligned_t *)calloc(threads, sizeof(aligned_t)); assert(initme); rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t)); assert(rets); iprintf("creating the barrier for %zu threads\n", threads + 1); wait_on_me = qt_feb_barrier_create(threads + 1); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 0; i < threads; i++) { qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_feb_barrier_enter(wait_on_me); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); initme_idx = 0; for (i = 0; i < threads; i++) { if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Destroying barrier...\n"); qt_feb_barrier_destroy(wait_on_me); iprintf("Success!\n"); /* this loop shouldn't be necessary... but seems to avoid crashes in rare * cases (in other words there must a race condition in qthread_finalize() * if there are outstanding threads out there) */ for (i = 0; i < threads * 2; i++) { aligned_t tmp = 1; qthread_readFF(&tmp, rets + i); assert(tmp == 0); } return 0; }
void accalt_init(int argc, char * argv[]) { int num_threads = 1; main_team = (accalt_team_t *) malloc(sizeof (accalt_team_t)); #ifdef ARGOBOTS ABT_init(argc, argv); int num_pools = 1; if (getenv("ACCALT_NUM_THREADS") != NULL) { num_threads = atoi(getenv("ACCALT_NUM_THREADS")); } if (getenv("ACCALT_NUM_POOLS") != NULL) { num_pools = atoi(getenv("ACCALT_NUM_POOLS")); } main_team->num_xstreams = num_threads; main_team->num_pools = num_pools; //printf("Argobots %d ES, %d Pools\n", num_threads, num_pools); ABT_xstream_self(&main_team->master); main_team->team = (ABT_xstream *) malloc(sizeof (ABT_xstream) * num_threads); main_team->pools = (ABT_pool *) malloc(sizeof (ABT_pool) * num_pools); for (int i = 0; i < num_pools; i++) { ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, &main_team->pools[i]); } ABT_xstream_self(&main_team->team[0]); ABT_xstream_set_main_sched_basic(main_team->team[0], ABT_SCHED_DEFAULT, 1, &main_team->pools[0]); for (int i = 1; i < num_threads; i++) { ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &main_team->pools[i % main_team->num_pools], ABT_SCHED_CONFIG_NULL, &main_team->team[i]); ABT_xstream_start(main_team->team[i]); } #endif #ifdef MASSIVETHREADS char buff[10]; if (getenv("ACCALT_NUM_THREADS") != NULL) { num_threads = atoi(getenv("ACCALT_NUM_THREADS")); sprintf(buff, "%d", num_threads); setenv("MYTH_WORKER_NUM", buff, 1); } else num_threads = atoi(getenv("MYTH_WORKER_NUM")); setenv("MYTH_BIND_WORKERS", "1", 1); //printf("Massive %d Workers\n", num_threads); main_team->num_workers = num_threads; myth_init(); //MassiveThreads #endif #ifdef QTHREADS char buff[10]; int num_workers_per_thread; if (getenv("ACCALT_NUM_THREADS") != NULL) { num_threads = atoi(getenv("ACCALT_NUM_THREADS")); sprintf(buff, "%d", num_threads); setenv("QTHREAD_NUM_SHEPHERDS", buff, 1); } else num_threads = atoi(getenv("QTHREAD_NUM_SHEPHERDS")); if (getenv("ACCALT_NUM_WORKERS_PER_THREAD") != NULL) { num_workers_per_thread = atoi(getenv("ACCALT_NUM_WORKERS_PER_THREAD")); sprintf(buff, "%d", num_workers_per_thread); setenv("QTHREAD_NUM_WORKERS_PER_SHEPHERD", buff, 1); } else num_workers_per_thread = atoi(getenv("QTHREAD_NUM_WORKERS_PER_SHEPHERD")); if (num_threads == 1 && num_workers_per_thread > 1) { setenv("QTHREAD_SHEPHERDS_BOUNDARY", "node", 1); setenv("QTHREAD_WORKER_UNIT", "core", 1); } if (num_threads > 1) { setenv("QTHREAD_SHEPHERDS_BOUNDARY", "core", 1); setenv("QTHREAD_WORKER_UNIT", "core", 1); } setenv("QTHREAD_AFFINITY", "yes", 1); //printf("Qthreads %d Shepherds, %d Workers_per_shepherd\n", num_threads, num_workers_per_thread); main_team->num_shepherds = num_threads; main_team->num_workers_per_shepherd = num_workers_per_thread; qthread_initialize(); //qthreads #endif }
int main(int argc, char *argv[]) { aligned_t *ui_array, *ui_array2; double *d_array, *d_array2; size_t len = 1000000; qtimer_t timer = qtimer_create(); double cumulative_time_qutil = 0.0; double cumulative_time_libc = 0.0; int using_doubles = 0; unsigned long iterations = 10; qthread_initialize(); CHECK_VERBOSE(); printf("%i threads\n", (int)qthread_num_workers()); NUMARG(len, "TEST_LEN"); NUMARG(iterations, "TEST_ITERATIONS"); NUMARG(using_doubles, "TEST_USING_DOUBLES"); printf("using %s\n", using_doubles ? "doubles" : "aligned_ts"); if (using_doubles) { d_array = calloc(len, sizeof(double)); printf("array is %s\n", human_readable(len * sizeof(double))); assert(d_array); // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL); for (unsigned int i = 0; i < len; i++) { d_array[i] = ((double)random()) / ((double)RAND_MAX) + random(); } d_array2 = calloc(len, sizeof(double)); assert(d_array2); // madvise(d_array2,len*sizeof(double), MADV_RANDOM); iprintf("double array generated...\n"); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qutil_qsort(d_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu doubles with qutil took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_qutil); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qsort(d_array2, len, sizeof(double), dcmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_libc /= (double)iterations; printf("sorting %lu doubles with libc took: %f seconds\n", (unsigned long)len, cumulative_time_libc); free(d_array); free(d_array2); } else { ui_array = calloc(len, sizeof(aligned_t)); printf("array is %s\n", human_readable(len * sizeof(aligned_t))); for (unsigned int i = 0; i < len; i++) { ui_array[i] = random(); } ui_array2 = calloc(len, sizeof(aligned_t)); iprintf("ui_array generated...\n"); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qutil_aligned_qsort(ui_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu aligned_ts with qutil took: %f seconds\n", (unsigned long)len, cumulative_time_qutil); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qsort(ui_array2, len, sizeof(double), acmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); } cumulative_time_libc /= (double)iterations; printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_libc); free(ui_array); free(ui_array2); } if (cumulative_time_qutil < cumulative_time_libc) { printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } else { printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } qtimer_destroy(timer); return 0; }