// allow barrer initization from C void INTERNAL qt_global_barrier_init(size_t size, int debug) { /*{{{ */ if (MBar == NULL) { MBar = qt_barrier_create(size, REGION_BARRIER); assert(MBar); } } /*}}} */
int main(int argc, char *argv[]) { int n = 10; int m = 10; num_timesteps = 10; workload = 0; workload_per = 0; workload_var = 0; int print_final = 0; int alltime = 0; CHECK_VERBOSE(); NUMARG(n, "N"); NUMARG(m, "M"); NUMARG(num_timesteps, "TIMESTEPS"); NUMARG(workload, "WORKLOAD"); NUMARG(workload_per, "WORKLOAD_PER"); NUMARG(workload_var, "WORKLOAD_VAR"); NUMARG(print_final, "PRINT_FINAL"); NUMARG(alltime, "ALL_TIME"); assert (n > 0 && m > 0); // Initialize Qthreads assert(qthread_initialize() == 0); qtimer_t alloc_timer = qtimer_create(); qtimer_t init_timer = qtimer_create(); qtimer_t exec_timer = qtimer_create(); // Allocate memory for 3-stage stencil (with boundary padding) qtimer_start(alloc_timer); stencil_t points; points.N = n + 2; points.M = m + 2; for (int s = 0; s < NUM_STAGES; s++) { points.stage[s] = malloc(points.N*sizeof(aligned_t *)); assert(NULL != points.stage[s]); for (int i = 0; i < points.N; i++) { points.stage[s][i] = calloc(points.M, sizeof(aligned_t)); assert(NULL != points.stage[s][i]); } } qtimer_stop(alloc_timer); // Initialize first stage and set boundary conditions qtimer_start(init_timer); for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { qthread_writeF_const(&points.stage[0][i][j], 0); for (int s = 1; s < NUM_STAGES; s++) qthread_empty(&points.stage[s][i][j]); } } for (int i = 0; i < points.N; i++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][i][0], BOUNDARY); qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY); #else points.stage[s][i][0] = BOUNDARY; points.stage[s][i][points.M-1] = BOUNDARY; #endif } } for (int j = 0; j < points.M; j++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][0][j], BOUNDARY); qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY); #else points.stage[s][0][j] = BOUNDARY; points.stage[s][points.N-1][j] = BOUNDARY; #endif } } qtimer_stop(init_timer); // Create barrier to synchronize on completion of calculations qtimer_start(exec_timer); points.barrier = qt_barrier_create(n*m+1, REGION_BARRIER); // Spawn tasks to start calculating updates at each point update_args_t args = {&points, -1, -1, 1, 1}; for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { args.i = i; args.j = j; qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL); } } // Wait for calculations to finish qt_barrier_enter(points.barrier); qtimer_stop(exec_timer); // Print timing info if (alltime) { fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer)); fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer)); fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer)); } else { fprintf(stdout, "%f\n", qtimer_secs(exec_timer)); } // Print stencils if (print_final) { size_t final = (num_timesteps % NUM_STAGES); iprintf("Stage %lu:\n", prev_stage(prev_stage(final))); print_stage(&points, prev_stage(prev_stage(final))); iprintf("\nStage %lu:\n", prev_stage(final)); print_stage(&points, prev_stage(final)); iprintf("\nStage %lu:\n", final); print_stage(&points, final); } qt_barrier_destroy(points.barrier); qtimer_destroy(alloc_timer); qtimer_destroy(init_timer); qtimer_destroy(exec_timer); // Free allocated memory for (int i = 0; i < points.N; i++) { free(points.stage[0][i]); free(points.stage[1][i]); free(points.stage[2][i]); } free(points.stage[0]); free(points.stage[1]); free(points.stage[2]); return 0; }
int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }