// Start the main task. // // Warning: this method is not called within a Qthread task context. Do // not use methods that require task context (e.g., task-local storage). void chpl_task_callMain(void (*chpl_main)(void)) { const chpl_bool initial_serial_state = false; const c_localeid_t initial_locale_id = default_locale_id; const chapel_wrapper_args_t wrapper_args = {chpl_main, NULL, NULL, 0, {initial_serial_state, initial_locale_id, NULL, chpl_malloc, chpl_calloc, chpl_realloc, chpl_free}}; qthread_debug(CHAPEL_CALLS, "[%d] begin chpl_task_callMain()\n", chpl_localeID); default_serial_state = initial_serial_state; #ifdef QTHREAD_MULTINODE qthread_debug(CHAPEL_BEHAVIOR, "[%d] calling spr_unify\n", chpl_localeID); int const rc = spr_unify(); assert(SPR_OK == rc); #endif /* QTHREAD_MULTINODE */ qthread_fork_syncvar(chapel_wrapper, &wrapper_args, &exit_ret); qthread_syncvar_readFF(NULL, &exit_ret); qthread_debug(CHAPEL_BEHAVIOR, "[%d] main task finished\n", chpl_localeID); qthread_debug(CHAPEL_CALLS, "[%d] end chpl_task_callMain()\n", chpl_localeID); }
// Start the main task. // // Warning: this method is not called within a Qthread task context. Do // not use methods that require task context (e.g., task-local storage). void chpl_task_callMain(void (*chpl_main)(void)) { const chpl_qthread_wrapper_args_t wrapper_args = {chpl_main, NULL, NULL, 0, false, {c_sublocid_any_val, false}}; qthread_debug(CHAPEL_CALLS, "[%d] begin chpl_task_callMain()\n", chpl_localeID); #ifdef QTHREAD_MULTINODE qthread_debug(CHAPEL_BEHAVIOR, "[%d] calling spr_unify\n", chpl_localeID); int const rc = spr_unify(); assert(SPR_OK == rc); #endif /* QTHREAD_MULTINODE */ qthread_fork_syncvar(chapel_wrapper, &wrapper_args, &exit_ret); qthread_syncvar_readFF(NULL, &exit_ret); qthread_debug(CHAPEL_BEHAVIOR, "[%d] main task finished\n", chpl_localeID); qthread_debug(CHAPEL_CALLS, "[%d] end chpl_task_callMain()\n", chpl_localeID); }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); // If the operator did not attempt to set a stack size, force // a reasonable lower bound if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE")) setenv("QT_STACK_SIZE", "32768", 0); assert(qthread_initialize() == 0); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); aligned_t donecount = 0; root.dc = &donecount; qthread_empty(&donecount); aligned_t tot = 0; root.acc = &tot; root.expect = 1; qthread_fork_syncvar(visit, &root, NULL); qthread_readFF(NULL, root.dc); total_num_nodes = tot; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #endif /* ifdef PRINT_STATS */ return 0; }