T mt_readff(T& target) { #ifdef __MTA__ return readff(&target); #elif USING_QTHREADS T ret; qthread_readFF(&ret, &target); return ret; #else return target; #endif }
int main(int argc, char *argv[]) { aligned_t ret; qthread_init(2); CHECK_VERBOSE(); assert(qthread_num_shepherds() == 2); iprintf("now to fork to shepherd 0...\n"); qthread_fork_to(checkres, (void *)0, &ret, 0); qthread_readFF(&ret, &ret); iprintf("success in forking to shepherd 0!\n"); iprintf("now to fork to shepherd 1...\n"); qthread_fork_to(checkres, (void *)1, &ret, 1); qthread_readFF(&ret, &ret); iprintf("success in forking to shepherd 1!\n"); iprintf("now to fork the migrant...\n"); qthread_fork(migrant, NULL, &ret); iprintf("success in forking migrant!\n"); qthread_readFF(&ret, &ret); iprintf("migrant returned successfully!\n"); return 0; }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { int count = 0; aligned_t max = 0; aligned_t tmp = 0; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(count, "COUNT"); iprintf("Main executing in team %lu (w/ parent %lu)\n", (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id()); assert(qt_team_id() == default_team_id); assert(qt_team_parent_id() == non_team_id); aligned_t hello_in_team_ret; qthread_fork(hello_in_team, NULL, &hello_in_team_ret); qthread_readFF(&tmp, &hello_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_rets[count]; for (int i = 0; i < count; i++) { qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]); } for (int i = 0; i < count; i++) { qthread_readFF(&tmp, &hello_new_team_rets[i]); max = MAX(max, tmp); } aligned_t hello_new_team_in_team_ret; qthread_fork_new_team( hello_new_team_in_team, NULL, &hello_new_team_in_team_ret); qthread_readFF(&tmp, &hello_new_team_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_new_team_ret; qthread_fork_new_team( hello_new_team_new_team, NULL, &hello_new_team_new_team_ret); qthread_readFF(&tmp, &hello_new_team_new_team_ret); max = MAX(max, tmp); iprintf("max is %lu\n", (unsigned long)max); if (count + 4 == max) { iprintf("SUCCEEDED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 0; } else { iprintf("FAILED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 1; } }
// Test that writeFF waits for empty var to be filled, writes, and leaves full. // Requires that only one worker is running. Basically does: // 1: empty var // 1: fork(writeFF) // 1: yields // 2: starts runnning // 2: hits writeFF, and yields since var is empty // 1: writeEF // 1: hits readFF on forked task and yield // 2: running again, finishes writeFF, task returns // 1: readFF competes, finishes static void testWriteFFWaits(void) { aligned_t ret; concurrent_t=45; qthread_empty(&concurrent_t); assert(qthread_num_workers() == 1); iprintf("1: Forking writeFF wrapper\n"); qthread_fork_to(writeFF_wrapper, NULL, &ret, qthread_shep()); iprintf("1: Forked, now yielding to 2\n"); qthread_yield(); iprintf("1: Back from yield\n"); // verify that writeFF has not completed assert(qthread_feb_status(&concurrent_t) == 0); assert(concurrent_t != 55); iprintf("1: Writing EF\n"); qthread_writeEF_const(&concurrent_t, 35); // wait for writeFF wrapper to complete qthread_readFF(NULL, &ret); // veify that writeFF completed and that FEB is full iprintf("1: concurrent_t=%d\n", concurrent_t); assert(qthread_feb_status(&concurrent_t) == 1); assert(concurrent_t == 55); }
int qthread_multinode_run(void) { aligned_t val; if (0 == initialized) { return 1; } qthread_debug(MULTINODE_CALLS, "[%d] begin qthread_multinode_run\n", my_rank); qthread_internal_net_driver_barrier(); if (0 != my_rank) { struct die_msg_t msg; qthread_readFF(&val, &time_to_die); qthread_debug(MULTINODE_DETAILS, "[%d] time to die\n", my_rank); msg.my_rank = my_rank; qthread_internal_net_driver_send(0, DIE_MSG_TAG, &msg, sizeof(msg)); qthread_finalize(); exit(0); } qthread_debug(MULTINODE_CALLS, "[%d] end qthread_multinode_run\n", my_rank); return QTHREAD_SUCCESS; }
// // Broadcast the value of 'id'th entry in chpl_private_broadcast_table // on the calling locale onto every other locale. This is done to set // up global constants of simple scalar types (primarily). // void chpl_comm_broadcast_private(int id, int32_t size, int32_t tid) { int i; bcast_private_args_t *payload; PROFILE_INCR(profile_comm_broadcast_private,1); qthread_debug(CHAPEL_CALLS, "[%d] begin id=%d, size=%d, tid=%d\n", chpl_localeID, id, size, tid); payload = chpl_mem_allocMany(1, sizeof(bcast_private_args_t) + size, CHPL_RT_MD_COMM_PRIVATE_BROADCAST_DATA, 0, 0); payload->id = id; payload->size = size; memcpy(payload->data, chpl_private_broadcast_table[id], size); qthread_debug(CHAPEL_DETAILS, "[%d] payload={.id=%d; .size=%d; .data=?}\n", chpl_localeID, payload->id, payload->size); aligned_t rets[chpl_numLocales]; for (i = 0; i < chpl_numLocales; i++) { if (i != chpl_localeID) { qthread_fork_remote(bcast_private, payload, &rets[i], i, sizeof(bcast_private_args_t) + size); } } for (i = 0; i < chpl_numLocales; i++) { if (i != chpl_localeID) { qthread_readFF(&rets[i], &rets[i]); } } chpl_mem_free(payload,0,0); qthread_debug(CHAPEL_CALLS, "[%d] end id=%d, size=%d, tid=%d\n", chpl_localeID, id, size, tid); }
inline int qthread_readFF(T *const dest, const T *const src) { QTHREAD_CHECKSIZE(T); return qthread_readFF((aligned_t *)dest, (aligned_t *)src); }
int main(int argc, char *argv[]) { aligned_t rets[NUM_THREADS]; qtimer_t timer = qtimer_create(); double cumulative_time = 0.0; if (qthread_initialize() != QTHREAD_SUCCESS) { fprintf(stderr, "qthread library could not be initialized!\n"); exit(EXIT_FAILURE); } CHECK_VERBOSE(); for (int iteration = 0; iteration < 10; iteration++) { qtimer_start(timer); for (int i = 0; i < NUM_THREADS; i++) { qthread_fork(qincr, NULL, &(rets[i])); } for (int i = 0; i < NUM_THREADS; i++) { qthread_readFF(NULL, &(rets[i])); } qtimer_stop(timer); iprintf("\ttest iteration %i: %f secs\n", iteration, qtimer_secs(timer)); cumulative_time += qtimer_secs(timer); } printf("qthread time: %f\n", cumulative_time / 10.0); return 0; }
void accalt_ult_join(ACCALT_ult *ult) { #ifdef ARGOBOTS ABT_thread_free(ult); #endif #ifdef MASSIVETHREADS myth_join(*ult, NULL); #endif #ifdef QTHREADS qthread_readFF(NULL, ult); #endif }
void accalt_tasklet_join(ACCALT_tasklet *tasklet) { #ifdef ARGOBOTS ABT_task_free(tasklet); #endif #ifdef MASSIVETHREADS myth_join(*tasklet, NULL); #endif #ifdef QTHREADS qthread_readFF(NULL, tasklet); #endif }
static void Run (ObjT *obj, const RetV & ret, FptrT fptr, const Arg1V &arg1, const Arg2V &arg2, const Arg3V &arg3, const Arg4V &arg4, const Arg5V &arg5, int start, int stop, int step = 1) { bool join = true; int total, steptd, tdc, tdc_pow2, round_total, base_count; if (step == 1) { total = (stop - start); } else { total = (stop - start) / step; if (((stop - start) % step) != 0) { total++; } } SCALE_TD_POW2(total, tdc_pow2); tdc = 1 << tdc_pow2; steptd = step << tdc_pow2; base_count = total >> tdc_pow2; round_total = base_count << tdc_pow2; switch (TypeC) { case mt_loop_traits::ParNoJoin: join = false; case mt_loop_traits::Par: { aligned_t *thr; if (join) { thr = new aligned_t[tdc]; } for (int i = 0; i < tdc; i++) { int count = base_count + (((round_total + i) < total) ? 1 : 0); qthread_fork(run_qtd<Iter>, ITER(start, steptd, count), join ? (thr + i) : NULL); start += step; } if (join) { for (int i = 0; i < tdc; i++) qthread_readFF(thr + i, thr + i); delete[] thr; } } break; } }
static aligned_t hello_new_team_new_team(void *arg_) { unsigned int id = qt_team_id(); unsigned int parent_id = qt_team_parent_id(); iprintf("`hello_new_team_new_team` executing in team %lu (w/ parent %lu)\n", (unsigned long)id, (unsigned long)parent_id); assert(parent_id == non_team_id); aligned_t ret; qthread_fork_new_team(hello_new_team, NULL, &ret); qthread_readFF(&ret, &ret); return MAX(id, ret); }
static inline qutil_qsort_iprets_t qutil_qsort_inner_partitioner(double *array, const size_t length, const double pivot) { /*{{{*/ /* choose the number of threads to use */ const size_t numthreads = length / MT_LOOP_CHUNK + ((length % MT_LOOP_CHUNK) ? 1 : 0); /* calculate the megachunk information for determining the array lengths * each thread will be fed. */ const size_t megachunk_size = MT_CHUNKSIZE * numthreads; /* just used as a boolean test */ const size_t extra_chunks = length % megachunk_size; size_t megachunks = length / (MT_CHUNKSIZE * numthreads); qutil_qsort_iprets_t retval = { ((aligned_t)-1), 0 }; aligned_t *rets; struct qutil_qsort_args *args; size_t i; rets = MALLOC(sizeof(aligned_t) * numthreads); args = MALLOC(sizeof(struct qutil_qsort_args) * numthreads); /* spawn threads to do the partitioning */ for (i = 0; i < numthreads; i++) { args[i].array = array + (i * MT_CHUNKSIZE); args[i].offset = i * MT_CHUNKSIZE; args[i].pivot = pivot; args[i].jump = (numthreads - 1) * MT_CHUNKSIZE + 1; args[i].furthest_leftwall = &retval.leftwall; args[i].furthest_rightwall = &retval.rightwall; if (extra_chunks != 0) { args[i].length = megachunks * (megachunk_size) + MT_CHUNKSIZE; if (args[i].length + args[i].offset >= length) { args[i].length = length - args[i].offset; megachunks--; } } else { args[i].length = length - megachunk_size + MT_CHUNKSIZE; } /* qutil_qsort_partition(args+i); */ qthread_fork((qthread_f)qutil_qsort_partition, args + i, rets + i); } for (i = 0; i < numthreads; i++) { qthread_readFF(NULL, rets + i); } FREE(args, sizeof(struct qutil_qsort_args) * numthreads); FREE(rets, sizeof(aligned_t) * numthreads); return retval; } /*}}}*/
/* * The main procedure simply creates a producer and a consumer task to run in * parallel */ int main(int argc, char *argv[]) { aligned_t t[2]; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(bufferSize, "BUFFERSIZE"); numItems = 8 * bufferSize; NUMARG(numItems, "NUMITEMS"); iprintf("%i threads...\n", qthread_num_shepherds()); buff = malloc(sizeof(aligned_t) * bufferSize); for (unsigned int i = 0; i < bufferSize; ++i) { buff[i] = 0; } qthread_fork(consumer, NULL, &t[0]); qthread_fork(producer, NULL, &t[1]); qthread_readFF(NULL, &t[0]); qthread_readFF(NULL, &t[1]); /* cleanup... unnecessary in general, but for the moment I'm tracking down * errors in the FEB system, so let's clean up */ for (unsigned int i = 0; i < bufferSize; ++i) { qthread_fill(buff + i); } free(buff); iprintf("Success!\n"); return 0; }
// Notes: // - Each task receives distinct copy of parent // - Copy of child is shallow, be careful with `state` member static aligned_t visit(void *args_) { node_t *parent = (node_t *)args_; int parent_height = parent->height; int num_children = parent->num_children; aligned_t expect = parent->expect; aligned_t num_descendants[num_children]; aligned_t sum_descendants = 1; if (num_children != 0) { node_t child __attribute__((aligned(8))); aligned_t donec = 0; // Spawn children, if any child.height = parent_height + 1; child.dc = &donec; child.expect = num_children; qthread_empty(&donec); for (int i = 0; i < num_children; i++) { child.acc = &num_descendants[i]; for (int j = 0; j < num_samples; j++) { rng_spawn(parent->state.state, child.state.state, i); } child.num_children = calc_num_children(&child); qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL); } // Wait for children to finish up, accumulate descendants counts if (donec != expect) qthread_readFF(NULL, &donec); for (int i = 0; i < num_children; i++) { sum_descendants += num_descendants[i]; } } *parent->acc = sum_descendants; if (qthread_incr(parent->dc, 1) + 1 == expect) { qthread_fill(parent->dc); } return 0; }
// // remote fork should launch a thread on locale that runs function f // passing it arg where the size of arg is stored in arg_size // notes: // multiple forks to the same locale should be handled concurrently // void chpl_comm_fork(int locale, chpl_fn_int_t fid, void *arg, int32_t arg_size, int32_t arg_tid) { aligned_t ret; PROFILE_INCR(profile_comm_fork,1); PROFILE_BIN_INCR(profile_comm_fork_size,arg_size); qthread_debug(CHAPEL_CALLS, "[%d] begin locale=%d, fid=%d, arg_size=%d\n", chpl_localeID, locale, fid, arg_size); qthread_debug(CHAPEL_BEHAVIOR, "[%d] (blocking) forking fn %d with arg-size %d\n", chpl_localeID, fid, arg_size); qthread_empty(&ret); spawn(locale, fid, arg, arg_size, arg_tid, &ret); qthread_readFF(NULL, &ret); qthread_debug(CHAPEL_CALLS, "[%d] end locale=%d, fid=%d, arg_size=%d\n", chpl_localeID, locale, fid, arg_size); }
int qthread_multinode_multistop(void) { aligned_t val; qthread_debug(MULTINODE_CALLS, "[%d] begin qthread_multinode_multistop\n", my_rank); if (0 != my_rank) { struct die_msg_t msg; qthread_readFF(&val, &time_to_die); qthread_debug(MULTINODE_DETAILS, "[%d] time to die\n", my_rank); msg.my_rank = my_rank; qthread_internal_net_driver_send(0, DIE_MSG_TAG, &msg, sizeof(msg)); exit(0); // triggers atexit(net_cleanup) } qthread_debug(MULTINODE_CALLS, "[%d] end qthread_multinode_multistop\n", my_rank); return QTHREAD_SUCCESS; }
// // initializes the communications package // set chpl_localeID and chpl_numLocales // notes: // * Called with the argc/argv pair passed to main() // void chpl_comm_init(int *argc_p, char ***argv_p) { qthread_debug(CHAPEL_CALLS, "[%d] begin\n", chpl_localeID); // Set stack size >= 8 pages (lower bound derived from experience) unsigned long const default_stack_size = 32768; unsigned long const stack_size = qt_internal_get_env_num("STACK_SIZE", default_stack_size, default_stack_size); char stack_size_str[100] = {0}; if (default_stack_size > stack_size) { snprintf(stack_size_str, 99, "%lu", default_stack_size); } else { snprintf(stack_size_str, 99, "%lu", stack_size); } setenv("QT_STACK_SIZE", stack_size_str, 1); /* Initialize SPR: * * - All locales participate in initialization. */ int const rc = spr_init(SPR_SPMD, chapel_remote_functions); assert(SPR_OK == rc); /* Record locale info */ chpl_localeID = spr_locale_id(); chpl_numLocales = spr_num_locales(); qthread_debug(CHAPEL_BEHAVIOR, "[%d] initialized SPR with %d locales\n", chpl_localeID, chpl_numLocales); /* Set up segment information table */ #undef malloc seginfo_table = malloc(chpl_numLocales * sizeof(seginfo_t)); #define malloc dont_use_malloc_use_chpl_mem_allocMany_instead if (0 == chpl_localeID) { int i; int global_table_size = chpl_numGlobalsOnHeap * sizeof(void *) + getpagesize(); #undef malloc void * global_table = malloc(global_table_size); #define malloc dont_use_malloc_use_chpl_mem_allocMany_instead // Make sure segment is page-aligned. seginfo_table[0].addr = ((void *)(((uint8_t *)global_table) + (((((uintptr_t)global_table) % getpagesize()) == 0) ? 0 : (getpagesize() - (((uintptr_t)global_table) % getpagesize()))))); seginfo_table[0].size = global_table_size; for (i = 1; i < chpl_numLocales; i++) { seginfo_table[i].addr = NULL; seginfo_table[i].size = 0; } } chpl_comm_barrier("waiting for seginfo table setup at root"); // Broadcast segment info if (0 == chpl_localeID) { int i; aligned_t rets[chpl_numLocales]; for (i = 1; i < chpl_numLocales; i++) { qthread_fork_remote(bcast_seginfo, seginfo_table, &rets[i], i, chpl_numLocales * sizeof(seginfo_t)); } for (i = 1; i < chpl_numLocales; i++) { qthread_readFF(&rets[i], &rets[i]); } } chpl_comm_barrier("waiting for seginfo table bcast"); qthread_debug(CHAPEL_CALLS, "[%d] end\n", chpl_localeID); }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); // If the operator did not attempt to set a stack size, force // a reasonable lower bound if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE")) setenv("QT_STACK_SIZE", "32768", 0); assert(qthread_initialize() == 0); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); aligned_t donecount = 0; root.dc = &donecount; qthread_empty(&donecount); aligned_t tot = 0; root.acc = &tot; root.expect = 1; qthread_fork_syncvar(visit, &root, NULL); qthread_readFF(NULL, root.dc); total_num_nodes = tot; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { aligned_t *t[2]; uint64_t x_value; uint64_t pairs; assert(qthread_initialize() == 0); pairs = qthread_num_shepherds() * 6; CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); NUMARG(pairs, "PAIRS"); t[0] = calloc(pairs, sizeof(aligned_t)); t[1] = calloc(pairs, sizeof(aligned_t)); iprintf("%i threads...\n", qthread_num_shepherds()); iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w); qthread_syncvar_empty(&id); qthread_syncvar_writeF_const(&id, 1); iprintf("id = 0x%lx\n", (unsigned long)id.u.w); { uint64_t tmp = 0; qthread_syncvar_readFF(&tmp, &id); assert(tmp == 1); } iprintf("x's status is: %s (want full (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 1); qthread_syncvar_readFE(NULL, &x); iprintf("x's status became: %s (want empty (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 0); for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_readFF(NULL, &(t[0][i])); qthread_readFF(NULL, &(t[1][i])); } iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_fill(&x); iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_readFF(&x_value, &x); assert(qthread_syncvar_status(&x) == 1); free(t[0]); free(t[1]); if (x_value == iterations - 1) { iprintf("Success! x==%lu\n", (unsigned long)x_value); return 0; } else { fprintf(stderr, "Final value of x=%lu, expected %lu\n", (unsigned long)x_value, (unsigned long)(iterations - 1)); return -1; } }
void API_FUNC qutil_mergesort(double *array, size_t length) { /*{{{*/ /* first, decide how much of the array each thread gets */ size_t chunksize = MT_LOOP_CHUNK; /* second, decide how many threads to use... */ size_t numthreads; aligned_t *rets; size_t i; struct qutil_mergesort_args *args; assert(qthread_library_initialized); chunksize = 10; /* third, an initial qsort() */ numthreads = length / chunksize; if (length - (numthreads * chunksize)) { numthreads++; } rets = MALLOC(sizeof(aligned_t) * numthreads); args = MALLOC(sizeof(struct qutil_mergesort_args) * numthreads); for (i = 0; i < numthreads; i++) { args[i].array = array; args[i].first_start = i * chunksize; args[i].first_stop = (i + 1) * chunksize - 1; if (args[i].first_stop >= length) { args[i].first_stop = length - 1; } qthread_fork((qthread_f)qutil_mergesort_presort, args + i, rets + i); } for (i = 0; i < numthreads; i++) { qthread_readFF(NULL, rets + i); } FREE(rets, sizeof(aligned_t) * numthreads); FREE(args, sizeof(struct qutil_mergesort_args) * numthreads); /* prepare scratch memory */ if (chunksize <= length) { numthreads = (length - chunksize) / (2 * chunksize); if ((length - chunksize) - (2 * chunksize * numthreads)) { numthreads++; } rets = MALLOC(sizeof(aligned_t) * numthreads); assert(rets); args = MALLOC(sizeof(struct qutil_mergesort_args) * numthreads); assert(args); numthreads = 0; } /* now, commence with the merging */ while (chunksize <= length) { i = 0; numthreads = 0; while (i < length - chunksize) { args[numthreads].array = array; args[numthreads].first_start = i; args[numthreads].first_stop = i + chunksize - 1; args[numthreads].second_start = i + chunksize; args[numthreads].second_stop = ((i + 2 * chunksize - 1) < (length - 1)) ? (i + 2 * chunksize - 1) : (length - 1); qthread_fork((qthread_f)qutil_mergesort_inner, args + numthreads, rets + numthreads); i += 2 * chunksize; numthreads++; } for (i = 0; i < numthreads; i++) { qthread_readFF(NULL, rets + i); } chunksize *= 2; } if (rets) { FREE(rets, sizeof(aligned_t) * numthreads); FREE(args, sizeof(struct qutil_mergesort_args) * numthreads); } } /*}}}*/
int main(int argc, char *argv[]) { aligned_t return_value = 0; int status, ret; CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED"); status = qthread_initialize(); assert(status == QTHREAD_SUCCESS); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf(" %i threads total\n", qthread_num_workers()); iprintf("Creating the queue...\n"); the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0); assert(the_queue); iprintf("---------------------------------------------------------\n"); iprintf("\tSINGLE THREAD TEST\n\n"); iprintf("1/4 Spawning thread to be queued...\n"); status = qthread_fork(tobequeued, NULL, &return_value); assert(status == QTHREAD_SUCCESS); iprintf("2/4 Waiting for thread to queue itself...\n"); while(qthread_queue_length(the_queue) != 1) qthread_yield(); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/4 Releasing the queue...\n"); qthread_queue_release_all(the_queue); ret = qthread_readFF(NULL, &return_value); assert(ret == QTHREAD_SUCCESS); assert(threads_in == 1); assert(awoke == 1); assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("4/4 Test passed!\n"); iprintf("---------------------------------------------------------\n"); iprintf("\tMULTI THREAD TEST\n\n"); threads_in = 0; awoke = 0; aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED); iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED); for (int i=0; i<THREADS_ENQUEUED; i++) { status = qthread_fork(tobequeued, NULL, retvals + i); assert(status == QTHREAD_SUCCESS); } iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED); while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield(); assert(threads_in == THREADS_ENQUEUED); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/6 Releasing a single thread...\n"); qthread_queue_release_one(the_queue); iprintf("4/6 Waiting for that thread to exit\n"); while (awoke == 0) qthread_yield(); assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1)); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("5/6 Releasing the rest of the threads...\n"); qthread_queue_release_all(the_queue); for (int i=0; i<THREADS_ENQUEUED; i++) { ret = qthread_readFF(NULL, retvals + i); assert(ret == QTHREAD_SUCCESS); } assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("6/6 Test passed!\n"); return EXIT_SUCCESS; }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { size_t depth = 3; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(depth, "TEST_DEPTH"); // Test creating an empty sinc { qt_sinc_t zero_sinc; qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0); qt_sinc_wait(&zero_sinc, NULL); qt_sinc_fini(&zero_sinc); qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0); qt_sinc_expect(three_sinc, 3); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qt_sinc_wait(three_sinc, NULL); qt_sinc_destroy(three_sinc); } qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2); // Spawn additional waits aligned_t rets[3]; { qthread_fork(wait_on_sinc, sinc, &rets[0]); qthread_fork(wait_on_sinc, sinc, &rets[1]); qthread_fork(wait_on_sinc, sinc, &rets[2]); } { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_create(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); for (int i = 0; i < 3; i++) qthread_readFF(NULL, &rets[i]); // Reset the sinc qt_sinc_reset(sinc, 2); // Second use { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_reset(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); qt_sinc_destroy(sinc); return 0; }
int main(int argc, char *argv[]) { size_t num_edges; aligned_t * rets; vertex_t * edges; for (int i = 0; i < NUM_VERTICES; i++) { in_degrees[i] = 0; } /* Initialize SPR in SPMD mode */ qthread_f actions[2] = {incr_in_degree, NULL}; spr_init(SPR_SPMD, actions); here = spr_locale_id(); num_locales = spr_num_locales(); if (0 == here) { printf("Running with %d locales\n", num_locales); } rng_init(rng_state.state, time(NULL) * here); /* Create local portion of the graph */ indices[0] = 0; for (int i = 1; i < NUM_VERTICES + 1; i++) { indices[i] = indices[i-1] + random_vertex(); } for (int i = 0; i < NUM_VERTICES + 1; i++) { printf("[%03d] indices[%d]: %lu\n", here, i, indices[i]); } num_edges = indices[NUM_VERTICES]; edges = malloc(num_edges * sizeof(vertex_t)); for (int i = 0; i < num_edges; i++) { edges[i].lid = random_locale(); edges[i].vid = random_vertex(); } for (int i = 0; i < num_edges; i++) { printf("[%03d] edges[%d]: (%lu,%lu)\n", here, i, edges[i].lid, edges[i].vid); } /* TODO: barrier */ /* Fill in-degrees property map */ rets = malloc(num_edges * sizeof(aligned_t)); for (int i = 0; i < NUM_VERTICES; i++) { for (int j = indices[i]; j < indices[i+1]; j++) { printf("[%03d] spawning incr of edge[%d] = (%lu,%lu)\n", here, j, edges[j].lid, edges[j].vid); qthread_fork_remote(incr_in_degree, /* action */ &(edges[j].vid), /* local vertex id */ &rets[j], /* feb */ edges[j].lid, /* locale */ sizeof(vertex_id_t)); } } for (int i = 0; i < num_edges; i++) { qthread_readFF(&rets[i], &rets[i]); } /* Print in-degrees */ for (int i = 0; i < NUM_VERTICES; i++) { printf("[%03d] in-degree(%lu) = %lu\n", here, i, in_degrees[i]); } /* Free up allocated resources */ free(rets); free(edges); return 0; }
int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { size_t threads = 1000, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(threads, "THREADS"); NUMARG(iterations, "ITERATIONS"); initme = (aligned_t *)calloc(threads, sizeof(aligned_t)); assert(initme); rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t)); assert(rets); iprintf("creating the barrier for %zu threads\n", threads + 1); wait_on_me = qt_feb_barrier_create(threads + 1); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 0; i < threads; i++) { qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_feb_barrier_enter(wait_on_me); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); initme_idx = 0; for (i = 0; i < threads; i++) { if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Destroying barrier...\n"); qt_feb_barrier_destroy(wait_on_me); iprintf("Success!\n"); /* this loop shouldn't be necessary... but seems to avoid crashes in rare * cases (in other words there must a race condition in qthread_finalize() * if there are outstanding threads out there) */ for (i = 0; i < threads * 2; i++) { aligned_t tmp = 1; qthread_readFF(&tmp, rets + i); assert(tmp == 0); } return 0; }