ffwritea(int fd, char *buf, int nbytes, struct ffsw *stat, ...) { struct fdinfo *fio; int ret; bitptr bufptr; int locubc, *pubc, na; /* need a place to put result */ int locfulp; va_list ap; fio = GETIOB(fd); SET_BPTR(bufptr, CPTR2BP(buf)); /* adjust number of bits requested if ubc passed in */ NUMARG(na); locubc = 0; pubc = &locubc; locfulp = FULL; if (na < 4 || na > 6) { errno = FDC_ERR_NOPARM; return(ERR); } va_start(ap, stat); if (na > 4) locfulp = va_arg(ap, int); if (na > 5) pubc = va_arg(ap, int *); CHECK_FIOPTR(fio, stat); ret = XRCALL(fio, writeartn) fio, bufptr, nbytes, stat, locfulp, pubc); return (ret); }
ffweof(int fd, ...) #endif { struct fdinfo *fio; int ret, na; struct ffsw locstat, *pstat; #if !defined(__mips) && !defined(_LITTLE_ENDIAN) va_list ap; #endif fio = GETIOB(fd); #if defined(__mips) || defined(_LITTLE_ENDIAN) na = 1; pstat = &locstat; #else NUMARG(na); if (na < 2) pstat = &locstat; else { va_start(ap, fd); pstat = va_arg(ap, struct ffsw *); } #endif CHECK_FIOPTR(fio, pstat); ret = XRCALL(fio, weofrtn) fio, pstat); #if !defined(__mips) && !defined(_LITTLE_ENDIAN) if (na < 2) errno = locstat.sw_error; #endif return (ret); }
ffbksp(int fd, ...) #endif { struct fdinfo *fio; int ret, na; struct ffsw locstat, *pstat; #ifdef _CRAY va_list ap; #endif fio = GETIOB(fd); #ifdef _CRAY NUMARG(na); if (na < 2) pstat = &locstat; else { va_start(ap, fd); pstat = va_arg(ap, struct ffsw *); } #else pstat = &locstat; na = 1; #endif CHECK_FIOPTR(fio, pstat); ret = XRCALL(fio, backrtn) fio, pstat); /* set errno only if stat was not passed */ if (na < 2) errno = locstat.sw_error; return (ret); }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { int count = 0; aligned_t max = 0; aligned_t tmp = 0; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(count, "COUNT"); iprintf("Main executing in team %lu (w/ parent %lu)\n", (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id()); assert(qt_team_id() == default_team_id); assert(qt_team_parent_id() == non_team_id); aligned_t hello_in_team_ret; qthread_fork(hello_in_team, NULL, &hello_in_team_ret); qthread_readFF(&tmp, &hello_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_rets[count]; for (int i = 0; i < count; i++) { qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]); } for (int i = 0; i < count; i++) { qthread_readFF(&tmp, &hello_new_team_rets[i]); max = MAX(max, tmp); } aligned_t hello_new_team_in_team_ret; qthread_fork_new_team( hello_new_team_in_team, NULL, &hello_new_team_in_team_ret); qthread_readFF(&tmp, &hello_new_team_in_team_ret); max = MAX(max, tmp); aligned_t hello_new_team_new_team_ret; qthread_fork_new_team( hello_new_team_new_team, NULL, &hello_new_team_new_team_ret); qthread_readFF(&tmp, &hello_new_team_new_team_ret); max = MAX(max, tmp); iprintf("max is %lu\n", (unsigned long)max); if (count + 4 == max) { iprintf("SUCCEEDED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 0; } else { iprintf("FAILED with count %lu and max team id %lu\n", (unsigned long)count, (unsigned long)max); return 1; } }
/* * The main procedure simply creates a producer and a consumer task to run in * parallel */ int main(int argc, char *argv[]) { aligned_t t[2]; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(bufferSize, "BUFFERSIZE"); numItems = 8 * bufferSize; NUMARG(numItems, "NUMITEMS"); iprintf("%i threads...\n", qthread_num_shepherds()); buff = malloc(sizeof(aligned_t) * bufferSize); for (unsigned int i = 0; i < bufferSize; ++i) { buff[i] = 0; } qthread_fork(consumer, NULL, &t[0]); qthread_fork(producer, NULL, &t[1]); qthread_readFF(NULL, &t[0]); qthread_readFF(NULL, &t[1]); /* cleanup... unnecessary in general, but for the moment I'm tracking down * errors in the FEB system, so let's clean up */ for (unsigned int i = 0; i < bufferSize; ++i) { qthread_fill(buff + i); } free(buff); iprintf("Success!\n"); return 0; }
int _ff_err(struct fdinfo *fio, bitptr bufptr, size_t nbytes, struct ffsw *stat, int fulp, int *ubc) { int na; #ifdef _CRAY NUMARG(na); #else na = 6; #endif if (na == 6 || na == 5) /* if read/write[ca] */ _SETERROR(stat, FDC_ERR_NOSUP, 0) else abort(); return(ERR); }
int ffread(int fd, char *buf, int nbytes, ...) #endif { struct fdinfo *fio; ssize_t ret; int locfulp; bitptr bufptr; int locubc, *pubc, na; /* need a place to put result */ struct ffsw locstat, *pstat; /* need a place to put result */ #if !defined(__mips) && !defined(_LITTLE_ENDIAN) va_list ap; #endif fio = GETIOB(fd); SET_BPTR(bufptr, CPTR2BP(buf)); /* adjust number of bits requested if ubc passed in */ #ifdef _CRAY NUMARG(na); #elif defined(__mips) || defined(_LITTLE_ENDIAN) na = 3; #endif locubc = 0; pubc = &locubc; locfulp = FULL; pstat = &locstat; #if !defined(__mips) && !defined(_LITTLE_ENDIAN) va_start(ap, nbytes); if (na < 3 || na > 6) { errno = FDC_ERR_NOPARM; return(ERR); } if (na > 3) pstat = va_arg(ap, struct ffsw *); if (na > 4) locfulp = va_arg(ap, int); if (na > 5) pubc = va_arg(ap, int *); #endif CHECK_FIOPTR(fio, pstat); ret = XRCALL(fio, readrtn) fio, bufptr, nbytes, pstat, locfulp, pubc); if (na < 4) errno = locstat.sw_error; return (ret); }
/* * _ff_err2 is used when the stat parameter is param #2 */ int _ff_err2( struct fdinfo *fio, struct ffsw *stat) { int na; #ifdef _UNICOS NUMARG(na); #else na = 2; #endif if (na == 2) _SETERROR(stat, FDC_ERR_NOSUP, 0) else abort(); return(ERR); }
int main(int argc, char *argv[]) { assert(qthread_initialize() == QTHREAD_SUCCESS); CHECK_VERBOSE(); NUMARG(numincrs, "NUM_INCRS"); // future_init(128); iprintf("%i shepherds\n", qthread_num_shepherds()); iprintf("%i threads\n", qthread_num_workers()); qt_loop_balance_sinc(0, numincrs, sum, NULL); if (threads != numincrs) { iprintf("threads == %lu, not %lu\n", (unsigned long)threads, (unsigned long)numincrs); } assert(threads == numincrs); return 0; }
int main(int argc, char *argv[]) { uint64_t count = 1048576; unsigned long threads = 1; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); NUMARG(count, "MT_COUNT"); assert(0 != count); #pragma omp parallel #pragma omp single { timer = qtimer_create(); threads = omp_get_num_threads(); qtimer_start(timer); #pragma omp task untied for (uint64_t i = 0; i < count; i++) { #pragma omp task untied null_task(NULL); } #pragma omp taskwait qtimer_stop(timer); } total_time = qtimer_secs(timer); qtimer_destroy(timer); printf("%lu %lu %f\n", threads, (unsigned long)count, total_time); return 0; }
ffsetsp(int fd, ...) { struct fdinfo *fio; struct ffsw locstat, *pstat; int ret, na; va_list ap; fio = GETIOB(fd); NUMARG(na); if (na < 2) pstat = &locstat; else { va_start(ap, fd); pstat = va_arg(ap, struct ffsw *); } CHECK_FIOPTR(fio, pstat); _eov_load(1); /* Call a routine that provides hard references */ /* to eov routines. */ ret = XRCALL(fio, fcntlrtn) fio, FC_SETSP, 1, pstat); if (na < 2) errno = locstat.sw_error; return (ret); }
int main(int argc, char *argv[]) { int n = 10; int m = 10; num_timesteps = 10; workload = 0; workload_per = 0; workload_var = 0; int print_final = 0; int alltime = 0; CHECK_VERBOSE(); NUMARG(n, "N"); NUMARG(m, "M"); NUMARG(num_timesteps, "TIMESTEPS"); NUMARG(workload, "WORKLOAD"); NUMARG(workload_per, "WORKLOAD_PER"); NUMARG(workload_var, "WORKLOAD_VAR"); NUMARG(print_final, "PRINT_FINAL"); NUMARG(alltime, "ALL_TIME"); assert (n > 0 && m > 0); // Initialize Qthreads assert(qthread_initialize() == 0); qtimer_t alloc_timer = qtimer_create(); qtimer_t init_timer = qtimer_create(); qtimer_t exec_timer = qtimer_create(); // Allocate memory for 3-stage stencil (with boundary padding) qtimer_start(alloc_timer); stencil_t points; points.N = n + 2; points.M = m + 2; for (int s = 0; s < NUM_STAGES; s++) { points.stage[s] = malloc(points.N*sizeof(aligned_t *)); assert(NULL != points.stage[s]); for (int i = 0; i < points.N; i++) { points.stage[s][i] = calloc(points.M, sizeof(aligned_t)); assert(NULL != points.stage[s][i]); } } qtimer_stop(alloc_timer); // Initialize first stage and set boundary conditions qtimer_start(init_timer); for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { qthread_writeF_const(&points.stage[0][i][j], 0); for (int s = 1; s < NUM_STAGES; s++) qthread_empty(&points.stage[s][i][j]); } } for (int i = 0; i < points.N; i++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][i][0], BOUNDARY); qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY); #else points.stage[s][i][0] = BOUNDARY; points.stage[s][i][points.M-1] = BOUNDARY; #endif } } for (int j = 0; j < points.M; j++) { for (int s = 0; s < NUM_STAGES; s++) { #ifdef BOUNDARY_SYNC qthread_writeF_const(&points.stage[s][0][j], BOUNDARY); qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY); #else points.stage[s][0][j] = BOUNDARY; points.stage[s][points.N-1][j] = BOUNDARY; #endif } } qtimer_stop(init_timer); // Create barrier to synchronize on completion of calculations qtimer_start(exec_timer); points.barrier = qt_feb_barrier_create(n*m+1); // Spawn tasks to start calculating updates at each point update_args_t args = {&points, -1, -1, 1, 1}; for (int i = 1; i < points.N-1; i++) { for (int j = 1; j < points.M-1; j++) { args.i = i; args.j = j; qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL); } } // Wait for calculations to finish qt_feb_barrier_enter(points.barrier); qtimer_stop(exec_timer); // Print timing info if (alltime) { fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer)); fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer)); fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer)); } else { fprintf(stdout, "%f\n", qtimer_secs(exec_timer)); } // Print stencils if (print_final) { size_t final = (num_timesteps % NUM_STAGES); iprintf("Stage %lu:\n", prev_stage(prev_stage(final))); print_stage(&points, prev_stage(prev_stage(final))); iprintf("\nStage %lu:\n", prev_stage(final)); print_stage(&points, prev_stage(final)); iprintf("\nStage %lu:\n", final); print_stage(&points, final); } qt_feb_barrier_destroy(points.barrier); qtimer_destroy(alloc_timer); qtimer_destroy(init_timer); qtimer_destroy(exec_timer); // Free allocated memory for (int i = 0; i < points.N; i++) { free(points.stage[0][i]); free(points.stage[1][i]); free(points.stage[2][i]); } free(points.stage[0]); free(points.stage[1]); free(points.stage[2]); return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); // If the operator did not attempt to set a stack size, force // a reasonable lower bound if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE")) setenv("QT_STACK_SIZE", "32768", 0); assert(qthread_initialize() == 0); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); aligned_t donecount = 0; root.dc = &donecount; qthread_empty(&donecount); aligned_t tot = 0; root.acc = &tot; root.expect = 1; qthread_fork_syncvar(visit, &root, NULL); qthread_readFF(NULL, root.dc); total_num_nodes = tot; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / qthread_num_workers()); #endif /* ifdef PRINT_STATS */ return 0; }
int main(int argc, char *argv[]) { aligned_t *t[2]; uint64_t x_value; uint64_t pairs; assert(qthread_initialize() == 0); pairs = qthread_num_shepherds() * 6; CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); NUMARG(pairs, "PAIRS"); t[0] = calloc(pairs, sizeof(aligned_t)); t[1] = calloc(pairs, sizeof(aligned_t)); iprintf("%i threads...\n", qthread_num_shepherds()); iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w); qthread_syncvar_empty(&id); qthread_syncvar_writeF_const(&id, 1); iprintf("id = 0x%lx\n", (unsigned long)id.u.w); { uint64_t tmp = 0; qthread_syncvar_readFF(&tmp, &id); assert(tmp == 1); } iprintf("x's status is: %s (want full (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 1); qthread_syncvar_readFE(NULL, &x); iprintf("x's status became: %s (want empty (and nowait))\n", qthread_syncvar_status(&x) ? "full" : "empty"); assert(qthread_syncvar_status(&x) == 0); for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i])); } for (unsigned int i = 0; i < pairs; ++i) { qthread_readFF(NULL, &(t[0][i])); qthread_readFF(NULL, &(t[1][i])); } iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_fill(&x); iprintf("shouldn't be blocking on x (current status: %s)\n", qthread_syncvar_status(&x) ? "full" : "empty"); qthread_syncvar_readFF(&x_value, &x); assert(qthread_syncvar_status(&x) == 1); free(t[0]); free(t[1]); if (x_value == iterations - 1) { iprintf("Success! x==%lu\n", (unsigned long)x_value); return 0; } else { fprintf(stderr, "Final value of x=%lu, expected %lu\n", (unsigned long)x_value, (unsigned long)(iterations - 1)); return -1; } }
int main(int argc, char *argv[]) { aligned_t return_value = 0; int status, ret; CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED"); status = qthread_initialize(); assert(status == QTHREAD_SUCCESS); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf(" %i threads total\n", qthread_num_workers()); iprintf("Creating the queue...\n"); the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0); assert(the_queue); iprintf("---------------------------------------------------------\n"); iprintf("\tSINGLE THREAD TEST\n\n"); iprintf("1/4 Spawning thread to be queued...\n"); status = qthread_fork(tobequeued, NULL, &return_value); assert(status == QTHREAD_SUCCESS); iprintf("2/4 Waiting for thread to queue itself...\n"); while(qthread_queue_length(the_queue) != 1) qthread_yield(); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/4 Releasing the queue...\n"); qthread_queue_release_all(the_queue); ret = qthread_readFF(NULL, &return_value); assert(ret == QTHREAD_SUCCESS); assert(threads_in == 1); assert(awoke == 1); assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("4/4 Test passed!\n"); iprintf("---------------------------------------------------------\n"); iprintf("\tMULTI THREAD TEST\n\n"); threads_in = 0; awoke = 0; aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED); iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED); for (int i=0; i<THREADS_ENQUEUED; i++) { status = qthread_fork(tobequeued, NULL, retvals + i); assert(status == QTHREAD_SUCCESS); } iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED); while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield(); assert(threads_in == THREADS_ENQUEUED); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/6 Releasing a single thread...\n"); qthread_queue_release_one(the_queue); iprintf("4/6 Waiting for that thread to exit\n"); while (awoke == 0) qthread_yield(); assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1)); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("5/6 Releasing the rest of the threads...\n"); qthread_queue_release_all(the_queue); for (int i=0; i<THREADS_ENQUEUED; i++) { ret = qthread_readFF(NULL, retvals + i); assert(ret == QTHREAD_SUCCESS); } assert(qthread_queue_length(the_queue) == 0); assert(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("6/6 Test passed!\n"); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { qarray *a; distribution_t disttypes[] = { FIXED_HASH, FIXED_FIELDS, ALL_LOCAL, ALL_RAND, ALL_LEAST, DIST_RAND, DIST_STRIPES, DIST_FIELDS, DIST_LEAST }; const char *distnames[] = { "FIXED_HASH", "FIXED_FIELDS", "ALL_LOCAL", "ALL_RAND", "ALL_LEAST", "DIST_RAND", "DIST_STRIPES", "DIST_FIELDS", "DIST_LEAST" }; unsigned int dt_index; unsigned int num_dists = sizeof(disttypes) / sizeof(distribution_t); unsigned int dists = (1 << num_dists) - 1; qthread_initialize(); CHECK_VERBOSE(); NUMARG(dists, "TEST_DISTS"); NUMARG(ELEMENT_COUNT, "ELEMENT_COUNT"); /* iterate over all the different distribution types */ for (dt_index = 0; dt_index < num_dists; dt_index++) { if ((dists & (1 << dt_index)) == 0) { continue; } /* test a basic array of doubles */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(double), disttypes[dt_index], 0, 0); assert(a); iprintf("%s: created basic array of doubles\n", distnames[dt_index]); qarray_iter(a, 0, ELEMENT_COUNT, assign1); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); assert(count == ELEMENT_COUNT); } { size_t i; for (i = 0; i < ELEMENT_COUNT; i++) { double elem = *(double *)qarray_elem_nomigrate(a, i); if (elem != 1.0) { printf ("element %lu is %f instead of 1.0, disttype = %s\n", (unsigned long)i, elem, distnames[dt_index]); assert(elem == 1.0); } } } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); /* now test an array of giant things */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(bigobj), disttypes[dt_index], 0, 0); iprintf("%s: created array of big objects\n", distnames[dt_index]); qarray_iter(a, 0, ELEMENT_COUNT, assignall1); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); // assert(count == ELEMENT_COUNT); } { size_t i; char fail = 0; for (i = 0; i < ELEMENT_COUNT; i++) { char *elem = (char *)qarray_elem_nomigrate(a, i); size_t j; for (j = 0; j < sizeof(bigobj); j++) { if (elem[j] != 1) { printf ( "byte %lu of element %lu is %i instead of 1, dt_index = %u\n", (unsigned long)j, (unsigned long)i, elem[j], dt_index); fail = 1; break; } } } assert(fail == 0); } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); /* now test an array of weird-sized things */ count = 0; a = qarray_create_configured(ELEMENT_COUNT, sizeof(offsize), disttypes[dt_index], 0, 0); iprintf("%s: created array of odd-sized objects\n", distnames[dt_index]); qarray_iter_loop(a, 0, ELEMENT_COUNT, assignoff1, NULL); iprintf("%s: iterated; now checking work...\n", distnames[dt_index]); if (count != ELEMENT_COUNT) { printf("count = %lu, dt_index = %u\n", (unsigned long)count, dt_index); assert(count == ELEMENT_COUNT); } { size_t i; for (i = 0; i < ELEMENT_COUNT; i++) { char *elem = (char *)qarray_elem_nomigrate(a, i); size_t j; for (j = 0; j < sizeof(offsize); j++) { if (elem[j] != 1) { printf ( "byte %lu of element %lu is %i instead of 1, dt_index = %u\n", (unsigned long)j, (unsigned long)i, elem[j], dt_index); assert(elem[j] == 1); } } } } iprintf("%s: correct result!\n", distnames[dt_index]); qarray_destroy(a); } return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned long tmp = 0; NUMARG(tmp, "UTS_TREE_TYPE"); tree_type = (tree_t)tmp; } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); { unsigned long tmp = 0; NUMARG(tmp, "UTS_SHAPE_FN"); shape_fn = (shape_t)tmp; } NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; { retval = _Cilk_spawn visit(root); _Cilk_sync; } total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS LOG_UTS_RESULTS_YAML(total_num_nodes, total_time) LOG_ENV_CILK_YAML() #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / __cilkrts_get_nworkers()); #endif /* ifdef PRINT_STATS */ return 0; }
ffopen(const char *name, int flags, ...) { int narg; int cblks; _ffopen_t fd; int retfd; int aifound; mode_t mode; long cbits; va_list ap; union spec_u *fdspec; struct gl_o_inf gloinf; assign_info ai; struct fdinfo *nfio; extern union spec_u *_g_fdc_spec(); struct ffsw *pstat, locstat; #ifdef _CRAY NUMARG(narg); #elif defined(__mips) || defined(_LITTLE_ENDIAN) /* mode is passed only when O_CREAT is set */ if (flags & O_CREAT) narg = 3; else narg = 2; #else narg = 6; #endif mode = 0; cbits = 0; cblks = 0; pstat = &locstat; /* * New usage only allows 5 params. (what does this mean ???) */ va_start(ap, flags); if (narg >= 3) #if defined(BUILD_OS_DARWIN) mode = (mode_t) va_arg(ap, int); #else /* defined(BUILD_OS_DARWIN) */ mode = va_arg(ap, mode_t); #endif /* defined(BUILD_OS_DARWIN) */ if (narg >= 4) cbits = va_arg(ap, long); if (narg >= 5) pstat = va_arg(ap, struct ffsw *); if (narg >= 6) cblks = va_arg(ap, int); va_end(ap); aifound = _assign_asgcmd_info(name, -1, ASN_G_FF | ASN_G_ALL, &ai, NULL, 1); if (aifound == -1) { ERETURN(pstat, errno, 0); } if (aifound == 1 && ai.F_filter_flg) fdspec = &ai.F_filter[0]; else fdspec = NULL; (void) memset(&gloinf, 0, sizeof(gloinf)); gloinf.aip = aifound ? &ai : NULL; fd = _ffopen(name, flags, mode, fdspec, pstat, cbits, cblks, NULL, &gloinf); #if defined(_CRAY1) || defined(__mips) if (fd != _FFOPEN_ERR && MULTI_ON) { nfio = NULL; if (_ff_top_lock(fd, &nfio, pstat) < 0) fd = _FFOPEN_ERR; if (nfio != NULL) fd = (_ffopen_t)nfio; } #endif /* * ffopen returns an int. Call a routine which associates an * int with what is returned by _ffopen */ #if defined(__mips) || defined(_LITTLE_ENDIAN) retfd = _ff_fdinfo_to_int(fd, pstat); #else retfd = (int)fd; #endif /* should check chain of layers here for sanity */ if (narg < 4) errno = locstat.sw_error; return(retfd); }
// ////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { size_t depth = 3; assert(qthread_initialize() == 0); CHECK_VERBOSE(); NUMARG(depth, "TEST_DEPTH"); // Test creating an empty sinc { qt_sinc_t zero_sinc; qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0); qt_sinc_wait(&zero_sinc, NULL); qt_sinc_fini(&zero_sinc); qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0); qt_sinc_expect(three_sinc, 3); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qthread_fork(submit_to_sinc, three_sinc, NULL); qt_sinc_wait(three_sinc, NULL); qt_sinc_destroy(three_sinc); } qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2); // Spawn additional waits aligned_t rets[3]; { qthread_fork(wait_on_sinc, sinc, &rets[0]); qthread_fork(wait_on_sinc, sinc, &rets[1]); qthread_fork(wait_on_sinc, sinc, &rets[2]); } { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_create(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); for (int i = 0; i < 3; i++) qthread_readFF(NULL, &rets[i]); // Reset the sinc qt_sinc_reset(sinc, 2); // Second use { v_args_t args = { depth, sinc }; // These two spawns covered by qt_sinc_reset(...,2) qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL); } qt_sinc_wait(sinc, NULL); qt_sinc_destroy(sinc); return 0; }
int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }
int main(int argc, char *argv[]) { aligned_t *ui_array, *ui_array2; double *d_array, *d_array2; size_t len = 1000000; qtimer_t timer = qtimer_create(); double cumulative_time_qutil = 0.0; double cumulative_time_libc = 0.0; int using_doubles = 0; unsigned long iterations = 10; qthread_initialize(); CHECK_VERBOSE(); printf("%i threads\n", (int)qthread_num_workers()); NUMARG(len, "TEST_LEN"); NUMARG(iterations, "TEST_ITERATIONS"); NUMARG(using_doubles, "TEST_USING_DOUBLES"); printf("using %s\n", using_doubles ? "doubles" : "aligned_ts"); if (using_doubles) { d_array = calloc(len, sizeof(double)); printf("array is %s\n", human_readable(len * sizeof(double))); assert(d_array); // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL); for (unsigned int i = 0; i < len; i++) { d_array[i] = ((double)random()) / ((double)RAND_MAX) + random(); } d_array2 = calloc(len, sizeof(double)); assert(d_array2); // madvise(d_array2,len*sizeof(double), MADV_RANDOM); iprintf("double array generated...\n"); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qutil_qsort(d_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu doubles with qutil took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_qutil); for (unsigned int i = 0; i < iterations; i++) { memcpy(d_array2, d_array, len * sizeof(double)); qtimer_start(timer); qsort(d_array2, len, sizeof(double), dcmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n", i, (unsigned long)len, qtimer_secs(timer)); } cumulative_time_libc /= (double)iterations; printf("sorting %lu doubles with libc took: %f seconds\n", (unsigned long)len, cumulative_time_libc); free(d_array); free(d_array2); } else { ui_array = calloc(len, sizeof(aligned_t)); printf("array is %s\n", human_readable(len * sizeof(aligned_t))); for (unsigned int i = 0; i < len; i++) { ui_array[i] = random(); } ui_array2 = calloc(len, sizeof(aligned_t)); iprintf("ui_array generated...\n"); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qutil_aligned_qsort(ui_array2, len); qtimer_stop(timer); cumulative_time_qutil += qtimer_secs(timer); } cumulative_time_qutil /= (double)iterations; printf("sorting %lu aligned_ts with qutil took: %f seconds\n", (unsigned long)len, cumulative_time_qutil); for (int i = 0; i < iterations; i++) { memcpy(ui_array2, ui_array, len * sizeof(aligned_t)); qtimer_start(timer); qsort(ui_array2, len, sizeof(double), acmp); qtimer_stop(timer); cumulative_time_libc += qtimer_secs(timer); } cumulative_time_libc /= (double)iterations; printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n", (unsigned long)len, cumulative_time_libc); free(ui_array); free(ui_array2); } if (cumulative_time_qutil < cumulative_time_libc) { printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } else { printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil); } qtimer_destroy(timer); return 0; }
int main(int argc, char *argv[]) { size_t threads = 1000, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(threads, "THREADS"); NUMARG(iterations, "ITERATIONS"); initme = (aligned_t *)calloc(threads, sizeof(aligned_t)); assert(initme); rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t)); assert(rets); iprintf("creating the barrier for %zu threads\n", threads + 1); wait_on_me = qt_feb_barrier_create(threads + 1); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 0; i < threads; i++) { qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_feb_barrier_enter(wait_on_me); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); initme_idx = 0; for (i = 0; i < threads; i++) { if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Destroying barrier...\n"); qt_feb_barrier_destroy(wait_on_me); iprintf("Success!\n"); /* this loop shouldn't be necessary... but seems to avoid crashes in rare * cases (in other words there must a race condition in qthread_finalize() * if there are outstanding threads out there) */ for (i = 0; i < threads * 2; i++) { aligned_t tmp = 1; qthread_readFF(&tmp, rets + i); assert(tmp == 0); } return 0; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned int tmp = (unsigned int)tree_type; NUMARG(tmp, "UTS_TREE_TYPE"); if (tmp <= BALANCED) { tree_type = (tree_t)tmp; } else { fprintf(stderr, "invalid tree type\n"); return EXIT_FAILURE; } tmp = (unsigned int)shape_fn; NUMARG(tmp, "UTS_SHAPE_FN"); if (tmp <= FIXED) { shape_fn = (shape_t)tmp; } else { fprintf(stderr, "invalid shape function\n"); return EXIT_FAILURE; } } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #pragma omp parallel #pragma omp single #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; #pragma omp parallel #pragma omp single nowait #pragma omp task untied retval = visit(&root, root.num_children); total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / omp_get_num_threads()); #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / omp_get_num_threads()); #endif /* ifdef PRINT_STATS */ return 0; }