Example #1
0
ffwritea(int fd, char *buf, int nbytes, struct ffsw *stat, ...)
        { 
        struct fdinfo *fio;
        int ret; 
        bitptr bufptr;   
        int locubc, *pubc, na;        /* need a place to put result */ 
	int locfulp;
	va_list ap;

        fio = GETIOB(fd);
        SET_BPTR(bufptr, CPTR2BP(buf));
        /* adjust number of bits requested if ubc passed in */ 
        NUMARG(na);
	locubc = 0;
        pubc = &locubc;
        locfulp = FULL;
	if (na < 4 || na > 6)
		{
		errno = FDC_ERR_NOPARM;
		return(ERR);
                }
	va_start(ap, stat);
	if (na > 4)
		locfulp = va_arg(ap, int);
	if (na > 5)
		pubc = va_arg(ap, int *);
	CHECK_FIOPTR(fio, stat);
        ret = XRCALL(fio, writeartn) fio, bufptr, nbytes,
						stat, locfulp, pubc);
        return (ret);
        }
Example #2
0
ffweof(int fd, ...)
#endif
	{
	struct fdinfo *fio;
	int ret, na;
	struct ffsw locstat, *pstat;
#if	!defined(__mips) && !defined(_LITTLE_ENDIAN)
	va_list ap;
#endif

	fio = GETIOB(fd);
#if	defined(__mips) || defined(_LITTLE_ENDIAN)
	na = 1;
	pstat = &locstat;
#else
	NUMARG(na);
	if (na < 2)
		pstat = &locstat;
	else
		{
		va_start(ap, fd);
		pstat = va_arg(ap, struct ffsw *);
		}
#endif
	CHECK_FIOPTR(fio, pstat);
	ret = XRCALL(fio, weofrtn) fio, pstat);
#if	!defined(__mips) && !defined(_LITTLE_ENDIAN)
	if (na < 2)
		errno = locstat.sw_error;
#endif
	return (ret);
	}
Example #3
0
ffbksp(int fd, ...)
#endif
	{
	struct fdinfo *fio;
	int ret, na;
	struct ffsw locstat, *pstat;
#ifdef _CRAY
	va_list ap;
#endif

	fio = GETIOB(fd);
#ifdef _CRAY
	NUMARG(na);
	if (na < 2)
		pstat = &locstat;
	else
		{
		va_start(ap, fd);
		pstat = va_arg(ap, struct ffsw *);
		}
#else
	pstat = &locstat;
	na = 1;
#endif
	CHECK_FIOPTR(fio, pstat);
	ret = XRCALL(fio, backrtn) fio, pstat);
	/* set errno only if stat was not passed */
	if (na < 2)
		errno = locstat.sw_error;
	return (ret);
	}
Example #4
0
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    int count = 0;

    aligned_t max = 0;
    aligned_t tmp = 0;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(count, "COUNT");

    iprintf("Main executing in team %lu (w/ parent %lu)\n", 
        (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id());
    assert(qt_team_id() == default_team_id);
    assert(qt_team_parent_id() == non_team_id);

    aligned_t hello_in_team_ret;
    qthread_fork(hello_in_team, NULL, &hello_in_team_ret);
    qthread_readFF(&tmp, &hello_in_team_ret);
    max = MAX(max, tmp);

    aligned_t hello_new_team_rets[count];
    for (int i = 0; i < count; i++) {
        qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]);
    }
    for (int i = 0; i < count; i++) {
        qthread_readFF(&tmp, &hello_new_team_rets[i]);
        max = MAX(max, tmp);
    }

    aligned_t hello_new_team_in_team_ret;
    qthread_fork_new_team(
        hello_new_team_in_team, NULL, &hello_new_team_in_team_ret);
    qthread_readFF(&tmp, &hello_new_team_in_team_ret);
    max = MAX(max, tmp);

    aligned_t hello_new_team_new_team_ret;
    qthread_fork_new_team(
        hello_new_team_new_team, NULL, &hello_new_team_new_team_ret);
    qthread_readFF(&tmp, &hello_new_team_new_team_ret);
    max = MAX(max, tmp);

    iprintf("max is %lu\n", (unsigned long)max);

    if (count + 4 == max) {
        iprintf("SUCCEEDED with count %lu and max team id %lu\n",
            (unsigned long)count,
            (unsigned long)max);
        return 0;
    } else {
        iprintf("FAILED with count %lu and max team id %lu\n",
            (unsigned long)count,
            (unsigned long)max);
        return 1;
    }
}
Example #5
0
/*
 * The main procedure simply creates a producer and a consumer task to run in
 * parallel
 */
int main(int argc,
         char *argv[])
{
    aligned_t t[2];

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(bufferSize, "BUFFERSIZE");
    numItems = 8 * bufferSize;
    NUMARG(numItems, "NUMITEMS");

    iprintf("%i threads...\n", qthread_num_shepherds());

    buff = malloc(sizeof(aligned_t) * bufferSize);
    for (unsigned int i = 0; i < bufferSize; ++i) {
        buff[i] = 0;
    }

    qthread_fork(consumer, NULL, &t[0]);
    qthread_fork(producer, NULL, &t[1]);
    qthread_readFF(NULL, &t[0]);
    qthread_readFF(NULL, &t[1]);

    /* cleanup... unnecessary in general, but for the moment I'm tracking down
     * errors in the FEB system, so let's clean up */
    for (unsigned int i = 0; i < bufferSize; ++i) {
	qthread_fill(buff + i);
    }

    free(buff);

    iprintf("Success!\n");

    return 0;
}
Example #6
0
int
_ff_err(struct fdinfo *fio, bitptr bufptr, size_t nbytes, struct ffsw *stat, int fulp, int *ubc)
{
	int na;

#ifdef	_CRAY
	NUMARG(na);
#else
	na = 6;
#endif
	if (na == 6 || na == 5) /* if read/write[ca] */
		_SETERROR(stat, FDC_ERR_NOSUP, 0)
	else
		abort();
	return(ERR);
}
Example #7
0
int
ffread(int fd, char *buf, int nbytes, ...)
#endif
	{
	struct fdinfo *fio;
	ssize_t ret;
	int  locfulp;
	bitptr bufptr;
	int locubc, *pubc, na;	/* need a place to put result */
	struct ffsw locstat, *pstat;	/* need a place to put result */
#if	!defined(__mips) && !defined(_LITTLE_ENDIAN)
	va_list ap;
#endif

	fio = GETIOB(fd);
	SET_BPTR(bufptr, CPTR2BP(buf));
	/* adjust number of bits requested if ubc passed in */
#ifdef _CRAY
	NUMARG(na);
#elif	defined(__mips) || defined(_LITTLE_ENDIAN)
	na = 3;
#endif
	locubc = 0;
	pubc = &locubc;
	locfulp = FULL;
	pstat = &locstat;
#if	!defined(__mips) && !defined(_LITTLE_ENDIAN)
	va_start(ap, nbytes);
	if (na < 3 || na > 6)
		{
		errno = FDC_ERR_NOPARM;
		return(ERR);
		}
	if (na > 3)
		pstat = va_arg(ap, struct ffsw *);
	if (na > 4)
		locfulp = va_arg(ap, int);
	if (na > 5)
		pubc = va_arg(ap, int *);
#endif
	CHECK_FIOPTR(fio, pstat);
	ret = XRCALL(fio, readrtn) fio, bufptr, nbytes,
						pstat, locfulp, pubc);
	if (na < 4)
		errno = locstat.sw_error;
	return (ret);
	}
Example #8
0
/*
 * _ff_err2 is used when the stat parameter is param #2
 */
int
_ff_err2(
struct fdinfo	*fio,
struct ffsw	*stat)
{
	int na;

#ifdef	_UNICOS
	NUMARG(na);
#else
	na = 2;
#endif
	if (na == 2)
		_SETERROR(stat, FDC_ERR_NOSUP, 0)
	else
		abort();
	return(ERR);
}
Example #9
0
int main(int   argc,
         char *argv[])
{
    assert(qthread_initialize() == QTHREAD_SUCCESS);
    CHECK_VERBOSE();
    NUMARG(numincrs, "NUM_INCRS");
    // future_init(128);
    iprintf("%i shepherds\n", qthread_num_shepherds());
    iprintf("%i threads\n", qthread_num_workers());

    qt_loop_balance_sinc(0, numincrs, sum, NULL);

    if (threads != numincrs) {
        iprintf("threads == %lu, not %lu\n", (unsigned long)threads, (unsigned long)numincrs);
    }
    assert(threads == numincrs);

    return 0;
}
Example #10
0
int main(int   argc,
         char *argv[])
{
    uint64_t      count    = 1048576;
    unsigned long threads  = 1;

    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    NUMARG(count, "MT_COUNT");
    assert(0 != count);

#pragma omp parallel
#pragma omp single
    {
        timer   = qtimer_create();
        threads = omp_get_num_threads();

        qtimer_start(timer);
#pragma omp task untied
        for (uint64_t i = 0; i < count; i++) {
#pragma omp task untied
            null_task(NULL);
        }
#pragma omp taskwait
        qtimer_stop(timer);
    }

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

    printf("%lu %lu %f\n",
           threads,
           (unsigned long)count,
           total_time);

    return 0;
}
Example #11
0
ffsetsp(int fd, ...)
	{
	struct fdinfo *fio;
	struct ffsw locstat, *pstat;
	int ret, na;
	va_list ap;

	fio = GETIOB(fd);
	NUMARG(na);
	if (na < 2)
		pstat = &locstat;
	else
		{
		va_start(ap, fd);
		pstat = va_arg(ap, struct ffsw *);
		}
	CHECK_FIOPTR(fio, pstat);
	_eov_load(1);	/* Call a routine that provides hard references */
			/* to eov routines. */
	ret = XRCALL(fio, fcntlrtn) fio, FC_SETSP, 1, pstat);
	if (na < 2)
		errno = locstat.sw_error;
	return (ret);
	}
Example #12
0
int main(int argc, char *argv[])
{
    int n = 10;
    int m = 10;
    num_timesteps = 10;
    workload = 0;
    workload_per = 0;
    workload_var = 0;
    int print_final = 0;
    int alltime = 0;

    CHECK_VERBOSE();
    NUMARG(n, "N");
    NUMARG(m, "M");
    NUMARG(num_timesteps, "TIMESTEPS");
    NUMARG(workload, "WORKLOAD");
    NUMARG(workload_per, "WORKLOAD_PER");
    NUMARG(workload_var, "WORKLOAD_VAR");
    NUMARG(print_final, "PRINT_FINAL");
    NUMARG(alltime, "ALL_TIME");

    assert (n > 0 && m > 0);

    // Initialize Qthreads
    assert(qthread_initialize() == 0);

    qtimer_t alloc_timer = qtimer_create();
    qtimer_t init_timer = qtimer_create();
    qtimer_t exec_timer = qtimer_create();

    // Allocate memory for 3-stage stencil (with boundary padding)
    qtimer_start(alloc_timer);
    stencil_t points;
    points.N = n + 2;
    points.M = m + 2;

    for (int s = 0; s < NUM_STAGES; s++) {
        points.stage[s] = malloc(points.N*sizeof(aligned_t *));
        assert(NULL != points.stage[s]);
        for (int i = 0; i < points.N; i++) {
            points.stage[s][i] = calloc(points.M, sizeof(aligned_t));
            assert(NULL != points.stage[s][i]);
        }
    }
    qtimer_stop(alloc_timer);

    // Initialize first stage and set boundary conditions
    qtimer_start(init_timer);
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            qthread_writeF_const(&points.stage[0][i][j], 0);
            for (int s = 1; s < NUM_STAGES; s++)
                qthread_empty(&points.stage[s][i][j]);
        }
    }
    for (int i = 0; i < points.N; i++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][i][0], BOUNDARY);
            qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY);
#else
            points.stage[s][i][0] = BOUNDARY;
            points.stage[s][i][points.M-1] = BOUNDARY;
#endif
        }
    }
    for (int j = 0; j < points.M; j++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][0][j], BOUNDARY);
            qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY);
#else
            points.stage[s][0][j] = BOUNDARY;
            points.stage[s][points.N-1][j] = BOUNDARY;
#endif
        }
    }
    qtimer_stop(init_timer);

    // Create barrier to synchronize on completion of calculations
    qtimer_start(exec_timer);
    points.barrier = qt_feb_barrier_create(n*m+1);

    // Spawn tasks to start calculating updates at each point
    update_args_t args = {&points, -1, -1, 1, 1};
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            args.i = i;
            args.j = j;
            qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL);
        }
    }

    // Wait for calculations to finish
    qt_feb_barrier_enter(points.barrier);
    qtimer_stop(exec_timer);

    // Print timing info
    if (alltime) {
        fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer));
        fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer));
        fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer));
    } else {
        fprintf(stdout, "%f\n", qtimer_secs(exec_timer));
    }

    // Print stencils
    if (print_final) {
        size_t final = (num_timesteps % NUM_STAGES);
        iprintf("Stage %lu:\n", prev_stage(prev_stage(final)));
        print_stage(&points, prev_stage(prev_stage(final)));
        iprintf("\nStage %lu:\n", prev_stage(final));
        print_stage(&points, prev_stage(final));
        iprintf("\nStage %lu:\n", final);
        print_stage(&points, final);
    }

    qt_feb_barrier_destroy(points.barrier);
    qtimer_destroy(alloc_timer);
    qtimer_destroy(init_timer);
    qtimer_destroy(exec_timer);

    // Free allocated memory
    for (int i = 0; i < points.N; i++) {
        free(points.stage[0][i]);
        free(points.stage[1][i]);
        free(points.stage[2][i]);
    }
    free(points.stage[0]);
    free(points.stage[1]);
    free(points.stage[2]);

    return 0;
}
Example #13
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

    // If the operator did not attempt to set a stack size, force
    // a reasonable lower bound
    if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE"))
        setenv("QT_STACK_SIZE", "32768", 0);

    assert(qthread_initialize() == 0);

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);
    aligned_t donecount = 0;
    root.dc = &donecount;
    qthread_empty(&donecount);
    aligned_t tot = 0;
    root.acc = &tot;
    root.expect = 1;

    qthread_fork_syncvar(visit, &root, NULL);
    qthread_readFF(NULL, root.dc);
    total_num_nodes = tot;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#endif /* ifdef PRINT_STATS */

    return 0;
}
int main(int argc,
         char *argv[])
{
    aligned_t *t[2];
    uint64_t x_value;

    uint64_t pairs;

    assert(qthread_initialize() == 0);
    pairs = qthread_num_shepherds() * 6;

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");
    NUMARG(pairs, "PAIRS");

    t[0] = calloc(pairs, sizeof(aligned_t));
    t[1] = calloc(pairs, sizeof(aligned_t));

    iprintf("%i threads...\n", qthread_num_shepherds());
    iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w);

    qthread_syncvar_empty(&id);
    qthread_syncvar_writeF_const(&id, 1);
    iprintf("id = 0x%lx\n", (unsigned long)id.u.w);
    {
        uint64_t tmp = 0;
        qthread_syncvar_readFF(&tmp, &id);
        assert(tmp == 1);
    }
    iprintf("x's status is: %s (want full (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 1);
    qthread_syncvar_readFE(NULL, &x);
    iprintf("x's status became: %s (want empty (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 0);
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_readFF(NULL, &(t[0][i]));
        qthread_readFF(NULL, &(t[1][i]));
    }
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_fill(&x);
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_readFF(&x_value, &x);
    assert(qthread_syncvar_status(&x) == 1);

    free(t[0]);
    free(t[1]);

    if (x_value == iterations - 1) {
        iprintf("Success! x==%lu\n", (unsigned long)x_value);
        return 0;
    } else {
        fprintf(stderr, "Final value of x=%lu, expected %lu\n",
                (unsigned long)x_value, (unsigned long)(iterations - 1));
        return -1;
    }
}
Example #15
0
int main(int   argc,
         char *argv[])
{
    aligned_t return_value = 0;
    int status, ret;

    CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output
    NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED");

    status = qthread_initialize();
    assert(status == QTHREAD_SUCCESS);

    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("  %i threads total\n", qthread_num_workers());

    iprintf("Creating the queue...\n");
    the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0);
    assert(the_queue);

    iprintf("---------------------------------------------------------\n");
    iprintf("\tSINGLE THREAD TEST\n\n");

    iprintf("1/4 Spawning thread to be queued...\n");
    status = qthread_fork(tobequeued, NULL, &return_value);
    assert(status == QTHREAD_SUCCESS);

    iprintf("2/4 Waiting for thread to queue itself...\n");
    while(qthread_queue_length(the_queue) != 1) qthread_yield();
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/4 Releasing the queue...\n");
    qthread_queue_release_all(the_queue);

    ret = qthread_readFF(NULL, &return_value);
    assert(ret == QTHREAD_SUCCESS);

    assert(threads_in == 1);
    assert(awoke == 1);
    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);
    iprintf("4/4 Test passed!\n");

    iprintf("---------------------------------------------------------\n");
    iprintf("\tMULTI THREAD TEST\n\n");

    threads_in = 0;
    awoke = 0;
    aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED);
    iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED);
    for (int i=0; i<THREADS_ENQUEUED; i++) {
        status = qthread_fork(tobequeued, NULL, retvals + i);
        assert(status == QTHREAD_SUCCESS);
    }

    iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED);
    while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield();
    assert(threads_in == THREADS_ENQUEUED);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/6 Releasing a single thread...\n");
    qthread_queue_release_one(the_queue);

    iprintf("4/6 Waiting for that thread to exit\n");
    while (awoke == 0) qthread_yield();

    assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1));
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("5/6 Releasing the rest of the threads...\n");
    qthread_queue_release_all(the_queue);

    for (int i=0; i<THREADS_ENQUEUED; i++) {
        ret = qthread_readFF(NULL, retvals + i);
        assert(ret == QTHREAD_SUCCESS);
    }

    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("6/6 Test passed!\n");

    return EXIT_SUCCESS;
}
Example #16
0
int main(int argc,
         char *argv[])
{
    qarray *a;
    distribution_t disttypes[] = {
        FIXED_HASH, FIXED_FIELDS,
        ALL_LOCAL, ALL_RAND, ALL_LEAST,
        DIST_RAND, DIST_STRIPES, DIST_FIELDS, DIST_LEAST
    };
    const char *distnames[] = {
        "FIXED_HASH", "FIXED_FIELDS",
        "ALL_LOCAL", "ALL_RAND", "ALL_LEAST",
        "DIST_RAND", "DIST_STRIPES", "DIST_FIELDS", "DIST_LEAST"
    };
    unsigned int dt_index;
    unsigned int num_dists = sizeof(disttypes) / sizeof(distribution_t);
    unsigned int dists = (1 << num_dists) - 1;

    qthread_initialize();
    CHECK_VERBOSE();
    NUMARG(dists, "TEST_DISTS");
    NUMARG(ELEMENT_COUNT, "ELEMENT_COUNT");

    /* iterate over all the different distribution types */
    for (dt_index = 0; dt_index < num_dists; dt_index++) {
        if ((dists & (1 << dt_index)) == 0) {
            continue;
        }
        /* test a basic array of doubles */
        count = 0;
        a = qarray_create_configured(ELEMENT_COUNT, sizeof(double),
                                     disttypes[dt_index], 0, 0);
        assert(a);
        iprintf("%s: created basic array of doubles\n", distnames[dt_index]);
        qarray_iter(a, 0, ELEMENT_COUNT, assign1);
        iprintf("%s: iterated; now checking work...\n", distnames[dt_index]);
        if (count != ELEMENT_COUNT) {
            printf("count = %lu, dt_index = %u\n", (unsigned long)count,
                   dt_index);
            assert(count == ELEMENT_COUNT);
        }
        {
            size_t i;

            for (i = 0; i < ELEMENT_COUNT; i++) {
                double elem = *(double *)qarray_elem_nomigrate(a, i);

                if (elem != 1.0) {
                    printf
                        ("element %lu is %f instead of 1.0, disttype = %s\n",
                        (unsigned long)i, elem, distnames[dt_index]);
                    assert(elem == 1.0);
                }
            }
        }
        iprintf("%s: correct result!\n", distnames[dt_index]);
        qarray_destroy(a);

        /* now test an array of giant things */
        count = 0;
        a = qarray_create_configured(ELEMENT_COUNT, sizeof(bigobj),
                                     disttypes[dt_index], 0, 0);
        iprintf("%s: created array of big objects\n", distnames[dt_index]);
        qarray_iter(a, 0, ELEMENT_COUNT, assignall1);
        iprintf("%s: iterated; now checking work...\n", distnames[dt_index]);
        if (count != ELEMENT_COUNT) {
            printf("count = %lu, dt_index = %u\n", (unsigned long)count,
                   dt_index);
            // assert(count == ELEMENT_COUNT);
        }
        {
            size_t i;
            char fail = 0;

            for (i = 0; i < ELEMENT_COUNT; i++) {
                char *elem = (char *)qarray_elem_nomigrate(a, i);
                size_t j;

                for (j = 0; j < sizeof(bigobj); j++) {
                    if (elem[j] != 1) {
                        printf
                        (
                         "byte %lu of element %lu is %i instead of 1, dt_index = %u\n",
                         (unsigned long)j, (unsigned long)i, elem[j],
                         dt_index);
                        fail = 1;
                        break;
                    }
                }
            }
            assert(fail == 0);
        }
        iprintf("%s: correct result!\n", distnames[dt_index]);
        qarray_destroy(a);

        /* now test an array of weird-sized things */
        count = 0;
        a = qarray_create_configured(ELEMENT_COUNT, sizeof(offsize),
                                     disttypes[dt_index], 0, 0);
        iprintf("%s: created array of odd-sized objects\n",
                distnames[dt_index]);
        qarray_iter_loop(a, 0, ELEMENT_COUNT, assignoff1, NULL);
        iprintf("%s: iterated; now checking work...\n", distnames[dt_index]);
        if (count != ELEMENT_COUNT) {
            printf("count = %lu, dt_index = %u\n", (unsigned long)count,
                   dt_index);
            assert(count == ELEMENT_COUNT);
        }
        {
            size_t i;

            for (i = 0; i < ELEMENT_COUNT; i++) {
                char *elem = (char *)qarray_elem_nomigrate(a, i);
                size_t j;

                for (j = 0; j < sizeof(offsize); j++) {
                    if (elem[j] != 1) {
                        printf
                        (
                         "byte %lu of element %lu is %i instead of 1, dt_index = %u\n",
                         (unsigned long)j, (unsigned long)i, elem[j],
                         dt_index);
                        assert(elem[j] == 1);
                    }
                }
            }
        }
        iprintf("%s: correct result!\n", distnames[dt_index]);
        qarray_destroy(a);
    }

    return 0;
}
Example #17
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned long tmp = 0;
        NUMARG(tmp, "UTS_TREE_TYPE");
        tree_type = (tree_t)tmp;
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    {
        unsigned long tmp = 0;
        NUMARG(tmp, "UTS_SHAPE_FN");
        shape_fn = (shape_t)tmp;
    }
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);

    nodecount = 1;
    long retval;
    {
        retval = _Cilk_spawn visit(root);

        _Cilk_sync;
    }

    total_num_nodes = retval;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    LOG_UTS_RESULTS_YAML(total_num_nodes, total_time)
    LOG_ENV_CILK_YAML()
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / __cilkrts_get_nworkers());
#endif /* ifdef PRINT_STATS */

    return 0;
}
Example #18
0
ffopen(const char *name, int flags, ...)
{
	int		narg;
	int		cblks;
	_ffopen_t	fd;
	int		retfd;
	int		aifound;
	mode_t		mode;
	long		cbits;
	va_list		ap;
	union spec_u	*fdspec;
	struct gl_o_inf	gloinf;
	assign_info	ai;
	struct fdinfo	*nfio;


	extern union spec_u *_g_fdc_spec();
	struct ffsw *pstat, locstat;

#ifdef	_CRAY
	NUMARG(narg);
#elif   defined(__mips) || defined(_LITTLE_ENDIAN)
	/* mode is passed only when O_CREAT is set */
	if (flags & O_CREAT)
		narg = 3;
	else
		narg = 2;
#else
	narg = 6;
#endif
	mode	= 0;
	cbits 	= 0;
	cblks 	= 0;
	pstat	= &locstat;
/*
 *	New usage only allows 5 params.	     (what does this mean ???)
 */
	va_start(ap, flags);
	if (narg >= 3)
#if defined(BUILD_OS_DARWIN)
		mode	= (mode_t) va_arg(ap, int);
#else /* defined(BUILD_OS_DARWIN) */
		mode	= va_arg(ap, mode_t);
#endif /* defined(BUILD_OS_DARWIN) */
	if (narg >= 4)
		cbits	= va_arg(ap, long);
	if (narg >= 5)
		pstat	= va_arg(ap, struct ffsw *);
	if (narg >= 6)
		cblks	= va_arg(ap, int);

	va_end(ap);

 	aifound = _assign_asgcmd_info(name, -1, ASN_G_FF | ASN_G_ALL, &ai,
			NULL, 1);
	if (aifound == -1) {
		ERETURN(pstat, errno, 0);
	}

	if (aifound == 1 && ai.F_filter_flg)
		fdspec = &ai.F_filter[0];
	else
		fdspec = NULL;

	(void) memset(&gloinf, 0, sizeof(gloinf));
        gloinf.aip	= aifound ? &ai : NULL;

	fd = _ffopen(name, flags, mode, fdspec, pstat, cbits, cblks, NULL,
		&gloinf);

#if defined(_CRAY1) || defined(__mips)
	if (fd != _FFOPEN_ERR && MULTI_ON) {
		nfio = NULL;
		if (_ff_top_lock(fd, &nfio, pstat) < 0)
			fd = _FFOPEN_ERR;	
		if (nfio != NULL)
			fd = (_ffopen_t)nfio;
	}
#endif
	/*
	 * ffopen returns an int. Call a routine which associates an
	 * int with what is returned by _ffopen 
	 */
#if	defined(__mips) || defined(_LITTLE_ENDIAN)
	retfd = _ff_fdinfo_to_int(fd, pstat);
#else
	retfd = (int)fd;
#endif
	/* should check chain of layers here for sanity */
	if (narg < 4)
		errno = locstat.sw_error;

	return(retfd);
}
Example #19
0
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    size_t depth = 3;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(depth, "TEST_DEPTH");

    // Test creating an empty sinc
    {
        qt_sinc_t zero_sinc;
	qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0);
        qt_sinc_wait(&zero_sinc, NULL);
        qt_sinc_fini(&zero_sinc);

        qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0);
        qt_sinc_expect(three_sinc, 3);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qt_sinc_wait(three_sinc, NULL);
        qt_sinc_destroy(three_sinc);
    }

    qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2);

    // Spawn additional waits
    aligned_t rets[3];
    {
        qthread_fork(wait_on_sinc, sinc, &rets[0]);
        qthread_fork(wait_on_sinc, sinc, &rets[1]);
        qthread_fork(wait_on_sinc, sinc, &rets[2]);
    }

    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_create(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    for (int i = 0; i < 3; i++)
        qthread_readFF(NULL, &rets[i]);

    // Reset the sinc
    qt_sinc_reset(sinc, 2);

    // Second use
    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_reset(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    qt_sinc_destroy(sinc);

    return 0;
}
Example #20
0
int main(int   argc,
         char *argv[])
{
    size_t     threads, i;
    aligned_t *rets;
    qtimer_t   t;
    unsigned int iter, iterations = 10;
    double tot = 0.0;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");

    threads = qthread_num_workers();
    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("%i threads...\n", (int)threads);

    initme = calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = malloc(threads * sizeof(aligned_t));
    assert(rets);

    iprintf("Creating a barrier to block %i threads\n", threads);
    wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0);     // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 1; i < threads; i++) {
            void *arg[2] = {wait_on_me, (void*)(intptr_t)i};
            qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_barrier_enter(wait_on_me, 0);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));
        tot += qtimer_secs(t);

        // reset
        initme_idx = 1;

        // check retvals
        for (i = 1; i < threads; i++) {
            qthread_readFF(NULL, rets + i);
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Average barrier time = %f\n", tot / iterations);

    iprintf("Destroying the barrier...\n");
    qt_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    return 0;
}
Example #21
0
int main(int argc, char *argv[])
{
    aligned_t *ui_array, *ui_array2;
    double *d_array, *d_array2;
    size_t len = 1000000;
    qtimer_t timer = qtimer_create();
    double cumulative_time_qutil = 0.0;
    double cumulative_time_libc = 0.0;
    int using_doubles = 0;
    unsigned long iterations = 10;

    qthread_initialize();

    CHECK_VERBOSE();
    printf("%i threads\n", (int)qthread_num_workers());
    NUMARG(len, "TEST_LEN");
    NUMARG(iterations, "TEST_ITERATIONS");
    NUMARG(using_doubles, "TEST_USING_DOUBLES");
    printf("using %s\n", using_doubles ? "doubles" : "aligned_ts");

    if (using_doubles) {
        d_array = calloc(len, sizeof(double));
	printf("array is %s\n", human_readable(len * sizeof(double)));
        assert(d_array);
        // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL);
        for (unsigned int i = 0; i < len; i++) {
            d_array[i] = ((double)random()) / ((double)RAND_MAX) + random();
        }
        d_array2 = calloc(len, sizeof(double));
        assert(d_array2);
        // madvise(d_array2,len*sizeof(double), MADV_RANDOM);
        iprintf("double array generated...\n");
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qutil_qsort(d_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
        cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu doubles with qutil took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_qutil);
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qsort(d_array2, len, sizeof(double), dcmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu doubles with libc took: %f seconds\n",
               (unsigned long)len, cumulative_time_libc);
        free(d_array);
        free(d_array2);
    } else {
        ui_array = calloc(len, sizeof(aligned_t));
	printf("array is %s\n", human_readable(len * sizeof(aligned_t)));
        for (unsigned int i = 0; i < len; i++) {
            ui_array[i] = random();
        }
        ui_array2 = calloc(len, sizeof(aligned_t));
        iprintf("ui_array generated...\n");
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qutil_aligned_qsort(ui_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
        }
	cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu aligned_ts with qutil took: %f seconds\n",
               (unsigned long)len, cumulative_time_qutil);
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qsort(ui_array2, len, sizeof(double), acmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_libc);
        free(ui_array);
        free(ui_array2);
    }
    if (cumulative_time_qutil < cumulative_time_libc) {
	printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    } else {
	printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    }

    qtimer_destroy(timer);

    return 0;
}
Example #22
0
int main(int   argc,
         char *argv[])
{
    size_t       threads = 1000, i;
    aligned_t   *rets;
    qtimer_t     t;
    unsigned int iter, iterations = 10;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(threads, "THREADS");
    NUMARG(iterations, "ITERATIONS");

    initme = (aligned_t *)calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t));
    assert(rets);

    iprintf("creating the barrier for %zu threads\n", threads + 1);
    wait_on_me = qt_feb_barrier_create(threads + 1);    // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 0; i < threads; i++) {
            qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_feb_barrier_enter(wait_on_me);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));

        initme_idx = 0;

        for (i = 0; i < threads; i++) {
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Destroying barrier...\n");
    qt_feb_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    /* this loop shouldn't be necessary... but seems to avoid crashes in rare
     * cases (in other words there must a race condition in qthread_finalize()
     * if there are outstanding threads out there) */
    for (i = 0; i < threads * 2; i++) {
        aligned_t tmp = 1;
        qthread_readFF(&tmp, rets + i);
        assert(tmp == 0);
    }
    return 0;
}
Example #23
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

#pragma omp parallel
#pragma omp single
#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);

    nodecount = 1;
    long retval;
#pragma omp parallel
#pragma omp single nowait
#pragma omp task untied
    retval = visit(&root, root.num_children);

    total_num_nodes = retval;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / omp_get_num_threads());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / omp_get_num_threads());
#endif /* ifdef PRINT_STATS */

    return 0;
}