Esempio n. 1
0
int main(int argc, char *argv[])
{
    aligned_t rets[NUM_THREADS];
    qtimer_t timer = qtimer_create();
    double cumulative_time = 0.0;

    if (qthread_initialize() != QTHREAD_SUCCESS) {
        fprintf(stderr, "qthread library could not be initialized!\n");
        exit(EXIT_FAILURE);
    }
    CHECK_VERBOSE();

    for (int iteration = 0; iteration < 10; iteration++) {
        qtimer_start(timer);
        for (int i = 0; i < NUM_THREADS; i++) {
            qthread_fork(qincr, NULL, &(rets[i]));
        }
        for (int i = 0; i < NUM_THREADS; i++) {
            qthread_readFF(NULL, &(rets[i]));
        }
        qtimer_stop(timer);
        iprintf("\ttest iteration %i: %f secs\n", iteration,
                qtimer_secs(timer));
        cumulative_time += qtimer_secs(timer);
    }
    printf("qthread time: %f\n", cumulative_time / 10.0);

    return 0;
}
Esempio n. 2
0
void Qthread::initialize( int thread_count )
{
  // Environment variable: QTHREAD_NUM_SHEPHERDS
  // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP
  // Environment variable: QTHREAD_HWPAR

  {
    char buffer[256];
    snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count);
    putenv(buffer);
  }

  const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) &&
                       ( thread_count    == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) &&
                       ( thread_count    == qthread_num_workers() );

  bool ok_symmetry = true ;

  if ( ok_init ) {
    Impl::s_number_shepherds            = qthread_num_shepherds();
    Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD);
    Impl::s_number_workers              = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ;

    for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) {
      ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) );
    }
  }

  if ( ! ok_init || ! ok_symmetry ) {
    std::ostringstream msg ;

    msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ;
    msg << " : qthread_num_shepherds = " << qthread_num_shepherds();
    msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD);
    msg << " : qthread_num_workers = " << qthread_num_workers();

    if ( ! ok_symmetry ) {
      msg << " : qthread_num_workers_local = {" ;
      for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) {
        msg << " " << qthread_num_workers_local(i) ;
      }
      msg << " }" ;
    }

    Impl::s_number_workers   = 0 ;
    Impl::s_number_shepherds = 0 ;
    Impl::s_number_workers_per_shepherd = 0 ;

    if ( ok_init ) { qthread_finalize(); }

    Kokkos::Impl::throw_runtime_exception( msg.str() );
  }

  Impl::QthreadExec::resize_worker_scratch( 256 , 256 );

  // Init the array for used for arbitrarily sized atomics
  Impl::init_lock_array_host_space();

}
Esempio n. 3
0
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    int count = 0;

    aligned_t max = 0;
    aligned_t tmp = 0;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(count, "COUNT");

    iprintf("Main executing in team %lu (w/ parent %lu)\n", 
        (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id());
    assert(qt_team_id() == default_team_id);
    assert(qt_team_parent_id() == non_team_id);

    aligned_t hello_in_team_ret;
    qthread_fork(hello_in_team, NULL, &hello_in_team_ret);
    qthread_readFF(&tmp, &hello_in_team_ret);
    max = MAX(max, tmp);

    aligned_t hello_new_team_rets[count];
    for (int i = 0; i < count; i++) {
        qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]);
    }
    for (int i = 0; i < count; i++) {
        qthread_readFF(&tmp, &hello_new_team_rets[i]);
        max = MAX(max, tmp);
    }

    aligned_t hello_new_team_in_team_ret;
    qthread_fork_new_team(
        hello_new_team_in_team, NULL, &hello_new_team_in_team_ret);
    qthread_readFF(&tmp, &hello_new_team_in_team_ret);
    max = MAX(max, tmp);

    aligned_t hello_new_team_new_team_ret;
    qthread_fork_new_team(
        hello_new_team_new_team, NULL, &hello_new_team_new_team_ret);
    qthread_readFF(&tmp, &hello_new_team_new_team_ret);
    max = MAX(max, tmp);

    iprintf("max is %lu\n", (unsigned long)max);

    if (count + 4 == max) {
        iprintf("SUCCEEDED with count %lu and max team id %lu\n",
            (unsigned long)count,
            (unsigned long)max);
        return 0;
    } else {
        iprintf("FAILED with count %lu and max team id %lu\n",
            (unsigned long)count,
            (unsigned long)max);
        return 1;
    }
}
Esempio n. 4
0
int main(){
  assert(qthread_initialize() == 0);
  qt_sinc_t sinc;
  qt_sinc_init(&sinc, 0, NULL, NULL, 1);
  int ret;
  args_t args = { 30, &sinc, &ret };
  qthread_fork_copyargs(fib, &args, sizeof(args_t), NULL);
  qt_sinc_wait(&sinc, NULL);
  printf("%d\n", ret);
}
Esempio n. 5
0
static void *initializer(void *junk)
{
    qthread_initialize();
    MACHINE_FENCE;
    chpl_qthread_done_initializing = 1;

    qthread_syncvar_readFF(NULL, &canexit);

    qthread_finalize();
    MACHINE_FENCE;
    done_finalizing = 1;
    return NULL;
}
Esempio n. 6
0
    void run()
    {
        setenv("QT_NUM_SHEPHERDS",
            boost::lexical_cast<std::string>(this->osthreads_).c_str(), 1);
        setenv("QT_NUM_WORKERS_PER_SHEPHERD", "1", 1);

        qthread_initialize(); 

        // Cold run
        //kernel();

        // Hot run
        results_type results = kernel();
        print_results(results);
    }
Esempio n. 7
0
static void *initializer(void *junk)
{
    qthread_initialize();
    (void) pthread_mutex_lock(&done_init_final_mux);  // implicit memory fence
    chpl_qthread_done_initializing = 1;
    (void) pthread_mutex_unlock(&done_init_final_mux);

    qthread_syncvar_readFF(NULL, &canexit);

    qthread_finalize();
    (void) pthread_mutex_lock(&done_init_final_mux);  // implicit memory fence
    done_finalizing = 1;
    (void) pthread_mutex_unlock(&done_init_final_mux);

    return NULL;
}
Esempio n. 8
0
int main(int argc,
         char *argv[])
{
    CHECK_VERBOSE();

    aligned_t tmp, ret = 0;
    int retval;
    long foobar = 1234567890;

    setenv("QT_MULTINODE","yes",1);

    qthread_initialize();

    my_id = qthread_multinode_rank();
    world_size = qthread_multinode_size();

    iprintf("(%03d) Rank %d of %d is alive\n", my_id, my_id, world_size);

    retval = qthread_multinode_register(2, returner);
    if (retval != 0){
        fprintf(stderr, "(%03d) multinode_register returned %d\n", my_id, retval);
        return 1;
    }

    qthread_multinode_run();
    if (my_id != 0) return 2;

    int target = (world_size > 1) ? 1 : 0;
    retval = qthread_fork_remote(returner, &foobar, &ret, target, sizeof(long));
    if (retval != 0) {
        fprintf(stderr, "(%03d) fork_remote returned %d\n", my_id, retval);
        return 3;
    }

    retval = qthread_readFE(&tmp, &ret);
    iprintf("(%03d) returner returned %ld\n", my_id, (long) tmp);
    if (retval != 0) {
        fprintf(stderr, "(%03d) readFE returned %d (%d)\n", my_id, retval, (int) tmp);
        return 4;
    }

    qthread_finalize();

    return (tmp == foobar) ? 0 : 5;
}
Esempio n. 9
0
int main(int   argc,
         char *argv[])
{
    assert(qthread_initialize() == QTHREAD_SUCCESS);
    CHECK_VERBOSE();
    NUMARG(numincrs, "NUM_INCRS");
    // future_init(128);
    iprintf("%i shepherds\n", qthread_num_shepherds());
    iprintf("%i threads\n", qthread_num_workers());

    qt_loop_balance_sinc(0, numincrs, sum, NULL);

    if (threads != numincrs) {
        iprintf("threads == %lu, not %lu\n", (unsigned long)threads, (unsigned long)numincrs);
    }
    assert(threads == numincrs);

    return 0;
}
Esempio n. 10
0
int main(int   argc,
         char *argv[])
{
    qtimer_t t;

    assert(qthread_initialize() == QTHREAD_SUCCESS);

    CHECK_VERBOSE();

    t = qtimer_create();
    assert(t);
    qtimer_start(t);
    qtimer_stop(t);
    if (qtimer_secs(t) == 0) {
        fprintf(stderr, "qtimer_secs(t) reported zero length time.\n");
    } else if (qtimer_secs(t) < 0) {
        fprintf(stderr, "qtimer_secs(t) thinks time went backwards (%g).\n",
                qtimer_secs(t));
    }
    iprintf("time to find self and assert it: %g secs\n", qtimer_secs(t));

    qtimer_start(t);
    qtimer_stop(t);
    assert(qtimer_secs(t) >= 0.0);
    if (qtimer_secs(t) == 0.0) {
        iprintf("inlining reduces calltime to zero (apparently)\n");
    } else {
        iprintf("smallest measurable time: %g secs\n", qtimer_secs(t));
    }

    qtimer_destroy(t);

    // Now to test fastrand
    ks_test();
    runs();
    autocorrelation();

    qthread_finalize();

    return 0;
}
Esempio n. 11
0
/*
 * The main procedure simply creates a producer and a consumer task to run in
 * parallel
 */
int main(int argc,
         char *argv[])
{
    aligned_t t[2];

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(bufferSize, "BUFFERSIZE");
    numItems = 8 * bufferSize;
    NUMARG(numItems, "NUMITEMS");

    iprintf("%i threads...\n", qthread_num_shepherds());

    buff = malloc(sizeof(aligned_t) * bufferSize);
    for (unsigned int i = 0; i < bufferSize; ++i) {
        buff[i] = 0;
    }

    qthread_fork(consumer, NULL, &t[0]);
    qthread_fork(producer, NULL, &t[1]);
    qthread_readFF(NULL, &t[0]);
    qthread_readFF(NULL, &t[1]);

    /* cleanup... unnecessary in general, but for the moment I'm tracking down
     * errors in the FEB system, so let's clean up */
    for (unsigned int i = 0; i < bufferSize; ++i) {
	qthread_fill(buff + i);
    }

    free(buff);

    iprintf("Success!\n");

    return 0;
}
Esempio n. 12
0
int main(int argc, char *argv[])
{
    int n = 10;
    int m = 10;
    num_timesteps = 10;
    workload = 0;
    workload_per = 0;
    workload_var = 0;
    int print_final = 0;
    int alltime = 0;

    CHECK_VERBOSE();
    NUMARG(n, "N");
    NUMARG(m, "M");
    NUMARG(num_timesteps, "TIMESTEPS");
    NUMARG(workload, "WORKLOAD");
    NUMARG(workload_per, "WORKLOAD_PER");
    NUMARG(workload_var, "WORKLOAD_VAR");
    NUMARG(print_final, "PRINT_FINAL");
    NUMARG(alltime, "ALL_TIME");

    assert (n > 0 && m > 0);

    // Initialize Qthreads
    assert(qthread_initialize() == 0);

    qtimer_t alloc_timer = qtimer_create();
    qtimer_t init_timer = qtimer_create();
    qtimer_t exec_timer = qtimer_create();

    // Allocate memory for 3-stage stencil (with boundary padding)
    qtimer_start(alloc_timer);
    stencil_t points;
    points.N = n + 2;
    points.M = m + 2;

    for (int s = 0; s < NUM_STAGES; s++) {
        points.stage[s] = malloc(points.N*sizeof(aligned_t *));
        assert(NULL != points.stage[s]);
        for (int i = 0; i < points.N; i++) {
            points.stage[s][i] = calloc(points.M, sizeof(aligned_t));
            assert(NULL != points.stage[s][i]);
        }
    }
    qtimer_stop(alloc_timer);

    // Initialize first stage and set boundary conditions
    qtimer_start(init_timer);
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            qthread_writeF_const(&points.stage[0][i][j], 0);
            for (int s = 1; s < NUM_STAGES; s++)
                qthread_empty(&points.stage[s][i][j]);
        }
    }
    for (int i = 0; i < points.N; i++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][i][0], BOUNDARY);
            qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY);
#else
            points.stage[s][i][0] = BOUNDARY;
            points.stage[s][i][points.M-1] = BOUNDARY;
#endif
        }
    }
    for (int j = 0; j < points.M; j++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][0][j], BOUNDARY);
            qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY);
#else
            points.stage[s][0][j] = BOUNDARY;
            points.stage[s][points.N-1][j] = BOUNDARY;
#endif
        }
    }
    qtimer_stop(init_timer);

    // Create barrier to synchronize on completion of calculations
    qtimer_start(exec_timer);
    points.barrier = qt_feb_barrier_create(n*m+1);

    // Spawn tasks to start calculating updates at each point
    update_args_t args = {&points, -1, -1, 1, 1};
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            args.i = i;
            args.j = j;
            qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL);
        }
    }

    // Wait for calculations to finish
    qt_feb_barrier_enter(points.barrier);
    qtimer_stop(exec_timer);

    // Print timing info
    if (alltime) {
        fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer));
        fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer));
        fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer));
    } else {
        fprintf(stdout, "%f\n", qtimer_secs(exec_timer));
    }

    // Print stencils
    if (print_final) {
        size_t final = (num_timesteps % NUM_STAGES);
        iprintf("Stage %lu:\n", prev_stage(prev_stage(final)));
        print_stage(&points, prev_stage(prev_stage(final)));
        iprintf("\nStage %lu:\n", prev_stage(final));
        print_stage(&points, prev_stage(final));
        iprintf("\nStage %lu:\n", final);
        print_stage(&points, final);
    }

    qt_feb_barrier_destroy(points.barrier);
    qtimer_destroy(alloc_timer);
    qtimer_destroy(init_timer);
    qtimer_destroy(exec_timer);

    // Free allocated memory
    for (int i = 0; i < points.N; i++) {
        free(points.stage[0][i]);
        free(points.stage[1][i]);
        free(points.stage[2][i]);
    }
    free(points.stage[0]);
    free(points.stage[1]);
    free(points.stage[2]);

    return 0;
}
Esempio n. 13
0
int main(int argc,
         char *argv[])
{
    aligned_t *t[2];
    uint64_t x_value;

    uint64_t pairs;

    assert(qthread_initialize() == 0);
    pairs = qthread_num_shepherds() * 6;

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");
    NUMARG(pairs, "PAIRS");

    t[0] = calloc(pairs, sizeof(aligned_t));
    t[1] = calloc(pairs, sizeof(aligned_t));

    iprintf("%i threads...\n", qthread_num_shepherds());
    iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w);

    qthread_syncvar_empty(&id);
    qthread_syncvar_writeF_const(&id, 1);
    iprintf("id = 0x%lx\n", (unsigned long)id.u.w);
    {
        uint64_t tmp = 0;
        qthread_syncvar_readFF(&tmp, &id);
        assert(tmp == 1);
    }
    iprintf("x's status is: %s (want full (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 1);
    qthread_syncvar_readFE(NULL, &x);
    iprintf("x's status became: %s (want empty (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 0);
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_readFF(NULL, &(t[0][i]));
        qthread_readFF(NULL, &(t[1][i]));
    }
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_fill(&x);
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_readFF(&x_value, &x);
    assert(qthread_syncvar_status(&x) == 1);

    free(t[0]);
    free(t[1]);

    if (x_value == iterations - 1) {
        iprintf("Success! x==%lu\n", (unsigned long)x_value);
        return 0;
    } else {
        fprintf(stderr, "Final value of x=%lu, expected %lu\n",
                (unsigned long)x_value, (unsigned long)(iterations - 1));
        return -1;
    }
}
Esempio n. 14
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

    // If the operator did not attempt to set a stack size, force
    // a reasonable lower bound
    if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE"))
        setenv("QT_STACK_SIZE", "32768", 0);

    assert(qthread_initialize() == 0);

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);
    aligned_t donecount = 0;
    root.dc = &donecount;
    qthread_empty(&donecount);
    aligned_t tot = 0;
    root.acc = &tot;
    root.expect = 1;

    qthread_fork_syncvar(visit, &root, NULL);
    qthread_readFF(NULL, root.dc);
    total_num_nodes = tot;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#endif /* ifdef PRINT_STATS */

    return 0;
}
Esempio n. 15
0
int main(int argc,
         char *argv[])
{
    qarray *a;
    distribution_t disttypes[] = {
        FIXED_HASH, FIXED_FIELDS,
        ALL_LOCAL, ALL_RAND, ALL_LEAST,
        DIST_RAND, DIST_STRIPES, DIST_FIELDS, DIST_LEAST
    };
    const char *distnames[] = {
        "FIXED_HASH", "FIXED_FIELDS",
        "ALL_LOCAL", "ALL_RAND", "ALL_LEAST",
        "DIST_RAND", "DIST_STRIPES", "DIST_FIELDS", "DIST_LEAST"
    };
    unsigned int dt_index;
    unsigned int num_dists = sizeof(disttypes) / sizeof(distribution_t);
    unsigned int dists = (1 << num_dists) - 1;

    qthread_initialize();
    CHECK_VERBOSE();
    NUMARG(dists, "TEST_DISTS");
    NUMARG(ELEMENT_COUNT, "ELEMENT_COUNT");

    /* iterate over all the different distribution types */
    for (dt_index = 0; dt_index < num_dists; dt_index++) {
        if ((dists & (1 << dt_index)) == 0) {
            continue;
        }
        /* test a basic array of doubles */
        count = 0;
        a = qarray_create_configured(ELEMENT_COUNT, sizeof(double),
                                     disttypes[dt_index], 0, 0);
        assert(a);
        iprintf("%s: created basic array of doubles\n", distnames[dt_index]);
        qarray_iter(a, 0, ELEMENT_COUNT, assign1);
        iprintf("%s: iterated; now checking work...\n", distnames[dt_index]);
        if (count != ELEMENT_COUNT) {
            printf("count = %lu, dt_index = %u\n", (unsigned long)count,
                   dt_index);
            assert(count == ELEMENT_COUNT);
        }
        {
            size_t i;

            for (i = 0; i < ELEMENT_COUNT; i++) {
                double elem = *(double *)qarray_elem_nomigrate(a, i);

                if (elem != 1.0) {
                    printf
                        ("element %lu is %f instead of 1.0, disttype = %s\n",
                        (unsigned long)i, elem, distnames[dt_index]);
                    assert(elem == 1.0);
                }
            }
        }
        iprintf("%s: correct result!\n", distnames[dt_index]);
        qarray_destroy(a);

        /* now test an array of giant things */
        count = 0;
        a = qarray_create_configured(ELEMENT_COUNT, sizeof(bigobj),
                                     disttypes[dt_index], 0, 0);
        iprintf("%s: created array of big objects\n", distnames[dt_index]);
        qarray_iter(a, 0, ELEMENT_COUNT, assignall1);
        iprintf("%s: iterated; now checking work...\n", distnames[dt_index]);
        if (count != ELEMENT_COUNT) {
            printf("count = %lu, dt_index = %u\n", (unsigned long)count,
                   dt_index);
            // assert(count == ELEMENT_COUNT);
        }
        {
            size_t i;
            char fail = 0;

            for (i = 0; i < ELEMENT_COUNT; i++) {
                char *elem = (char *)qarray_elem_nomigrate(a, i);
                size_t j;

                for (j = 0; j < sizeof(bigobj); j++) {
                    if (elem[j] != 1) {
                        printf
                        (
                         "byte %lu of element %lu is %i instead of 1, dt_index = %u\n",
                         (unsigned long)j, (unsigned long)i, elem[j],
                         dt_index);
                        fail = 1;
                        break;
                    }
                }
            }
            assert(fail == 0);
        }
        iprintf("%s: correct result!\n", distnames[dt_index]);
        qarray_destroy(a);

        /* now test an array of weird-sized things */
        count = 0;
        a = qarray_create_configured(ELEMENT_COUNT, sizeof(offsize),
                                     disttypes[dt_index], 0, 0);
        iprintf("%s: created array of odd-sized objects\n",
                distnames[dt_index]);
        qarray_iter_loop(a, 0, ELEMENT_COUNT, assignoff1, NULL);
        iprintf("%s: iterated; now checking work...\n", distnames[dt_index]);
        if (count != ELEMENT_COUNT) {
            printf("count = %lu, dt_index = %u\n", (unsigned long)count,
                   dt_index);
            assert(count == ELEMENT_COUNT);
        }
        {
            size_t i;

            for (i = 0; i < ELEMENT_COUNT; i++) {
                char *elem = (char *)qarray_elem_nomigrate(a, i);
                size_t j;

                for (j = 0; j < sizeof(offsize); j++) {
                    if (elem[j] != 1) {
                        printf
                        (
                         "byte %lu of element %lu is %i instead of 1, dt_index = %u\n",
                         (unsigned long)j, (unsigned long)i, elem[j],
                         dt_index);
                        assert(elem[j] == 1);
                    }
                }
            }
        }
        iprintf("%s: correct result!\n", distnames[dt_index]);
        qarray_destroy(a);
    }

    return 0;
}
Esempio n. 16
0
int main(int   argc,
         char *argv[])
{
    size_t     threads, i;
    aligned_t *rets;
    qtimer_t   t;
    unsigned int iter, iterations = 10;
    double tot = 0.0;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");

    threads = qthread_num_workers();
    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("%i threads...\n", (int)threads);

    initme = calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = malloc(threads * sizeof(aligned_t));
    assert(rets);

    iprintf("Creating a barrier to block %i threads\n", threads);
    wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0);     // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 1; i < threads; i++) {
            void *arg[2] = {wait_on_me, (void*)(intptr_t)i};
            qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_barrier_enter(wait_on_me, 0);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));
        tot += qtimer_secs(t);

        // reset
        initme_idx = 1;

        // check retvals
        for (i = 1; i < threads; i++) {
            qthread_readFF(NULL, rets + i);
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Average barrier time = %f\n", tot / iterations);

    iprintf("Destroying the barrier...\n");
    qt_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    return 0;
}
Esempio n. 17
0
int main(int   argc,
         char *argv[])
{
    aligned_t return_value = 0;
    int status, ret;

    CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output
    NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED");

    status = qthread_initialize();
    assert(status == QTHREAD_SUCCESS);

    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("  %i threads total\n", qthread_num_workers());

    iprintf("Creating the queue...\n");
    the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0);
    assert(the_queue);

    iprintf("---------------------------------------------------------\n");
    iprintf("\tSINGLE THREAD TEST\n\n");

    iprintf("1/4 Spawning thread to be queued...\n");
    status = qthread_fork(tobequeued, NULL, &return_value);
    assert(status == QTHREAD_SUCCESS);

    iprintf("2/4 Waiting for thread to queue itself...\n");
    while(qthread_queue_length(the_queue) != 1) qthread_yield();
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/4 Releasing the queue...\n");
    qthread_queue_release_all(the_queue);

    ret = qthread_readFF(NULL, &return_value);
    assert(ret == QTHREAD_SUCCESS);

    assert(threads_in == 1);
    assert(awoke == 1);
    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);
    iprintf("4/4 Test passed!\n");

    iprintf("---------------------------------------------------------\n");
    iprintf("\tMULTI THREAD TEST\n\n");

    threads_in = 0;
    awoke = 0;
    aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED);
    iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED);
    for (int i=0; i<THREADS_ENQUEUED; i++) {
        status = qthread_fork(tobequeued, NULL, retvals + i);
        assert(status == QTHREAD_SUCCESS);
    }

    iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED);
    while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield();
    assert(threads_in == THREADS_ENQUEUED);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/6 Releasing a single thread...\n");
    qthread_queue_release_one(the_queue);

    iprintf("4/6 Waiting for that thread to exit\n");
    while (awoke == 0) qthread_yield();

    assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1));
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("5/6 Releasing the rest of the threads...\n");
    qthread_queue_release_all(the_queue);

    for (int i=0; i<THREADS_ENQUEUED; i++) {
        ret = qthread_readFF(NULL, retvals + i);
        assert(ret == QTHREAD_SUCCESS);
    }

    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("6/6 Test passed!\n");

    return EXIT_SUCCESS;
}
Esempio n. 18
0
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    size_t depth = 3;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(depth, "TEST_DEPTH");

    // Test creating an empty sinc
    {
        qt_sinc_t zero_sinc;
	qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0);
        qt_sinc_wait(&zero_sinc, NULL);
        qt_sinc_fini(&zero_sinc);

        qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0);
        qt_sinc_expect(three_sinc, 3);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qt_sinc_wait(three_sinc, NULL);
        qt_sinc_destroy(three_sinc);
    }

    qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2);

    // Spawn additional waits
    aligned_t rets[3];
    {
        qthread_fork(wait_on_sinc, sinc, &rets[0]);
        qthread_fork(wait_on_sinc, sinc, &rets[1]);
        qthread_fork(wait_on_sinc, sinc, &rets[2]);
    }

    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_create(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    for (int i = 0; i < 3; i++)
        qthread_readFF(NULL, &rets[i]);

    // Reset the sinc
    qt_sinc_reset(sinc, 2);

    // Second use
    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_reset(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    qt_sinc_destroy(sinc);

    return 0;
}
Esempio n. 19
0
int main(int   argc,
         char *argv[])
{
    size_t       threads = 1000, i;
    aligned_t   *rets;
    qtimer_t     t;
    unsigned int iter, iterations = 10;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(threads, "THREADS");
    NUMARG(iterations, "ITERATIONS");

    initme = (aligned_t *)calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t));
    assert(rets);

    iprintf("creating the barrier for %zu threads\n", threads + 1);
    wait_on_me = qt_feb_barrier_create(threads + 1);    // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 0; i < threads; i++) {
            qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_feb_barrier_enter(wait_on_me);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));

        initme_idx = 0;

        for (i = 0; i < threads; i++) {
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Destroying barrier...\n");
    qt_feb_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    /* this loop shouldn't be necessary... but seems to avoid crashes in rare
     * cases (in other words there must a race condition in qthread_finalize()
     * if there are outstanding threads out there) */
    for (i = 0; i < threads * 2; i++) {
        aligned_t tmp = 1;
        qthread_readFF(&tmp, rets + i);
        assert(tmp == 0);
    }
    return 0;
}
Esempio n. 20
0
void accalt_init(int argc, char * argv[]) {

    int num_threads = 1;
    main_team = (accalt_team_t *) malloc(sizeof (accalt_team_t));

#ifdef ARGOBOTS


    ABT_init(argc, argv);
    int num_pools = 1;
    if (getenv("ACCALT_NUM_THREADS") != NULL) {
        num_threads = atoi(getenv("ACCALT_NUM_THREADS"));
    }
    if (getenv("ACCALT_NUM_POOLS") != NULL) {
        num_pools = atoi(getenv("ACCALT_NUM_POOLS"));
    }
    main_team->num_xstreams = num_threads;
    main_team->num_pools = num_pools;
    //printf("Argobots %d ES, %d Pools\n", num_threads, num_pools);
    ABT_xstream_self(&main_team->master);

    main_team->team = (ABT_xstream *) malloc(sizeof (ABT_xstream) * num_threads);
    main_team->pools = (ABT_pool *) malloc(sizeof (ABT_pool) * num_pools);

    for (int i = 0; i < num_pools; i++) {
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE,
                &main_team->pools[i]);
    }

    ABT_xstream_self(&main_team->team[0]);
    ABT_xstream_set_main_sched_basic(main_team->team[0], ABT_SCHED_DEFAULT,
            1, &main_team->pools[0]);
    for (int i = 1; i < num_threads; i++) {
        ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1,
                &main_team->pools[i % main_team->num_pools],
                ABT_SCHED_CONFIG_NULL, &main_team->team[i]);
        ABT_xstream_start(main_team->team[i]);
    }
#endif
#ifdef MASSIVETHREADS
    char buff[10];
    if (getenv("ACCALT_NUM_THREADS") != NULL) {
        num_threads = atoi(getenv("ACCALT_NUM_THREADS"));
        sprintf(buff, "%d", num_threads);
        setenv("MYTH_WORKER_NUM", buff, 1);
    } else
        num_threads = atoi(getenv("MYTH_WORKER_NUM"));

    setenv("MYTH_BIND_WORKERS", "1", 1);

    //printf("Massive %d Workers\n", num_threads);
    main_team->num_workers = num_threads;
    myth_init(); //MassiveThreads
#endif
#ifdef QTHREADS
    char buff[10];
    int num_workers_per_thread;
    if (getenv("ACCALT_NUM_THREADS") != NULL) {
        num_threads = atoi(getenv("ACCALT_NUM_THREADS"));
        sprintf(buff, "%d", num_threads);
        setenv("QTHREAD_NUM_SHEPHERDS", buff, 1);
    } else
        num_threads = atoi(getenv("QTHREAD_NUM_SHEPHERDS"));

    if (getenv("ACCALT_NUM_WORKERS_PER_THREAD") != NULL) {
        num_workers_per_thread = atoi(getenv("ACCALT_NUM_WORKERS_PER_THREAD"));
        sprintf(buff, "%d", num_workers_per_thread);
        setenv("QTHREAD_NUM_WORKERS_PER_SHEPHERD", buff, 1);
    } else
        num_workers_per_thread = atoi(getenv("QTHREAD_NUM_WORKERS_PER_SHEPHERD"));
    if (num_threads == 1 && num_workers_per_thread > 1) {
        setenv("QTHREAD_SHEPHERDS_BOUNDARY", "node", 1);
        setenv("QTHREAD_WORKER_UNIT", "core", 1);
    }
    if (num_threads > 1) {
        setenv("QTHREAD_SHEPHERDS_BOUNDARY", "core", 1);
        setenv("QTHREAD_WORKER_UNIT", "core", 1);
    }
    setenv("QTHREAD_AFFINITY", "yes", 1);

    //printf("Qthreads %d Shepherds, %d Workers_per_shepherd\n", num_threads, num_workers_per_thread);

    main_team->num_shepherds = num_threads;
    main_team->num_workers_per_shepherd = num_workers_per_thread;
    qthread_initialize(); //qthreads
#endif
}
Esempio n. 21
0
int main(int argc, char *argv[])
{
    aligned_t *ui_array, *ui_array2;
    double *d_array, *d_array2;
    size_t len = 1000000;
    qtimer_t timer = qtimer_create();
    double cumulative_time_qutil = 0.0;
    double cumulative_time_libc = 0.0;
    int using_doubles = 0;
    unsigned long iterations = 10;

    qthread_initialize();

    CHECK_VERBOSE();
    printf("%i threads\n", (int)qthread_num_workers());
    NUMARG(len, "TEST_LEN");
    NUMARG(iterations, "TEST_ITERATIONS");
    NUMARG(using_doubles, "TEST_USING_DOUBLES");
    printf("using %s\n", using_doubles ? "doubles" : "aligned_ts");

    if (using_doubles) {
        d_array = calloc(len, sizeof(double));
	printf("array is %s\n", human_readable(len * sizeof(double)));
        assert(d_array);
        // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL);
        for (unsigned int i = 0; i < len; i++) {
            d_array[i] = ((double)random()) / ((double)RAND_MAX) + random();
        }
        d_array2 = calloc(len, sizeof(double));
        assert(d_array2);
        // madvise(d_array2,len*sizeof(double), MADV_RANDOM);
        iprintf("double array generated...\n");
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qutil_qsort(d_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
        cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu doubles with qutil took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_qutil);
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qsort(d_array2, len, sizeof(double), dcmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu doubles with libc took: %f seconds\n",
               (unsigned long)len, cumulative_time_libc);
        free(d_array);
        free(d_array2);
    } else {
        ui_array = calloc(len, sizeof(aligned_t));
	printf("array is %s\n", human_readable(len * sizeof(aligned_t)));
        for (unsigned int i = 0; i < len; i++) {
            ui_array[i] = random();
        }
        ui_array2 = calloc(len, sizeof(aligned_t));
        iprintf("ui_array generated...\n");
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qutil_aligned_qsort(ui_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
        }
	cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu aligned_ts with qutil took: %f seconds\n",
               (unsigned long)len, cumulative_time_qutil);
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qsort(ui_array2, len, sizeof(double), acmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_libc);
        free(ui_array);
        free(ui_array2);
    }
    if (cumulative_time_qutil < cumulative_time_libc) {
	printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    } else {
	printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    }

    qtimer_destroy(timer);

    return 0;
}