Exemplo n.º 1
0
void Qthread::initialize( int thread_count )
{
  // Environment variable: QTHREAD_NUM_SHEPHERDS
  // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP
  // Environment variable: QTHREAD_HWPAR

  {
    char buffer[256];
    snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count);
    putenv(buffer);
  }

  const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) &&
                       ( thread_count    == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) &&
                       ( thread_count    == qthread_num_workers() );

  bool ok_symmetry = true ;

  if ( ok_init ) {
    Impl::s_number_shepherds            = qthread_num_shepherds();
    Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD);
    Impl::s_number_workers              = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ;

    for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) {
      ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) );
    }
  }

  if ( ! ok_init || ! ok_symmetry ) {
    std::ostringstream msg ;

    msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ;
    msg << " : qthread_num_shepherds = " << qthread_num_shepherds();
    msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD);
    msg << " : qthread_num_workers = " << qthread_num_workers();

    if ( ! ok_symmetry ) {
      msg << " : qthread_num_workers_local = {" ;
      for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) {
        msg << " " << qthread_num_workers_local(i) ;
      }
      msg << " }" ;
    }

    Impl::s_number_workers   = 0 ;
    Impl::s_number_shepherds = 0 ;
    Impl::s_number_workers_per_shepherd = 0 ;

    if ( ok_init ) { qthread_finalize(); }

    Kokkos::Impl::throw_runtime_exception( msg.str() );
  }

  Impl::QthreadExec::resize_worker_scratch( 256 , 256 );

  // Init the array for used for arbitrarily sized atomics
  Impl::init_lock_array_host_space();

}
Exemplo n.º 2
0
int main(int argc,
         char *argv[])
{
    aligned_t ret;

    qthread_init(2);

    CHECK_VERBOSE();

    assert(qthread_num_shepherds() == 2);
    iprintf("now to fork to shepherd 0...\n");
    qthread_fork_to(checkres, (void *)0, &ret, 0);
    qthread_readFF(&ret, &ret);
    iprintf("success in forking to shepherd 0!\n");
    iprintf("now to fork to shepherd 1...\n");
    qthread_fork_to(checkres, (void *)1, &ret, 1);
    qthread_readFF(&ret, &ret);
    iprintf("success in forking to shepherd 1!\n");
    iprintf("now to fork the migrant...\n");
    qthread_fork(migrant, NULL, &ret);
    iprintf("success in forking migrant!\n");
    qthread_readFF(&ret, &ret);
    iprintf("migrant returned successfully!\n");

    return 0;
}
Exemplo n.º 3
0
void INTERNAL initialize_hazardptrs(void)
{/*{{{*/
    freelist_max = qthread_num_shepherds() * qlib->nworkerspershep + 7;
    for (qthread_shepherd_id_t i = 0; i < qthread_num_shepherds(); ++i) {
        for (qthread_worker_id_t j = 0; j < qlib->nworkerspershep; ++j) {
            memset(qlib->shepherds[i].workers[j].hazard_ptrs, 0, sizeof(uintptr_t) * HAZARD_PTRS_PER_SHEP);
            memset(&qlib->shepherds[i].workers[j].hazard_free_list, 0, sizeof(hazard_freelist_t));
            qlib->shepherds[i].workers[j].hazard_free_list.freelist = calloc(freelist_max + 1, sizeof(hazard_freelist_entry_t));
            assert(qlib->shepherds[i].workers[j].hazard_free_list.freelist);
            qlib->shepherds[i].workers[j].hazard_free_list.freelist[freelist_max].ptr = free_these_freelists;
            free_these_freelists                                                      = qlib->shepherds[i].workers[j].hazard_free_list.freelist;
        }
    }
    TLS_INIT(ts_hazard_ptrs);
    QTHREAD_CASLOCK_INIT(hzptr_list, NULL);
    qthread_internal_cleanup(hazardptr_internal_teardown);
}/*}}}*/
int main(int argc,
         char *argv[])
{
    CHECK_VERBOSE();
    assert(qthread_init(1) == 0); 
    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("  %i threads total\n", qthread_num_workers());

    testWriteFFWaits();
}
Exemplo n.º 5
0
c_sublocid_t chpl_task_getNumSublocales(void)
{
    // FIXME: What we really want here is the number of NUMA
    // sublocales we are supporting.  For now we use the number of
    // shepherds as a proxy for that.

#ifdef CHPL_LOCALE_MODEL_NUM_SUBLOCALES
#if CHPL_LOCALE_MODEL_NUM_SUBLOCALES < 0
#error Forced number of sublocales cannot be negative.
#endif
    if (CHPL_LOCALE_MODEL_NUM_SUBLOCALES == 0) {
        return 0;
    } else {
        c_sublocid_t num_sublocs = (c_sublocid_t) qthread_num_shepherds();
        return ((num_sublocs < CHPL_LOCALE_MODEL_NUM_SUBLOCALES)
                ? num_sublocs
                : CHPL_LOCALE_MODEL_NUM_SUBLOCALES);
    }
#else
    return (c_sublocid_t) qthread_num_shepherds();
#endif
}
Exemplo n.º 6
0
c_sublocid_t chpl_task_getNumSubLocales(void)
{
    c_sublocid_t num_sublocs = (c_sublocid_t) qthread_num_shepherds();

    // FIXME: What we really want here is the number of NUMA
    // sublocales we are supporting.  For now we use the number of
    // shepherds as a proxy for that.
#ifdef CHPL_LOCALE_MODEL_NUM_SUBLOCALES
    return ((num_sublocs < CHPL_LOCALE_MODEL_NUM_SUBLOCALES)
            ? num_sublocs
            : CHPL_LOCALE_MODEL_NUM_SUBLOCALES);
#else
    return (c_sublocid_t) ((num_sublocs < 2) ? 0 : num_sublocs);
#endif
}
Exemplo n.º 7
0
/******************************************************
* Unbalanced Tree Search v2.1                        *
* Based on the implementation available at           *
*     http://sourceforge.net/projects/uts-benchmark  *
******************************************************/

#ifdef HAVE_CONFIG_H
# include "config.h" /* for _GNU_SOURCE */
#endif

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h> /* for INT_MAX */
#include <math.h>   /* for floor, log, sin */
#include <qthread/qthread.h>
#include <qthread/qtimer.h>
#include "argparsing.h"

#define BRG_RNG // Select RNG
#include "../../utils/rng/rng.h"

#define PRINT_STATS 1

#define MAXNUMCHILDREN 100

typedef enum {
    BIN = 0,
    GEO,
    HYBRID,
    BALANCED
} tree_t;
static char *type_names[] = {
    "Binomial",
    "Geometric",
    "Hybrid",
    "Balanced"
};

typedef enum {
    LINEAR = 0,
    EXPDEC,
    CYCLIC,
    FIXED
} shape_t;
static char *shape_names[] = {
    "Linear decrease",
    "Exponential decrease",
    "Cyclic",
    "Fixed branching factor"
};

typedef struct {
    int            height; // Depth of node in the tree
    struct state_t state;  // Local RNG state
    int            num_children;
    aligned_t     *acc;
    aligned_t     *dc;
    aligned_t      expect;
} node_t;

// Default values
static tree_t  tree_type     = GEO;
static double  bf_0          = 4.0;
static int     root_seed     = 0;
static int     num_samples   = 1;
static int     tree_depth    = 6;
static shape_t shape_fn      = LINEAR;
static int     non_leaf_bf   = 4;
static double  non_leaf_prob = 15.0 / 64.0;
static double  shift_depth   = 0.5;

// Tree metrics
static uint64_t tree_height = 0;
static uint64_t num_leaves  = 0;

static double normalize(int n)
{/*{{{*/
    if (n < 0) {
        printf("*** toProb: rand n = %d out of range\n", n);
    }

    return ((n < 0) ? 0.0 : ((double)n) / (double)INT_MAX);
}/*}}}*/

static int calc_num_children_bin(node_t *parent)
{/*{{{*/
    int    v = rng_rand(parent->state.state);
    double d = normalize(v);

    return (d < non_leaf_prob) ? non_leaf_bf : 0;
}/*}}}*/

static int calc_num_children(node_t *parent)
{/*{{{*/
    int num_children = 0;

    if (parent->height == 0) { num_children = (int)floor(bf_0); } else { num_children = calc_num_children_bin(parent); }

    if (parent->height == 0) {
        int root_bf = (int)ceil(bf_0);
        if (num_children > root_bf) {
            printf("*** Number of children truncated from %d to %d\n",
                   num_children, root_bf);
            num_children = root_bf;
        }
    } else {
        if (num_children > MAXNUMCHILDREN) {
            printf("*** Number of children truncated from %d to %d\n",
                   num_children, MAXNUMCHILDREN);
            num_children = MAXNUMCHILDREN;
        }
    }

    return num_children;
}/*}}}*/

// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static aligned_t visit(void *args_)
{
    node_t  *parent          = (node_t *)args_;
    int      parent_height   = parent->height;
    int      num_children    = parent->num_children;
    aligned_t expect         = parent->expect;
    aligned_t num_descendants[num_children];
    aligned_t sum_descendants = 1;

    if (num_children != 0) {
        node_t     child __attribute__((aligned(8)));
        aligned_t  donec = 0;

        // Spawn children, if any
        child.height = parent_height + 1;
        child.dc     = &donec;
        child.expect = num_children;

        qthread_empty(&donec);

        for (int i = 0; i < num_children; i++) {
            child.acc    = &num_descendants[i];

            for (int j = 0; j < num_samples; j++) {
                rng_spawn(parent->state.state, child.state.state, i);
            }

            child.num_children = calc_num_children(&child);

            qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL);
        }

        // Wait for children to finish up, accumulate descendants counts
        if (donec != expect) qthread_readFF(NULL, &donec);

        for (int i = 0; i < num_children; i++) {
            sum_descendants += num_descendants[i];
        }
    }

    *parent->acc = sum_descendants;
    if (qthread_incr(parent->dc, 1) + 1 == expect) {
        qthread_fill(parent->dc);
    }

    return 0;
}

#ifdef PRINT_STATS
static void print_stats(void)
{/*{{{*/
    printf("tree-type %d\ntree-type-name %s\n",
           tree_type, type_names[tree_type]);
    printf("root-bf %.1f\nroot-seed %d\n",
           bf_0, root_seed);

    if ((tree_type == GEO) || (tree_type == HYBRID)) {
        printf("gen_mx %d\nshape-fn %d\nshape-fn-name %s\n",
               tree_depth, shape_fn, shape_names[shape_fn]);
    }

    if ((tree_type == BIN) || (tree_type == HYBRID)) {
        double q  = non_leaf_prob;
        int    m  = non_leaf_bf;
        double es = (1.0 / (1.0 - q * m));
        printf("q %f\nm %d\nE(n) %f\nE(s) %.2f\n",
               q, m, q * m, es);
    }

    if (tree_type == HYBRID) {
        printf("root-to-depth %d\n",
               (int)ceil(shift_depth * tree_depth));
    }

    if (tree_type == BALANCED) {
        printf("gen_mx %d\n", tree_depth);
        printf("expected-num-nodes %llu\nexpected-num-leaves %llu\n",
               (unsigned long long)((pow(bf_0, tree_depth + 1) - 1.0) / (bf_0 - 1.0)),
               (unsigned long long)pow(bf_0, tree_depth));
    }

    printf("compute-granularity %d\n", num_samples);
    printf("num-sheps %d\n", qthread_num_shepherds());
    printf("num-workers %d\n", qthread_num_workers());

    printf("\n");

    fflush(stdout);
}/*}}}*/

#else /* ifdef PRINT_STATS */
static void print_banner(void)
{/*{{{*/
    printf("UTS - Unbalanced Tree Search 2.1 (C/Qthreads)\n");
    printf("Tree type:%3d (%s)\n", tree_type, type_names[tree_type]);
    printf("Tree shape parameters:\n");
    printf("  root branching factor b_0 = %.1f, root seed = %d\n",
           bf_0, root_seed);

    if ((tree_type == GEO) || (tree_type == HYBRID)) {
        printf("  GEO parameters: gen_mx = %d, shape function = %d (%s)\n",
               tree_depth, shape_fn, shape_names[shape_fn]);
    }

    if ((tree_type == BIN) || (tree_type == HYBRID)) {
        double q  = non_leaf_prob;
        int    m  = non_leaf_bf;
        double es = (1.0 / (1.0 - q * m));
        printf("  BIN parameters: q = %f, m = %d, E(n) = %f, E(s) = %.2f\n",
               q, m, q * m, es);
    }

    if (tree_type == HYBRID) {
        printf("  HYBRID: GEO from root to depth %d, then BIN\n",
               (int)ceil(shift_depth * tree_depth));
    }

    if (tree_type == BALANCED) {
        printf("  BALANCED parameters: gen_mx = %d\n", tree_depth);
        printf("    Expected size: %llu nodes, %llu leaves\n",
               (unsigned long long)((pow(bf_0, tree_depth + 1) - 1.0) / (bf_0 - 1.0)),
               (unsigned long long)pow(bf_0, tree_depth));
    }

    printf("Random number generator: ");
    printf("SHA-1 (state size = %ldB)\n", sizeof(struct state_t));
    printf("Compute granularity: %d\n", num_samples);
    printf("Execution strategy:\n");
    printf("  Shepherds: %d\n", qthread_num_shepherds());
    printf("  Workers:   %d\n", qthread_num_workers());

    printf("\n");

    fflush(stdout);
}/*}}}*/
Exemplo n.º 8
0
int main(int   argc,
         char *argv[])
{
    assert(qthread_initialize() == QTHREAD_SUCCESS);
    CHECK_VERBOSE();
    NUMARG(numincrs, "NUM_INCRS");
    // future_init(128);
    iprintf("%i shepherds\n", qthread_num_shepherds());
    iprintf("%i threads\n", qthread_num_workers());

    qt_loop_balance_sinc(0, numincrs, sum, NULL);

    if (threads != numincrs) {
        iprintf("threads == %lu, not %lu\n", (unsigned long)threads, (unsigned long)numincrs);
    }
    assert(threads == numincrs);

    return 0;
}
Exemplo n.º 9
0
static void print_stats(void)
{/*{{{*/
    printf("tree-type %d\ntree-type-name %s\n",
           tree_type, type_names[tree_type]);
    printf("root-bf %.1f\nroot-seed %d\n",
           bf_0, root_seed);

    if ((tree_type == GEO) || (tree_type == HYBRID)) {
        printf("gen_mx %d\nshape-fn %d\nshape-fn-name %s\n",
               tree_depth, shape_fn, shape_names[shape_fn]);
    }

    if ((tree_type == BIN) || (tree_type == HYBRID)) {
        double q  = non_leaf_prob;
        int    m  = non_leaf_bf;
        double es = (1.0 / (1.0 - q * m));
        printf("q %f\nm %d\nE(n) %f\nE(s) %.2f\n",
               q, m, q * m, es);
    }

    if (tree_type == HYBRID) {
        printf("root-to-depth %d\n",
               (int)ceil(shift_depth * tree_depth));
    }

    if (tree_type == BALANCED) {
        printf("gen_mx %d\n", tree_depth);
        printf("expected-num-nodes %llu\nexpected-num-leaves %llu\n",
               (unsigned long long)((pow(bf_0, tree_depth + 1) - 1.0) / (bf_0 - 1.0)),
               (unsigned long long)pow(bf_0, tree_depth));
    }

    printf("compute-granularity %d\n", num_samples);
    printf("num-sheps %d\n", qthread_num_shepherds());
    printf("num-workers %d\n", qthread_num_workers());

    printf("\n");

    fflush(stdout);
}/*}}}*/
Exemplo n.º 10
0
/*
 * The main procedure simply creates a producer and a consumer task to run in
 * parallel
 */
int main(int argc,
         char *argv[])
{
    aligned_t t[2];

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(bufferSize, "BUFFERSIZE");
    numItems = 8 * bufferSize;
    NUMARG(numItems, "NUMITEMS");

    iprintf("%i threads...\n", qthread_num_shepherds());

    buff = malloc(sizeof(aligned_t) * bufferSize);
    for (unsigned int i = 0; i < bufferSize; ++i) {
        buff[i] = 0;
    }

    qthread_fork(consumer, NULL, &t[0]);
    qthread_fork(producer, NULL, &t[1]);
    qthread_readFF(NULL, &t[0]);
    qthread_readFF(NULL, &t[1]);

    /* cleanup... unnecessary in general, but for the moment I'm tracking down
     * errors in the FEB system, so let's clean up */
    for (unsigned int i = 0; i < bufferSize; ++i) {
	qthread_fill(buff + i);
    }

    free(buff);

    iprintf("Success!\n");

    return 0;
}
Exemplo n.º 11
0
int main(int argc,
         char *argv[])
{
    aligned_t *t[2];
    uint64_t x_value;

    uint64_t pairs;

    assert(qthread_initialize() == 0);
    pairs = qthread_num_shepherds() * 6;

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");
    NUMARG(pairs, "PAIRS");

    t[0] = calloc(pairs, sizeof(aligned_t));
    t[1] = calloc(pairs, sizeof(aligned_t));

    iprintf("%i threads...\n", qthread_num_shepherds());
    iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w);

    qthread_syncvar_empty(&id);
    qthread_syncvar_writeF_const(&id, 1);
    iprintf("id = 0x%lx\n", (unsigned long)id.u.w);
    {
        uint64_t tmp = 0;
        qthread_syncvar_readFF(&tmp, &id);
        assert(tmp == 1);
    }
    iprintf("x's status is: %s (want full (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 1);
    qthread_syncvar_readFE(NULL, &x);
    iprintf("x's status became: %s (want empty (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 0);
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_readFF(NULL, &(t[0][i]));
        qthread_readFF(NULL, &(t[1][i]));
    }
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_fill(&x);
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_readFF(&x_value, &x);
    assert(qthread_syncvar_status(&x) == 1);

    free(t[0]);
    free(t[1]);

    if (x_value == iterations - 1) {
        iprintf("Success! x==%lu\n", (unsigned long)x_value);
        return 0;
    } else {
        fprintf(stderr, "Final value of x=%lu, expected %lu\n",
                (unsigned long)x_value, (unsigned long)(iterations - 1));
        return -1;
    }
}
Exemplo n.º 12
0
int main(int   argc,
         char *argv[])
{
    aligned_t return_value = 0;
    int status, ret;

    CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output
    NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED");

    status = qthread_initialize();
    assert(status == QTHREAD_SUCCESS);

    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("  %i threads total\n", qthread_num_workers());

    iprintf("Creating the queue...\n");
    the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0);
    assert(the_queue);

    iprintf("---------------------------------------------------------\n");
    iprintf("\tSINGLE THREAD TEST\n\n");

    iprintf("1/4 Spawning thread to be queued...\n");
    status = qthread_fork(tobequeued, NULL, &return_value);
    assert(status == QTHREAD_SUCCESS);

    iprintf("2/4 Waiting for thread to queue itself...\n");
    while(qthread_queue_length(the_queue) != 1) qthread_yield();
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/4 Releasing the queue...\n");
    qthread_queue_release_all(the_queue);

    ret = qthread_readFF(NULL, &return_value);
    assert(ret == QTHREAD_SUCCESS);

    assert(threads_in == 1);
    assert(awoke == 1);
    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);
    iprintf("4/4 Test passed!\n");

    iprintf("---------------------------------------------------------\n");
    iprintf("\tMULTI THREAD TEST\n\n");

    threads_in = 0;
    awoke = 0;
    aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED);
    iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED);
    for (int i=0; i<THREADS_ENQUEUED; i++) {
        status = qthread_fork(tobequeued, NULL, retvals + i);
        assert(status == QTHREAD_SUCCESS);
    }

    iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED);
    while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield();
    assert(threads_in == THREADS_ENQUEUED);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/6 Releasing a single thread...\n");
    qthread_queue_release_one(the_queue);

    iprintf("4/6 Waiting for that thread to exit\n");
    while (awoke == 0) qthread_yield();

    assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1));
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("5/6 Releasing the rest of the threads...\n");
    qthread_queue_release_all(the_queue);

    for (int i=0; i<THREADS_ENQUEUED; i++) {
        ret = qthread_readFF(NULL, retvals + i);
        assert(ret == QTHREAD_SUCCESS);
    }

    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("6/6 Test passed!\n");

    return EXIT_SUCCESS;
}
Exemplo n.º 13
0
int main(int argc, char *argv[])
{
    aligned_t *ui_array, *ui_array2;
    double *d_array, *d_array2;
    size_t len = 1000000;
    qtimer_t timer = qtimer_create();
    double cumulative_time_qutil = 0.0;
    double cumulative_time_libc = 0.0;
    int using_doubles = 0;
    unsigned long iterations = 10;

    qthread_initialize();

    CHECK_VERBOSE();
    printf("%i threads\n", (int)qthread_num_workers());
    NUMARG(len, "TEST_LEN");
    NUMARG(iterations, "TEST_ITERATIONS");
    NUMARG(using_doubles, "TEST_USING_DOUBLES");
    printf("using %s\n", using_doubles ? "doubles" : "aligned_ts");

    if (using_doubles) {
        d_array = calloc(len, sizeof(double));
	printf("array is %s\n", human_readable(len * sizeof(double)));
        assert(d_array);
        // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL);
        for (unsigned int i = 0; i < len; i++) {
            d_array[i] = ((double)random()) / ((double)RAND_MAX) + random();
        }
        d_array2 = calloc(len, sizeof(double));
        assert(d_array2);
        // madvise(d_array2,len*sizeof(double), MADV_RANDOM);
        iprintf("double array generated...\n");
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qutil_qsort(d_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
        cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu doubles with qutil took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_qutil);
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qsort(d_array2, len, sizeof(double), dcmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu doubles with libc took: %f seconds\n",
               (unsigned long)len, cumulative_time_libc);
        free(d_array);
        free(d_array2);
    } else {
        ui_array = calloc(len, sizeof(aligned_t));
	printf("array is %s\n", human_readable(len * sizeof(aligned_t)));
        for (unsigned int i = 0; i < len; i++) {
            ui_array[i] = random();
        }
        ui_array2 = calloc(len, sizeof(aligned_t));
        iprintf("ui_array generated...\n");
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qutil_aligned_qsort(ui_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
        }
	cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu aligned_ts with qutil took: %f seconds\n",
               (unsigned long)len, cumulative_time_qutil);
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qsort(ui_array2, len, sizeof(double), acmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_libc);
        free(ui_array);
        free(ui_array2);
    }
    if (cumulative_time_qutil < cumulative_time_libc) {
	printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    } else {
	printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    }

    qtimer_destroy(timer);

    return 0;
}
Exemplo n.º 14
0
static void hazardous_scan(hazard_freelist_t *hfl)
{/*{{{*/
    const size_t num_hps = qthread_num_workers() * HAZARD_PTRS_PER_SHEP;
    void            **plist = MALLOC(sizeof(void *) * (num_hps + hzptr_list_len));
    hazard_freelist_t tmpfreelist;

    assert(plist);
    tmpfreelist.freelist = calloc(freelist_max, sizeof(hazard_freelist_entry_t));
    assert(tmpfreelist.freelist);
    do {
        /* Stage 1: Collect hazardpointers */
        {
            qthread_shepherd_id_t i;
            for (i = 0; i < qthread_num_shepherds(); ++i) {
                for (qthread_worker_id_t j = 0; j < qlib->nworkerspershep; ++j) {
                    if (&(qlib->shepherds[i].workers[j].hazard_free_list) != hfl) {
                        memcpy(plist + (i * qlib->nworkerspershep * HAZARD_PTRS_PER_SHEP) + (j * HAZARD_PTRS_PER_SHEP),
                               qlib->shepherds[i].workers[j].hazard_ptrs,
                               sizeof(void *) * HAZARD_PTRS_PER_SHEP);
                    } else {
                        memset(plist + (i * qlib->nworkerspershep * HAZARD_PTRS_PER_SHEP) + (j * HAZARD_PTRS_PER_SHEP),
                               0,
                               sizeof(void *) * HAZARD_PTRS_PER_SHEP);
                    }
                }
            }
            uintptr_t *hzptr_tmp = QTHREAD_CASLOCK_READ(hzptr_list);
            while (hzptr_tmp != NULL) {
                memcpy(plist + (i * qlib->nworkerspershep * HAZARD_PTRS_PER_SHEP),
                       hzptr_tmp,
                       sizeof(uintptr_t) * HAZARD_PTRS_PER_SHEP);
                hzptr_tmp = (uintptr_t *)hzptr_tmp[HAZARD_PTRS_PER_SHEP];
            }
        }

        /* Stage 2: free pointers that are not in the set of hazardous pointers */
        tmpfreelist.count = 0;
        qsort(plist, num_hps, sizeof(void *), void_cmp);
        assert(hfl->count == freelist_max);
        for (size_t i = 0; i < freelist_max; ++i) {
            const uintptr_t ptr = (uintptr_t)hfl->freelist[i].ptr;
            if (ptr == 0) { break; }
            /* look for this ptr in the plist */
            if (binary_search((uintptr_t *)plist, ptr, num_hps)) {
                /* if found, cannot free it */
                tmpfreelist.freelist[tmpfreelist.count] = hfl->freelist[i];
                tmpfreelist.count++;
            } else {
                /* not found, therefore, we can free it */
                hfl->freelist[i].freefunc((void *)ptr);
            }
        }
        if (tmpfreelist.count == freelist_max) {
            /* This will ONLY happen under *extremely* heavy contention. */
            MACHINE_FENCE;
        }
    } while (tmpfreelist.count == freelist_max);
    assert(tmpfreelist.count < freelist_max);
    memcpy(hfl->freelist, tmpfreelist.freelist, tmpfreelist.count * sizeof(hazard_freelist_entry_t));
    hfl->count = tmpfreelist.count;
    FREE(tmpfreelist.freelist, sizeof(hazard_freelist_entry_t));
    FREE(plist, sizeof(void *) * (num_hps + hzptr_list_len));
}/*}}}*/
Exemplo n.º 15
0
int main(int   argc,
         char *argv[])
{
    size_t     threads, i;
    aligned_t *rets;
    qtimer_t   t;
    unsigned int iter, iterations = 10;
    double tot = 0.0;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");

    threads = qthread_num_workers();
    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("%i threads...\n", (int)threads);

    initme = calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = malloc(threads * sizeof(aligned_t));
    assert(rets);

    iprintf("Creating a barrier to block %i threads\n", threads);
    wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0);     // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 1; i < threads; i++) {
            void *arg[2] = {wait_on_me, (void*)(intptr_t)i};
            qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_barrier_enter(wait_on_me, 0);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));
        tot += qtimer_secs(t);

        // reset
        initme_idx = 1;

        // check retvals
        for (i = 1; i < threads; i++) {
            qthread_readFF(NULL, rets + i);
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Average barrier time = %f\n", tot / iterations);

    iprintf("Destroying the barrier...\n");
    qt_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    return 0;
}
Exemplo n.º 16
0
void Task::schedule()
{
  // Is waiting for execution

  // Increment active task count before spawning.
  Kokkos::atomic_increment( m_active_count );

  // spawn in qthread.  must malloc the precondition array and give to qthread.
  // qthread will eventually free this allocation so memory will not be leaked.

  // concern with thread safety of malloc, does this need to be guarded?
  aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) );

  qprecon[0] = reinterpret_cast<aligned_t *>( uintptr_t(m_dep_size) );

  for ( int i = 0 ; i < m_dep_size ; ++i ) {
    qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthread precondition flag
  }

  if ( m_apply_team && ! m_apply_single ) {
    // If more than one shepherd spawn on a shepherd other than this shepherd
    const int num_shepherd            = qthread_num_shepherds();
    const int num_worker_per_shepherd = qthread_num_workers_local(NO_SHEPHERD);
    const int this_shepherd           = qthread_shep();

    int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ;

#if 0
fprintf( stdout
       , "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n"
       , qthread_shep()
       , qthread_worker_local(NULL)
       , reinterpret_cast<unsigned long>(this)
       , spawn_shepherd
       , num_worker_per_shepherd - 1
       );
fflush(stdout);
#endif

    qthread_spawn_cloneable
      ( & Task::qthread_func
      , this
      , 0
      , NULL
      , m_dep_size , qprecon /* dependences */
      , spawn_shepherd
      , unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY )
      , num_worker_per_shepherd - 1
      );
  }
  else {
    qthread_spawn( & Task::qthread_func /* function */
                 , this                 /* function argument */
                 , 0
                 , NULL
                 , m_dep_size , qprecon /* dependences */
                 , NO_SHEPHERD
                 , QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */
                 );
  }
}