コード例 #1
2
ファイル: util.hpp プロジェクト: Auguraculums/graphdb-testing
T mt_readff(T& target)
{
#ifdef __MTA__
  return readff(&target);
#elif USING_QTHREADS
  T ret;
  qthread_readFF(&ret, &target);
  return ret;
#else
  return target;
#endif
}
コード例 #2
0
ファイル: qthread_migrate_to.c プロジェクト: Agobin/chapel
int main(int argc,
         char *argv[])
{
    aligned_t ret;

    qthread_init(2);

    CHECK_VERBOSE();

    assert(qthread_num_shepherds() == 2);
    iprintf("now to fork to shepherd 0...\n");
    qthread_fork_to(checkres, (void *)0, &ret, 0);
    qthread_readFF(&ret, &ret);
    iprintf("success in forking to shepherd 0!\n");
    iprintf("now to fork to shepherd 1...\n");
    qthread_fork_to(checkres, (void *)1, &ret, 1);
    qthread_readFF(&ret, &ret);
    iprintf("success in forking to shepherd 1!\n");
    iprintf("now to fork the migrant...\n");
    qthread_fork(migrant, NULL, &ret);
    iprintf("success in forking migrant!\n");
    qthread_readFF(&ret, &ret);
    iprintf("migrant returned successfully!\n");

    return 0;
}
コード例 #3
0
ファイル: test_teams.c プロジェクト: Agobin/chapel
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    int count = 0;

    aligned_t max = 0;
    aligned_t tmp = 0;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(count, "COUNT");

    iprintf("Main executing in team %lu (w/ parent %lu)\n", 
        (unsigned long)qt_team_id(), (unsigned long)qt_team_parent_id());
    assert(qt_team_id() == default_team_id);
    assert(qt_team_parent_id() == non_team_id);

    aligned_t hello_in_team_ret;
    qthread_fork(hello_in_team, NULL, &hello_in_team_ret);
    qthread_readFF(&tmp, &hello_in_team_ret);
    max = MAX(max, tmp);

    aligned_t hello_new_team_rets[count];
    for (int i = 0; i < count; i++) {
        qthread_fork_new_team(hello_new_team, NULL, &hello_new_team_rets[i]);
    }
    for (int i = 0; i < count; i++) {
        qthread_readFF(&tmp, &hello_new_team_rets[i]);
        max = MAX(max, tmp);
    }

    aligned_t hello_new_team_in_team_ret;
    qthread_fork_new_team(
        hello_new_team_in_team, NULL, &hello_new_team_in_team_ret);
    qthread_readFF(&tmp, &hello_new_team_in_team_ret);
    max = MAX(max, tmp);

    aligned_t hello_new_team_new_team_ret;
    qthread_fork_new_team(
        hello_new_team_new_team, NULL, &hello_new_team_new_team_ret);
    qthread_readFF(&tmp, &hello_new_team_new_team_ret);
    max = MAX(max, tmp);

    iprintf("max is %lu\n", (unsigned long)max);

    if (count + 4 == max) {
        iprintf("SUCCEEDED with count %lu and max team id %lu\n",
            (unsigned long)count,
            (unsigned long)max);
        return 0;
    } else {
        iprintf("FAILED with count %lu and max team id %lu\n",
            (unsigned long)count,
            (unsigned long)max);
        return 1;
    }
}
コード例 #4
0
// Test that writeFF waits for empty var to be filled, writes, and leaves full.
// Requires that only one worker is running. Basically does:
//     1: empty var
//     1: fork(writeFF)
//     1: yields
//     2: starts runnning
//     2: hits writeFF, and yields since var is empty
//     1: writeEF
//     1: hits readFF on forked task and yield
//     2: running again, finishes writeFF, task returns
//     1: readFF competes, finishes
static void testWriteFFWaits(void)
{
    aligned_t ret;
    concurrent_t=45;
    qthread_empty(&concurrent_t);
    assert(qthread_num_workers() == 1);

    iprintf("1: Forking writeFF wrapper\n");
    qthread_fork_to(writeFF_wrapper, NULL, &ret, qthread_shep());
    iprintf("1: Forked, now yielding to 2\n");
    qthread_yield();
    iprintf("1: Back from yield\n");

    // verify that writeFF has not completed
    assert(qthread_feb_status(&concurrent_t) == 0);
    assert(concurrent_t != 55);

    iprintf("1: Writing EF\n");
    qthread_writeEF_const(&concurrent_t, 35);

    // wait for writeFF wrapper to complete
    qthread_readFF(NULL, &ret);

    // veify that writeFF completed and that FEB is full
    iprintf("1: concurrent_t=%d\n", concurrent_t);
    assert(qthread_feb_status(&concurrent_t) == 1);
    assert(concurrent_t == 55);
}
コード例 #5
0
ファイル: net.c プロジェクト: Agobin/chapel
int qthread_multinode_run(void)
{
    aligned_t val;

    if (0 == initialized) { return 1; }

    qthread_debug(MULTINODE_CALLS, "[%d] begin qthread_multinode_run\n", my_rank);

    qthread_internal_net_driver_barrier();

    if (0 != my_rank) {
        struct die_msg_t msg;

        qthread_readFF(&val, &time_to_die);
        qthread_debug(MULTINODE_DETAILS, "[%d] time to die\n", my_rank);
        msg.my_rank = my_rank;
        qthread_internal_net_driver_send(0, DIE_MSG_TAG, &msg, sizeof(msg));
        qthread_finalize();
        exit(0);
    }

    qthread_debug(MULTINODE_CALLS, "[%d] end qthread_multinode_run\n", my_rank);

    return QTHREAD_SUCCESS;
}
コード例 #6
0
ファイル: comm-qthreads.c プロジェクト: Agobin/chapel
//
// Broadcast the value of 'id'th entry in chpl_private_broadcast_table
// on the calling locale onto every other locale.  This is done to set
// up global constants of simple scalar types (primarily).
//
void chpl_comm_broadcast_private(int id, int32_t size, int32_t tid)
{
    int i;
    bcast_private_args_t *payload;

    PROFILE_INCR(profile_comm_broadcast_private,1);

    qthread_debug(CHAPEL_CALLS, "[%d] begin id=%d, size=%d, tid=%d\n", chpl_localeID, id, size, tid);

    payload = chpl_mem_allocMany(1, sizeof(bcast_private_args_t) + size, 
                                 CHPL_RT_MD_COMM_PRIVATE_BROADCAST_DATA, 0, 0);
    payload->id = id;
    payload->size = size;
    memcpy(payload->data, chpl_private_broadcast_table[id], size);

    qthread_debug(CHAPEL_DETAILS, "[%d] payload={.id=%d; .size=%d; .data=?}\n", chpl_localeID, payload->id, payload->size);

    aligned_t rets[chpl_numLocales];
    for (i = 0; i < chpl_numLocales; i++) {
        if (i != chpl_localeID) {
            qthread_fork_remote(bcast_private, payload, &rets[i], i,
                                sizeof(bcast_private_args_t) + size);
        }
    }
    for (i = 0; i < chpl_numLocales; i++) {
        if (i != chpl_localeID) {
            qthread_readFF(&rets[i], &rets[i]);
        }
    }

    chpl_mem_free(payload,0,0);

    qthread_debug(CHAPEL_CALLS, "[%d] end id=%d, size=%d, tid=%d\n", chpl_localeID, id, size, tid);
}
コード例 #7
0
ファイル: qthread.hpp プロジェクト: jcazzie/chapel
inline int qthread_readFF(T *const       dest,
                          const T *const src)
{
    QTHREAD_CHECKSIZE(T);
    return qthread_readFF((aligned_t *)dest,
                          (aligned_t *)src);
}
コード例 #8
0
ファイル: time_cncthr_bench.c プロジェクト: Agobin/chapel
int main(int argc, char *argv[])
{
    aligned_t rets[NUM_THREADS];
    qtimer_t timer = qtimer_create();
    double cumulative_time = 0.0;

    if (qthread_initialize() != QTHREAD_SUCCESS) {
        fprintf(stderr, "qthread library could not be initialized!\n");
        exit(EXIT_FAILURE);
    }
    CHECK_VERBOSE();

    for (int iteration = 0; iteration < 10; iteration++) {
        qtimer_start(timer);
        for (int i = 0; i < NUM_THREADS; i++) {
            qthread_fork(qincr, NULL, &(rets[i]));
        }
        for (int i = 0; i < NUM_THREADS; i++) {
            qthread_readFF(NULL, &(rets[i]));
        }
        qtimer_stop(timer);
        iprintf("\ttest iteration %i: %f secs\n", iteration,
                qtimer_secs(timer));
        cumulative_time += qtimer_secs(timer);
    }
    printf("qthread time: %f\n", cumulative_time / 10.0);

    return 0;
}
コード例 #9
0
ファイル: accalt.c プロジェクト: adcastel/ULT_work
void accalt_ult_join(ACCALT_ult *ult) {
#ifdef ARGOBOTS
    ABT_thread_free(ult);
#endif
#ifdef MASSIVETHREADS
    myth_join(*ult, NULL);
#endif
#ifdef QTHREADS
    qthread_readFF(NULL, ult);
#endif
}
コード例 #10
0
ファイル: accalt.c プロジェクト: adcastel/ULT_work
void accalt_tasklet_join(ACCALT_tasklet *tasklet) {
#ifdef ARGOBOTS
    ABT_task_free(tasklet);
#endif
#ifdef MASSIVETHREADS
    myth_join(*tasklet, NULL);
#endif
#ifdef QTHREADS
    qthread_readFF(NULL, tasklet);
#endif
}
コード例 #11
0
ファイル: loop_templates.hpp プロジェクト: Agobin/chapel
    static void Run (ObjT        *obj,
                     const RetV & ret,
                     FptrT        fptr,
                     const Arg1V &arg1,
                     const Arg2V &arg2,
                     const Arg3V &arg3,
                     const Arg4V &arg4,
                     const Arg5V &arg5,
                     int          start,
                     int          stop,
                     int          step = 1)
    {
        bool join = true;

        int total, steptd, tdc, tdc_pow2, round_total, base_count;

        if (step == 1) {
            total = (stop - start);
        } else {
            total = (stop - start) / step;
            if (((stop - start) % step) != 0) {
                total++;
            }
        }

        SCALE_TD_POW2(total, tdc_pow2);

        tdc         = 1 << tdc_pow2;
        steptd      = step << tdc_pow2;
        base_count  = total >> tdc_pow2;
        round_total = base_count << tdc_pow2;

        switch (TypeC) {
            case mt_loop_traits::ParNoJoin:
                join = false;
            case mt_loop_traits::Par:
            {
                aligned_t *thr;
                if (join) { thr = new aligned_t[tdc]; }
                for (int i = 0; i < tdc; i++) {
                    int count = base_count + (((round_total + i) < total) ? 1 : 0);
                    qthread_fork(run_qtd<Iter>, ITER(start, steptd, count), join ? (thr + i) : NULL);

                    start += step;
                }

                if (join) {
                    for (int i = 0; i < tdc; i++) qthread_readFF(thr + i, thr + i);
                    delete[] thr;
                }
            } break;
        }
    }
コード例 #12
0
ファイル: test_teams.c プロジェクト: Agobin/chapel
static aligned_t hello_new_team_new_team(void *arg_) {
    unsigned int id = qt_team_id();
    unsigned int parent_id = qt_team_parent_id();

    iprintf("`hello_new_team_new_team` executing in team %lu (w/ parent %lu)\n", 
        (unsigned long)id, (unsigned long)parent_id);
    assert(parent_id == non_team_id);

    aligned_t ret;
    qthread_fork_new_team(hello_new_team, NULL, &ret);
    qthread_readFF(&ret, &ret);

    return MAX(id, ret);
}
コード例 #13
0
ファイル: qutil.c プロジェクト: Agobin/chapel
static inline qutil_qsort_iprets_t qutil_qsort_inner_partitioner(double      *array,
                                                                 const size_t length,
                                                                 const double pivot)
{   /*{{{*/
    /* choose the number of threads to use */
    const size_t numthreads =
        length / MT_LOOP_CHUNK + ((length % MT_LOOP_CHUNK) ? 1 : 0);
    /* calculate the megachunk information for determining the array lengths
     * each thread will be fed. */
    const size_t megachunk_size = MT_CHUNKSIZE * numthreads;
    /* just used as a boolean test */
    const size_t extra_chunks = length % megachunk_size;

    size_t                   megachunks = length / (MT_CHUNKSIZE * numthreads);
    qutil_qsort_iprets_t     retval     = { ((aligned_t)-1), 0 };
    aligned_t               *rets;
    struct qutil_qsort_args *args;
    size_t                   i;

    rets = MALLOC(sizeof(aligned_t) * numthreads);
    args = MALLOC(sizeof(struct qutil_qsort_args) * numthreads);
    /* spawn threads to do the partitioning */
    for (i = 0; i < numthreads; i++) {
        args[i].array              = array + (i * MT_CHUNKSIZE);
        args[i].offset             = i * MT_CHUNKSIZE;
        args[i].pivot              = pivot;
        args[i].jump               = (numthreads - 1) * MT_CHUNKSIZE + 1;
        args[i].furthest_leftwall  = &retval.leftwall;
        args[i].furthest_rightwall = &retval.rightwall;
        if (extra_chunks != 0) {
            args[i].length = megachunks * (megachunk_size) + MT_CHUNKSIZE;
            if (args[i].length + args[i].offset >= length) {
                args[i].length = length - args[i].offset;
                megachunks--;
            }
        } else {
            args[i].length = length - megachunk_size + MT_CHUNKSIZE;
        }
        /* qutil_qsort_partition(args+i); */
        qthread_fork((qthread_f)qutil_qsort_partition, args + i, rets + i);
    }
    for (i = 0; i < numthreads; i++) {
        qthread_readFF(NULL, rets + i);
    }
    FREE(args, sizeof(struct qutil_qsort_args) * numthreads);
    FREE(rets, sizeof(aligned_t) * numthreads);

    return retval;
} /*}}}*/
コード例 #14
0
ファイル: feb_stream.c プロジェクト: Agobin/chapel
/*
 * The main procedure simply creates a producer and a consumer task to run in
 * parallel
 */
int main(int argc,
         char *argv[])
{
    aligned_t t[2];

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(bufferSize, "BUFFERSIZE");
    numItems = 8 * bufferSize;
    NUMARG(numItems, "NUMITEMS");

    iprintf("%i threads...\n", qthread_num_shepherds());

    buff = malloc(sizeof(aligned_t) * bufferSize);
    for (unsigned int i = 0; i < bufferSize; ++i) {
        buff[i] = 0;
    }

    qthread_fork(consumer, NULL, &t[0]);
    qthread_fork(producer, NULL, &t[1]);
    qthread_readFF(NULL, &t[0]);
    qthread_readFF(NULL, &t[1]);

    /* cleanup... unnecessary in general, but for the moment I'm tracking down
     * errors in the FEB system, so let's clean up */
    for (unsigned int i = 0; i < bufferSize; ++i) {
	qthread_fill(buff + i);
    }

    free(buff);

    iprintf("Success!\n");

    return 0;
}
コード例 #15
0
ファイル: uts_qthreads.c プロジェクト: deniskin82/chapel
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static aligned_t visit(void *args_)
{
    node_t  *parent          = (node_t *)args_;
    int      parent_height   = parent->height;
    int      num_children    = parent->num_children;
    aligned_t expect         = parent->expect;
    aligned_t num_descendants[num_children];
    aligned_t sum_descendants = 1;

    if (num_children != 0) {
        node_t     child __attribute__((aligned(8)));
        aligned_t  donec = 0;

        // Spawn children, if any
        child.height = parent_height + 1;
        child.dc     = &donec;
        child.expect = num_children;

        qthread_empty(&donec);

        for (int i = 0; i < num_children; i++) {
            child.acc    = &num_descendants[i];

            for (int j = 0; j < num_samples; j++) {
                rng_spawn(parent->state.state, child.state.state, i);
            }

            child.num_children = calc_num_children(&child);

            qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL);
        }

        // Wait for children to finish up, accumulate descendants counts
        if (donec != expect) qthread_readFF(NULL, &donec);

        for (int i = 0; i < num_children; i++) {
            sum_descendants += num_descendants[i];
        }
    }

    *parent->acc = sum_descendants;
    if (qthread_incr(parent->dc, 1) + 1 == expect) {
        qthread_fill(parent->dc);
    }

    return 0;
}
コード例 #16
0
ファイル: comm-qthreads.c プロジェクト: Agobin/chapel
//
// remote fork should launch a thread on locale that runs function f
// passing it arg where the size of arg is stored in arg_size
// notes:
//   multiple forks to the same locale should be handled concurrently
//
void chpl_comm_fork(int locale, chpl_fn_int_t fid,
                    void *arg, int32_t arg_size, int32_t arg_tid)
{
    aligned_t ret;

    PROFILE_INCR(profile_comm_fork,1);
    PROFILE_BIN_INCR(profile_comm_fork_size,arg_size);

    qthread_debug(CHAPEL_CALLS, "[%d] begin locale=%d, fid=%d, arg_size=%d\n", chpl_localeID, locale, fid, arg_size);

    qthread_debug(CHAPEL_BEHAVIOR, "[%d] (blocking) forking fn %d with arg-size %d\n", chpl_localeID, fid, arg_size);

    qthread_empty(&ret);
    spawn(locale, fid, arg, arg_size, arg_tid, &ret);
    qthread_readFF(NULL, &ret);

    qthread_debug(CHAPEL_CALLS, "[%d] end locale=%d, fid=%d, arg_size=%d\n", chpl_localeID, locale, fid, arg_size);
}
コード例 #17
0
ファイル: net.c プロジェクト: Agobin/chapel
int qthread_multinode_multistop(void)
{
    aligned_t val;

    qthread_debug(MULTINODE_CALLS, "[%d] begin qthread_multinode_multistop\n", my_rank);

    if (0 != my_rank) {
        struct die_msg_t msg;

        qthread_readFF(&val, &time_to_die);
        qthread_debug(MULTINODE_DETAILS, "[%d] time to die\n", my_rank);
        msg.my_rank = my_rank;
        qthread_internal_net_driver_send(0, DIE_MSG_TAG, &msg, sizeof(msg));

        exit(0); // triggers atexit(net_cleanup)
    }

    qthread_debug(MULTINODE_CALLS, "[%d] end qthread_multinode_multistop\n", my_rank);

    return QTHREAD_SUCCESS;
}
コード例 #18
0
ファイル: comm-qthreads.c プロジェクト: Agobin/chapel
//
// initializes the communications package
//   set chpl_localeID and chpl_numLocales
// notes:
//   * Called with the argc/argv pair passed to main()
//
void chpl_comm_init(int *argc_p, char ***argv_p)
{
    qthread_debug(CHAPEL_CALLS, "[%d] begin\n", chpl_localeID);

    // Set stack size >= 8 pages (lower bound derived from experience)
    unsigned long const default_stack_size = 32768;
    unsigned long const stack_size = 
        qt_internal_get_env_num("STACK_SIZE",
                                default_stack_size,
                                default_stack_size);
    char stack_size_str[100] = {0};
    if (default_stack_size > stack_size) {
        snprintf(stack_size_str, 99, "%lu", default_stack_size);
    } else {
        snprintf(stack_size_str, 99, "%lu", stack_size);
    }
    setenv("QT_STACK_SIZE", stack_size_str, 1);

    /* Initialize SPR:                              *
     * - All locales participate in initialization. */
    int const rc = spr_init(SPR_SPMD, chapel_remote_functions);
    assert(SPR_OK == rc);

    /* Record locale info */
    chpl_localeID = spr_locale_id();
    chpl_numLocales = spr_num_locales();

    qthread_debug(CHAPEL_BEHAVIOR, "[%d] initialized SPR with %d locales\n", chpl_localeID, chpl_numLocales);

    /* Set up segment information table */
#undef malloc
    seginfo_table = malloc(chpl_numLocales * sizeof(seginfo_t));
#define malloc dont_use_malloc_use_chpl_mem_allocMany_instead

    if (0 == chpl_localeID) {
        int i;

        int global_table_size = chpl_numGlobalsOnHeap * sizeof(void *) + getpagesize();
#undef malloc
        void * global_table = malloc(global_table_size);
#define malloc dont_use_malloc_use_chpl_mem_allocMany_instead
       
        // Make sure segment is page-aligned.
        seginfo_table[0].addr = ((void *)(((uint8_t *)global_table) +
                                 (((((uintptr_t)global_table) % getpagesize()) == 0) ? 0 :
                                  (getpagesize() - (((uintptr_t)global_table) % getpagesize())))));
        seginfo_table[0].size = global_table_size;

        for (i = 1; i < chpl_numLocales; i++) {
            seginfo_table[i].addr = NULL;
            seginfo_table[i].size = 0;
        }
    }

    chpl_comm_barrier("waiting for seginfo table setup at root");

    // Broadcast segment info
    if (0 == chpl_localeID) {
        int i;
        aligned_t rets[chpl_numLocales];
        for (i = 1; i < chpl_numLocales; i++) {
            qthread_fork_remote(bcast_seginfo, seginfo_table, &rets[i], i,
                                chpl_numLocales * sizeof(seginfo_t));
        }
        for (i = 1; i < chpl_numLocales; i++) {
            qthread_readFF(&rets[i], &rets[i]);
        }
    }

    chpl_comm_barrier("waiting for seginfo table bcast");

    qthread_debug(CHAPEL_CALLS, "[%d] end\n", chpl_localeID);
}
コード例 #19
0
ファイル: uts_qthreads.c プロジェクト: deniskin82/chapel
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

    // If the operator did not attempt to set a stack size, force
    // a reasonable lower bound
    if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE"))
        setenv("QT_STACK_SIZE", "32768", 0);

    assert(qthread_initialize() == 0);

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);
    aligned_t donecount = 0;
    root.dc = &donecount;
    qthread_empty(&donecount);
    aligned_t tot = 0;
    root.acc = &tot;
    root.expect = 1;

    qthread_fork_syncvar(visit, &root, NULL);
    qthread_readFF(NULL, root.dc);
    total_num_nodes = tot;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#endif /* ifdef PRINT_STATS */

    return 0;
}
コード例 #20
0
int main(int argc,
         char *argv[])
{
    aligned_t *t[2];
    uint64_t x_value;

    uint64_t pairs;

    assert(qthread_initialize() == 0);
    pairs = qthread_num_shepherds() * 6;

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");
    NUMARG(pairs, "PAIRS");

    t[0] = calloc(pairs, sizeof(aligned_t));
    t[1] = calloc(pairs, sizeof(aligned_t));

    iprintf("%i threads...\n", qthread_num_shepherds());
    iprintf("Initial value of x: %lu\n", (unsigned long)x.u.w);

    qthread_syncvar_empty(&id);
    qthread_syncvar_writeF_const(&id, 1);
    iprintf("id = 0x%lx\n", (unsigned long)id.u.w);
    {
        uint64_t tmp = 0;
        qthread_syncvar_readFF(&tmp, &id);
        assert(tmp == 1);
    }
    iprintf("x's status is: %s (want full (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 1);
    qthread_syncvar_readFE(NULL, &x);
    iprintf("x's status became: %s (want empty (and nowait))\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    assert(qthread_syncvar_status(&x) == 0);
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(consumer, (void *)(uintptr_t)i, &(t[0][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_fork(producer, (void *)(uintptr_t)(i + pairs), &(t[1][i]));
    }
    for (unsigned int i = 0; i < pairs; ++i) {
        qthread_readFF(NULL, &(t[0][i]));
        qthread_readFF(NULL, &(t[1][i]));
    }
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_fill(&x);
    iprintf("shouldn't be blocking on x (current status: %s)\n",
            qthread_syncvar_status(&x) ? "full" : "empty");
    qthread_syncvar_readFF(&x_value, &x);
    assert(qthread_syncvar_status(&x) == 1);

    free(t[0]);
    free(t[1]);

    if (x_value == iterations - 1) {
        iprintf("Success! x==%lu\n", (unsigned long)x_value);
        return 0;
    } else {
        fprintf(stderr, "Final value of x=%lu, expected %lu\n",
                (unsigned long)x_value, (unsigned long)(iterations - 1));
        return -1;
    }
}
コード例 #21
0
ファイル: qutil.c プロジェクト: Agobin/chapel
void API_FUNC qutil_mergesort(double *array,
                              size_t  length)
{   /*{{{*/
    /* first, decide how much of the array each thread gets */
    size_t chunksize = MT_LOOP_CHUNK;

    /* second, decide how many threads to use... */
    size_t                       numthreads;
    aligned_t                   *rets;
    size_t                       i;
    struct qutil_mergesort_args *args;

    assert(qthread_library_initialized);

    chunksize = 10;
    /* third, an initial qsort() */
    numthreads = length / chunksize;
    if (length - (numthreads * chunksize)) {
        numthreads++;
    }
    rets = MALLOC(sizeof(aligned_t) * numthreads);
    args = MALLOC(sizeof(struct qutil_mergesort_args) * numthreads);
    for (i = 0; i < numthreads; i++) {
        args[i].array       = array;
        args[i].first_start = i * chunksize;
        args[i].first_stop  = (i + 1) * chunksize - 1;
        if (args[i].first_stop >= length) {
            args[i].first_stop = length - 1;
        }

        qthread_fork((qthread_f)qutil_mergesort_presort, args + i, rets + i);
    }
    for (i = 0; i < numthreads; i++) {
        qthread_readFF(NULL, rets + i);
    }
    FREE(rets, sizeof(aligned_t) * numthreads);
    FREE(args, sizeof(struct qutil_mergesort_args) * numthreads);
    /* prepare scratch memory */
    if (chunksize <= length) {
        numthreads = (length - chunksize) / (2 * chunksize);
        if ((length - chunksize) - (2 * chunksize * numthreads)) {
            numthreads++;
        }
        rets = MALLOC(sizeof(aligned_t) * numthreads);
        assert(rets);
        args = MALLOC(sizeof(struct qutil_mergesort_args) * numthreads);
        assert(args);
        numthreads = 0;
    }
    /* now, commence with the merging */
    while (chunksize <= length) {
        i          = 0;
        numthreads = 0;
        while (i < length - chunksize) {
            args[numthreads].array        = array;
            args[numthreads].first_start  = i;
            args[numthreads].first_stop   = i + chunksize - 1;
            args[numthreads].second_start = i + chunksize;
            args[numthreads].second_stop  =
                ((i + 2 * chunksize - 1) <
                 (length - 1)) ? (i + 2 * chunksize - 1) : (length - 1);
            qthread_fork((qthread_f)qutil_mergesort_inner, args + numthreads,
                         rets + numthreads);
            i += 2 * chunksize;
            numthreads++;
        }
        for (i = 0; i < numthreads; i++) {
            qthread_readFF(NULL, rets + i);
        }
        chunksize *= 2;
    }
    if (rets) {
        FREE(rets, sizeof(aligned_t) * numthreads);
        FREE(args, sizeof(struct qutil_mergesort_args) * numthreads);
    }
} /*}}}*/
コード例 #22
0
ファイル: queue.c プロジェクト: Agobin/chapel
int main(int   argc,
         char *argv[])
{
    aligned_t return_value = 0;
    int status, ret;

    CHECK_VERBOSE(); // part of the testing harness; toggles iprintf() output
    NUMARG(THREADS_ENQUEUED, "THREADS_ENQUEUED");

    status = qthread_initialize();
    assert(status == QTHREAD_SUCCESS);

    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("  %i threads total\n", qthread_num_workers());

    iprintf("Creating the queue...\n");
    the_queue = qthread_queue_create(QTHREAD_QUEUE_MULTI_JOIN_LENGTH, 0);
    assert(the_queue);

    iprintf("---------------------------------------------------------\n");
    iprintf("\tSINGLE THREAD TEST\n\n");

    iprintf("1/4 Spawning thread to be queued...\n");
    status = qthread_fork(tobequeued, NULL, &return_value);
    assert(status == QTHREAD_SUCCESS);

    iprintf("2/4 Waiting for thread to queue itself...\n");
    while(qthread_queue_length(the_queue) != 1) qthread_yield();
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/4 Releasing the queue...\n");
    qthread_queue_release_all(the_queue);

    ret = qthread_readFF(NULL, &return_value);
    assert(ret == QTHREAD_SUCCESS);

    assert(threads_in == 1);
    assert(awoke == 1);
    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);
    iprintf("4/4 Test passed!\n");

    iprintf("---------------------------------------------------------\n");
    iprintf("\tMULTI THREAD TEST\n\n");

    threads_in = 0;
    awoke = 0;
    aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED);
    iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED);
    for (int i=0; i<THREADS_ENQUEUED; i++) {
        status = qthread_fork(tobequeued, NULL, retvals + i);
        assert(status == QTHREAD_SUCCESS);
    }

    iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED);
    while(qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield();
    assert(threads_in == THREADS_ENQUEUED);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("3/6 Releasing a single thread...\n");
    qthread_queue_release_one(the_queue);

    iprintf("4/6 Waiting for that thread to exit\n");
    while (awoke == 0) qthread_yield();

    assert(qthread_queue_length(the_queue) == (THREADS_ENQUEUED - 1));
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("5/6 Releasing the rest of the threads...\n");
    qthread_queue_release_all(the_queue);

    for (int i=0; i<THREADS_ENQUEUED; i++) {
        ret = qthread_readFF(NULL, retvals + i);
        assert(ret == QTHREAD_SUCCESS);
    }

    assert(qthread_queue_length(the_queue) == 0);
    assert(qthread_readstate(NODE_BUSYNESS) == 1);

    iprintf("6/6 Test passed!\n");

    return EXIT_SUCCESS;
}
コード例 #23
0
ファイル: sinc_null.c プロジェクト: Agobin/chapel
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    size_t depth = 3;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(depth, "TEST_DEPTH");

    // Test creating an empty sinc
    {
        qt_sinc_t zero_sinc;
	qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0);
        qt_sinc_wait(&zero_sinc, NULL);
        qt_sinc_fini(&zero_sinc);

        qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0);
        qt_sinc_expect(three_sinc, 3);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qt_sinc_wait(three_sinc, NULL);
        qt_sinc_destroy(three_sinc);
    }

    qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2);

    // Spawn additional waits
    aligned_t rets[3];
    {
        qthread_fork(wait_on_sinc, sinc, &rets[0]);
        qthread_fork(wait_on_sinc, sinc, &rets[1]);
        qthread_fork(wait_on_sinc, sinc, &rets[2]);
    }

    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_create(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    for (int i = 0; i < 3; i++)
        qthread_readFF(NULL, &rets[i]);

    // Reset the sinc
    qt_sinc_reset(sinc, 2);

    // Second use
    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_reset(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    qt_sinc_destroy(sinc);

    return 0;
}
コード例 #24
0
ファイル: in_edges.c プロジェクト: Agobin/chapel
int main(int   argc,
         char *argv[])
{
    size_t        num_edges;
    aligned_t   * rets;
    vertex_t    * edges;

    for (int i = 0; i < NUM_VERTICES; i++) {
        in_degrees[i] = 0;
    }

    /* Initialize SPR in SPMD mode */
    qthread_f actions[2] = {incr_in_degree, NULL};
    spr_init(SPR_SPMD, actions);
    here        = spr_locale_id();
    num_locales = spr_num_locales();
    if (0 == here) {
        printf("Running with %d locales\n", num_locales);
    }

    rng_init(rng_state.state, time(NULL) * here);

    /* Create local portion of the graph */
    indices[0] = 0;
    for (int i = 1; i < NUM_VERTICES + 1; i++) {
        indices[i]    = indices[i-1] + random_vertex();
    }
    for (int i = 0; i < NUM_VERTICES + 1; i++) {
        printf("[%03d] indices[%d]: %lu\n", here, i, indices[i]);
    }

    num_edges = indices[NUM_VERTICES];
    edges = malloc(num_edges * sizeof(vertex_t));
    for (int i = 0; i < num_edges; i++) {
        edges[i].lid = random_locale();
        edges[i].vid = random_vertex();
    }
    for (int i = 0; i < num_edges; i++) {
        printf("[%03d] edges[%d]: (%lu,%lu)\n", here, i, 
               edges[i].lid, edges[i].vid);
    }

    /* TODO: barrier */

    /* Fill in-degrees property map */
    rets = malloc(num_edges * sizeof(aligned_t));
    for (int i = 0; i < NUM_VERTICES; i++) {
        for (int j = indices[i]; j < indices[i+1]; j++) {
            printf("[%03d] spawning incr of edge[%d] = (%lu,%lu)\n",
                   here, j, edges[j].lid, edges[j].vid);
            qthread_fork_remote(incr_in_degree,         /* action */
                                &(edges[j].vid),        /* local vertex id */
                                &rets[j],               /* feb */
                                edges[j].lid,           /* locale */
                                sizeof(vertex_id_t));
        }
    }
    for (int i = 0; i < num_edges; i++) {
        qthread_readFF(&rets[i], &rets[i]);
    }

    /* Print in-degrees */
    for (int i = 0; i < NUM_VERTICES; i++) {
        printf("[%03d] in-degree(%lu) = %lu\n",
               here, i, in_degrees[i]);
    }

    /* Free up allocated resources */
    free(rets);
    free(edges);

    return 0;
}
コード例 #25
0
ファイル: log_barrier.c プロジェクト: xiuxiazhang/qthreads
int main(int   argc,
         char *argv[])
{
    size_t     threads, i;
    aligned_t *rets;
    qtimer_t   t;
    unsigned int iter, iterations = 10;
    double tot = 0.0;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");

    threads = qthread_num_workers();
    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("%i threads...\n", (int)threads);

    initme = calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = malloc(threads * sizeof(aligned_t));
    assert(rets);

    iprintf("Creating a barrier to block %i threads\n", threads);
    wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0);     // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 1; i < threads; i++) {
            void *arg[2] = {wait_on_me, (void*)(intptr_t)i};
            qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_barrier_enter(wait_on_me, 0);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));
        tot += qtimer_secs(t);

        // reset
        initme_idx = 1;

        // check retvals
        for (i = 1; i < threads; i++) {
            qthread_readFF(NULL, rets + i);
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Average barrier time = %f\n", tot / iterations);

    iprintf("Destroying the barrier...\n");
    qt_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    return 0;
}
コード例 #26
0
ファイル: feb_barrier.c プロジェクト: deniskin82/chapel
int main(int   argc,
         char *argv[])
{
    size_t       threads = 1000, i;
    aligned_t   *rets;
    qtimer_t     t;
    unsigned int iter, iterations = 10;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(threads, "THREADS");
    NUMARG(iterations, "ITERATIONS");

    initme = (aligned_t *)calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t));
    assert(rets);

    iprintf("creating the barrier for %zu threads\n", threads + 1);
    wait_on_me = qt_feb_barrier_create(threads + 1);    // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 0; i < threads; i++) {
            qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_feb_barrier_enter(wait_on_me);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));

        initme_idx = 0;

        for (i = 0; i < threads; i++) {
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Destroying barrier...\n");
    qt_feb_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    /* this loop shouldn't be necessary... but seems to avoid crashes in rare
     * cases (in other words there must a race condition in qthread_finalize()
     * if there are outstanding threads out there) */
    for (i = 0; i < threads * 2; i++) {
        aligned_t tmp = 1;
        qthread_readFF(&tmp, rets + i);
        assert(tmp == 0);
    }
    return 0;
}