Esempio n. 1
0
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static long visit(node_t parent)
{
    node_t    child;
    uint64_t *child_descendants = calloc(sizeof(long), parent.num_children);

    CILK_C_REDUCER_OPADD(num_descendants, ulong, 0);
    uint64_t tmp;

    // Spawn children, if any
    for (int i = 0; i < parent.num_children; i++) {
        child.height = parent.height + 1;

        for (int j = 0; j < num_samples; j++) {
            rng_spawn(parent.state.state, child.state.state, i);
        }

        child.num_children = calc_num_children(&child);

        child_descendants[i] = _Cilk_spawn visit(child);
    }

    _Cilk_sync;

    CILK_C_REGISTER_REDUCER(num_descendants);

    _Cilk_for(int i = 0; i < parent.num_children; i++) {
        REDUCER_VIEW(num_descendants) += child_descendants[i];
    }

    tmp = 1 + REDUCER_VIEW(num_descendants);

    CILK_C_UNREGISTER_REDUCER(num_descendants);

    return tmp;
}
Esempio n. 2
0
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static long visit(node_t *parent,
                  int     num_children)
{
    uint64_t num_descendants = 1;

#ifdef BIG_STACKS
    uint64_t child_descendants[num_children];
    node_t   child_nodes[num_children];
#else
    uint64_t *child_descendants;
    node_t   *child_nodes;

    if (num_children > 0) {
        child_descendants = calloc(sizeof(uint64_t), num_children);
        child_nodes       = malloc(sizeof(node_t) * num_children);
    }
#endif

    // Spawn children, if any
    for (int i = 0; i < num_children; i++) {
        node_t *child = &child_nodes[i];

        child->height = parent->height + 1;

        for (int j = 0; j < num_samples; j++) {
            rng_spawn(parent->state.state, child->state.state, i);
        }

        child->num_children = calc_num_children(child);

#pragma omp task untied firstprivate(i, child) shared(child_descendants)
        child_descendants[i] = visit(child, child->num_children);
    }

#pragma omp taskwait

// #pragma omp parallel for reduction(+:num_descendants)
    for (int i = 0; i < num_children; i++) {
        num_descendants += child_descendants[i];
    }

#ifndef BIG_STACKS
    if (num_children > 0) {
        free(child_descendants);
        free(child_nodes);
    }
#endif

    return num_descendants;
}
Esempio n. 3
0
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static aligned_t visit(void *args_)
{
    node_t  *parent          = (node_t *)args_;
    int      parent_height   = parent->height;
    int      num_children    = parent->num_children;
    aligned_t expect         = parent->expect;
    aligned_t num_descendants[num_children];
    aligned_t sum_descendants = 1;

    if (num_children != 0) {
        node_t     child __attribute__((aligned(8)));
        aligned_t  donec = 0;

        // Spawn children, if any
        child.height = parent_height + 1;
        child.dc     = &donec;
        child.expect = num_children;

        qthread_empty(&donec);

        for (int i = 0; i < num_children; i++) {
            child.acc    = &num_descendants[i];

            for (int j = 0; j < num_samples; j++) {
                rng_spawn(parent->state.state, child.state.state, i);
            }

            child.num_children = calc_num_children(&child);

            qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL);
        }

        // Wait for children to finish up, accumulate descendants counts
        if (donec != expect) qthread_readFF(NULL, &donec);

        for (int i = 0; i < num_children; i++) {
            sum_descendants += num_descendants[i];
        }
    }

    *parent->acc = sum_descendants;
    if (qthread_incr(parent->dc, 1) + 1 == expect) {
        qthread_fill(parent->dc);
    }

    return 0;
}
Esempio n. 4
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

#pragma omp parallel
#pragma omp single
#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);

    nodecount = 1;
    long retval;
#pragma omp parallel
#pragma omp single nowait
#pragma omp task untied
    retval = visit(&root, root.num_children);

    total_num_nodes = retval;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / omp_get_num_threads());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / omp_get_num_threads());
#endif /* ifdef PRINT_STATS */

    return 0;
}
Esempio n. 5
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned long tmp = 0;
        NUMARG(tmp, "UTS_TREE_TYPE");
        tree_type = (tree_t)tmp;
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    {
        unsigned long tmp = 0;
        NUMARG(tmp, "UTS_SHAPE_FN");
        shape_fn = (shape_t)tmp;
    }
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);

    nodecount = 1;
    long retval;
    {
        retval = _Cilk_spawn visit(root);

        _Cilk_sync;
    }

    total_num_nodes = retval;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    LOG_UTS_RESULTS_YAML(total_num_nodes, total_time)
    LOG_ENV_CILK_YAML()
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / __cilkrts_get_nworkers());
#endif /* ifdef PRINT_STATS */

    return 0;
}
Esempio n. 6
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

    // If the operator did not attempt to set a stack size, force
    // a reasonable lower bound
    if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE"))
        setenv("QT_STACK_SIZE", "32768", 0);

    assert(qthread_initialize() == 0);

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);
    aligned_t donecount = 0;
    root.dc = &donecount;
    qthread_empty(&donecount);
    aligned_t tot = 0;
    root.acc = &tot;
    root.expect = 1;

    qthread_fork_syncvar(visit, &root, NULL);
    qthread_readFF(NULL, root.dc);
    total_num_nodes = tot;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#endif /* ifdef PRINT_STATS */

    return 0;
}