示例#1
0
文件: sinc_null.c 项目: Agobin/chapel
static aligned_t visit(void *arg_)
{
    v_args_t *arg = (v_args_t *)arg_;

    if (arg->depth > 2) {
        /* I'm an internal node. */
        v_args_t args = { arg->depth - 1, arg->sinc };

        qt_sinc_expect(arg->sinc, 2);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);

        qt_sinc_submit(arg->sinc, NULL);
    } else if (arg->depth == 2) {
        /* I'm going to spawn leaf nodes. */
        v_args_t args = { arg->depth - 1, arg->sinc };

        qt_sinc_expect(arg->sinc, 2);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);

        qt_sinc_submit(arg->sinc, NULL);
    } else {
        /* I'm a leaf node. */
        qt_sinc_submit(arg->sinc, NULL);
    }

    return 0;
}
示例#2
0
void chpl_task_startMovedTask(chpl_fn_p      fp,
                              void          *arg,
                              c_sublocid_t   subLoc,
                              chpl_taskID_t  id,
                              chpl_bool      serial_state)
{
    assert(subLoc != c_sublocid_curr);
    assert(id == chpl_nullTaskID);

    chapel_wrapper_args_t wrapper_args = 
        {fp, arg, NULL, 0, *chpl_task_getPrivateData()};
    wrapper_args.chpl_data.serial_state = serial_state;

    PROFILE_INCR(profile_task_startMovedTask,1);

#if 1
    // We are timing out when the subLoc is passed as 0 (zero).  Can
    // we not time share tasks on a single shepherd?  Perhaps we can
    // only time share as many tasks on a shepherd as that shepherd
    // has workers?  For now, force the subLoc to be "any".
    subLoc = c_sublocid_any;
#endif
    if (subLoc == c_sublocid_any) {
        qthread_fork_syncvar_copyargs(chapel_wrapper, &wrapper_args,
                                      sizeof(chapel_wrapper_args_t), NULL);
    } else {
        qthread_fork_syncvar_copyargs_to(chapel_wrapper, &wrapper_args,
                                         sizeof(chapel_wrapper_args_t), NULL,
                                         (qthread_shepherd_id_t) subLoc);
    }
}
示例#3
0
void chpl_task_addToTaskList(chpl_fn_int_t     fid,
                             void             *arg,
                             c_sublocid_t      subLoc,
                             chpl_task_list_p *task_list,
                             int32_t           task_list_locale,
                             chpl_bool         is_begin_stmt,
                             int               lineno,
                             chpl_string       filename)
{
    qthread_shepherd_id_t const here_shep_id = qthread_shep();
    chpl_task_private_data_t *parent_chpl_data = chpl_task_getPrivateData();
    chpl_bool serial_state = parent_chpl_data->serial_state;
    chapel_wrapper_args_t wrapper_args = 
        {chpl_ftable[fid], arg, filename, lineno, *parent_chpl_data};

    PROFILE_INCR(profile_task_addToTaskList,1);

    if (serial_state) {
        syncvar_t ret = SYNCVAR_STATIC_EMPTY_INITIALIZER;
        qthread_fork_syncvar_copyargs_to(chapel_wrapper, &wrapper_args,
                                         sizeof(chapel_wrapper_args_t), &ret,
                                         here_shep_id);
        qthread_syncvar_readFF(NULL, &ret);
    } else if (subLoc == c_sublocid_any) {
        qthread_fork_syncvar_copyargs(chapel_wrapper, &wrapper_args,
                                      sizeof(chapel_wrapper_args_t), NULL);
    } else {
        if (subLoc == c_sublocid_curr)
            subLoc = (c_sublocid_t) here_shep_id;
        qthread_fork_syncvar_copyargs_to(chapel_wrapper, &wrapper_args,
                                         sizeof(chapel_wrapper_args_t), NULL,
                                         (qthread_shepherd_id_t) subLoc);
    }
}
示例#4
0
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static aligned_t visit(void *args_)
{
    node_t  *parent          = (node_t *)args_;
    int      parent_height   = parent->height;
    int      num_children    = parent->num_children;
    aligned_t expect         = parent->expect;
    aligned_t num_descendants[num_children];
    aligned_t sum_descendants = 1;

    if (num_children != 0) {
        node_t     child __attribute__((aligned(8)));
        aligned_t  donec = 0;

        // Spawn children, if any
        child.height = parent_height + 1;
        child.dc     = &donec;
        child.expect = num_children;

        qthread_empty(&donec);

        for (int i = 0; i < num_children; i++) {
            child.acc    = &num_descendants[i];

            for (int j = 0; j < num_samples; j++) {
                rng_spawn(parent->state.state, child.state.state, i);
            }

            child.num_children = calc_num_children(&child);

            qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL);
        }

        // Wait for children to finish up, accumulate descendants counts
        if (donec != expect) qthread_readFF(NULL, &donec);

        for (int i = 0; i < num_children; i++) {
            sum_descendants += num_descendants[i];
        }
    }

    *parent->acc = sum_descendants;
    if (qthread_incr(parent->dc, 1) + 1 == expect) {
        qthread_fill(parent->dc);
    }

    return 0;
}
示例#5
0
int main(int argc, char *argv[])
{
    int n = 10;
    int m = 10;
    num_timesteps = 10;
    workload = 0;
    workload_per = 0;
    workload_var = 0;
    int print_final = 0;
    int alltime = 0;

    CHECK_VERBOSE();
    NUMARG(n, "N");
    NUMARG(m, "M");
    NUMARG(num_timesteps, "TIMESTEPS");
    NUMARG(workload, "WORKLOAD");
    NUMARG(workload_per, "WORKLOAD_PER");
    NUMARG(workload_var, "WORKLOAD_VAR");
    NUMARG(print_final, "PRINT_FINAL");
    NUMARG(alltime, "ALL_TIME");

    assert (n > 0 && m > 0);

    // Initialize Qthreads
    assert(qthread_initialize() == 0);

    qtimer_t alloc_timer = qtimer_create();
    qtimer_t init_timer = qtimer_create();
    qtimer_t exec_timer = qtimer_create();

    // Allocate memory for 3-stage stencil (with boundary padding)
    qtimer_start(alloc_timer);
    stencil_t points;
    points.N = n + 2;
    points.M = m + 2;

    for (int s = 0; s < NUM_STAGES; s++) {
        points.stage[s] = malloc(points.N*sizeof(aligned_t *));
        assert(NULL != points.stage[s]);
        for (int i = 0; i < points.N; i++) {
            points.stage[s][i] = calloc(points.M, sizeof(aligned_t));
            assert(NULL != points.stage[s][i]);
        }
    }
    qtimer_stop(alloc_timer);

    // Initialize first stage and set boundary conditions
    qtimer_start(init_timer);
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            qthread_writeF_const(&points.stage[0][i][j], 0);
            for (int s = 1; s < NUM_STAGES; s++)
                qthread_empty(&points.stage[s][i][j]);
        }
    }
    for (int i = 0; i < points.N; i++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][i][0], BOUNDARY);
            qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY);
#else
            points.stage[s][i][0] = BOUNDARY;
            points.stage[s][i][points.M-1] = BOUNDARY;
#endif
        }
    }
    for (int j = 0; j < points.M; j++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][0][j], BOUNDARY);
            qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY);
#else
            points.stage[s][0][j] = BOUNDARY;
            points.stage[s][points.N-1][j] = BOUNDARY;
#endif
        }
    }
    qtimer_stop(init_timer);

    // Create barrier to synchronize on completion of calculations
    qtimer_start(exec_timer);
    points.barrier = qt_feb_barrier_create(n*m+1);

    // Spawn tasks to start calculating updates at each point
    update_args_t args = {&points, -1, -1, 1, 1};
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            args.i = i;
            args.j = j;
            qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL);
        }
    }

    // Wait for calculations to finish
    qt_feb_barrier_enter(points.barrier);
    qtimer_stop(exec_timer);

    // Print timing info
    if (alltime) {
        fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer));
        fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer));
        fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer));
    } else {
        fprintf(stdout, "%f\n", qtimer_secs(exec_timer));
    }

    // Print stencils
    if (print_final) {
        size_t final = (num_timesteps % NUM_STAGES);
        iprintf("Stage %lu:\n", prev_stage(prev_stage(final)));
        print_stage(&points, prev_stage(prev_stage(final)));
        iprintf("\nStage %lu:\n", prev_stage(final));
        print_stage(&points, prev_stage(final));
        iprintf("\nStage %lu:\n", final);
        print_stage(&points, final);
    }

    qt_feb_barrier_destroy(points.barrier);
    qtimer_destroy(alloc_timer);
    qtimer_destroy(init_timer);
    qtimer_destroy(exec_timer);

    // Free allocated memory
    for (int i = 0; i < points.N; i++) {
        free(points.stage[0][i]);
        free(points.stage[1][i]);
        free(points.stage[2][i]);
    }
    free(points.stage[0]);
    free(points.stage[1]);
    free(points.stage[2]);

    return 0;
}
示例#6
0
文件: sinc_null.c 项目: Agobin/chapel
// //////////////////////////////////////////////////////////////////////////////
int main(int   argc,
         char *argv[])
{
    size_t depth = 3;

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(depth, "TEST_DEPTH");

    // Test creating an empty sinc
    {
        qt_sinc_t zero_sinc;
	qt_sinc_init(&zero_sinc, 0, NULL, NULL, 0);
        qt_sinc_wait(&zero_sinc, NULL);
        qt_sinc_fini(&zero_sinc);

        qt_sinc_t *three_sinc = qt_sinc_create(0, NULL, NULL, 0);
        qt_sinc_expect(three_sinc, 3);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qthread_fork(submit_to_sinc, three_sinc, NULL);
        qt_sinc_wait(three_sinc, NULL);
        qt_sinc_destroy(three_sinc);
    }

    qt_sinc_t *sinc = qt_sinc_create(0, NULL, NULL, 2);

    // Spawn additional waits
    aligned_t rets[3];
    {
        qthread_fork(wait_on_sinc, sinc, &rets[0]);
        qthread_fork(wait_on_sinc, sinc, &rets[1]);
        qthread_fork(wait_on_sinc, sinc, &rets[2]);
    }

    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_create(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    for (int i = 0; i < 3; i++)
        qthread_readFF(NULL, &rets[i]);

    // Reset the sinc
    qt_sinc_reset(sinc, 2);

    // Second use
    {
        v_args_t args = { depth, sinc };

        // These two spawns covered by qt_sinc_reset(...,2)
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
        qthread_fork_syncvar_copyargs(visit, &args, sizeof(v_args_t), NULL);
    }

    qt_sinc_wait(sinc, NULL);
    qt_sinc_destroy(sinc);

    return 0;
}