Пример #1
0
void rt1_launcher(void *arg)
{
    int idx = (int)(intptr_t)arg;
    ABT_thread cur_thread;
    ABT_pool cur_pool;
    ABT_sched_config config;
    ABT_sched sched;
    size_t size;
    double t_start, t_end;

    ABT_sched_config_var cv_event_freq = {
        .idx = 0,
        .type = ABT_SCHED_CONFIG_INT
    };

    ABT_sched_config_var cv_idx = {
        .idx = 1,
        .type = ABT_SCHED_CONFIG_INT
    };

    ABT_sched_def sched_def = {
        .type = ABT_SCHED_TYPE_ULT,
        .init = sched_init,
        .run = sched_run,
        .free = sched_free,
        .get_migr_pool = NULL
    };

    /* Create a scheduler */
    ABT_sched_config_create(&config,
                            cv_event_freq, 10,
                            cv_idx, idx,
                            ABT_sched_config_var_end);
    ABT_sched_create(&sched_def, 1, &rt1_data->pool, config, &sched);

    /* Push the scheduler to the current pool */
    ABT_thread_self(&cur_thread);
    ABT_thread_get_last_pool(cur_thread, &cur_pool);
    ABT_pool_add_sched(cur_pool, sched);

    /* Free */
    ABT_sched_config_free(&config);

    t_start = ABT_get_wtime();
    while (1) {
        rt1_app(idx);

        ABT_pool_get_total_size(cur_pool, &size);
        if (size == 0) {
            ABT_sched_free(&sched);
            int rank;
            ABT_xstream_self_rank(&rank);
            printf("ES%d: finished\n", rank);
            ABT_mutex_lock(rt1_data->mutex);
            rt1_data->xstreams[rank] = ABT_XSTREAM_NULL;
            rt1_data->num_xstreams--;
            ABT_mutex_unlock(rt1_data->mutex);
            break;
        }

        t_end = ABT_get_wtime();
        if ((t_end - t_start) > g_timeout) {
            ABT_sched_finish(sched);
        }
    }
}

static void rt1_app(int eid)
{
    int i, num_comps;
    size_t size;
    ABT_thread cur_thread;
    ABT_pool cur_pool;

    ABT_thread_self(&cur_thread);
    ABT_thread_get_last_pool(cur_thread, &cur_pool);

    if (eid == 0) ABT_event_prof_start();

    num_comps = rt1_data->num_comps;
    for (i = 0; i < num_comps * 2; i += 2) {
        ABT_thread_create(rt1_data->pool, rt1_app_compute,
                          (void *)(intptr_t)(eid * num_comps * 2 + i),
                          ABT_THREAD_ATTR_NULL, NULL);
        ABT_task_create(rt1_data->pool, rt1_app_compute,
                        (void *)(intptr_t)(eid * num_comps * 2 + i + 1),
                        NULL);
    }

    do {
        ABT_thread_yield();

        /* If the size of cur_pool is zero, it means the stacked scheduler has
         * been terminated because of the shrinking event. */
        ABT_pool_get_total_size(cur_pool, &size);
        if (size == 0) break;

        ABT_pool_get_total_size(rt1_data->pool, &size);
    } while (size > 0);

    if (eid == 0) {
        ABT_event_prof_stop();

        int cnt = __atomic_exchange_n(&rt1_data->cnt, 0, __ATOMIC_SEQ_CST);
        double local_work = (double)(cnt * rt1_data->num_iters);
        ABT_event_prof_publish("ops", local_work, local_work);
    }
}

static void rt1_app_compute(void *arg)
{
    int pos = (int)(intptr_t)arg;
    int i;

    rt1_data->app_data[pos] = 0;
    for (i = 0; i < rt1_data->num_iters; i++) {
        rt1_data->app_data[pos] += sin((double)pos);
    }

    __atomic_fetch_add(&rt1_data->cnt, 1, __ATOMIC_SEQ_CST);
}
int main(int argc, char *argv[])
{
    int i, j, r;
    int num_xstreams;
    char *str, *endptr;
    ABT_xstream *xstreams;
    ABT_thread *threads;
    vector_scal_task_args_t *args;
    int inner_xstreams;
    double *time, avg_time = 0.0;

    num_xstreams = (argc > 1) ? atoi(argv[1]) : NUM_XSTREAMS;
    inner_xstreams = (argc > 2) ? atoi(argv[2]) : NUM_XSTREAMS;
    int rep = (argc > 3) ? atoi(argv[3]) : NUM_REPS;
    time = (double *)malloc(sizeof(double) * rep);

    init();

    g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams);
    xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams);
    threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_xstreams);
    args = (vector_scal_task_args_t *)malloc(sizeof(vector_scal_task_args_t)
            * num_xstreams);

    /* initialization */
    ABT_init(argc, argv);

    for (i = 0; i < num_xstreams; i++) {
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE,
                              &g_pools[i]);
    }

    /* ES creation */
    ABT_xstream_self(&xstreams[0]);
    ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT,
                                     1, &g_pools[0]);

    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i],
                                 ABT_SCHED_CONFIG_NULL, &xstreams[i]);
        ABT_xstream_start(xstreams[i]);
    }

    /* Each task is created on the xstream which is going to execute it */

    for (r = 0; r < rep; r++) {
        time[r] = ABT_get_wtime();

        int bloc = NUM / (num_xstreams);
        int rest = NUM % (num_xstreams);
        int start = 0;
        int end = 0;

        for (j = 0; j < num_xstreams; j++) {
            start = end;
            int inc = (j < rest) ? 1 : 0;
            end += bloc + inc;
            args[j].start = start;
            args[j].end = end;
            args[j].it = NUM;
            args[j].nxstreams = inner_xstreams;
            if (j > 0) {
                ABT_thread_create(g_pools[j], vector_scal_launch,
                                  (void *)&args[j], ABT_THREAD_ATTR_NULL,
                                  &threads[j]);
            }
        }
        vector_scal_launch((void *)&args[0]);

        for (j = 1; j < num_xstreams; j++) {
            ABT_thread_free(&threads[j]);
        }

        time[r] = ABT_get_wtime() - time[r];
        avg_time += time[r];
    }
    avg_time /= rep;
    printf("%d %d %f\n", num_xstreams, inner_xstreams, avg_time);
    check();

    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_join(xstreams[i]);
        ABT_xstream_free(&xstreams[i]);
    }

    ABT_finalize();

    free(g_pools);
    free(xstreams);
    free(threads);
    free(args);
    free(time);

    return EXIT_SUCCESS;
}
Пример #3
0
/* Create a work-stealing scheduler and push it to the pool */
static void thread_add_sched(void *arg)
{
    int idx = (int)(intptr_t)arg;
    int i;
    ABT_thread cur_thread;
    ABT_pool cur_pool;
    ABT_pool *my_pools;
    ABT_sched_config config;
    ABT_sched sched;
    size_t size;
    double t_start, t_end;

    ABT_sched_config_var cv_event_freq = {
        .idx = 0,
        .type = ABT_SCHED_CONFIG_INT
    };

    ABT_sched_config_var cv_idx = {
        .idx = 1,
        .type = ABT_SCHED_CONFIG_INT
    };

    ABT_sched_def sched_def = {
        .type = ABT_SCHED_TYPE_ULT,
        .init = sched_init,
        .run = sched_run,
        .free = sched_free,
        .get_migr_pool = NULL
    };

    /* Create a scheduler */
    ABT_sched_config_create(&config,
                            cv_event_freq, 10,
                            cv_idx, idx,
                            ABT_sched_config_var_end);
    my_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * max_xstreams);
    for (i = 0; i < max_xstreams; i++) {
        my_pools[i] = g_pools[(idx + i) % max_xstreams];
    }
    ABT_sched_create(&sched_def, max_xstreams, my_pools, config, &sched);

    /* Create a ULT for the new scheduler */
    ABT_thread_create(my_pools[0], thread_work, arg, ABT_THREAD_ATTR_NULL,
                      NULL);

    /* Push the scheduler to the current pool */
    ABT_thread_self(&cur_thread);
    ABT_thread_get_last_pool(cur_thread, &cur_pool);
    ABT_pool_add_sched(cur_pool, sched);

    /* Free */
    ABT_thread_release(cur_thread);
    ABT_sched_config_free(&config);
    free(my_pools);

    t_start = ABT_get_wtime();
    while (1) {
        ABT_thread_yield();

        ABT_pool_get_total_size(cur_pool, &size);
        if (size == 0) {
            ABT_sched_free(&sched);
            break;
        }

        t_end = ABT_get_wtime();
        if ((t_end - t_start) > g_timeout) {
            ABT_sched_finish(sched);
        }
    }
}

static void thread_work(void *arg)
{
    int idx = (int)(intptr_t)arg;
    int i;
    ABT_thread cur_thread;
    ABT_pool cur_pool;
    ABT_thread *threads;
    int num_threads;
    double t_start, t_end;

    ABT_thread_self(&cur_thread);
    ABT_thread_get_last_pool(cur_thread, &cur_pool);
    ABT_thread_release(cur_thread);

    t_start = ABT_get_wtime();
    while (1) {
        num_threads = 2;
        threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_threads);
        for (i = 0; i < num_threads; i++) {
            ABT_thread_create(cur_pool, thread_hello, NULL,
                              ABT_THREAD_ATTR_NULL, &threads[i]);
        }
        for (i = 0; i < num_threads; i++) {
            ABT_thread_free(&threads[i]);
        }
        free(threads);

        if (g_signal[idx]) {
            ABT_xstream xstream;
            ABT_xstream_self(&xstream);
            ABT_xstream_cancel(xstream);
            g_signal[idx] = 0;
            break;
        }

        t_end = ABT_get_wtime();
        if ((t_end - t_start) > g_timeout) {
            break;
        }
    }
}

static void test_printf(const char *format, ...)
{
#if 0
    va_start(list, format);
    vprintf(format, list);
    va_end(list);
    fflush(stdout);
#endif
}
Пример #4
0
int main(int argc, char *argv[])
{
    ABT_pool (*all_pools)[2];
    ABT_sched *scheds;
    ABT_thread *top_threads;
    size_t i, t;
    uint64_t t_start;

    /* initialize */
    ABT_test_init(argc, argv);

    for (i = 0; i < T_LAST; i++) {
        t_times[i] = 0;
    }

    /* read command-line arguments */
    num_xstreams = ABT_test_get_arg_val(ABT_TEST_ARG_N_ES);
    num_threads  = ABT_test_get_arg_val(ABT_TEST_ARG_N_ULT);
    iter = ABT_test_get_arg_val(ABT_TEST_ARG_N_ITER);

    g_xstreams = (ABT_xstream *)malloc(num_xstreams * sizeof(ABT_xstream));
    g_pools    = (ABT_pool *)malloc(num_xstreams * sizeof(ABT_pool));
    g_threads  = (ABT_thread **)malloc(num_xstreams * sizeof(ABT_thread *));
    for (i = 0; i < num_xstreams; i++) {
        g_threads[i] = (ABT_thread *)malloc(num_threads * sizeof(ABT_thread));
    }
    all_pools = (ABT_pool (*)[2])malloc(num_xstreams * sizeof(ABT_pool) * 2);
    scheds = (ABT_sched *)malloc(num_xstreams * sizeof(ABT_sched));
    top_threads = (ABT_thread *)malloc(num_xstreams * sizeof(ABT_thread));

    /* create pools and schedulers */
    for (i = 0; i < num_xstreams; i++) {
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPSC, ABT_TRUE,
                              &all_pools[i][0]);
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_PRIV, ABT_TRUE,
                              &all_pools[i][1]);
        g_pools[i] = all_pools[i][1];

        ABT_sched_create_basic(ABT_SCHED_DEFAULT, 2, all_pools[i],
                               ABT_SCHED_CONFIG_NULL, &scheds[i]);
    }

    /* create ESs */
    ABT_xstream_self(&g_xstreams[0]);
    ABT_xstream_set_main_sched(g_xstreams[0], scheds[0]);
    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_create(scheds[i], &g_xstreams[i]);
    }

    /* benchmarking */
    for (t = 0; t < T_LAST; t++) {
        void (*test_fn)(void *);

        if (t == T_YIELD) {
            if (t_times[T_YIELD_ALL] > t_times[T_YIELD_OVERHEAD]) {
                t_times[t] = t_times[T_YIELD_ALL] - t_times[T_YIELD_OVERHEAD];
            } else {
                t_times[t] = 0;
            }
            continue;
        } else if (t == T_YIELD_TO) {
            if (t_times[T_YIELD_TO_ALL] > t_times[T_YIELD_TO_OVERHEAD]) {
                t_times[t] = t_times[T_YIELD_TO_ALL] - t_times[T_YIELD_TO_OVERHEAD];
            } else {
                t_times[t] = 0;
            }
            continue;
        }

        switch (t) {
            case T_CREATE_JOIN:        test_fn = test_create_join;
                                       break;
            case T_CREATE_UNNAMED:     test_fn = test_create_unnamed;
                                       break;
            case T_YIELD_OVERHEAD:     test_fn = test_yield_overhead;
                                       break;
            case T_YIELD_ALL:          test_fn = test_yield;
                                       break;
            case T_YIELD_TO_OVERHEAD:  test_fn = test_yield_to_overhead;
                                       break;
            case T_YIELD_TO_ALL:       test_fn = test_yield_to;
                                       break;
#ifdef TEST_MIGRATE_TO
            case T_MIGRATE_TO_XSTREAM: test_fn = test_migrate_to_xstream;
                                       break;
#endif
            default: assert(0);
        }

        /* warm-up */
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_create(all_pools[i][0], test_fn, (void *)i,
                              ABT_THREAD_ATTR_NULL, &top_threads[i]);
        }
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_free(&top_threads[i]);
        }

        /* measurement */
#ifdef USE_TIME
        t_start = ABT_get_wtime();
#else
        t_start = ABT_test_get_cycles();
#endif
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_create(all_pools[i][0], test_fn, (void *)i,
                              ABT_THREAD_ATTR_NULL, &top_threads[i]);
        }
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_free(&top_threads[i]);
        }
#ifdef USE_TIME
        t_times[t] = ABT_get_wtime() - t_start;
#else
        t_times[t] = ABT_test_get_cycles() - t_start;
#endif
    }

    /* join and free */
    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_join(g_xstreams[i]);
        ABT_xstream_free(&g_xstreams[i]);
    }

    /* finalize */
    ABT_test_finalize(0);

    /* compute the execution time for one iteration */
    for (i = 0; i < T_LAST; i++) {
        t_times[i] = t_times[i] / iter / num_threads;
    }

    /* output */
    int line_size = 56;
    ABT_test_print_line(stdout, '-', line_size);
    printf("%s\n", "Argobots");
    ABT_test_print_line(stdout, '-', line_size);
    printf("# of ESs        : %d\n", num_xstreams);
    printf("# of ULTs per ES: %d\n", num_threads);
    ABT_test_print_line(stdout, '-', line_size);
    printf("Avg. execution time (in seconds, %d times)\n", iter);
    ABT_test_print_line(stdout, '-', line_size);
    printf("%-20s %-s\n", "operation", "time");
    ABT_test_print_line(stdout, '-', line_size);
    for (i = 0; i < T_LAST; i++) {
#ifdef USE_TIME
        printf("%-19s  %.9lf\n", t_names[i], t_times[i]);
#else
        printf("%-19s  %11" PRIu64 "\n", t_names[i], t_times[i]);
#endif
    }
    ABT_test_print_line(stdout, '-', line_size);

    free(g_xstreams);
    free(g_pools);
    for (i = 0; i < num_xstreams; i++) {
        free(g_threads[i]);
    }
    free(g_threads);
    free(all_pools);
    free(scheds);
    free(top_threads);

    return EXIT_SUCCESS;
}