Пример #1
0
void accalt_init(int argc, char * argv[]) {

    int num_threads = 1;
    main_team = (accalt_team_t *) malloc(sizeof (accalt_team_t));

#ifdef ARGOBOTS


    ABT_init(argc, argv);
    int num_pools = 1;
    if (getenv("ACCALT_NUM_THREADS") != NULL) {
        num_threads = atoi(getenv("ACCALT_NUM_THREADS"));
    }
    if (getenv("ACCALT_NUM_POOLS") != NULL) {
        num_pools = atoi(getenv("ACCALT_NUM_POOLS"));
    }
    main_team->num_xstreams = num_threads;
    main_team->num_pools = num_pools;
    //printf("Argobots %d ES, %d Pools\n", num_threads, num_pools);
    ABT_xstream_self(&main_team->master);

    main_team->team = (ABT_xstream *) malloc(sizeof (ABT_xstream) * num_threads);
    main_team->pools = (ABT_pool *) malloc(sizeof (ABT_pool) * num_pools);

    for (int i = 0; i < num_pools; i++) {
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE,
                &main_team->pools[i]);
    }

    ABT_xstream_self(&main_team->team[0]);
    ABT_xstream_set_main_sched_basic(main_team->team[0], ABT_SCHED_DEFAULT,
            1, &main_team->pools[0]);
    for (int i = 1; i < num_threads; i++) {
        ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1,
                &main_team->pools[i % main_team->num_pools],
                ABT_SCHED_CONFIG_NULL, &main_team->team[i]);
        ABT_xstream_start(main_team->team[i]);
    }
#endif
#ifdef MASSIVETHREADS
    char buff[10];
    if (getenv("ACCALT_NUM_THREADS") != NULL) {
        num_threads = atoi(getenv("ACCALT_NUM_THREADS"));
        sprintf(buff, "%d", num_threads);
        setenv("MYTH_WORKER_NUM", buff, 1);
    } else
        num_threads = atoi(getenv("MYTH_WORKER_NUM"));

    setenv("MYTH_BIND_WORKERS", "1", 1);

    //printf("Massive %d Workers\n", num_threads);
    main_team->num_workers = num_threads;
    myth_init(); //MassiveThreads
#endif
#ifdef QTHREADS
    char buff[10];
    int num_workers_per_thread;
    if (getenv("ACCALT_NUM_THREADS") != NULL) {
        num_threads = atoi(getenv("ACCALT_NUM_THREADS"));
        sprintf(buff, "%d", num_threads);
        setenv("QTHREAD_NUM_SHEPHERDS", buff, 1);
    } else
        num_threads = atoi(getenv("QTHREAD_NUM_SHEPHERDS"));

    if (getenv("ACCALT_NUM_WORKERS_PER_THREAD") != NULL) {
        num_workers_per_thread = atoi(getenv("ACCALT_NUM_WORKERS_PER_THREAD"));
        sprintf(buff, "%d", num_workers_per_thread);
        setenv("QTHREAD_NUM_WORKERS_PER_SHEPHERD", buff, 1);
    } else
        num_workers_per_thread = atoi(getenv("QTHREAD_NUM_WORKERS_PER_SHEPHERD"));
    if (num_threads == 1 && num_workers_per_thread > 1) {
        setenv("QTHREAD_SHEPHERDS_BOUNDARY", "node", 1);
        setenv("QTHREAD_WORKER_UNIT", "core", 1);
    }
    if (num_threads > 1) {
        setenv("QTHREAD_SHEPHERDS_BOUNDARY", "core", 1);
        setenv("QTHREAD_WORKER_UNIT", "core", 1);
    }
    setenv("QTHREAD_AFFINITY", "yes", 1);

    //printf("Qthreads %d Shepherds, %d Workers_per_shepherd\n", num_threads, num_workers_per_thread);

    main_team->num_shepherds = num_threads;
    main_team->num_workers_per_shepherd = num_workers_per_thread;
    qthread_initialize(); //qthreads
#endif
}
Пример #2
0
int main(int argc, char *argv[])
{
    int i, j;
    int ret;
    int num_xstreams = DEFAULT_NUM_XSTREAMS;
    int num_threads = DEFAULT_NUM_THREADS;
    if (argc > 1) num_xstreams = atoi(argv[1]);
    assert(num_xstreams >= 0);
    if (argc > 2) num_threads = atoi(argv[2]);
    assert(num_threads >= 0);

    ABT_xstream *xstreams;
    xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams);

    ABT_pool *pools;
    pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams);

    /* Initialize */
    ABT_test_init(argc, argv);

    /* Create Execution Streams */
    ret = ABT_xstream_self(&xstreams[0]);
    ABT_TEST_ERROR(ret, "ABT_xstream_self");
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_create(ABT_SCHED_NULL, &xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_create");
    }

    /* Get the pools attached to an execution stream */
    for (i = 0; i < num_xstreams; i++) {
        ret = ABT_xstream_get_main_pools(xstreams[i], 1, pools+i);
        ABT_TEST_ERROR(ret, "ABT_xstream_get_main_pools");
    }

    /* Create threads */
    for (i = 0; i < num_xstreams; i++) {
        for (j = 0; j < num_threads; j++) {
            size_t tid = i * num_threads + j + 1;
            ret = ABT_thread_create(pools[i],
                    thread_func, (void *)tid, ABT_THREAD_ATTR_NULL,
                    NULL);
            ABT_TEST_ERROR(ret, "ABT_thread_create");
        }
    }

    /* Join Execution Streams */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_join(xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_join");
    }

    /* Free Execution Streams */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_free(&xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_free");
    }

    /* Finalize */
    ret = ABT_test_finalize(0);

    free(pools);
    free(xstreams);

    return ret;
}
Пример #3
0
int main(int argc, char *argv[])
{
  // Argobots definitions
  int ret, i;
  num_threads = 0;

  // Sockets Definitions
  int fd, cfd;
  struct sockaddr_in svaddr;
  struct sockaddr_storage claddr;
  socklen_t addrlen;
  signal(SIGINT, sighandler);

  ABT_init(argc, argv);

  int abts = 0;//ABT_snoozer_xstream_self_set();
  if (abts != 0){
    fprintf(stderr, "%s\n", "ABT snoozer xstream self error");
    exit(-1);
  }
  xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * CORES);
  ret = ABT_xstream_self(&xstreams[0]);
  if(ret != 0){
    fprintf(stderr, "%s\n", "ABT xstream self error");
    exit(-1);
  }
  ret = ABT_xstream_get_main_pools(xstreams[0], 1, &pool);
  if(ret != 0){
    fprintf(stderr, "%s\n", "ABT xstream pool error");
    exit(-1);
  }
  ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, &g_pool);

  /* ES creation */
  ABT_xstream_self(&xstreams[0]);
  ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT,
                                   1, &g_pool);
  for (i = 1; i < CORES; i++) {
      ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pool,
                               ABT_SCHED_CONFIG_NULL, &xstreams[i]);
      ABT_xstream_start(xstreams[i]);
  }
  //abtio = abt_io_init(CORES);
  //assert(abtio != NULL);

  fd = socket(AF_INET, SOCK_STREAM, 0);
  if(fd < 0){
    fprintf(stderr, "%s\n", "Socket creating error");
    exit(-1);
  }
  memset(&svaddr, 0, sizeof(struct sockaddr_in));
  svaddr.sin_family = AF_INET;
  svaddr.sin_addr.s_addr = INADDR_ANY;
  svaddr.sin_port = htons(PORTNUM);

  ret = bind(fd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in));
  if(ret < 0){
    fprintf(stderr, "%s\n", "Socket binding error");
    exit(-1);
  }
  if( listen(fd, BACKLOG) == -1)
  {
    fprintf(stderr, "%s\n", "Socket listening error");
    exit(-1);
  }
  int aio_sock =  abt_io_socket_initialize(1000);
  if(aio_sock <= 0)
    printf("Initialize io_sock error\n");
  //printf("USer: main: epoll fd = %d\n", aio_sock);
  addrlen = sizeof(struct sockaddr_storage);
  while(1){
    cfd = accept(fd, (struct sockaddr *)&claddr, &addrlen);
    if(cfd == -1){
      fprintf(stderr, "%s\n", "Socket accepting error");
      exit(-1);
    }
    //printf("accepted client on file descriptor %d\n", cfd);
    struct thread_args* ta = (struct thread_args*) malloc (sizeof(ta));
    ta->epfd = aio_sock;
    ta->fd = cfd;
    ABT_thread_create(g_pool, handle_client, (void *) ta, ABT_THREAD_ATTR_NULL, threads[num_threads++]);
  }

  return 0;
}
Пример #4
0
int main(int argc, char *argv[])
{
    int i, j;
    int ret, expected;
    int num_xstreams = DEFAULT_NUM_XSTREAMS;
    int num_threads = DEFAULT_NUM_THREADS;
    if (argc > 1) num_xstreams = atoi(argv[1]);
    assert(num_xstreams >= 0);
    if (argc > 2) num_threads = atoi(argv[2]);
    assert(num_threads >= 0);

    ABT_mutex mutex;
    ABT_xstream *xstreams;
    thread_arg_t **args;
    xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams);
    assert(xstreams != NULL);
    args = (thread_arg_t **)malloc(sizeof(thread_arg_t *) * num_xstreams);
    assert(args != NULL);
    for (i = 0; i < num_xstreams; i++) {
        args[i] = (thread_arg_t *)malloc(sizeof(thread_arg_t) * num_threads);
    }

    /* Initialize */
    ABT_test_init(argc, argv);

    /* Create Execution Streams */
    ret = ABT_xstream_self(&xstreams[0]);
    ABT_TEST_ERROR(ret, "ABT_xstream_self");
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_create(ABT_SCHED_NULL, &xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_create");
    }

    /* Get the pools attached to an execution stream */
    ABT_pool *pools;
    pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams);
    for (i = 0; i < num_xstreams; i++) {
        ret = ABT_xstream_get_main_pools(xstreams[i], 1, pools+i);
        ABT_TEST_ERROR(ret, "ABT_xstream_get_main_pools");
    }

    /* Create a mutex */
    ret = ABT_mutex_create(&mutex);
    ABT_TEST_ERROR(ret, "ABT_mutex_create");

    /* Create threads */
    for (i = 0; i < num_xstreams; i++) {
        for (j = 0; j < num_threads; j++) {
            int tid = i * num_threads + j + 1;
            args[i][j].id = tid;
            args[i][j].mutex = mutex;
            ret = ABT_thread_create(pools[i],
                    thread_func, (void *)&args[i][j], ABT_THREAD_ATTR_NULL,
                    NULL);
            ABT_TEST_ERROR(ret, "ABT_thread_create");
        }
    }

    /* Switch to other user level threads */
    ABT_thread_yield();

    /* Join Execution Streams */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_join(xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_join");
    }

    /* Free the mutex */
    ret = ABT_mutex_free(&mutex);
    ABT_TEST_ERROR(ret, "ABT_mutex_free");

    /* Free Execution Streams */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_free(&xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_free");
    }

    /* Validation */
    expected = num_xstreams * num_threads;
    if (g_counter != expected) {
        printf("g_counter = %d\n", g_counter);
    }

    /* Finalize */
    ret = ABT_test_finalize(g_counter != expected);

    for (i = 0; i < num_xstreams; i++) {
        free(args[i]);
    }
    free(args);
    free(xstreams);
    free(pools);

    return ret;
}
int main(int argc, char *argv[])
{
    int i, j;
    int ntasks;
    int start, end;
    int num_xstreams;
    ABT_xstream *xstreams;
    vector_scal_task_args_t *args;
    struct timeval t_start, t_end, t_end2;
    char *str, *endptr;
    float *a;

    num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS;
    if (argc > 2) {
        str = argv[2];
    }

    ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS;
    if (ntasks < num_xstreams) {
        ntasks = num_xstreams;
    }

    printf("# of ESs: %d\n", num_xstreams);

    xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams);
    args = (vector_scal_task_args_t *)malloc(sizeof(vector_scal_task_args_t)
                                             * num_xstreams);
    g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams);

    a = malloc(sizeof(float) * ntasks);

    for (i = 0; i < ntasks; i++) {
        a[i] = i + 100.0f;
    }

    /* initialization */
    ABT_init(argc, argv);

    for (i = 0; i < num_xstreams; i++) {
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE,
                              &g_pools[i]);
    }

    /* ES creation */
    ABT_xstream_self(&xstreams[0]);
    ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT,
                                     1, &g_pools[0]);

    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i],
                                 ABT_SCHED_CONFIG_NULL, &xstreams[i]);
        ABT_xstream_start(xstreams[i]);
    }

    /* Work here */
    start = end = 0;
    int bloc = ntasks / num_xstreams;
    int rest = ntasks % num_xstreams;
    gettimeofday(&t_start, NULL);
    for (j = 0; j < num_xstreams; j++) {
        start = end;
        end = start + bloc;
        if (j < rest) {
            end++;
        }
        args[j].ptr = a;
        args[j].value = 0.9f;
        args[j].start = start;
        args[j].end = end;
        args[j].id = j;
        ABT_thread_create_on_xstream(xstreams[j], task_creator,
                                     (void *)&args[j], ABT_THREAD_ATTR_NULL,
                                     NULL);
    }
    gettimeofday(&t_end2, NULL);

    for (i = 0; i < num_xstreams; i++) {
        size_t size;
        do {
            ABT_thread_yield();
            ABT_pool_get_size(g_pools[i], &size);
        } while (size != 0);
    }

    gettimeofday(&t_end, NULL);

    for (i = 0; i < ntasks; i++) {
        if (a[i] != (i + 100.0f) * 0.9f) {
            printf("error: a[%d]\n", i);
        }
    }

    double time = (t_end.tv_sec * 1000000 + t_end.tv_usec)
                - (t_start.tv_sec * 1000000 + t_start.tv_usec);
    double time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec)
                 - (t_start.tv_sec * 1000000 + t_start.tv_usec);

    printf("nxstreams: %d\nntasks %d\nTime(s): %f\n",
           num_xstreams, ntasks, time / 1000000.0);
    /* join ESs */
    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_join(xstreams[i]);
        ABT_xstream_free(&xstreams[i]);
    }
    printf("Creation time=%f\n", time2 / 1000000.0);
    ABT_finalize();

    free(xstreams);

    return EXIT_SUCCESS;
}
Пример #6
0
int main(int argc, char *argv[])
{
    int i, j, ret;
    int num_xstreams = DEFAULT_NUM_XSTREAMS;
    int num_threads = DEFAULT_NUM_THREADS;
    int num_tasks = DEFAULT_NUM_TASKS;
    if (argc > 1) num_xstreams = atoi(argv[1]);
    assert(num_xstreams >= 0);
    if (argc > 2) num_threads = atoi(argv[2]);
    assert(num_threads >= 0);
    if (argc > 3) num_tasks = atoi(argv[3]);
    assert(num_tasks >= 0);

    ABT_xstream *xstreams;
    ABT_thread **threads;
    thread_arg_t **thread_args;
    ABT_task *tasks;
    task_arg_t *task_args;

    xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams);
    threads = (ABT_thread **)malloc(sizeof(ABT_thread *) * num_xstreams);
    thread_args = (thread_arg_t **)malloc(sizeof(thread_arg_t*) * num_xstreams);
    for (i = 0; i < num_xstreams; i++) {
        threads[i] = (ABT_thread *)malloc(sizeof(ABT_thread) * num_threads);
        for (j = 0; j < num_threads; j++) {
            threads[i][j] = ABT_THREAD_NULL;
        }
        thread_args[i] = (thread_arg_t *)malloc(sizeof(thread_arg_t) *
                                                num_threads);
    }
    tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_tasks);
    task_args = (task_arg_t *)malloc(sizeof(task_arg_t) * num_tasks);

    /* Initialize */
    ABT_test_init(argc, argv);

    /* Create Execution Streams */
    ret = ABT_xstream_self(&xstreams[0]);
    ABT_TEST_ERROR(ret, "ABT_xstream_self");
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_create(ABT_SCHED_NULL, &xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_create");
    }

    /* Get the pools attached to an execution stream */
    ABT_pool *pools;
    pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams);
    for (i = 0; i < num_xstreams; i++) {
        ret = ABT_xstream_get_main_pools(xstreams[i], 1, pools+i);
        ABT_TEST_ERROR(ret, "ABT_xstream_get_main_pools");
    }

    /* Create threads */
    for (i = 0; i < num_xstreams; i++) {
        for (j = 0; j < num_threads; j++) {
            int tid = i * num_threads + j + 1;
            thread_args[i][j].id = tid;
            thread_args[i][j].num_threads = num_threads;
            thread_args[i][j].threads = &threads[i][0];
            ret = ABT_thread_create(pools[i],
                    thread_func, (void *)&thread_args[i][j],
                    ABT_THREAD_ATTR_NULL,
                    &threads[i][j]);
            ABT_TEST_ERROR(ret, "ABT_thread_create");
        }
    }

    /* Create tasks with task_func1 */
    for (i = 0; i < num_tasks; i++) {
        size_t num = 100 + i;
        ret = ABT_task_create(pools[i % num_xstreams],
                              task_func1, (void *)num,
                              NULL);
        ABT_TEST_ERROR(ret, "ABT_task_create");
    }

    /* Create tasks with task_func2 */
    for (i = 0; i < num_tasks; i++) {
        task_args[i].num = 100 + i;
        ret = ABT_task_create(pools[i % num_xstreams],
                              task_func2, (void *)&task_args[i],
                              &tasks[i]);
        ABT_TEST_ERROR(ret, "ABT_task_create");
    }

    /* Switch to other work units */
    ABT_thread_yield();

    /* Results of task_funcs2 */
    for (i = 0; i < num_tasks; i++) {
        ABT_task_state state;
        do {
            ABT_task_get_state(tasks[i], &state);
            ABT_thread_yield();
        } while (state != ABT_TASK_STATE_TERMINATED);

        ABT_test_printf(1, "task_func2: num=%lu result=%llu\n",
               task_args[i].num, task_args[i].result);

        /* Free named tasks */
        ret = ABT_task_free(&tasks[i]);
        ABT_TEST_ERROR(ret, "ABT_task_free");
    }

    /* Join Execution Streams */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_join(xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_join");
    }

    /* Free Execution Streams */
    for (i = 0; i < num_xstreams; i++) {
        for (j = 0; j < num_threads; j++) {
            ret = ABT_thread_free(&threads[i][j]);
            ABT_TEST_ERROR(ret, "ABT_thread_free");
        }

        if (i == 0) continue;

        ret = ABT_xstream_free(&xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_free");
    }

    /* Finalize */
    ret = ABT_test_finalize(0);

    for (i = 0; i < num_xstreams; i++) {
        free(thread_args[i]);
        free(threads[i]);
    }
    free(thread_args);
    free(threads);
    free(task_args);
    free(tasks);
    free(pools);
    free(xstreams);

    return ret;
}
Пример #7
0
int main(int argc, char *argv[])
{
    int i, ret;
    ABT_xstream *xstreams;
    ABT_pool *pools;
    ABT_thread *threads;
    int num_xstreams = DEFAULT_NUM_XSTREAMS;

    /* Initialize */
    ABT_test_init(argc, argv);

    if (argc >= 2) {
        num_xstreams = ABT_test_get_arg_val(ABT_TEST_ARG_N_ES);
        num_threads  = ABT_test_get_arg_val(ABT_TEST_ARG_N_ULT);
    }

    ABT_test_printf(1, "# of ESs    : %d\n", num_xstreams);
    ABT_test_printf(1, "# of ULTs/ES: %d\n", num_threads);

    xstreams = (ABT_xstream *)malloc(num_xstreams * sizeof(ABT_xstream));
    pools = (ABT_pool *)malloc(num_xstreams * sizeof(ABT_pool));
    threads = (ABT_thread *)malloc(num_xstreams * sizeof(ABT_thread));

    /* Create Execution Streams */
    ret = ABT_xstream_self(&xstreams[0]);
    ABT_TEST_ERROR(ret, "ABT_xstream_self");
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_create(ABT_SCHED_NULL, &xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_create");
    }

    /* Get the first pool of each ES */
    for (i = 0; i < num_xstreams; i++) {
        ret = ABT_xstream_get_main_pools(xstreams[i], 1, &pools[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_get_main_pools");
    }

    /* Create one ULT for each ES */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_thread_create(pools[i], thread_create, (void *)0,
                                ABT_THREAD_ATTR_NULL, &threads[i]);
        ABT_TEST_ERROR(ret, "ABT_thread_create");
    }

    thread_create((void *)0);

    /* Join and free ULTs */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_thread_free(&threads[i]);
        ABT_TEST_ERROR(ret, "ABT_thread_free");
    }

    /* Join and free ESs */
    for (i = 1; i < num_xstreams; i++) {
        ret = ABT_xstream_join(xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_join");
        ret = ABT_xstream_free(&xstreams[i]);
        ABT_TEST_ERROR(ret, "ABT_xstream_free");
    }

    /* Finalize */
    ret = ABT_test_finalize(0);

    free(xstreams);
    free(pools);
    free(threads);

    return ret;
}
Пример #8
0
int main(int argc, char *argv[])
{
    ABT_pool (*all_pools)[2];
    ABT_sched *scheds;
    ABT_thread *top_threads;
    size_t i, t;
    uint64_t t_start;

    /* initialize */
    ABT_test_init(argc, argv);

    for (i = 0; i < T_LAST; i++) {
        t_times[i] = 0;
    }

    /* read command-line arguments */
    num_xstreams = ABT_test_get_arg_val(ABT_TEST_ARG_N_ES);
    num_threads  = ABT_test_get_arg_val(ABT_TEST_ARG_N_ULT);
    iter = ABT_test_get_arg_val(ABT_TEST_ARG_N_ITER);

    g_xstreams = (ABT_xstream *)malloc(num_xstreams * sizeof(ABT_xstream));
    g_pools    = (ABT_pool *)malloc(num_xstreams * sizeof(ABT_pool));
    g_threads  = (ABT_thread **)malloc(num_xstreams * sizeof(ABT_thread *));
    for (i = 0; i < num_xstreams; i++) {
        g_threads[i] = (ABT_thread *)malloc(num_threads * sizeof(ABT_thread));
    }
    all_pools = (ABT_pool (*)[2])malloc(num_xstreams * sizeof(ABT_pool) * 2);
    scheds = (ABT_sched *)malloc(num_xstreams * sizeof(ABT_sched));
    top_threads = (ABT_thread *)malloc(num_xstreams * sizeof(ABT_thread));

    /* create pools and schedulers */
    for (i = 0; i < num_xstreams; i++) {
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPSC, ABT_TRUE,
                              &all_pools[i][0]);
        ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_PRIV, ABT_TRUE,
                              &all_pools[i][1]);
        g_pools[i] = all_pools[i][1];

        ABT_sched_create_basic(ABT_SCHED_DEFAULT, 2, all_pools[i],
                               ABT_SCHED_CONFIG_NULL, &scheds[i]);
    }

    /* create ESs */
    ABT_xstream_self(&g_xstreams[0]);
    ABT_xstream_set_main_sched(g_xstreams[0], scheds[0]);
    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_create(scheds[i], &g_xstreams[i]);
    }

    /* benchmarking */
    for (t = 0; t < T_LAST; t++) {
        void (*test_fn)(void *);

        if (t == T_YIELD) {
            if (t_times[T_YIELD_ALL] > t_times[T_YIELD_OVERHEAD]) {
                t_times[t] = t_times[T_YIELD_ALL] - t_times[T_YIELD_OVERHEAD];
            } else {
                t_times[t] = 0;
            }
            continue;
        } else if (t == T_YIELD_TO) {
            if (t_times[T_YIELD_TO_ALL] > t_times[T_YIELD_TO_OVERHEAD]) {
                t_times[t] = t_times[T_YIELD_TO_ALL] - t_times[T_YIELD_TO_OVERHEAD];
            } else {
                t_times[t] = 0;
            }
            continue;
        }

        switch (t) {
            case T_CREATE_JOIN:        test_fn = test_create_join;
                                       break;
            case T_CREATE_UNNAMED:     test_fn = test_create_unnamed;
                                       break;
            case T_YIELD_OVERHEAD:     test_fn = test_yield_overhead;
                                       break;
            case T_YIELD_ALL:          test_fn = test_yield;
                                       break;
            case T_YIELD_TO_OVERHEAD:  test_fn = test_yield_to_overhead;
                                       break;
            case T_YIELD_TO_ALL:       test_fn = test_yield_to;
                                       break;
#ifdef TEST_MIGRATE_TO
            case T_MIGRATE_TO_XSTREAM: test_fn = test_migrate_to_xstream;
                                       break;
#endif
            default: assert(0);
        }

        /* warm-up */
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_create(all_pools[i][0], test_fn, (void *)i,
                              ABT_THREAD_ATTR_NULL, &top_threads[i]);
        }
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_free(&top_threads[i]);
        }

        /* measurement */
#ifdef USE_TIME
        t_start = ABT_get_wtime();
#else
        t_start = ABT_test_get_cycles();
#endif
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_create(all_pools[i][0], test_fn, (void *)i,
                              ABT_THREAD_ATTR_NULL, &top_threads[i]);
        }
        for (i = 0; i < num_xstreams; i++) {
            ABT_thread_free(&top_threads[i]);
        }
#ifdef USE_TIME
        t_times[t] = ABT_get_wtime() - t_start;
#else
        t_times[t] = ABT_test_get_cycles() - t_start;
#endif
    }

    /* join and free */
    for (i = 1; i < num_xstreams; i++) {
        ABT_xstream_join(g_xstreams[i]);
        ABT_xstream_free(&g_xstreams[i]);
    }

    /* finalize */
    ABT_test_finalize(0);

    /* compute the execution time for one iteration */
    for (i = 0; i < T_LAST; i++) {
        t_times[i] = t_times[i] / iter / num_threads;
    }

    /* output */
    int line_size = 56;
    ABT_test_print_line(stdout, '-', line_size);
    printf("%s\n", "Argobots");
    ABT_test_print_line(stdout, '-', line_size);
    printf("# of ESs        : %d\n", num_xstreams);
    printf("# of ULTs per ES: %d\n", num_threads);
    ABT_test_print_line(stdout, '-', line_size);
    printf("Avg. execution time (in seconds, %d times)\n", iter);
    ABT_test_print_line(stdout, '-', line_size);
    printf("%-20s %-s\n", "operation", "time");
    ABT_test_print_line(stdout, '-', line_size);
    for (i = 0; i < T_LAST; i++) {
#ifdef USE_TIME
        printf("%-19s  %.9lf\n", t_names[i], t_times[i]);
#else
        printf("%-19s  %11" PRIu64 "\n", t_names[i], t_times[i]);
#endif
    }
    ABT_test_print_line(stdout, '-', line_size);

    free(g_xstreams);
    free(g_pools);
    for (i = 0; i < num_xstreams; i++) {
        free(g_threads[i]);
    }
    free(g_threads);
    free(all_pools);
    free(scheds);
    free(top_threads);

    return EXIT_SUCCESS;
}