/* Function to compute Fibonacci numbers */ void fibonacci_task(void *arguments) { int n, result; task_args *a1, *a2, *parent, *temp; ABT_task t1, t2; task_args *args = (task_args *)arguments; n = args->n; parent = args->parent; /* checking for base cases */ if (n <= 2) { args->result = 1; result = 1; int flag = 1; while (flag && parent != NULL) { ABT_mutex_lock(parent->mutex); parent->result += result; if (result == parent->result) flag = 0; ABT_mutex_unlock(parent->mutex); result = parent->result; temp = parent->parent; if (flag && temp) { ABT_mutex_free(&parent->mutex); free(parent); } parent = temp; } ABT_mutex_free(&args->mutex); if (args->parent) { free(args); } } else { a1 = (task_args *)malloc(sizeof(task_args)); a1->n = n - 1; a1->result = 0; ABT_mutex_create(&a1->mutex); a1->parent = args; ABT_task_create(g_pool, fibonacci_task, a1, &t1); a2 = (task_args *)malloc(sizeof(task_args)); a2->n = n - 2; a2->result = 0; ABT_mutex_create(&a2->mutex); a2->parent = args; ABT_task_create(g_pool, fibonacci_task, a2, &t2); } }
static void init(int num_xstreams) { int i; ABT_mutex_create(&g_mutex); g_xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * max_xstreams); g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * max_xstreams); g_signal = (int *)calloc(max_xstreams, sizeof(int)); for (i = 0; i < max_xstreams; i++) { g_xstreams[i] = ABT_XSTREAM_NULL; g_pools[i] = ABT_POOL_NULL; } /* Create pools */ for (i = 0; i < max_xstreams; i++) { ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, &g_pools[i]); } /* Create ESs */ ABT_xstream_self(&g_xstreams[0]); for (i = 1; i < num_xstreams; i++) { create_xstream(i); } }
void rt1_init(int max_xstreams, ABT_xstream *xstreams) { int i; char *env; rt1_data = (rt1_data_t *)calloc(1, sizeof(rt1_data_t)); rt1_data->max_xstreams = max_xstreams; rt1_data->num_xstreams = max_xstreams; rt1_data->xstreams = (ABT_xstream *)malloc(max_xstreams*sizeof(ABT_xstream)); for (i = 0; i < max_xstreams; i++) { rt1_data->xstreams[i] = xstreams[i]; } ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, &rt1_data->pool); ABT_mutex_create(&rt1_data->mutex); ABT_barrier_create(rt1_data->num_xstreams, &rt1_data->bar); /* Add event callbacks */ /* NOTE: Each runtime needs to register callbacks only once for each event. * If it registers more than once, callbacks will be invoked as many times * as they are registered. */ ABT_event_add_callback(ABT_EVENT_STOP_XSTREAM, rt1_ask_stop_xstream, rt1_data, rt1_act_stop_xstream, rt1_data, &rt1_data->stop_cb_id); ABT_event_add_callback(ABT_EVENT_ADD_XSTREAM, rt1_ask_add_xstream, rt1_data, rt1_act_add_xstream, rt1_data, &rt1_data->add_cb_id); /* application data */ env = getenv("APP_NUM_COMPS"); if (env) { rt1_data->num_comps = atoi(env); } else { rt1_data->num_comps = NUM_COMPS; } env = getenv("APP_NUM_ITERS"); if (env) { rt1_data->num_iters = atoi(env); } else { rt1_data->num_iters = NUM_ITERS; } size_t num_elems = rt1_data->max_xstreams * rt1_data->num_comps * 2; rt1_data->app_data = (double *)calloc(num_elems, sizeof(double)); printf("# of WUs created per ES: %d\n", rt1_data->num_comps); printf("# of iterations per WU : %d\n", rt1_data->num_iters); }
void __kmp_global_initialize(void) { int i; int status; /* Initialize Argobots before other initializations. */ status = ABT_init(0, NULL); KMP_CHECK_SYSFAIL( "ABT_init", status ); __kmp_global.g = { 0 }; /* --------------------------------------------------------------------------- */ /* map OMP 3.0 schedule types with our internal schedule types */ static sched_type sch_map[ kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ] = { kmp_sch_static_chunked, // ==> kmp_sched_static = 1 kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 kmp_sch_auto, // ==> kmp_sched_auto = 4 kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 // will likely not used, introduced here just to debug the code // of public intel extension schedules }; KMP_MEMCPY(__kmp_global.sch_map, sch_map, sizeof(sch_map)); #if OMP_40_ENABLED __kmp_global.nested_proc_bind = { NULL, 0, 0 }; __kmp_global.affinity_num_places = 0; #endif __kmp_global.place_num_sockets = 0; __kmp_global.place_socket_offset = 0; __kmp_global.place_num_cores = 0; __kmp_global.place_core_offset = 0; __kmp_global.place_num_threads_per_core = 0; __kmp_global.tasking_mode = tskm_task_teams; __kmp_global.task_stealing_constraint = 1; /* Constrain task stealing by default */ #if OMP_41_ENABLED __kmp_global.max_task_priority = 0; #endif __kmp_global.settings = FALSE; __kmp_global.duplicate_library_ok = 0; __kmp_global.force_reduction_method = reduction_method_not_defined; __kmp_global.determ_red = FALSE; __kmp_global.cpuinfo = { 0 }; /* ------------------------------------------------------------------------ */ __kmp_global.init_serial = FALSE; __kmp_global.init_gtid = FALSE; __kmp_global.init_common = FALSE; __kmp_global.init_middle = FALSE; __kmp_global.init_parallel = FALSE; __kmp_global.init_runtime = FALSE; __kmp_global.init_counter = 0; __kmp_global.root_counter = 0; __kmp_global.version = 0; /* list of address of allocated caches for commons */ __kmp_global.threadpriv_cache_list = NULL; /* Global Locks */ ABT_mutex_create(&__kmp_global.stdio_lock); ABT_mutex_create(&__kmp_global.cat_lock); ABT_mutex_create(&__kmp_global.initz_lock); ABT_mutex_create(&__kmp_global.task_team_lock); for (i = 0; i < KMP_NUM_CRIT_LOCKS; i++) { ABT_mutex_create(&__kmp_global.crit_lock[i]); } __kmp_global.library = library_none; __kmp_global.sched = kmp_sch_default; /* scheduling method for runtime scheduling */ __kmp_global.sched_static = kmp_sch_static_greedy; /* default static scheduling method */ __kmp_global.sched_guided = kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ __kmp_global.sched_auto = kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ __kmp_global.chunk = 0; __kmp_global.stksize = KMP_DEFAULT_STKSIZE; __kmp_global.stkoffset = KMP_DEFAULT_STKOFFSET; __kmp_global.stkpadding = KMP_MIN_STKPADDING; __kmp_global.malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; __kmp_global.env_chunk = FALSE; /* KMP_CHUNK specified? */ __kmp_global.env_stksize = FALSE; /* KMP_STACKSIZE specified? */ __kmp_global.env_omp_stksize = FALSE; /* OMP_STACKSIZE specified? */ __kmp_global.env_all_threads = FALSE;/* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ __kmp_global.env_omp_all_threads = FALSE;/* OMP_THREAD_LIMIT specified? */ __kmp_global.env_checks = FALSE; /* KMP_CHECKS specified? */ __kmp_global.env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ __kmp_global.generate_warnings = kmp_warnings_low; __kmp_global.reserve_warn = 0; #ifdef DEBUG_SUSPEND __kmp_global.suspend_count = 0; #endif /* ------------------------------------------------------------------------- */ __kmp_global.allThreadsSpecified = 0; __kmp_global.align_alloc = CACHE_LINE; __kmp_global.xproc = 0; __kmp_global.avail_proc = 0; __kmp_global.sys_min_stksize = KMP_MIN_STKSIZE; __kmp_global.sys_max_nth = KMP_MAX_NTH; __kmp_global.max_nth = 0; __kmp_global.threads_capacity = 0; __kmp_global.dflt_team_nth = 0; __kmp_global.dflt_team_nth_ub = 0; __kmp_global.tp_capacity = 0; __kmp_global.tp_cached = 0; __kmp_global.dflt_nested = TRUE; __kmp_global.dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ #ifdef KMP_DFLT_NTH_CORES __kmp_global.ncores = 0; #endif __kmp_global.abort_delay = 0; /* Initialize the library data structures when we fork a child process, defaults to TRUE */ __kmp_global.need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */ __kmp_global.need_register_atfork_specified = TRUE; __kmp_global.tls_gtid_min = INT_MAX; __kmp_global.foreign_tp = TRUE; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 __kmp_global.inherit_fp_control = TRUE; __kmp_global.init_x87_fpu_control_word = 0; __kmp_global.init_mxcsr = 0; #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #if KMP_NESTED_HOT_TEAMS __kmp_global.hot_teams_mode = 0; /* 0 - free extra threads when reduced */ /* 1 - keep extra threads when reduced */ __kmp_global.hot_teams_max_level = 1; /* nesting level of hot teams */ #endif #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) __kmp_global.mic_type = non_mic; #endif #ifdef USE_LOAD_BALANCE __kmp_global.load_balance_interval = 1.0; #endif /* USE_LOAD_BALANCE */ __kmp_global.nested_nth = { NULL, 0, 0 }; #if OMP_40_ENABLED __kmp_global.display_env = FALSE; __kmp_global.display_env_verbose = FALSE; __kmp_global.omp_cancellation = FALSE; #endif /* ------------------------------------------------------ */ /* STATE mostly syncronized with global lock */ /* data written to rarely by masters, read often by workers */ __kmp_global.threads = NULL; __kmp_global.team_pool = NULL; __kmp_global.thread_pool = NULL; /* data read/written to often by masters */ __kmp_global.nth = 0; __kmp_global.all_nth = 0; __kmp_global.thread_pool_nth = 0; __kmp_global.thread_pool_active_nth = 0; __kmp_global.root = NULL; /* ------------------------------------------------------ */ __kmp_init_global = TRUE; }
/* Main function */ int main(int argc, char *argv[]) { int n, i, result, expected; int num_xstreams; ABT_xstream *xstreams; ABT_thread thread; thread_args args_thread; ABT_task task; task_args *args_task; if (argc > 1 && strcmp(argv[1], "-h") == 0) { printf("Usage: %s [N=10] [num_ES=4]\n", argv[0]); return EXIT_SUCCESS; } n = argc > 1 ? atoi(argv[1]) : N; num_xstreams = argc > 2 ? atoi(argv[2]) : NUM_XSTREAMS; printf("# of ESs: %d\n", num_xstreams); if (n <= 2) { result = 1; goto fn_result; } /* initialization */ ABT_init(argc, argv); /* shared pool creation */ ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, &g_pool); /* ES creation */ xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); ABT_xstream_self(&xstreams[0]); ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, 1, &g_pool); for (i = 1; i < num_xstreams; i++) { ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pool, ABT_SCHED_CONFIG_NULL, &xstreams[i]); ABT_xstream_start(xstreams[i]); } /* creating thread */ args_thread.n = n - 1; args_thread.eventual = ABT_EVENTUAL_NULL; ABT_thread_create(g_pool, fibonacci_thread, &args_thread, ABT_THREAD_ATTR_NULL, &thread); /* creating task */ args_task = (task_args *)malloc(sizeof(task_args)); args_task->n = n - 2; args_task->result = 0; ABT_mutex_create(&args_task->mutex); args_task->parent = NULL; ABT_task_create(g_pool, fibonacci_task, args_task, &task); /* switch to other user-level threads */ ABT_thread_yield(); /* join other threads */ ABT_thread_join(thread); ABT_thread_free(&thread); /* join ESs */ for (i = 1; i < num_xstreams; i++) { ABT_xstream_join(xstreams[i]); ABT_xstream_free(&xstreams[i]); } result = args_thread.result + args_task->result; free(args_task); ABT_finalize(); free(xstreams); fn_result: printf("Fib(%d): %d\n", n, result); expected = verify(n); if (result != expected) { fprintf(stderr, "ERROR: expected=%d\n", expected); exit(EXIT_FAILURE); } return EXIT_SUCCESS; }
GLT_func_prefix void glt_mutex_create(GLT_mutex * mutex) { CHECK(ABT_mutex_create(mutex),ABT_SUCCESS); }
int main(int argc, char *argv[]) { int i, j; int ret, expected; int num_xstreams = DEFAULT_NUM_XSTREAMS; int num_threads = DEFAULT_NUM_THREADS; if (argc > 1) num_xstreams = atoi(argv[1]); assert(num_xstreams >= 0); if (argc > 2) num_threads = atoi(argv[2]); assert(num_threads >= 0); ABT_mutex mutex; ABT_xstream *xstreams; thread_arg_t **args; xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); assert(xstreams != NULL); args = (thread_arg_t **)malloc(sizeof(thread_arg_t *) * num_xstreams); assert(args != NULL); for (i = 0; i < num_xstreams; i++) { args[i] = (thread_arg_t *)malloc(sizeof(thread_arg_t) * num_threads); } /* Initialize */ ABT_test_init(argc, argv); /* Create Execution Streams */ ret = ABT_xstream_self(&xstreams[0]); ABT_TEST_ERROR(ret, "ABT_xstream_self"); for (i = 1; i < num_xstreams; i++) { ret = ABT_xstream_create(ABT_SCHED_NULL, &xstreams[i]); ABT_TEST_ERROR(ret, "ABT_xstream_create"); } /* Get the pools attached to an execution stream */ ABT_pool *pools; pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); for (i = 0; i < num_xstreams; i++) { ret = ABT_xstream_get_main_pools(xstreams[i], 1, pools+i); ABT_TEST_ERROR(ret, "ABT_xstream_get_main_pools"); } /* Create a mutex */ ret = ABT_mutex_create(&mutex); ABT_TEST_ERROR(ret, "ABT_mutex_create"); /* Create threads */ for (i = 0; i < num_xstreams; i++) { for (j = 0; j < num_threads; j++) { int tid = i * num_threads + j + 1; args[i][j].id = tid; args[i][j].mutex = mutex; ret = ABT_thread_create(pools[i], thread_func, (void *)&args[i][j], ABT_THREAD_ATTR_NULL, NULL); ABT_TEST_ERROR(ret, "ABT_thread_create"); } } /* Switch to other user level threads */ ABT_thread_yield(); /* Join Execution Streams */ for (i = 1; i < num_xstreams; i++) { ret = ABT_xstream_join(xstreams[i]); ABT_TEST_ERROR(ret, "ABT_xstream_join"); } /* Free the mutex */ ret = ABT_mutex_free(&mutex); ABT_TEST_ERROR(ret, "ABT_mutex_free"); /* Free Execution Streams */ for (i = 1; i < num_xstreams; i++) { ret = ABT_xstream_free(&xstreams[i]); ABT_TEST_ERROR(ret, "ABT_xstream_free"); } /* Validation */ expected = num_xstreams * num_threads; if (g_counter != expected) { printf("g_counter = %d\n", g_counter); } /* Finalize */ ret = ABT_test_finalize(g_counter != expected); for (i = 0; i < num_xstreams; i++) { free(args[i]); } free(args); free(xstreams); free(pools); return ret; }