void par_sort(void* arg) { sort_data_t *in = (sort_data_t*) arg; TYPE* data = in->buffer; int left = in->left; int right = in->right; if (right - left + 1 > HC_GRANULARITY) { int index = partition(data, left, right); hclib_start_finish(); { if (left < index - 1) { sort_data_t* buf = (sort_data_t*) malloc(sizeof(sort_data_t)); buf->buffer = data; buf->left = left; buf->right = index - 1; { pragma137_omp_task *new_ctx = (pragma137_omp_task *)malloc(sizeof(pragma137_omp_task)); new_ctx->buf_ptr = &(buf); new_ctx->index_ptr = &(index); new_ctx->in_ptr = &(in); new_ctx->data_ptr = &(data); new_ctx->left_ptr = &(left); new_ctx->right_ptr = &(right); new_ctx->arg_ptr = &(arg); hclib_async(pragma137_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } } if (index < right) { sort_data_t* buf = (sort_data_t*) malloc(sizeof(sort_data_t)); buf->buffer = data; buf->left = index; buf->right = right; { pragma147_omp_task *new_ctx = (pragma147_omp_task *)malloc(sizeof(pragma147_omp_task)); new_ctx->buf_ptr = &(buf); new_ctx->index_ptr = &(index); new_ctx->in_ptr = &(in); new_ctx->data_ptr = &(data); new_ctx->left_ptr = &(left); new_ctx->right_ptr = &(right); new_ctx->arg_ptr = &(arg); hclib_async(pragma147_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } } } ; hclib_end_finish(); } else { // quicksort in C library qsort(data+left, right - left + 1, sizeof(TYPE), compare); } free(arg); }
void fib(void * raw_args) { FibArgs *args = raw_args; if (args->n < 2) { args->res = args->n; } else { FibArgs lhsArgs = { args->n - 1, 0 }; FibArgs rhsArgs = { args->n - 2, 0 }; FINISH { hclib_async(fib, &lhsArgs, NO_FUTURE, NO_PHASER, ANY_PLACE, NO_PROP); hclib_async(fib, &rhsArgs, NO_FUTURE, NO_PHASER, ANY_PLACE, NO_PROP); } args->res = lhsArgs.res + rhsArgs.res; } }
void hclib_async_memset_helper(place_t *pl, void *ptr, int val, size_t nbytes, hclib_future_t **future_list, void *user_arg, hclib_promise_t *out_promise) { gpu_task_t *task = malloc(sizeof(gpu_task_t)); task->t._fp = NULL; task->t.is_asyncAnyType = 0; task->t.future_list = NULL; task->t.args = NULL; task->t.place = NULL; hclib_promise_init(out_promise); task->gpu_type = GPU_MEMSET_TASK; task->promise_to_put = out_promise; task->arg_to_put = user_arg; task->gpu_task_def.memset_task.pl = pl; task->gpu_task_def.memset_task.ptr = ptr; task->gpu_task_def.memset_task.val = val; task->gpu_task_def.memset_task.nbytes = nbytes; #ifdef VERBOSE fprintf(stderr, "hclib_async_memset: pl=%p ptr=%p nbytes=%lu\n", pl, ptr, (unsigned long)nbytes); #endif if (future_list) { hclib_async(async_gpu_task_launcher, task, future_list, NULL, NULL, 0); } else { spawn_gpu_task((hclib_task_t *)task); } }
void hclib_async_copy_helper(place_t *dst_pl, void *dst, place_t *src_pl, void *src, size_t nbytes, hclib_future_t **future_list, void *user_arg, hclib_promise_t *out_promise) { gpu_task_t *task = malloc(sizeof(gpu_task_t)); task->t._fp = NULL; task->t.is_asyncAnyType = 0; task->t.future_list = NULL; task->t.args = NULL; task->t.place = NULL; hclib_promise_init(out_promise); task->gpu_type = GPU_COMM_TASK; task->promise_to_put = out_promise; task->arg_to_put = user_arg; task->gpu_task_def.comm_task.src_pl = src_pl; task->gpu_task_def.comm_task.dst_pl = dst_pl; task->gpu_task_def.comm_task.src = src; task->gpu_task_def.comm_task.dst = dst; task->gpu_task_def.comm_task.nbytes = nbytes; #ifdef VERBOSE fprintf(stderr, "hclib_async_copy: dst_pl=%p dst=%p src_pl=%p src=%p " "nbytes=%lu future_list=%p\n", dst_pl, dst, src_pl, src, (unsigned long)nbytes, future_list); #endif if (future_list) { hclib_async(async_gpu_task_launcher, task, future_list, NULL, NULL, 0); } else { spawn_gpu_task((hclib_task_t *)task); } }
void fib_ddt(void * raw_args) { FibDDtArgs *args = raw_args; if (args->n < 2) { args->resval = args->n; hclib_promise_put(args->res, args); } else { FibDDtArgs *lhsArgs = setup_fib_ddt_args(args->n - 1); FibDDtArgs *rhsArgs = setup_fib_ddt_args(args->n - 2); args->subres[0] = lhsArgs->res; args->subres[1] = rhsArgs->res; // sub-computation asyncs hclib_async(fib_ddt, lhsArgs, NO_FUTURE, NO_PHASER, ANY_PLACE, MY_ESCAPE_PROP); hclib_async(fib_ddt, rhsArgs, NO_FUTURE, NO_PHASER, ANY_PLACE, MY_ESCAPE_PROP); // async-await for sub-results hclib_async(fib_ddt_res, args, ps2fs(args->subres), NO_PHASER, ANY_PLACE, MY_ESCAPE_PROP); } }
hclib_future_t *hclib_async_future(future_fct_t fp, void *arg, hclib_future_t **futures, const int nfutures, hclib_locale_t *locale) { future_args_wrapper *wrapper = malloc(sizeof(future_args_wrapper)); hclib_promise_init(&wrapper->event); wrapper->fp = fp; wrapper->actual_in = arg; hclib_async(future_caller, wrapper, futures, nfutures, locale); return hclib_get_future_for_promise(&wrapper->event); }
void spawn_async(volatile int * indices, int i) { if (i < NB_ASYNC) { hclib_start_finish(); indices[i] = i; hclib_async(async_fct, (void*) (indices+i), NO_FUTURE, NO_PHASER, ANY_PLACE, NO_PROP); spawn_async(indices, i+1); hclib_end_finish(); assert_done(i, i+1); } }
long long fib (int n) { long long x, y; if (n < 2) return n; { pragma44_omp_task *new_ctx = (pragma44_omp_task *)malloc(sizeof(pragma44_omp_task)); new_ctx->x_ptr = &(x); new_ctx->y_ptr = &(y); new_ctx->n = n; hclib_async(pragma44_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } ; { pragma46_omp_task *new_ctx = (pragma46_omp_task *)malloc(sizeof(pragma46_omp_task)); new_ctx->x_ptr = &(x); new_ctx->y_ptr = &(y); new_ctx->n = n; hclib_async(pragma46_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } ; hclib_end_finish(); hclib_start_finish(); ; return x + y; }
void hclib_launch(generic_frame_ptr fct_ptr, void *arg, const char **deps, int ndeps) { unsigned long long start_time = 0; unsigned long long end_time; const int instrument = (getenv("HCLIB_INSTRUMENT") != NULL); hclib_init(deps, ndeps, instrument); if (profile_launch_body) { start_time = current_time_ns(); } hclib_async(fct_ptr, arg, NULL, 0, hclib_get_closest_locale()); hclib_finalize(instrument); if (profile_launch_body) { end_time = current_time_ns(); printf("\nHCLIB TIME %llu ns\n", end_time - start_time); } }
void taskMain(void *raw_args) { char **argv = raw_args; const int n = atoi(argv[1]); const int doDDT = argv[2] && atoi(argv[2]); const long fn = fib_iter(n); const long fnp1 = fib_iter(n+1); long answer; double t_start, t_end; // ASYNC-FINISH version if (!doDDT) { printf("async/finish version\n"); t_start = get_seconds(); FibArgs args = { n, 0 }; FINISH { hclib_async(fib, &args, NO_FUTURE, NO_PHASER, ANY_PLACE, NO_PROP); } t_end = get_seconds(); answer = args.res; //printf("asyncs = %ld\tfins=%ld\n", 2*fnp1-1, fnp1); }
void nqueens(int n, int j, char *a, int *solutions, int depth) { int *csols; int i; if (n == j) { /* good solution, count it */ *solutions = 1; return; } *solutions = 0; csols = (int *)malloc(n*sizeof(int)); memset(csols,0,n*sizeof(int)); /* try each possible position for queen <j> */ for (i = 0; i < n; i++) { { pragma126_omp_task *new_ctx = (pragma126_omp_task *)malloc(sizeof(pragma126_omp_task)); new_ctx->csols = csols; new_ctx->i = i; new_ctx->n = n; new_ctx->j = j; new_ctx->a = a; new_ctx->solutions = solutions; new_ctx->depth = depth; hclib_async(pragma126_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } } hclib_end_finish(); hclib_start_finish(); ; for ( i = 0; i < n; i++) *solutions += csols[i]; free(csols); }
void sweep (int nx, int ny, double dx, double dy, double *f_, int itold, int itnew, double *u_, double *unew_, int block_size) { int it; int block_x, block_y; if (block_size == 0) block_size = nx; int max_blocks_x = (nx / block_size); int max_blocks_y = (ny / block_size); hclib_start_finish(); { for (it = itold + 1; it <= itnew; it++) { // Save the current estimate. for (block_x = 0; block_x < max_blocks_x; block_x++) { for (block_y = 0; block_y < max_blocks_y; block_y++) { { pragma26_omp_task *new_ctx = (pragma26_omp_task *)malloc(sizeof(pragma26_omp_task)); new_ctx->it_ptr = &(it); new_ctx->block_x = block_x; new_ctx->block_y = block_y; new_ctx->max_blocks_x_ptr = &(max_blocks_x); new_ctx->max_blocks_y_ptr = &(max_blocks_y); new_ctx->nx_ptr = &(nx); new_ctx->ny_ptr = &(ny); new_ctx->dx_ptr = &(dx); new_ctx->dy_ptr = &(dy); new_ctx->f__ptr = &(f_); new_ctx->itold_ptr = &(itold); new_ctx->itnew_ptr = &(itnew); new_ctx->u__ptr = &(u_); new_ctx->unew__ptr = &(unew_); new_ctx->block_size_ptr = &(block_size); hclib_async(pragma26_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } ; } } hclib_end_finish(); hclib_start_finish(); ; // Compute a new estimate. for (block_x = 0; block_x < max_blocks_x; block_x++) { for (block_y = 0; block_y < max_blocks_y; block_y++) { { pragma36_omp_task *new_ctx = (pragma36_omp_task *)malloc(sizeof(pragma36_omp_task)); new_ctx->block_x = block_x; new_ctx->block_y = block_y; new_ctx->nx_ptr = &(nx); new_ctx->ny_ptr = &(ny); new_ctx->dx_ptr = &(dx); new_ctx->dy_ptr = &(dy); new_ctx->f__ptr = &(f_); new_ctx->u__ptr = &(u_); new_ctx->unew__ptr = &(unew_); new_ctx->block_size_ptr = &(block_size); hclib_async(pragma36_omp_task_hclib_async, new_ctx, NO_FUTURE, ANY_PLACE); } ; } } hclib_end_finish(); hclib_start_finish(); ; } } ; hclib_end_finish(); }
void shmem_task_nbi (void (*body)(void *), void *user_data, shmem_future_t **optional_future) { hclib_async(body, user_data, optional_future, NULL, NULL, 0); }