// Testcase to test hpx_lco_get_all function static int _getAll_handler(uint32_t *args, size_t size) { uint32_t n = *args; if (n < 2) { return HPX_THREAD_CONTINUE(n); } hpx_addr_t peers[] = { HPX_HERE, HPX_HERE }; uint32_t ns[] = { n - 1, n - 2 }; hpx_addr_t futures[] = { hpx_lco_future_new(sizeof(uint32_t)), hpx_lco_future_new(sizeof(uint32_t)) }; uint32_t ssn[] = { 0, 0 }; void *addrs[] = { &ssn[0], &ssn[1] }; size_t sizes[] = { sizeof(uint32_t), sizeof(uint32_t) }; hpx_call(peers[0], _getAll, futures[0], &ns[0], sizeof(uint32_t)); hpx_call(peers[1], _getAll, futures[1], &ns[1], sizeof(uint32_t)); hpx_lco_get_all(2, futures, sizes, addrs, NULL); hpx_lco_wait(futures[0]); hpx_lco_wait(futures[1]); hpx_addr_t wait = hpx_lco_future_new(0); hpx_lco_delete_all(2, futures, wait); hpx_lco_wait(wait); hpx_lco_delete(wait, HPX_NULL); uint32_t sn = ssn[0] * ssn[0] + ssn[1] * ssn[1]; return HPX_THREAD_CONTINUE(sn); }
static int lco_waitall_handler(void) { int size = HPX_LOCALITIES; int block_size = 1; int ranks = hpx_get_num_ranks(); printf("Starting the HPX LCO Wait all test\n"); printf("localities: %d\n", size); // Start the timer hpx_time_t t1 = hpx_time_now(); uint32_t blocks = size; uint32_t block_bytes = block_size * sizeof(uint32_t); printf("Number of blocks and bytes per block = %d, %d\n", blocks, block_bytes); printf("Ranks and blocks per rank = %d, %d\n", ranks, blocks / ranks); hpx_addr_t addr = hpx_gas_alloc_cyclic(blocks, sizeof(uint32_t), 0); uint32_t args[2] = { block_size, (blocks / ranks) }; int rem = blocks % ranks; hpx_addr_t done[2] = { hpx_lco_and_new(ranks), hpx_lco_and_new(rem) }; for (int i = 0; i < ranks; i++) { hpx_addr_t there = hpx_addr_add(addr, i * block_bytes, sizeof(uint32_t)); hpx_call(there, _init_memory, done[0], args, sizeof(args)); } for (int i = 0; i < rem; i++) { hpx_addr_t block = hpx_addr_add(addr, args[1] * ranks + i * block_bytes, block_bytes); hpx_call(block, _init_memory, done[1], args, sizeof(args)); } // Blocks the thread until all of the LCO's have been set. hpx_lco_wait_all(2, done, NULL); hpx_lco_delete_all(2, done, HPX_NULL); hpx_gas_free(addr, HPX_NULL); printf(" Elapsed: %g\n", hpx_time_elapsed_ms(t1)); return HPX_SUCCESS; }
/// Handle the test broadcast. /// /// We want to stress the allreduce by generating a bunch of parallel operations /// on it from different parts of the system. We do this by broadcasting this /// operation, which will spawn N instances of the @p leaf operation locally. static int _test_bcast_handler(hpx_addr_t allreduce, hpx_addr_t sum, hpx_action_t leaf) { int r; int row = HPX_LOCALITIES * HPX_LOCALITY_ID; for (int i = 0; i < N; ++i) { int j = row + i; int k = i + 1; // &sum is passed explicitly instead of as a continuation for the _join_leaf CHECK( hpx_call(HPX_HERE, leaf, HPX_NULL, &allreduce, &j, &k, &sum) ); } return HPX_SUCCESS; }
static int _test_action_handler(void) { char local; // Everyone joins the barrier once. if (sync_barrier_join(barrier, HPX_THREAD_ID)) { // I win the race. printf("thread %d running action on stack %p\n", HPX_THREAD_ID, &local); // This will push the task onto my queue, then I have to induce myself to // transfer to it---everyone else is blocked, so all I have to do is call // yield, which should do the transfer on the same stack, and make this // thread available to whoever wakes up. // // Note that the _test_task task actually releases the lock here, this // prevents anyone from stealing the parent thread (or getting it from the // yield queue) until I have already transferred to the child. // // We send our continuation along so that the test doesn't terminate early. hpx_addr_t and = hpx_thread_current_cont_target(); int e = hpx_call(HPX_HERE, _test_task, and); assert(e == HPX_SUCCESS); hpx_thread_yield(); printf("action stolen by %d\n", HPX_THREAD_ID); // Now, this thread should have been "stolen" or taken from the yield queue // or whatnot. We expect that we're running concurrent with, and on the same // stack, as the _test_task. Verify that we're on the same stack. ptrdiff_t d = &local - task_sp; if (0 < d && d < 1000) { // We're on the same stack---for this to be safe, the _test_task MUST have // already run, which implies that the value for n must be 1. int v = sync_load(&n, SYNC_ACQUIRE); printf("stack difference is %td, value is %d\n", d, v); assert(v == 1 && "work-first task test failed\n"); } else { printf("test indeterminate, task spawned with new stack, d=%td\n", d); } printf("work-first task test success\n"); } else { // I lost the race, wait for the entire thing to be set up before // returning and becoming a "stealer". sync_barrier_join(barrier, HPX_THREAD_ID); } printf("finishing %d\n", HPX_THREAD_ID); return HPX_SUCCESS; }
static int _test_try_task_handler(void) { barrier = sr_barrier_new(HPX_THREADS); assert(barrier); hpx_addr_t and = hpx_lco_and_new(HPX_THREADS + 1); assert(and); for (int i = 0; i < HPX_THREADS; ++i) { int e = hpx_call(HPX_HERE, _test_action, and); assert(e == HPX_SUCCESS); } hpx_lco_wait(and); hpx_lco_delete(and, HPX_NULL); sync_barrier_delete(barrier); return HPX_SUCCESS; }
static int _lco_get_remote_handler(void) { int rank = (HPX_LOCALITY_ID + 1) % HPX_LOCALITIES; hpx_addr_t there = HPX_THERE(rank); hpx_addr_t lco; int e = hpx_call_sync(there, _new_future, &lco, sizeof(lco)); assert(e == HPX_SUCCESS); int i = 42; e = hpx_call(lco, hpx_lco_set_action, HPX_NULL, &i, sizeof(i)); assert(e == HPX_SUCCESS); i = 0; e = hpx_lco_get(lco, sizeof(i), &i); assert(e == HPX_SUCCESS); assert(i = 42); return hpx_call_cc(lco, hpx_lco_delete_action); }
static int _main_action(int *args, size_t size) { int n = *args; printf("seqspawn(%d)\n", n); fflush(stdout); hpx_addr_t and = hpx_lco_and_new(n); hpx_time_t now = hpx_time_now(); for (int i = 0; i < n; i++) hpx_call(HPX_HERE, _nop, and, 0, 0); hpx_lco_wait(and); double elapsed = hpx_time_elapsed_ms(now)/1e3; hpx_lco_delete(and, HPX_NULL); printf("seconds: %.7f\n", elapsed); printf("localities: %d\n", HPX_LOCALITIES); printf("threads: %d\n", HPX_THREADS); hpx_exit(HPX_SUCCESS); }
static int lco_error_handler(void) { printf("Starting the HPX LCO get all test\n"); hpx_time_t t1 = hpx_time_now(); hpx_addr_t lco = hpx_lco_future_new(0); hpx_addr_t done = hpx_lco_future_new(0); hpx_call(HPX_HERE, _errorset, done, &lco, sizeof(lco)); hpx_status_t status = hpx_lco_wait(lco); printf("status == %d\n", status); assert(status == HPX_ERROR); hpx_lco_wait(done); hpx_lco_delete(lco, HPX_NULL); hpx_lco_delete(done, HPX_NULL); printf(" Elapsed: %.7f\n", hpx_time_elapsed_ms(t1)/1e3); return HPX_SUCCESS; }
int parallel_nqueens(int n, int col, int *hist) { hpx_addr_t theThread = HPX_HERE; struct thread_data td; //td.lyst = hist; td.n = n; td.col = col; memcpy(td.lyst, hist, MAX_SIZE*sizeof(int)); //printf("thread_data size:%d\n", sizeof(struct thread_data)); mutex = hpx_lco_sema_new(1); //solve(td.n, td.col, td.lyst); hpx_addr_t done = hpx_lco_future_new(sizeof(uint64_t)); hpx_call(theThread, _nqueens, done, &td, sizeof(td)); hpx_lco_wait(done); hpx_lco_delete(done, HPX_NULL); return HPX_SUCCESS; }
static int _spawn_handler(hpx_addr_t termination_lco) { int e; for (size_t i = 0; i < LCOS_PER_LOCALITY; ++i) { // test futures const hpx_addr_t test_futures[3] = { hpx_lco_future_new(0), termination_lco, hpx_lco_and_new(WAITERS) }; e = hpx_call(HPX_THERE(rand() % HPX_LOCALITIES), _set, HPX_NULL, &test_futures[0], sizeof(hpx_addr_t)); assert(e == HPX_SUCCESS); for(size_t j = 0; j < WAITERS; ++j) { e = hpx_call(HPX_THERE(rand() % HPX_LOCALITIES), _wait, test_futures[2], &test_futures[0], sizeof(hpx_addr_t)); assert(e == HPX_SUCCESS); } e = hpx_call(HPX_THERE(rand() % HPX_LOCALITIES), _delete, HPX_NULL, test_futures, sizeof(test_futures)); assert(e == HPX_SUCCESS); // test and lco const hpx_addr_t test_ands[3] = { hpx_lco_and_new(PARTICIPANTS), termination_lco, hpx_lco_and_new(WAITERS) }; for(size_t j = 0; j < PARTICIPANTS; ++j) { e = hpx_call(HPX_THERE(rand() % HPX_LOCALITIES), _set, HPX_NULL, &test_ands[0], sizeof(hpx_addr_t)); assert(e == HPX_SUCCESS); } for(size_t j = 0; j < WAITERS; ++j) { e = hpx_call(HPX_THERE(rand() % HPX_LOCALITIES), _wait, test_ands[2], &test_ands[0], sizeof(hpx_addr_t)); assert(e == HPX_SUCCESS); } e = hpx_call(HPX_THERE(rand() % HPX_LOCALITIES), _delete, HPX_NULL, test_ands, sizeof(test_ands)); assert(e == HPX_SUCCESS); } return HPX_SUCCESS; }
/// Free a global address. /// /// This global address must either be the base of a cyclic allocation, or a /// block allocated by _pgas_gas_alloc_local. At this time, we do not attempt to deal /// with the cyclic allocations, as they are using a simple csbrk allocator. static void _pgas_gas_free(void *gas, hpx_addr_t gpa, hpx_addr_t sync) { if (gpa == HPX_NULL) { return; } uint64_t offset = gpa_to_offset(gpa); void *lva = heap_offset_to_lva(global_heap, offset); dbg_assert_str(heap_contains_lva(global_heap, lva), "attempt to free out of bounds offset %"PRIu64"", offset); (void)lva; if (heap_offset_is_cyclic(global_heap, offset)) { heap_free_cyclic(global_heap, offset); hpx_lco_set(sync, 0, NULL, HPX_NULL, HPX_NULL); } else if (gpa_to_rank(gpa) == here->rank) { global_free(pgas_gpa_to_lva(offset)); hpx_lco_set(sync, 0, NULL, HPX_NULL, HPX_NULL); } else { dbg_check(hpx_call(gpa, pgas_free, sync), "free failed on %"PRIu64"", gpa); } }
// hpx_thread_current_cont_action gets the continuation action for the current // thread static int _thread_current_cont_target_handler(void) { hpx_action_t c_action = hpx_thread_current_cont_action(); hpx_addr_t c_target = hpx_thread_current_cont_target(); hpx_call(c_target, c_action, HPX_NULL, NULL, 0); return HPX_SUCCESS; }
static int _main_action(int *args, size_t size) { hpx_time_t t; int count; fprintf(stdout, HEADER); fprintf(stdout, "# Latency in (ms)\n"); t = hpx_time_now(); hpx_addr_t done = hpx_lco_future_new(0); fprintf(stdout, "Creation time: %g\n", hpx_time_elapsed_ms(t)); value = 1234; t = hpx_time_now(); hpx_call(HPX_HERE, _set_value, done, &value, sizeof(value)); fprintf(stdout, "Value set time: %g\n", hpx_time_elapsed_ms(t)); t = hpx_time_now(); hpx_lco_wait(done); fprintf(stdout, "Wait time: %g\n", hpx_time_elapsed_ms(t)); t = hpx_time_now(); hpx_lco_delete(done, HPX_NULL); fprintf(stdout, "Deletion time: %g\n", hpx_time_elapsed_ms(t)); fprintf(stdout, "%s\t%*s%*s%*s\n", "# NumReaders " , FIELD_WIDTH, "Get_Value ", FIELD_WIDTH, " LCO_Getall ", FIELD_WIDTH, "Delete"); for (int i = 0; i < sizeof(num_readers)/sizeof(num_readers[0]); i++) { fprintf(stdout, "%d\t\t", num_readers[i]); count = num_readers[i]; int values[count]; void *addrs[count]; size_t sizes[count]; hpx_addr_t futures[count]; for (int j = 0; j < count; j++) { addrs[j] = &values[j]; sizes[j] = sizeof(int); futures[j] = hpx_lco_future_new(sizeof(int)); } t = hpx_time_now(); for (int j = 0; j < count; j++) { t = hpx_time_now(); hpx_call(HPX_HERE, _get_value, futures[j], NULL, 0); hpx_lco_wait(futures[j]); } fprintf(stdout, "%*g", FIELD_WIDTH, hpx_time_elapsed_ms(t)); t = hpx_time_now(); hpx_lco_get_all(count, futures, sizes, addrs, NULL); fprintf(stdout, "%*g", FIELD_WIDTH, hpx_time_elapsed_ms(t)); t = hpx_time_now(); for (int j = 0; j < count; j++) hpx_lco_delete(futures[j], HPX_NULL); fprintf(stdout, "%*g\n", FIELD_WIDTH, hpx_time_elapsed_ms(t)); } hpx_exit(HPX_SUCCESS); }
static int _nqueens_action(void *args, size_t size) { int i, j; struct thread_data *my_data; my_data = (struct thread_data *) args; /* printf("n = %d, col = %d, count = %d\n", my_data->n , my_data->col , count); */ if (my_data->col == my_data->n) { hpx_lco_sema_p(mutex); ++count; /* printf("\nNo. %d\n-----\n", count); for (i = 0; i < my_data->n; i++, putchar('\n')) for(j = 0; j < my_data->n; j++) putchar(j == my_data->lyst[i] ? 'Q' : ((i + j) & 1) ? ' ' : '.'); */ hpx_lco_sema_v_sync(mutex); hpx_thread_exit(HPX_SUCCESS); //hpx_thread_continue(NULL, 0); //return HPX_SUCCESS; } #define p_attack(i, j) (my_data->lyst[j] == i || abs(my_data->lyst[j] - i) == my_data->col - j) int dummy=0; int num_spawns=0; for(i = 0, j = 0; i < my_data->n; i++) { for (j = 0; j < my_data->col && !p_attack(i, j); j++); if (j < my_data->col) { dummy++; } } //printf("dummy/spawns: %d/%d\n", dummy, my_data->n); num_spawns = my_data->n - dummy; bool D_CALL = false; //printf("num_spawns = %d\n", num_spawns); if( num_spawns == 0 ) { num_spawns = 1; D_CALL = true; } //num_spawns = my_data->n; struct thread_data temp[num_spawns]; hpx_addr_t futures[num_spawns]; hpx_addr_t threads[num_spawns]; int pqs[num_spawns]; size_t p_size[num_spawns]; void *addrs[num_spawns]; for(i = 0; i < num_spawns; i++) { futures[i] = hpx_lco_future_new(sizeof(int)); threads[i] = HPX_HERE; pqs[i] = 0; addrs[i] = &pqs[i]; p_size[i] = sizeof(size_t); } int k=0; // counter for hpx data for(i = 0, j = 0; i < my_data->n; i++) { for (j = 0; j < my_data->col && !p_attack(i, j); j++); if (j < my_data->col) { //printf("[%d] call continue.\n", i); continue; } //printf("[%d] call nqueens %d\n", i, k); my_data->lyst[my_data->col] = i; memcpy(temp[k].lyst, my_data->lyst, MAX_SIZE*sizeof(int)); temp[k].n = my_data->n; temp[k].col = my_data->col+1; //solve(n, col + 1, hist); hpx_call(threads[k], _nqueens, futures[k], (void *)&temp[k], sizeof(temp[k])); k++; } if( !D_CALL ) { hpx_lco_get_all(num_spawns, futures, p_size, addrs, NULL); for(i = 0; i < num_spawns; i++) hpx_lco_delete(futures[i], HPX_NULL); } return HPX_SUCCESS; }