unsigned long long parTreeSearch(int depth, Node *parent, int numChildren) { Node *n = (Node *)malloc(numChildren * sizeof(Node)); Node *nodePtr; int i, j; unsigned long long subtreesize = 1; unsigned long long *partialCount = (unsigned long long *)malloc(numChildren * sizeof(unsigned long long)); // Recurse on the children for (i = 0; i < numChildren; i++) { nodePtr = &n[i]; nodePtr->height = parent->height + 1; // The following line is the work (one or more SHA-1 ops) for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, nodePtr->state.state, i); } nodePtr->numChildren = uts_numChildren(nodePtr); hclib_pragma_marker("omp", "task untied firstprivate(i, nodePtr) shared(partialCount)", "pragma221_omp_task"); partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren); } hclib_pragma_marker("omp", "taskwait", "pragma225_omp_taskwait"); for (i = 0; i < numChildren; i++) { subtreesize += partialCount[i]; } free(n); free(partialCount); return subtreesize; }
unsigned long long parTreeSearch(int depth, Node *parent, int numChildren) { Node n[numChildren], *nodePtr; int i, j; unsigned long long subtreesize = 1, partialCount[numChildren]; // Recurse on the children for (i = 0; i < numChildren; i++) { nodePtr = &n[i]; nodePtr->height = parent->height + 1; // The following line is the work (one or more SHA-1 ops) for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, nodePtr->state.state, i); } nodePtr->numChildren = uts_numChildren(nodePtr); #pragma omp task untied firstprivate(i, nodePtr) shared(partialCount) partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren); } #pragma omp taskwait for (i = 0; i < numChildren; i++) { subtreesize += partialCount[i]; } return subtreesize; }
/* * Generate all children of the parent * * details depend on tree type, node type and shape function * */ void genChildren(Node * parent, void * child_buf, Node * child, StealStack * ss) { int parentHeight = parent->height; int numChildren, childType; ss->maxTreeDepth = max(ss->maxTreeDepth, parent->height); numChildren = uts_numChildren(parent); childType = uts_childType(parent); // record number of children in parent parent->numChildren = numChildren; // construct children and push onto stack if (numChildren > 0) { int i, j; child->type = childType; child->height = parentHeight + 1; for (i = 0; i < numChildren; i++) { for (j = 0; j < computeGranularity; j++) { // TBD: add parent height to spawn // computeGranularity controls number of rng_spawn calls per node rng_spawn(parent->state.state, child->state.state, i); } ss_put_work(ss, child_buf); } } else { ss->nLeaves++; } }
counter_t serial_uts ( Node *root ) { counter_t num_nodes; bots_message("Computing Unbalance Tree Search algorithm "); num_nodes = serTreeSearch( 0, root, uts_numChildren(root) ); bots_message(" completed!\n"); return num_nodes; }
/*********************************************************** * Recursive depth-first implementation * ***********************************************************/ int getNumRootChildren(Node *root) { int numChildren; numChildren = uts_numChildren(root); root->numChildren = numChildren; return numChildren; }
static counter_t _uts_action(void *args, size_t size) { int i, j; struct thread_data *my_data; struct thread_data temp, input; my_data = (struct thread_data *)args; Node n[my_data->numChildren], *nodePtr; counter_t subtreesize = 1, partialCount[my_data->numChildren]; temp.depth = my_data->depth; memcpy(&temp.parent, &my_data->parent, sizeof(Node)); temp.numChildren = my_data->numChildren; //hpx_lco_sema_p (mutex); //printf("D: %d; child: %d; spawns:%.0f\n", temp.depth, temp.numChildren, spawns_counter++); //hpx_lco_sema_v_sync (mutex); /* printf("\n[Node] height = %d; numChildren = %d\n" , temp.parent.height , temp.parent.numChildren); */ hpx_addr_t theThread = HPX_HERE; hpx_addr_t done = hpx_lco_future_new(sizeof(uint64_t)); // Recurse on the children for (i = 0; i < temp.numChildren; i++) { nodePtr = &n[i]; nodePtr->height = temp.parent.height + 1; // The following line is the work (one or more SHA-1 ops) for (j = 0; j < computeGranularity; j++) { rng_spawn(temp.parent.state.state, nodePtr->state.state, i); } nodePtr->numChildren = uts_numChildren(nodePtr); input.depth = temp.depth+1; memcpy(&input.parent, nodePtr, sizeof(Node)); input.numChildren = nodePtr->numChildren; //partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren); hpx_call_sync(theThread, _uts, &partialCount[i], sizeof(partialCount[i]), &input, sizeof(input)); } for (i = 0; i < temp.numChildren; i++) { subtreesize += partialCount[i]; } HPX_THREAD_CONTINUE(subtreesize); return HPX_SUCCESS; }
unsigned long long parallel_uts ( Node *root ) { unsigned long long num_nodes = 0 ; root->numChildren = uts_numChildren(root); bots_message("Computing Unbalance Tree Search algorithm "); #pragma omp parallel #pragma omp single nowait #pragma omp task untied num_nodes = parTreeSearch( 0, root, root->numChildren ); bots_message(" completed!"); return num_nodes; }
unsigned long long serTreeSearch(int depth, Node *parent, int numChildren) { unsigned long long subtreesize = 1, partialCount[numChildren]; Node n[numChildren]; int i, j; // Recurse on the children for (i = 0; i < numChildren; i++) { n[i].height = parent->height + 1; // The following line is the work (one or more SHA-1 ops) for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, n[i].state.state, i); } partialCount[i] = serTreeSearch(depth+1, &n[i], uts_numChildren(&n[i])); } // computing total size for (i = 0; i < numChildren; i++) subtreesize += partialCount[i]; return subtreesize; }
counter_t parTreeSearch(int depth, Node *parent, int numChildren) { //JK //Node n[numChildren], *nodePtr; Node *n, *nodePtr; int i, j; counter_t subtreesize = 1; counter_t *partialCount; //counter_t partialCount[numChildren]; n = (Node*)malloc(numChildren * sizeof(Node)); partialCount = (counter_t*)malloc(numChildren * sizeof(counter_t)); // Recurse on the children for (i = 0; i < numChildren; i++) { nodePtr = &n[i]; nodePtr->height = parent->height + 1; // The following line is the work (one or more SHA-1 ops) for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, nodePtr->state.state, i); } nodePtr->numChildren = uts_numChildren(nodePtr); #pragma omp task firstprivate(i, nodePtr) shared(partialCount) untied partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren); } #pragma omp taskwait for (i = 0; i < numChildren; i++) { subtreesize += partialCount[i]; } free(n); free(partialCount); return subtreesize; }
unsigned long long parallel_uts ( Node *root ) { unsigned long long num_nodes = 0 ; root->numChildren = uts_numChildren(root); bots_message("Computing Unbalance Tree Search algorithm "); hclib_pragma_marker("omp_to_hclib", "", "pragma183_omp_to_hclib"); { hclib_pragma_marker("omp", "parallel", "pragma185_omp_parallel"); { hclib_pragma_marker("omp", "single nowait", "pragma187_omp_single"); { hclib_pragma_marker("omp", "task untied", "pragma189_omp_task"); num_nodes = parTreeSearch( 0, root, root->numChildren ); } } } bots_message(" completed!"); return num_nodes; }
TASK_2(Result, parTreeSearch, int, depth, Node *, parent) { int numChildren, childType; counter_t parentHeight = parent->height; Result r = { depth, 1, 0 }; numChildren = uts_numChildren(parent); childType = uts_childType(parent); // record number of children in parent parent->numChildren = numChildren; // Recurse on the children if (numChildren > 0) { int i, j; for (i = 0; i < numChildren; i++) { Node *child = (Node*)alloca(sizeof(Node)); child->type = childType; child->height = parentHeight + 1; child->numChildren = -1; // not yet determined for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, child->state.state, i); } SPAWN(parTreeSearch, depth+1, child); } /* Wait a bit */ struct timespec tim = (struct timespec){0, 100L*numChildren}; nanosleep(&tim, NULL); for (i = 0; i < numChildren; i++) { Result c = SYNC(parTreeSearch); if (c.maxdepth>r.maxdepth) r.maxdepth = c.maxdepth; r.size += c.size; r.leaves += c.leaves; } } else {
counter_t parTreeSearch(int depth, Node *parent, int numChildren) { Node n[numChildren], *nodePtr; int i, j; counter_t subtreesize = 1, partialCount[numChildren]; //printf("[p] *** depth = %d ***\n", depth); //printf("[p] *** height = %d ***\n", parent->height); //printf("[p] *** numChildren = %d ***\n", parent->numChildren); // Recurse on the children for (i = 0; i < numChildren; i++) { nodePtr = &n[i]; nodePtr->height = parent->height + 1; // The following line is the work (one or more SHA-1 ops) for (j = 0; j < computeGranularity; j++) { rng_spawn(parent->state.state, nodePtr->state.state, i); } nodePtr->numChildren = uts_numChildren(nodePtr); //#pragma omp task firstprivate(i, nodePtr) shared(partialCount) untied partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren); } //#pragma omp taskwait for (i = 0; i < numChildren; i++) { subtreesize += partialCount[i]; } return subtreesize; }
/* * Generate all children of the parent * * details depend on tree type, node type and shape function * */ void genChildren(Node * parent, Node * child) { int parentHeight = parent->height; int numChildren, childType; #ifdef THREAD_METADATA t_metadata[omp_get_thread_num()].ntasks += 1; #endif thread_info[hclib::get_current_worker()].n_nodes++; numChildren = uts_numChildren(parent); childType = uts_childType(parent); // record number of children in parent parent->numChildren = numChildren; // construct children and push onto stack if (numChildren > 0) { int i, j; child->type = childType; child->height = parentHeight + 1; #ifdef UTS_STAT if (stats) { child->pp = parent; // pointer to parent } #endif const unsigned char * parent_state = parent->state.state; unsigned char * child_state = child->state.state; for (i = 0; i < numChildren; i++) { for (j = 0; j < computeGranularity; j++) { // TBD: add parent height to spawn // computeGranularity controls number of rng_spawn calls per node rng_spawn(parent_state, child_state, i); } Node parent = *child; int made_available_for_stealing = 0; if (hclib::get_current_worker() == 0 && n_buffered_steals < N_BUFFERED_STEALS) { hclib::shmem_set_lock(&steal_buffer_locks[pe]); if (n_buffered_steals < N_BUFFERED_STEALS) { steal_buffer[n_buffered_steals++] = parent; made_available_for_stealing = 1; } hclib::shmem_clear_lock(&steal_buffer_locks[pe]); } if (!made_available_for_stealing) { if (parent.height < 9) { hclib::async([parent] { Node child; initNode(&child); Node tmp = parent; genChildren(&tmp, &child); }); } else { Node child; initNode(&child); genChildren(&parent, &child); } } } } else { thread_info[hclib::get_current_worker()].n_leaves++; } }