예제 #1
0
파일: uts.c 프로젝트: agrippa/omp-to-x
unsigned long long parTreeSearch(int depth, Node *parent, int numChildren) 
{
  Node *n = (Node *)malloc(numChildren * sizeof(Node));
  Node *nodePtr;
  int i, j;
  unsigned long long subtreesize = 1;
  unsigned long long *partialCount = (unsigned long long *)malloc(numChildren * sizeof(unsigned long long));

  // Recurse on the children
  for (i = 0; i < numChildren; i++) {
     nodePtr = &n[i];

     nodePtr->height = parent->height + 1;

     // The following line is the work (one or more SHA-1 ops)
     for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, nodePtr->state.state, i);
     }

     nodePtr->numChildren = uts_numChildren(nodePtr);

hclib_pragma_marker("omp", "task untied firstprivate(i, nodePtr) shared(partialCount)", "pragma221_omp_task");
        partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
  }

hclib_pragma_marker("omp", "taskwait", "pragma225_omp_taskwait");

  for (i = 0; i < numChildren; i++) {
     subtreesize += partialCount[i];
  }
  free(n);
  free(partialCount);
  
  return subtreesize;
}
예제 #2
0
파일: uts.c 프로젝트: steleman/bots
unsigned long long parTreeSearch(int depth, Node *parent, int numChildren) 
{
  Node n[numChildren], *nodePtr;
  int i, j;
  unsigned long long subtreesize = 1, partialCount[numChildren];

  // Recurse on the children
  for (i = 0; i < numChildren; i++) {
     nodePtr = &n[i];

     nodePtr->height = parent->height + 1;

     // The following line is the work (one or more SHA-1 ops)
     for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, nodePtr->state.state, i);
     }

     nodePtr->numChildren = uts_numChildren(nodePtr);

     #pragma omp task untied firstprivate(i, nodePtr) shared(partialCount)
        partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
  }

  #pragma omp taskwait

  for (i = 0; i < numChildren; i++) {
     subtreesize += partialCount[i];
  }
  
  return subtreesize;
}
예제 #3
0
/* 
 * Generate all children of the parent
 *
 * details depend on tree type, node type and shape function
 *
 */
void genChildren(Node * parent, void * child_buf, Node * child, StealStack * ss) {
  int parentHeight = parent->height;
  int numChildren, childType;

  ss->maxTreeDepth = max(ss->maxTreeDepth, parent->height);

  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;

  // construct children and push onto stack
  if (numChildren > 0) {
    int i, j;
    child->type = childType;
    child->height = parentHeight + 1;

    for (i = 0; i < numChildren; i++) {
      for (j = 0; j < computeGranularity; j++) {
        // TBD:  add parent height to spawn
        // computeGranularity controls number of rng_spawn calls per node
        rng_spawn(parent->state.state, child->state.state, i);
      }

      ss_put_work(ss, child_buf);
    }
  } else {
    ss->nLeaves++;
  }
}
예제 #4
0
파일: uts.c 프로젝트: kempj/hpxMP
counter_t serial_uts ( Node *root )
{
   counter_t num_nodes;
   bots_message("Computing Unbalance Tree Search algorithm ");
   num_nodes = serTreeSearch( 0, root, uts_numChildren(root) );
   bots_message(" completed!\n");
   return num_nodes;
}
예제 #5
0
/***********************************************************
 * Recursive depth-first implementation                    *
 ***********************************************************/
int getNumRootChildren(Node *root)
{
	int numChildren;

	numChildren = uts_numChildren(root);
	root->numChildren = numChildren;

	return numChildren;
}
예제 #6
0
static counter_t _uts_action(void *args, size_t size) 
{
	int i, j;
	struct thread_data *my_data;
	struct thread_data temp, input;
	my_data = (struct thread_data *)args;	

	Node n[my_data->numChildren], *nodePtr;
	counter_t subtreesize = 1, partialCount[my_data->numChildren];

	temp.depth = my_data->depth;
	memcpy(&temp.parent, &my_data->parent, sizeof(Node));
	temp.numChildren = my_data->numChildren;

	//hpx_lco_sema_p (mutex);
	//printf("D: %d; child: %d; spawns:%.0f\n", temp.depth, temp.numChildren, spawns_counter++);
	//hpx_lco_sema_v_sync (mutex);

	/*
	   printf("\n[Node] height = %d; numChildren = %d\n"
	   , temp.parent.height
	   , temp.parent.numChildren);
	   */

	hpx_addr_t theThread = HPX_HERE;
	hpx_addr_t done = hpx_lco_future_new(sizeof(uint64_t));
	// Recurse on the children
	for (i = 0; i < temp.numChildren; i++) {
		nodePtr = &n[i];

		nodePtr->height = temp.parent.height + 1;

		// The following line is the work (one or more SHA-1 ops)
		for (j = 0; j < computeGranularity; j++) {
			rng_spawn(temp.parent.state.state, nodePtr->state.state, i);
		}

		nodePtr->numChildren = uts_numChildren(nodePtr);

		input.depth = temp.depth+1;
		memcpy(&input.parent, nodePtr, sizeof(Node));
		input.numChildren = nodePtr->numChildren;
		//partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
		hpx_call_sync(theThread, _uts, &partialCount[i], sizeof(partialCount[i]), &input, sizeof(input));
	}

	for (i = 0; i < temp.numChildren; i++) {
		subtreesize += partialCount[i];
	}

	HPX_THREAD_CONTINUE(subtreesize);
	return HPX_SUCCESS;
}
예제 #7
0
파일: uts.c 프로젝트: steleman/bots
unsigned long long parallel_uts ( Node *root )
{
   unsigned long long num_nodes = 0 ;
   root->numChildren = uts_numChildren(root);

   bots_message("Computing Unbalance Tree Search algorithm ");

   #pragma omp parallel  
      #pragma omp single nowait
      #pragma omp task untied
        num_nodes = parTreeSearch( 0, root, root->numChildren );

   bots_message(" completed!");

   return num_nodes;
}
예제 #8
0
파일: uts.c 프로젝트: steleman/bots
unsigned long long serTreeSearch(int depth, Node *parent, int numChildren) 
{
  unsigned long long subtreesize = 1, partialCount[numChildren];
  Node n[numChildren];
  int i, j;

  // Recurse on the children
  for (i = 0; i < numChildren; i++) {
     n[i].height = parent->height + 1;
     // The following line is the work (one or more SHA-1 ops)
     for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, n[i].state.state, i);
     }
     partialCount[i] = serTreeSearch(depth+1, &n[i], uts_numChildren(&n[i]));
  }
 
  // computing total size
  for (i = 0; i < numChildren; i++) subtreesize += partialCount[i];
  
  return subtreesize;
}
예제 #9
0
파일: uts.c 프로젝트: kempj/hpxMP
counter_t parTreeSearch(int depth, Node *parent, int numChildren)
{
    //JK
    //Node n[numChildren], *nodePtr;
    Node *n, *nodePtr;
    int i, j;
    counter_t subtreesize = 1;
    counter_t *partialCount;
    //counter_t partialCount[numChildren];

    n = (Node*)malloc(numChildren * sizeof(Node));
    partialCount = (counter_t*)malloc(numChildren * sizeof(counter_t));

    // Recurse on the children
    for (i = 0; i < numChildren; i++) {
        nodePtr = &n[i];
        nodePtr->height = parent->height + 1;

        // The following line is the work (one or more SHA-1 ops)
        for (j = 0; j < computeGranularity; j++) {
            rng_spawn(parent->state.state, nodePtr->state.state, i);
        }

        nodePtr->numChildren = uts_numChildren(nodePtr);

        #pragma omp task firstprivate(i, nodePtr) shared(partialCount) untied
        partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
    }

    #pragma omp taskwait

    for (i = 0; i < numChildren; i++) {
        subtreesize += partialCount[i];
    }

    free(n);
    free(partialCount);
    return subtreesize;
}
예제 #10
0
파일: uts.c 프로젝트: agrippa/omp-to-x
unsigned long long parallel_uts ( Node *root )
{
   unsigned long long num_nodes = 0 ;
   root->numChildren = uts_numChildren(root);

   bots_message("Computing Unbalance Tree Search algorithm ");

hclib_pragma_marker("omp_to_hclib", "", "pragma183_omp_to_hclib");
   {
hclib_pragma_marker("omp", "parallel", "pragma185_omp_parallel");
       {
hclib_pragma_marker("omp", "single nowait", "pragma187_omp_single");
           {
hclib_pragma_marker("omp", "task untied", "pragma189_omp_task");
        num_nodes = parTreeSearch( 0, root, root->numChildren );
           }
       }
   }

   bots_message(" completed!");

   return num_nodes;
}
예제 #11
0
파일: uts2-lace.c 프로젝트: fritzo/lace
TASK_2(Result, parTreeSearch, int, depth, Node *, parent) {
  int numChildren, childType;
  counter_t parentHeight = parent->height;

  Result r = { depth, 1, 0 };

  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;
  
  // Recurse on the children
  if (numChildren > 0) {
    int i, j;
    for (i = 0; i < numChildren; i++) {
      Node *child = (Node*)alloca(sizeof(Node));
      child->type = childType;
      child->height = parentHeight + 1;
      child->numChildren = -1;    // not yet determined
      for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, child->state.state, i);
      }
      SPAWN(parTreeSearch, depth+1, child);
    }

    /* Wait a bit */
    struct timespec tim = (struct timespec){0, 100L*numChildren};
    nanosleep(&tim, NULL);

    for (i = 0; i < numChildren; i++) {
      Result c = SYNC(parTreeSearch);
      if (c.maxdepth>r.maxdepth) r.maxdepth = c.maxdepth;
      r.size += c.size;
      r.leaves += c.leaves;
    }
  } else {
예제 #12
0
counter_t parTreeSearch(int depth, Node *parent, int numChildren) 
{
	Node n[numChildren], *nodePtr;
	int i, j;
	counter_t subtreesize = 1, partialCount[numChildren];


	//printf("[p] *** depth         = %d ***\n", depth);
	//printf("[p] *** height      = %d ***\n", parent->height);
	//printf("[p] *** numChildren = %d ***\n", parent->numChildren);

	// Recurse on the children
	for (i = 0; i < numChildren; i++) {
		nodePtr = &n[i];

		nodePtr->height = parent->height + 1;

		// The following line is the work (one or more SHA-1 ops)
		for (j = 0; j < computeGranularity; j++) {
			rng_spawn(parent->state.state, nodePtr->state.state, i);
		}

		nodePtr->numChildren = uts_numChildren(nodePtr);

		//#pragma omp task firstprivate(i, nodePtr) shared(partialCount) untied
		partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
	}

	//#pragma omp taskwait

	for (i = 0; i < numChildren; i++) {
		subtreesize += partialCount[i];
	}

	return subtreesize;
}
예제 #13
0
/* 
 * Generate all children of the parent
 *
 * details depend on tree type, node type and shape function
 *
 */
void genChildren(Node * parent, Node * child) {
  int parentHeight = parent->height;
  int numChildren, childType;

#ifdef THREAD_METADATA
  t_metadata[omp_get_thread_num()].ntasks += 1;
#endif

  thread_info[hclib::get_current_worker()].n_nodes++;

  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;
  
  // construct children and push onto stack
  if (numChildren > 0) {
    int i, j;
    child->type = childType;
    child->height = parentHeight + 1;

#ifdef UTS_STAT
    if (stats) {
      child->pp = parent;  // pointer to parent
    }
#endif

    const unsigned char * parent_state = parent->state.state;
    unsigned char * child_state = child->state.state;

    for (i = 0; i < numChildren; i++) {
      for (j = 0; j < computeGranularity; j++) {
        // TBD:  add parent height to spawn
        // computeGranularity controls number of rng_spawn calls per node
          rng_spawn(parent_state, child_state, i);
      }

      Node parent = *child;

      int made_available_for_stealing = 0;
      if (hclib::get_current_worker() == 0 && n_buffered_steals < N_BUFFERED_STEALS) {
          hclib::shmem_set_lock(&steal_buffer_locks[pe]);
          if (n_buffered_steals < N_BUFFERED_STEALS) {
              steal_buffer[n_buffered_steals++] = parent;
              made_available_for_stealing = 1;
          }
          hclib::shmem_clear_lock(&steal_buffer_locks[pe]);
      }

      if (!made_available_for_stealing) {
          if (parent.height < 9) {
              hclib::async([parent] {
                  Node child;
                  initNode(&child);

                  Node tmp = parent;

                  genChildren(&tmp, &child);
              });
          } else {
              Node child;
              initNode(&child);

              genChildren(&parent, &child);
          }
      }
    }
  } else {
      thread_info[hclib::get_current_worker()].n_leaves++;
  }
}