Пример #1
 * Generate all children of the parent
 * details depend on tree type, node type and shape function
void genChildren(Node * parent, void * child_buf, Node * child, StealStack * ss) {
  int parentHeight = parent->height;
  int numChildren, childType;

  ss->maxTreeDepth = max(ss->maxTreeDepth, parent->height);

  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;

  // construct children and push onto stack
  if (numChildren > 0) {
    int i, j;
    child->type = childType;
    child->height = parentHeight + 1;

    for (i = 0; i < numChildren; i++) {
      for (j = 0; j < computeGranularity; j++) {
        // TBD:  add parent height to spawn
        // computeGranularity controls number of rng_spawn calls per node
        rng_spawn(parent->state.state, child->state.state, i);

      ss_put_work(ss, child_buf);
  } else {
Пример #2
unsigned long long parTreeSearch(int depth, Node *parent, int numChildren) 
  Node *n = (Node *)malloc(numChildren * sizeof(Node));
  Node *nodePtr;
  int i, j;
  unsigned long long subtreesize = 1;
  unsigned long long *partialCount = (unsigned long long *)malloc(numChildren * sizeof(unsigned long long));

  // Recurse on the children
  for (i = 0; i < numChildren; i++) {
     nodePtr = &n[i];

     nodePtr->height = parent->height + 1;

     // The following line is the work (one or more SHA-1 ops)
     for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, nodePtr->state.state, i);

     nodePtr->numChildren = uts_numChildren(nodePtr);

hclib_pragma_marker("omp", "task untied firstprivate(i, nodePtr) shared(partialCount)", "pragma221_omp_task");
        partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);

hclib_pragma_marker("omp", "taskwait", "pragma225_omp_taskwait");

  for (i = 0; i < numChildren; i++) {
     subtreesize += partialCount[i];
  return subtreesize;
Пример #3
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static long visit(node_t parent)
    node_t    child;
    uint64_t *child_descendants = calloc(sizeof(long), parent.num_children);

    CILK_C_REDUCER_OPADD(num_descendants, ulong, 0);
    uint64_t tmp;

    // Spawn children, if any
    for (int i = 0; i < parent.num_children; i++) {
        child.height = parent.height + 1;

        for (int j = 0; j < num_samples; j++) {
            rng_spawn(parent.state.state, child.state.state, i);

        child.num_children = calc_num_children(&child);

        child_descendants[i] = _Cilk_spawn visit(child);



    _Cilk_for(int i = 0; i < parent.num_children; i++) {
        REDUCER_VIEW(num_descendants) += child_descendants[i];

    tmp = 1 + REDUCER_VIEW(num_descendants);


    return tmp;
Пример #4
unsigned long long parTreeSearch(int depth, Node *parent, int numChildren) 
  Node n[numChildren], *nodePtr;
  int i, j;
  unsigned long long subtreesize = 1, partialCount[numChildren];

  // Recurse on the children
  for (i = 0; i < numChildren; i++) {
     nodePtr = &n[i];

     nodePtr->height = parent->height + 1;

     // The following line is the work (one or more SHA-1 ops)
     for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, nodePtr->state.state, i);

     nodePtr->numChildren = uts_numChildren(nodePtr);

     #pragma omp task untied firstprivate(i, nodePtr) shared(partialCount)
        partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);

  #pragma omp taskwait

  for (i = 0; i < numChildren; i++) {
     subtreesize += partialCount[i];
  return subtreesize;
Пример #5
static counter_t _uts_action(void *args, size_t size) 
	int i, j;
	struct thread_data *my_data;
	struct thread_data temp, input;
	my_data = (struct thread_data *)args;	

	Node n[my_data->numChildren], *nodePtr;
	counter_t subtreesize = 1, partialCount[my_data->numChildren];

	temp.depth = my_data->depth;
	memcpy(&temp.parent, &my_data->parent, sizeof(Node));
	temp.numChildren = my_data->numChildren;

	//hpx_lco_sema_p (mutex);
	//printf("D: %d; child: %d; spawns:%.0f\n", temp.depth, temp.numChildren, spawns_counter++);
	//hpx_lco_sema_v_sync (mutex);

	   printf("\n[Node] height = %d; numChildren = %d\n"
	   , temp.parent.height
	   , temp.parent.numChildren);

	hpx_addr_t theThread = HPX_HERE;
	hpx_addr_t done = hpx_lco_future_new(sizeof(uint64_t));
	// Recurse on the children
	for (i = 0; i < temp.numChildren; i++) {
		nodePtr = &n[i];

		nodePtr->height = temp.parent.height + 1;

		// The following line is the work (one or more SHA-1 ops)
		for (j = 0; j < computeGranularity; j++) {
			rng_spawn(temp.parent.state.state, nodePtr->state.state, i);

		nodePtr->numChildren = uts_numChildren(nodePtr);

		input.depth = temp.depth+1;
		memcpy(&input.parent, nodePtr, sizeof(Node));
		input.numChildren = nodePtr->numChildren;
		//partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
		hpx_call_sync(theThread, _uts, &partialCount[i], sizeof(partialCount[i]), &input, sizeof(input));

	for (i = 0; i < temp.numChildren; i++) {
		subtreesize += partialCount[i];

	return HPX_SUCCESS;
Пример #6
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static long visit(node_t *parent,
                  int     num_children)
    uint64_t num_descendants = 1;

    uint64_t child_descendants[num_children];
    node_t   child_nodes[num_children];
    uint64_t *child_descendants;
    node_t   *child_nodes;

    if (num_children > 0) {
        child_descendants = calloc(sizeof(uint64_t), num_children);
        child_nodes       = malloc(sizeof(node_t) * num_children);

    // Spawn children, if any
    for (int i = 0; i < num_children; i++) {
        node_t *child = &child_nodes[i];

        child->height = parent->height + 1;

        for (int j = 0; j < num_samples; j++) {
            rng_spawn(parent->state.state, child->state.state, i);

        child->num_children = calc_num_children(child);

#pragma omp task untied firstprivate(i, child) shared(child_descendants)
        child_descendants[i] = visit(child, child->num_children);

#pragma omp taskwait

// #pragma omp parallel for reduction(+:num_descendants)
    for (int i = 0; i < num_children; i++) {
        num_descendants += child_descendants[i];

#ifndef BIG_STACKS
    if (num_children > 0) {

    return num_descendants;
Пример #7
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static aligned_t visit(void *args_)
    node_t  *parent          = (node_t *)args_;
    int      parent_height   = parent->height;
    int      num_children    = parent->num_children;
    aligned_t expect         = parent->expect;
    aligned_t num_descendants[num_children];
    aligned_t sum_descendants = 1;

    if (num_children != 0) {
        node_t     child __attribute__((aligned(8)));
        aligned_t  donec = 0;

        // Spawn children, if any
        child.height = parent_height + 1;
        child.dc     = &donec;
        child.expect = num_children;


        for (int i = 0; i < num_children; i++) {
            child.acc    = &num_descendants[i];

            for (int j = 0; j < num_samples; j++) {
                rng_spawn(parent->state.state, child.state.state, i);

            child.num_children = calc_num_children(&child);

            qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL);

        // Wait for children to finish up, accumulate descendants counts
        if (donec != expect) qthread_readFF(NULL, &donec);

        for (int i = 0; i < num_children; i++) {
            sum_descendants += num_descendants[i];

    *parent->acc = sum_descendants;
    if (qthread_incr(parent->dc, 1) + 1 == expect) {

    return 0;
Пример #8
unsigned long long serTreeSearch(int depth, Node *parent, int numChildren) 
  unsigned long long subtreesize = 1, partialCount[numChildren];
  Node n[numChildren];
  int i, j;

  // Recurse on the children
  for (i = 0; i < numChildren; i++) {
     n[i].height = parent->height + 1;
     // The following line is the work (one or more SHA-1 ops)
     for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, n[i].state.state, i);
     partialCount[i] = serTreeSearch(depth+1, &n[i], uts_numChildren(&n[i]));
  // computing total size
  for (i = 0; i < numChildren; i++) subtreesize += partialCount[i];
  return subtreesize;
Пример #9
Файл: uts.c Проект: kempj/hpxMP
counter_t parTreeSearch(int depth, Node *parent, int numChildren)
    //Node n[numChildren], *nodePtr;
    Node *n, *nodePtr;
    int i, j;
    counter_t subtreesize = 1;
    counter_t *partialCount;
    //counter_t partialCount[numChildren];

    n = (Node*)malloc(numChildren * sizeof(Node));
    partialCount = (counter_t*)malloc(numChildren * sizeof(counter_t));

    // Recurse on the children
    for (i = 0; i < numChildren; i++) {
        nodePtr = &n[i];
        nodePtr->height = parent->height + 1;

        // The following line is the work (one or more SHA-1 ops)
        for (j = 0; j < computeGranularity; j++) {
            rng_spawn(parent->state.state, nodePtr->state.state, i);

        nodePtr->numChildren = uts_numChildren(nodePtr);

        #pragma omp task firstprivate(i, nodePtr) shared(partialCount) untied
        partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);

    #pragma omp taskwait

    for (i = 0; i < numChildren; i++) {
        subtreesize += partialCount[i];

    return subtreesize;
Пример #10
TASK_2(Result, parTreeSearch, int, depth, Node *, parent) {
  int numChildren, childType;
  counter_t parentHeight = parent->height;

  Result r = { depth, 1, 0 };

  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;
  // Recurse on the children
  if (numChildren > 0) {
    int i, j;
    for (i = 0; i < numChildren; i++) {
      Node *child = (Node*)alloca(sizeof(Node));
      child->type = childType;
      child->height = parentHeight + 1;
      child->numChildren = -1;    // not yet determined
      for (j = 0; j < computeGranularity; j++) {
        rng_spawn(parent->state.state, child->state.state, i);
      SPAWN(parTreeSearch, depth+1, child);

    /* Wait a bit */
    struct timespec tim = (struct timespec){0, 100L*numChildren};
    nanosleep(&tim, NULL);

    for (i = 0; i < numChildren; i++) {
      Result c = SYNC(parTreeSearch);
      if (c.maxdepth>r.maxdepth) r.maxdepth = c.maxdepth;
      r.size += c.size;
      r.leaves += c.leaves;
  } else {
Пример #11
counter_t parTreeSearch(int depth, Node *parent, int numChildren) 
	Node n[numChildren], *nodePtr;
	int i, j;
	counter_t subtreesize = 1, partialCount[numChildren];

	//printf("[p] *** depth         = %d ***\n", depth);
	//printf("[p] *** height      = %d ***\n", parent->height);
	//printf("[p] *** numChildren = %d ***\n", parent->numChildren);

	// Recurse on the children
	for (i = 0; i < numChildren; i++) {
		nodePtr = &n[i];

		nodePtr->height = parent->height + 1;

		// The following line is the work (one or more SHA-1 ops)
		for (j = 0; j < computeGranularity; j++) {
			rng_spawn(parent->state.state, nodePtr->state.state, i);

		nodePtr->numChildren = uts_numChildren(nodePtr);

		//#pragma omp task firstprivate(i, nodePtr) shared(partialCount) untied
		partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);

	//#pragma omp taskwait

	for (i = 0; i < numChildren; i++) {
		subtreesize += partialCount[i];

	return subtreesize;
Пример #12
 * Generate all children of the parent
 * details depend on tree type, node type and shape function
void genChildren(Node * parent, Node * child) {
  int parentHeight = parent->height;
  int numChildren, childType;

  t_metadata[omp_get_thread_num()].ntasks += 1;


  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;
  // construct children and push onto stack
  if (numChildren > 0) {
    int i, j;
    child->type = childType;
    child->height = parentHeight + 1;

#ifdef UTS_STAT
    if (stats) {
      child->pp = parent;  // pointer to parent

    const unsigned char * parent_state = parent->state.state;
    unsigned char * child_state = child->state.state;

    for (i = 0; i < numChildren; i++) {
      for (j = 0; j < computeGranularity; j++) {
        // TBD:  add parent height to spawn
        // computeGranularity controls number of rng_spawn calls per node
          rng_spawn(parent_state, child_state, i);

      Node parent = *child;

      int made_available_for_stealing = 0;
      if (hclib::get_current_worker() == 0 && n_buffered_steals < N_BUFFERED_STEALS) {
          if (n_buffered_steals < N_BUFFERED_STEALS) {
              steal_buffer[n_buffered_steals++] = parent;
              made_available_for_stealing = 1;

      if (!made_available_for_stealing) {
          if (parent.height < 9) {
              hclib::async([parent] {
                  Node child;

                  Node tmp = parent;

                  genChildren(&tmp, &child);
          } else {
              Node child;

              genChildren(&parent, &child);
  } else {