// Testcase to test hpx_lco_get_all function
static int _getAll_handler(uint32_t *args, size_t size) {
  uint32_t n = *args;
  if (n < 2) {
    return HPX_THREAD_CONTINUE(n);
  }

  hpx_addr_t peers[] = {
    HPX_HERE,
    HPX_HERE
  };

  uint32_t ns[] = {
    n - 1,
    n - 2
  };

  hpx_addr_t futures[] =  {
    hpx_lco_future_new(sizeof(uint32_t)),
    hpx_lco_future_new(sizeof(uint32_t))
  };

  uint32_t ssn[] = {
    0,
    0
  };

  void *addrs[] = {
    &ssn[0],
    &ssn[1]
  };

  size_t sizes[] = {
    sizeof(uint32_t),
    sizeof(uint32_t)
  };

  hpx_call(peers[0], _getAll, futures[0], &ns[0], sizeof(uint32_t));
  hpx_call(peers[1], _getAll, futures[1], &ns[1], sizeof(uint32_t));

  hpx_lco_get_all(2, futures, sizes, addrs, NULL);

  hpx_lco_wait(futures[0]);
  hpx_lco_wait(futures[1]);

  hpx_addr_t wait = hpx_lco_future_new(0);
  hpx_lco_delete_all(2, futures, wait);
  hpx_lco_wait(wait);
  hpx_lco_delete(wait, HPX_NULL);

  uint32_t sn = ssn[0] * ssn[0] + ssn[1] * ssn[1];

  return HPX_THREAD_CONTINUE(sn);
}
static counter_t _uts_action(void *args, size_t size) 
{
	int i, j;
	struct thread_data *my_data;
	struct thread_data temp, input;
	my_data = (struct thread_data *)args;	

	Node n[my_data->numChildren], *nodePtr;
	counter_t subtreesize = 1, partialCount[my_data->numChildren];

	temp.depth = my_data->depth;
	memcpy(&temp.parent, &my_data->parent, sizeof(Node));
	temp.numChildren = my_data->numChildren;

	//hpx_lco_sema_p (mutex);
	//printf("D: %d; child: %d; spawns:%.0f\n", temp.depth, temp.numChildren, spawns_counter++);
	//hpx_lco_sema_v_sync (mutex);

	/*
	   printf("\n[Node] height = %d; numChildren = %d\n"
	   , temp.parent.height
	   , temp.parent.numChildren);
	   */

	hpx_addr_t theThread = HPX_HERE;
	hpx_addr_t done = hpx_lco_future_new(sizeof(uint64_t));
	// Recurse on the children
	for (i = 0; i < temp.numChildren; i++) {
		nodePtr = &n[i];

		nodePtr->height = temp.parent.height + 1;

		// The following line is the work (one or more SHA-1 ops)
		for (j = 0; j < computeGranularity; j++) {
			rng_spawn(temp.parent.state.state, nodePtr->state.state, i);
		}

		nodePtr->numChildren = uts_numChildren(nodePtr);

		input.depth = temp.depth+1;
		memcpy(&input.parent, nodePtr, sizeof(Node));
		input.numChildren = nodePtr->numChildren;
		//partialCount[i] = parTreeSearch(depth+1, nodePtr, nodePtr->numChildren);
		hpx_call_sync(theThread, _uts, &partialCount[i], sizeof(partialCount[i]), &input, sizeof(input));
	}

	for (i = 0; i < temp.numChildren; i++) {
		subtreesize += partialCount[i];
	}

	HPX_THREAD_CONTINUE(subtreesize);
	return HPX_SUCCESS;
}
int future_at_handler(void) {
  hpx_addr_t f = hpx_lco_future_new(0);
  return HPX_THREAD_CONTINUE(f);
}
static int _new_future_at_handler(void) {
  hpx_addr_t future = hpx_lco_future_new(sizeof(ONES));
  return HPX_THREAD_CONTINUE(future);
}
static int action_get_value(void *args, size_t size) {
  return HPX_THREAD_CONTINUE(value);
}
static int _check_null_handler(void) {
  static volatile int *null = NULL;
  hpx_thread_sigmask(HPX_SIG_BLOCK, HPX_SIGSEGV);
  hpx_thread_yield();
  return HPX_THREAD_CONTINUE(*null);
}