예제 #1
0
// -------------------------------------------------------------
// Vec2GA
// -------------------------------------------------------------
static
PetscErrorCode
Vec2GA(Vec x, int pgroup, int *ga, bool trans = false)
{
  int lrows, rows;
  PetscErrorCode ierr = 0;
  
  ierr = VecGetLocalSize(x, &lrows); CHKERRQ(ierr);
  ierr = VecGetSize(x, &rows); CHKERRQ(ierr);
  
  PetscInt vlo, vhi;
  ierr = VecGetOwnershipRange(x, &vlo, &vhi); CHKERRQ(ierr);
  
  PetscScalar *v;
  ierr = VecGetArray(x, &v); CHKERRQ(ierr);

  int lo[2] = {0,0}, hi[2] = {0,0}, ld[2] = {1,1};
  if (!trans) {
    ierr = CreateMatGA(pgroup, lrows, 1, rows, 1, ga); CHKERRQ(ierr);
    lo[0] = vlo; 
    hi[0] = vhi-1;
  } else {
    ierr = CreateMatGA(pgroup, 1, lrows, 1, rows, ga); CHKERRQ(ierr);
    lo[1] = vlo; 
    hi[1] = vhi-1;
  }
  NGA_Put(*ga, lo, hi, v, ld);
  // GA_Print(*ga);
  ierr = VecRestoreArray(x, &v); CHKERRQ(ierr);

  GA_Pgroup_sync(pgroup);
  return ierr;
}
예제 #2
0
/**
 * Push data from vector onto buses and branches. Vector must
 * be created with the mapToVector method using the same
 * GenVectorMap
 * @param vector vector containing data to be pushed to network
 */
void mapToNetwork(const gridpack::math::Vector &vector)
{
  int i, j, nvals;
  ComplexType *values = new ComplexType[p_maxValues];
  int *idx = new int[p_maxValues];
  // get values from buses
  for (i=0; i<p_nBuses; i++) {
    if (p_network->getActiveBus(i)) {
      nvals = p_network->getBus(i)->vectorNumElements();
      p_network->getBus(i)->vectorGetElementIndices(idx);
      for (j=0; j<nvals; j++) {
        vector.getElement(idx[j],values[j]);
      }
      p_network->getBus(i)->vectorSetElementValues(values);
    }
  }
  // get values from branches
  for (i=0; i<p_nBranches; i++) {
    if (p_network->getActiveBranch(i)) {
      nvals = p_network->getBranch(i)->vectorNumElements();
      p_network->getBranch(i)->vectorGetElementIndices(idx);
      for (j=0; j<nvals; j++) {
        vector.getElement(idx[j],values[j]);
      }
      p_network->getBranch(i)->vectorSetElementValues(values);
    }
  }
  delete [] values;
  delete [] idx;
  GA_Pgroup_sync(p_GAgrp);
}
예제 #3
0
/**
 * Reset existing vector from current component state on network
 * @param vector existing vector (should be generated from same mapper)
 */
void mapToVector(gridpack::math::Vector &vector)
{
  int t_set, t_bus, t_branch;
  vector.zero();
  loadBusData(vector,false);
  loadBranchData(vector,false);
  GA_Pgroup_sync(p_GAgrp);
  vector.ready();
}
예제 #4
0
/**
 * Generate vector from current component state on network and return a
 * conventional pointer to it. Used for Fortran interface.
 * @return return a pointer to new vector
 */
gridpack::math::Vector* intMapToVector(void)
{
  gridpack::parallel::Communicator comm = p_network->communicator();
  int blockSize = p_maxIndex-p_minIndex+1;
  gridpack::math::Vector*
    Ret(new gridpack::math::Vector(comm, blockSize));
  loadBusData(*Ret,false);
  loadBranchData(*Ret,false);
  GA_Pgroup_sync(p_GAgrp);
  Ret->ready();
  return Ret;
}
예제 #5
0
// -------------------------------------------------------------
// MatAssemmblyEnd_DenseGA
// -------------------------------------------------------------
static
PetscErrorCode
MatAssemmblyEnd_DenseGA(Mat mat, MatAssemblyType type)
{
  PetscErrorCode ierr = 0;
  struct MatGACtx *ctx;
  ierr = MatShellGetContext(mat, &ctx); CHKERRQ(ierr);
  GA_Pgroup_sync(ctx->gaGroup);
  MPI_Comm comm;
  ierr = PetscObjectGetComm((PetscObject)mat,&comm); CHKERRQ(ierr);
  ierr = MPI_Barrier(comm);
  // GA_Print(ctx->ga);
  return ierr;
}
예제 #6
0
// -------------------------------------------------------------
// MatMult_DenseGA
// -------------------------------------------------------------
static
PetscErrorCode
MatMult_DenseGA(Mat mat, Vec x, Vec y)
{
  // FIXME: I'm assuming the Mat and Vec's are compatible and that's
  // been checked somewhere else. Probably a mistake.

  PetscErrorCode ierr = 0;
  struct MatGACtx *ctx;
  ierr = MatShellGetContext(mat, &ctx); CHKERRQ(ierr);

  PetscInt Arows, Acols;
  ierr = MatGetSize(mat, &Arows, &Acols); CHKERRQ(ierr);

  int g_x, g_y;
  ierr = Vec2GA(x, ctx->gaGroup, &g_x, false); CHKERRQ(ierr);
  ierr = Vec2GA(y, ctx->gaGroup, &g_y, false); CHKERRQ(ierr);

  PetscScalarGA alpha(one), beta(zero);
  int ndim, itype, lo[2] = {0,0}, ahi[2], xhi[2], yhi[2];
  NGA_Inquire(ctx->ga, &itype, &ndim, ahi);
  ahi[0] -= 1; ahi[1] -= 1;
  NGA_Inquire(g_x, &itype, &ndim, xhi);
  xhi[0] -= 1; xhi[1] -= 1;
  NGA_Inquire(g_y, &itype, &ndim, yhi);
  yhi[0] -= 1; yhi[1] -= 1;
  // GA_Print(ctx->ga);
  // GA_Print(g_x);
  NGA_Matmul_patch('N', 'N', &alpha, &beta, 
                   ctx->ga, lo, ahi,
                   g_x, lo, xhi,
                   g_y, lo, yhi);

  GA_Pgroup_sync(ctx->gaGroup);
  // GA_Print(g_y);

  ierr = GA2Vec(g_y, y); CHKERRQ(ierr);

  GA_Destroy(g_y);
  GA_Destroy(g_x);

  MPI_Comm comm;
  ierr = PetscObjectGetComm((PetscObject)mat,&comm); CHKERRQ(ierr);
  ierr = MPI_Barrier(comm);

  return ierr;
}
예제 #7
0
// -------------------------------------------------------------
// MatDestroy_DenseGA
// -------------------------------------------------------------
static
PetscErrorCode
MatDestroy_DenseGA(Mat A)
{
  PetscErrorCode ierr = 0;

  MPI_Comm comm;
  ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
  ierr = MPI_Barrier(comm);

  struct MatGACtx *ctx;
  ierr = MatShellGetContext(A, &ctx); CHKERRQ(ierr);
  GA_Pgroup_sync(ctx->gaGroup);
  GA_Destroy(ctx->ga);
  // GA_Pgroup_destroy(ctx->gaGroup);
  ierr = PetscFree(ctx);
  return ierr;
}
Integer util_tcesublock_(Integer *val,Integer *p_handle) {

//    if(*p_handle==0) exit(1);
//ga_error("nxtask: p_handle is zero", 1);

    if(*val > 0) {
//       if(!initialized) exit(1);
//ga_error("nxtask: not yet initialized", 1);
       return (Integer) NGA_Read_inc(g_T, &subscript, 1);
    }
    else if(*val==0) {
       int n = 1;
       initialized=1;
       int p_h = (int)*p_handle;

       /* create task array */
//       g_T = NGA_Create(C_LONG, 1, &n,"Atomic Task", NULL);

       g_T = NGA_Create_config(C_LONG,1,&n,"Atomic Task",NULL,p_h);

       /* Initialize the task array */
       if(GA_Pgroup_nodeid(p_h)==0) {
	  int lo=0, hi=0;
	  NGA_Put (g_T, &lo, &hi, &initval, &hi);
//          printf("PUT %i %i %i\n",sizeof(*p_handle),sizeof(Integer),sizeof(int));
	  initval=0;
       }

       GA_Pgroup_sync(p_h);
//       printf("CREATE %i %i \n",*p_handle,g_T);
       return 0;
    }
    else if (*val < 0) {
        GA_Destroy(g_T);
//        printf("DELETE %i %i \n",*p_handle,g_T);
//        ga_pgroup_sync_(p_handle);
        initialized=0; 
        initval=0; 
        return 0;
   }
    
//    ga_error("nxtval: invalid value passed", 0L);
    return -1;
}
예제 #9
0
/**
 * Initialize mapper for the given network and the current mode. Create global
 * arrays that contain offsets that will be used to create vector from the
 * network component objects
 * @param network network that will generate vector
 */
GenVectorMap(boost::shared_ptr<_network> network)
  : p_network(network)
{
  p_Offsets = NULL;

  p_timer = NULL;
  //p_timer = gridpack::utility::CoarseTimer::instance();

  p_GAgrp = network->communicator().getGroup();
  p_me = GA_Pgroup_nodeid(p_GAgrp);
  p_nNodes = GA_Pgroup_nnodes(p_GAgrp);

  p_Offsets = new int[p_nNodes];

  p_nBuses = p_network->numBuses();
  p_nBranches = p_network->numBranches();

  getDimensions();
  setOffsets();
  setIndices();
  GA_Pgroup_sync(p_GAgrp);
}
예제 #10
0
// -------------------------------------------------------------
// MatDuplicate_DenseGA
// -------------------------------------------------------------
static
PetscErrorCode
MatDuplicate_DenseGA(Mat mat, MatDuplicateOption op, Mat *M)
{
  PetscErrorCode ierr = 0;
  struct MatGACtx *ctx, *newctx;
  ierr = MatShellGetContext(mat, &ctx); CHKERRQ(ierr);

  MPI_Comm comm;
  ierr = PetscObjectGetComm((PetscObject)mat, &comm); CHKERRQ(ierr);

  PetscInt lrows, grows, lcols, gcols;
  ierr = MatGetSize(mat, &grows, &gcols); CHKERRQ(ierr);
  ierr = MatGetLocalSize(mat, &lrows, &lcols); CHKERRQ(ierr);

  ierr = PetscMalloc(sizeof(struct MatGACtx), &newctx); CHKERRQ(ierr);
  newctx->gaGroup = ctx->gaGroup;
  newctx->ga = GA_Duplicate(ctx->ga, "PETSc Dense Matrix");

  ierr = MatCreateShell(comm, lrows, lcols, grows, gcols, newctx, M); CHKERRQ(ierr);
  ierr = MatSetOperations_DenseGA(*M);

  PetscScalar z(0.0);
  switch (op) {
  case (MAT_COPY_VALUES):
    GA_Copy(ctx->ga, newctx->ga);
    break;
  default:
    GA_Fill(newctx->ga, &z);
    break;
  }

  GA_Pgroup_sync(newctx->gaGroup);

  return ierr;
}
예제 #11
0
// -------------------------------------------------------------
// AdjacencyList::ready
// -------------------------------------------------------------
void
AdjacencyList::ready(void)
{
#if 1
  int grp = this->communicator().getGroup();
  int me = GA_Pgroup_nodeid(grp);
  int nprocs = GA_Pgroup_nnodes(grp);
  p_adjacency.clear();
  p_adjacency.resize(p_global_nodes.size());

  // Find total number of nodes and edges. Assume no duplicates
  int nedges = p_edges.size();
  int total_edges = nedges;
  char plus[2];
  strcpy(plus,"+");
  GA_Pgroup_igop(grp,&total_edges, 1, plus);
  int nnodes = p_original_nodes.size();
  int total_nodes = nnodes;
  GA_Pgroup_igop(grp,&total_nodes, 1, plus);

  // Create a global array containing original indices of all nodes and indexed
  // by the global index of the node
  int i, p;
  int dist[nprocs];
  for (p=0; p<nprocs; p++) {
    dist[p] = 0;
  }
  dist[me] = nnodes;
  GA_Pgroup_igop(grp,dist,nprocs,plus);
  int *mapc = new int[nprocs+1];
  mapc[0] = 0;
  for (p=1; p<nprocs; p++) {
    mapc[p] = mapc[p-1] + dist[p-1];
  }
  mapc[nprocs] = total_nodes;
  int g_nodes = GA_Create_handle();
  int dims = total_nodes;
  NGA_Set_data(g_nodes,1,&dims,C_INT);
  NGA_Set_pgroup(g_nodes, grp);
  if (!GA_Allocate(g_nodes)) {
    char buf[256];
    sprintf(buf,"AdjacencyList::ready: Unable to allocate distributed array"
        " for bus indices\n");
    printf(buf);
    throw gridpack::Exception(buf);
  }
  int lo, hi;
  lo = mapc[me];
  hi = mapc[me+1]-1;
  int size = hi - lo + 1;
  int o_idx[size], g_idx[size];
  for (i=0; i<size; i++) o_idx[i] = p_original_nodes[i]; 
  for (i=0; i<size; i++) g_idx[i] = p_global_nodes[i]; 
  int **indices= new int*[size];
  int *iptr = g_idx;
  for (i=0; i<size; i++) {
    indices[i] = iptr;
    iptr++;
  }
  if (size > 0) NGA_Scatter(g_nodes,o_idx,indices,size);
  GA_Pgroup_sync(grp);
  delete [] indices;
  delete [] mapc;

  // Cycle through all nodes and match them up with nodes at end of edges.
  for (p=0; p<nprocs; p++) {
    int iproc = (me+p)%nprocs;
    // Get node data from process iproc
    NGA_Distribution(g_nodes,iproc,&lo,&hi);
    size = hi - lo + 1;
    if (size <= 0) continue;
    int *buf = new int[size];
    int ld = 1;
    NGA_Get(g_nodes,&lo,&hi,buf,&ld);
    // Create a map of the nodes from process p
    std::map<int,int> nmap;
    std::map<int,int>::iterator it;
    std::pair<int,int> pr;
    for (i=lo; i<=hi; i++){
      pr = std::pair<int,int>(buf[i-lo],i);
      nmap.insert(pr);
    }
    delete [] buf;
    // scan through the edges looking for matches. If there is a match, set the
    // global index
    int idx;
    for (i=0; i<nedges; i++) {
      idx = static_cast<int>(p_edges[i].original_conn.first);
      it = nmap.find(idx);
      if (it != nmap.end()) {
        p_edges[i].global_conn.first = static_cast<Index>(it->second);
      }
      idx = static_cast<int>(p_edges[i].original_conn.second);
      it = nmap.find(idx);
      if (it != nmap.end()) {
        p_edges[i].global_conn.second = static_cast<Index>(it->second);
      }
    }
  }
  GA_Destroy(g_nodes);

  // All edges now have global indices assigned to them. Begin constructing
  // adjacency list. Start by creating a global array containing all edges
  dist[0] = 0;
  for (p=1; p<nprocs; p++) {
    double max = static_cast<double>(total_edges);
    max = (static_cast<double>(p))*(max/(static_cast<double>(nprocs)));
    dist[p] = 2*(static_cast<int>(max));
  }
  int g_edges = GA_Create_handle();
  dims = 2*total_edges;
  NGA_Set_data(g_edges,1,&dims,C_INT);
  NGA_Set_irreg_distr(g_edges,dist,&nprocs);
  NGA_Set_pgroup(g_edges, grp);
  if (!GA_Allocate(g_edges)) {
    char buf[256];
    sprintf(buf,"AdjacencyList::ready: Unable to allocate distributed array"
        " for branch indices\n");
    printf(buf);
    throw gridpack::Exception(buf);
  }

  // Add edge information to global array. Start by figuring out how much data
  // is associated with each process
  for (p=0; p<nprocs; p++) {
    dist[p] = 0;
  }
  dist[me] = nedges;
  GA_Pgroup_igop(grp,dist, nprocs, plus);
  int offset[nprocs];
  offset[0] = 0;
  for (p=1; p<nprocs; p++) {
    offset[p] = offset[p-1] + 2*dist[p-1];
  }
  // Figure out where local data goes in GA and then copy it to GA
  lo = offset[me];
  hi = lo + 2*nedges - 1;
  int edge_ids[2*nedges];
  for (i=0; i<nedges; i++) {
    edge_ids[2*i] = static_cast<int>(p_edges[i].global_conn.first);
    edge_ids[2*i+1] = static_cast<int>(p_edges[i].global_conn.second);
  }
  if (lo <= hi) {
    int ld = 1;
    NGA_Put(g_edges,&lo,&hi,edge_ids,&ld);
  }
  GA_Pgroup_sync(grp);

  // Cycle through all edges and find out how many are attached to the nodes on
  // your process. Start by creating a map between the global node indices and
  // the local node indices
  std::map<int,int> gmap;
  std::map<int,int>::iterator it;
  std::pair<int,int> pr;
  for (i=0; i<nnodes; i++){
    pr = std::pair<int,int>(static_cast<int>(p_global_nodes[i]),i);
    gmap.insert(pr);
  }
  // Cycle through edge information on each processor
  for (p=0; p<nprocs; p++) {
    int iproc = (me+p)%nprocs;
    NGA_Distribution(g_edges,iproc,&lo,&hi);
    int size = hi - lo + 1;
    int *buf = new int[size];
    int ld = 1;
    NGA_Get(g_edges,&lo,&hi,buf,&ld);
    BOOST_ASSERT(size%2 == 0);
    size = size/2;
    int idx1, idx2;
    Index idx;
    for (i=0; i<size; i++) {
      idx1 = buf[2*i];
      idx2 = buf[2*i+1];
      it = gmap.find(idx1);
      if (it != gmap.end()) {
        idx = static_cast<Index>(idx2);
        p_adjacency[it->second].push_back(idx);
      }
      it = gmap.find(idx2);
      if (it != gmap.end()) {
        idx = static_cast<Index>(idx1);
        p_adjacency[it->second].push_back(idx);
      }
    }
    delete [] buf;
  }
  GA_Destroy(g_edges);
  GA_Pgroup_sync(grp);
#else
  int me(this->processor_rank());
  int nproc(this->processor_size());

  p_adjacency.clear();
  p_adjacency.resize(p_nodes.size());

  IndexVector current_indexes;
  IndexVector connected_indexes;

  for (int p = 0; p < nproc; ++p) {

    // broadcast the node indexes owned by process p to all processes,
    // all processes work on these at once

    current_indexes.clear();
    if (me == p) {
      std::copy(p_nodes.begin(), p_nodes.end(), 
	  std::back_inserter(current_indexes));
      // std::cout << me << ": node indexes: ";
      // std::copy(current_indexes.begin(), current_indexes.end(),
      //           std::ostream_iterator<Index>(std::cout, ","));
      // std::cout << std::endl;
    }
    boost::mpi::broadcast(this->communicator(), current_indexes, p);

    // make a copy of the local edges in a list (so it's easier to
    // remove those completely accounted for)
    std::list<p_Edge> tmpedges;
    std::copy(p_edges.begin(), p_edges.end(), 
	std::back_inserter(tmpedges));

    // loop over the process p's node index set

    int local_index(0);
    for (IndexVector::iterator n = current_indexes.begin(); 
	n != current_indexes.end(); ++n, ++local_index) {

      // determine the local edges that refer to the current node index

      connected_indexes.clear();
      std::list<p_Edge>::iterator e(tmpedges.begin());
      //      std::cout << me << ": current node index: " << *n 
      //                << ", edges: " << tmpedges.size() 
      //                << std::endl;

      while (e != tmpedges.end()) {
	if (*n == e->conn.first && e->conn.second != bogus) {
	  connected_indexes.push_back(e->conn.second);
	  e->found.first = true;
	  // std::cout << me << ": found connection: edge " << e->index
	  //           << " (" << e->conn.first << ", " << e->conn.second << ")"
	  //           << std::endl;
	}
	if (*n == e->conn.second && e->conn.first != bogus) {
	  connected_indexes.push_back(e->conn.first);
	  e->found.second = true;
	  // std::cout << me << ": found connection: edge " << e->index
	  //           << " (" << e->conn.first << ", " << e->conn.second << ")"
	  //           << std::endl;
	}

	if (e->found.first && e->found.second) {
	  e = tmpedges.erase(e);
	} else if (e->conn.first == bogus || 
	    e->conn.second == bogus) {
	  e = tmpedges.erase(e);
	} else {
	  ++e;
	}
      }

      // gather all connections for the current node index to the
      // node's owner process, we have to gather the vectors because
      // processes will have different numbers of connections

      if (me == p) {
	size_t allsize;
        boost::mpi::reduce(this->communicator(), 
                           connected_indexes.size(), allsize, std::plus<size_t>(), p);

	std::vector<IndexVector> all_connected_indexes;
        boost::mpi::gather(this->communicator(), 
                           connected_indexes, all_connected_indexes, p);
	p_adjacency[local_index].clear();
	for (std::vector<IndexVector>::iterator k = all_connected_indexes.begin();
	    k != all_connected_indexes.end(); ++k) {
	  std::copy(k->begin(), k->end(), 
	      std::back_inserter(p_adjacency[local_index]));
	}
      } else {
	boost::mpi::reduce(this->communicator(), 
                           connected_indexes.size(), std::plus<size_t>(), p);
	boost::mpi::gather(this->communicator(), connected_indexes, p);
      }
      this->communicator().barrier();
    }
    this->communicator().barrier();
  }
#endif
}
예제 #12
0
~GenVectorMap()
{
  if (p_Offsets != NULL) delete [] p_Offsets;
  GA_Pgroup_sync(p_GAgrp);
}
예제 #13
0
/** Client code. Receives signals from the server to process a task or
    terminate processing and return*/
void client_code() {
  int *buf = NULL, buf_size;
  int flag;
  MPI_Status status;
  Integer p_handle;
  int ntsks=0, src;
  const char *pname = "client_code";
  double e1, e2, e3, e4, e5, f1, f2, f3, f4,f5,f6,f7,f8;
  double t_prepar=0, t_wait_start=0, t_grp=0,t_sync=0,t_compl=0,t_dest=0;
/*   double get_doit_time_(); */
/*   double get_esp_time_(); */
/*   double get_gm_crt_time_(); */
/*   double get_chrg_set_time_(); */
/*   double get_gm_push_time_(); */
  const int server = GA_Pgroup_absolute_id(ga_pgroup_get_default_(),SVR);
  const int default_grp = ga_pgroup_get_default_();; /*default GA group for this dispatcher instance*/
  const int world_me = GA_Nodeid();
  const int nproc = GA_Nnodes();

  t_ptask = 0.0;
/*   fprintf(stderr, "%d: 0 server=%d %s\n", GA_Nodeid(), server,pname); */

  e1 = util_wallsec_();
/*   fprintf(stderr, "%d: 0 %s\n", GA_Nodeid(), pname); */

/*   GA_Pgroup_set_default(GA_Pgroup_get_world()); */

/*   fprintf(stderr, "%d: 1 %s\n", world_me, pname); */

  buf_size = 1+ /*action to perform*/
    1+ /*task id - if TASK_SIGNAL*/
    nproc /*process group info*/
    ;

/*   buf = (int *)malloc(buf_size*sizeof(int)); */
  buf = (int *)alloca(buf_size*sizeof(int));
  assert(buf != NULL);

/*   fprintf(stderr, "%d: 2 %s\n", world_me, pname); */

  e2 = util_wallsec_();
  while(1) {
    int nelem, grp_me;
    Integer tskid;

    f1 = util_wallsec_();
/*     fprintf(stderr, "%d:: Waiting for work\n", world_me); */
    MPI_Recv(buf, buf_size, MPI_INT, MPI_ANY_SOURCE, SIGNAL_TAG, MPI_COMM_WORLD, &status);
    f2 = util_wallsec_();
    t_wait_start += (f2-f1);
/*     fprintf(stderr, "%d:: Client got msg from %d\n", world_me, status.MPI_SOURCE); */

    MPI_Get_elements(&status, MPI_INT, &nelem);
    assert(nelem >= 1);
      
    if(buf[0] == TERM_CLIENT) {
      /*process termination and return*/
/*        fprintf(stderr, "%d:: Recv-ed term signal\n", GA_Nodeid()); */
/*       free(buf); */
/*       fprintf(stderr, "%d:: Terminating client\n", GA_Nodeid()); */
#ifdef LEADER_BCAST
      signal_termination(SVR,status.MPI_SOURCE);
#endif
      break;
    }
/*     fprintf(stderr, "%d:: got a task to process\n", world_me); */
    /*Got a task to process*/
    assert(buf[0] == TASK_START);
    ntsks += 1;

    if(status.MPI_SOURCE == server) {
      qsort(buf+2, nelem-2, sizeof(int), int_compare);
    }
    f3  = util_wallsec_();
    t_prepar += (f3-f2);

#if LEADER_BCAST
    src = (server==status.MPI_SOURCE)?buf[2]:status.MPI_SOURCE;
    broadcast(nelem-2,buf+2,buf[2],src,buf,nelem*sizeof(int));
#endif

    /*The proc ids are in world group. So create sub-group of world group*/
    GA_Pgroup_set_default(GA_Pgroup_get_world());
    p_handle = GA_Pgroup_create(&buf[2], nelem-2);
    GA_Pgroup_set_default(p_handle);
/*     GA_Pgroup_sync(p_handle); */
    f4 = MPI_Wtime();
    t_grp += (f4-f3);

    tskid = buf[1];
/*     fprintf(stderr, "%d(%d):: Invoking process task tskid=%d\n", grp_me, world_me, tskid); */
    process_task_(&tskid, &p_handle);
    f5 = MPI_Wtime();
    t_ptask += (f5-f4);
    
    GA_Pgroup_sync(p_handle);
    grp_me = GA_Nodeid();
    f6 = util_wallsec_();
    t_sync += (f6-f5);

    if(grp_me == 0) {
      int v[2] = {TASK_DONE, tskid};
/*        fprintf(stderr, "%d(%d):: Sending ack for task %d to %d\n", */
/*  	      grp_me, world_me, tskid, SERVER); */
      MPI_Send(v, 2, MPI_INT, server, SIGNAL_TAG, MPI_COMM_WORLD);
    }
    f7 = util_wallsec_();
    t_compl += (f7-f6);
/*     GA_Pgroup_sync(p_handle); */
    GA_Pgroup_destroy(p_handle);
    GA_Pgroup_set_default(default_grp);
    f8 = util_wallsec_();
    t_dest += (f8-f7);
  }
  e3 = util_wallsec_();
/*   fprintf(stderr, "%d:: CLIENT total time=%lf\n", ga_nodeid_(), e3-e1); */
/*   fprintf(stderr, "%d:: CLIENT ntsks=%d\n", ga_nodeid_(), ntsks); */
/*   fprintf(stderr, "%d:: CLIENT loop time=%lf\n", ga_nodeid_(), e3-e2); */
/*   fprintf(stderr, "%d:: CLIENT wait start time=%lf\n", ga_nodeid_(),t_wait_start); */
/*   fprintf(stderr, "%d:: CLIENT prepare time=%lf\n", ga_nodeid_(),t_prepar); */
/*   fprintf(stderr, "%d:: CLIENT grp crt time=%lf\n", ga_nodeid_(), t_grp); */
/*   fprintf(stderr, "%d:: CLIENT ptask time=%lf\n", ga_nodeid_(), t_ptask); */
/*   fprintf(stderr, "%d:: CLIENT sync time=%lf\n", ga_nodeid_(), t_sync); */
/*   fprintf(stderr, "%d:: CLIENT compl time=%lf\n", ga_nodeid_(), t_compl); */
/*   fprintf(stderr, "%d:: CLIENT grp dstry time=%lf\n", ga_nodeid_(), t_dest); */
/*   fflush(stdout); */
/*   fprintf(stderr, "%d:: CLIENT doit time=%lf\n",ga_nodeid_(),get_doit_time_()); */
/*   fprintf(stderr, "%d:: CLIENT esp time=%lf\n",ga_nodeid_(),get_esp_time_()); */
/*   fprintf(stderr, "%d:: CLIENT chrg_set time=%lf\n",ga_nodeid_(),get_chrg_set_time_()); */
/*   fprintf(stderr, "%d:: CLIENT gm_crt time=%lf\n",ga_nodeid_(),get_gm_crt_time_()); */
/*   fprintf(stderr, "%d:: CLIENT gm_push time=%lf\n",ga_nodeid_(),get_gm_push_time_()); */
}