Ejemplo n.º 1
0
void pnga_select_elem(Integer g_a, char* op, void* val, Integer *subscript)
{
  Integer ndim, type, me, elems, ind=0, i;
  Integer lo[MAXDIM],hi[MAXDIM],dims[MAXDIM],ld[MAXDIM-1];
  elem_info_t info;
  Integer num_blocks;
  int     participate=0;
  int local_sync_begin;

  local_sync_begin = _ga_sync_begin; 
  _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/
  if(local_sync_begin)pnga_sync();

  me = pnga_nodeid();

  pnga_check_handle(g_a, "ga_select_elem");
  GA_PUSH_NAME("ga_elem_op");

  if (strncmp(op,"min",3) == 0);
  else if (strncmp(op,"max",3) == 0);
  else pnga_error("operator not recognized",0);

  pnga_inquire(g_a, &type, &ndim, dims);
  num_blocks = pnga_total_blocks(g_a);

  if (num_blocks < 0) {
    pnga_distribution(g_a, me, lo, hi);

    if ( lo[0]> 0 ){ /* base index is 1: we get 0 if no elements stored on p */

      /******************* calculate local result ************************/
      void    *ptr;
      pnga_access_ptr(g_a, lo, hi, &ptr, ld);
      GET_ELEMS(ndim,lo,hi,ld,&elems);
      participate =1;

      /* select local element */
      snga_select_elem(type, op, ptr, elems, &info, &ind);

      /* release access to the data */
      pnga_release(g_a, lo, hi);

      /* determine element subscript in the ndim-array */
      for(i = 0; i < ndim; i++){
        int elems = (int)( hi[i]-lo[i]+1);
        info.subscr[i] = ind%elems + lo[i] ;
        ind /= elems;
      }
    } 
  } else {
    void *ptr;
    Integer j, offset, jtot, upper;
    Integer nproc = pnga_nnodes();
    pnga_access_block_segment_ptr(g_a, me, &ptr, &elems);
    if (elems > 0) {
      participate =1;

      /* select local element */
      snga_select_elem(type, op, ptr, elems, &info, &ind);

      /* release access to the data */
      pnga_release_block_segment(g_a, me);

      /* convert local index back into a global array index */
      if (!pnga_uses_proc_grid(g_a)) {
        offset = 0;
        for (i=me; i<num_blocks; i += nproc) {
          pnga_distribution(g_a, i, lo, hi);
          jtot = 1;
          for (j=0; j<ndim; j++) {
            jtot *= (hi[j]-lo[j]+1);
          }
          upper = offset + jtot;
          if (ind >= offset && ind < upper) {
            break;
          }  else {
            offset += jtot;
          }
        }
        /* determine element subscript in the ndim-array */
        ind -= offset;
        for(i = 0; i < ndim; i++){
          int elems = (int)( hi[i]-lo[i]+1);
          info.subscr[i] = ind%elems + lo[i] ;
          ind /= elems;
        }
      } else {
        Integer stride[MAXDIM], index[MAXDIM];
        Integer blocks[MAXDIM], block_dims[MAXDIM];
        Integer proc_index[MAXDIM], topology[MAXDIM];
        Integer l_index[MAXDIM];
        Integer min, max;
        pnga_get_proc_index(g_a, me, proc_index);
        pnga_get_block_info(g_a, blocks, block_dims);
        pnga_get_proc_grid(g_a, topology);
        /* figure out strides for locally held block of data */
        for (i=0; i<ndim; i++) {
          stride[i] = 0;
          for (j=proc_index[i]; j<blocks[i]; j += topology[i]) {
            min = j*block_dims[i] + 1;
            max = (j+1)*block_dims[i];
            if (max > dims[i])
              max = dims[i];
            stride[i] += (max - min + 1);
          }
        }
        /* use strides to figure out local index */
        l_index[0] = ind%stride[0];
        for (i=1; i<ndim; i++) {
          ind = (ind-l_index[i-1])/stride[i-1];
          l_index[i] = ind%stride[i];
        }
        /* figure out block index for block holding data element */
        for (i=0; i<ndim; i++) {
          index[i] = l_index[i]/block_dims[i];
        }
        for (i=0; i<ndim; i++) {
          lo[i] = (topology[i]*index[i] + proc_index[i])*block_dims[i];
          info.subscr[i] = l_index[i]%block_dims[i] + lo[i];
        }
      }
    }
  }
  /* calculate global result */
  if(type==C_INT){
    int size = sizeof(double) + sizeof(Integer)*(int)ndim;
    armci_msg_sel(&info,size,op,ARMCI_INT,participate);
    *(int*)val = (int)info.v.ival;
  }else if(type==C_LONG){
    int size = sizeof(double) + sizeof(Integer)*(int)ndim;
    armci_msg_sel(&info,size,op,ARMCI_LONG,participate);
    *(long*)val = info.v.lval;
  }else if(type==C_LONGLONG){
    int size = sizeof(double) + sizeof(Integer)*(int)ndim;
    armci_msg_sel(&info,size,op,ARMCI_LONG_LONG,participate);
    *(long long*)val = info.v.llval;
  }else if(type==C_DBL){
    int size = sizeof(double) + sizeof(Integer)*(int)ndim;
    armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate);
    *(DoublePrecision*)val = info.v.dval;
  }else if(type==C_FLOAT){
    int size = sizeof(double) + sizeof(Integer)*ndim;
    armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate);
    *(float*)val = info.v.fval;       
  }else if(type==C_SCPL){
    int size = sizeof(info); /* for simplicity we send entire info */
    armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate);
    *(SingleComplex*)val = info.extra2;
  }else{
    int size = sizeof(info); /* for simplicity we send entire info */
    armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate);
    *(DoubleComplex*)val = info.extra;
  }

  for(i = 0; i < ndim; i++) subscript[i]= info.subscr[i];
  GA_POP_NAME;
}
Ejemplo n.º 2
0
/**
 * Get the next sub-block from the larger block defined when the iterator was
 * initialized
 * @param hdl handle for iterator
 * @param proc processor on which the next block resides
 * @param plo indices for lower corner of remote block
 * @param phi indices for upper corner of remote block
 * @param prem pointer to remote buffer
 * @return returns false if there is no new block, true otherwise
 */
int gai_iterator_next(_iterator_hdl *hdl, int *proc, Integer *plo[],
    Integer *phi[], char **prem, Integer ldrem[])
{
  Integer idx, i, p;
  Integer handle = GA_OFFSET + hdl->g_a;
  Integer p_handle = GA[handle].p_handle;
  Integer n_rstrctd = GA[handle].num_rstrctd;
  Integer *rank_rstrctd = GA[handle].rank_rstrctd;
  Integer elemsize = GA[handle].elemsize;
  int ndim;
  ndim = GA[handle].ndim;
  if (GA[handle].distr_type == REGULAR) {
    Integer *blo, *bhi;
    Integer nelems;
    idx = hdl->count;
    /* no blocks left, so return */
    if (idx>=hdl->nproc) return 0;

    p = (Integer)ProcListPerm[idx];
    *proc = (int)GA_proclist[p];
    if (p_handle >= 0) {
      *proc = (int)PGRP_LIST[p_handle].inv_map_proc_list[*proc];
    }
#ifdef PERMUTE_PIDS
    if (GA_Proc_list) *proc = (int)GA_inv_Proc_list[*proc];
#endif
    /* Find  visible portion of patch held by processor p and
     * return the result in plo and phi. Also get actual processor
     * index corresponding to p and store the result in proc.
     */
    gam_GetRangeFromMap(p, ndim, plo, phi);
    *proc = (int)GA_proclist[p];
    blo = *plo;
    bhi = *phi;
#ifdef LARGE_BLOCK_REQ
    /* Check to see if block size will overflow int values and initialize
     * counter over sub-blocks if the block is too big*/
    if (!hdl->oversize) {
      nelems = 1; 
      for (i=0; i<ndim; i++) nelems *= (bhi[i]-blo[i]+1);
      if (elemsize*nelems > MAX_INT_VALUE) {
        Integer maxint = 0;
        int maxidx;
        hdl->oversize = 1;
        /* Figure out block dimensions that correspond to block sizes
         * that are beneath MAX_INT_VALUE */
        for (i=0; i<ndim; i++) {
          hdl->blk_size[i] = (bhi[i]-blo[i]+1);
        }
        while (elemsize*nelems > MAX_INT_VALUE) {
          for (i=0; i<ndim; i++) {
            if (hdl->blk_size[i] > maxint) {
              maxidx = i;
              maxint = hdl->blk_size[i];
            }
          }
          hdl->blk_size[maxidx] /= 2;
          nelems = 1;
          for (i=0; i<ndim; i++) nelems *= hdl->blk_size[i];
        }
        /* Calculate the number of blocks along each dimension */
        for (i=0; i<ndim; i++) {
          hdl->blk_dim[i] = (bhi[i]-blo[i]+1)/hdl->blk_size[i];
          if (hdl->blk_dim[i]*hdl->blk_size[i] < (bhi[i]-blo[i]+1))
            hdl->blk_dim[i]++;
        }
        /* initialize block counting */
        for (i=0; i<ndim; i++) hdl->blk_inc[i] = 0;
      }
    }

    /* Get sub-block bounding dimensions */
    if (hdl->oversize) {
      Integer tmp;
      for (i=0; i<ndim; i++) {
        hdl->lobuf[i] = blo[i];
        hdl->hibuf[i] = bhi[i];
      }
      *plo = hdl->lobuf;
      *phi = hdl->hibuf;
      blo = *plo;
      bhi = *phi;
      for (i=0; i<ndim; i++) {
        hdl->lobuf[i] += hdl->blk_inc[i]*hdl->blk_size[i];
        tmp = hdl->lobuf[i] + hdl->blk_size[i]-1;
        if (tmp < hdl->hibuf[i]) hdl->hibuf[i] = tmp;
      }
    }
#endif

    if (n_rstrctd == 0) {
      gam_Location(*proc, handle, blo, prem, ldrem);
    } else {
      gam_Location(rank_rstrctd[*proc], handle, blo, prem, ldrem);
    }
    if (p_handle >= 0) {
      *proc = (int)GA_proclist[p];
      /* BJP */
      *proc = PGRP_LIST[p_handle].inv_map_proc_list[*proc];
    }
#ifdef LARGE_BLOCK_REQ
    if (!hdl->oversize) {
#endif
      hdl->count++;
#ifdef LARGE_BLOCK_REQ
    } else {
      /* update blk_inc array */
      hdl->blk_inc[0]++; 
      for (i=0; i<ndim-1; i++) {
        if (hdl->blk_inc[i] >= hdl->blk_dim[i]) {
          hdl->blk_inc[i] = 0;
          hdl->blk_inc[i+1]++;
        }
      }
      if (hdl->blk_inc[ndim-1] >= hdl->blk_dim[ndim-1]) {
        hdl->count++;
        hdl->oversize = 0;
      }
    }
#endif
    return 1;
  } else {
    Integer offset, l_offset, last, pinv;
    Integer blk_tot = GA[handle].block_total;
    Integer blo[MAXDIM], bhi[MAXDIM];
    Integer idx, j, jtot, chk, iproc;
    int check1, check2;
    if (GA[handle].distr_type == BLOCK_CYCLIC) {
      /* Simple block-cyclic distribution */
      if (hdl->iproc >= GAnproc) return 0;
      /*if (hdl->iproc == GAnproc-1 && hdl->iblock >= blk_tot) return 0;*/
      if (hdl->iblock == hdl->iproc) hdl->offset = 0;
      chk = 0;
      /* loop over blocks until a block with data is found */
      while (!chk) {
        /* get the block corresponding to the current value of iblock */
        idx = hdl->iblock;
        ga_ownsM(handle,idx,blo,bhi);
        /* check to see if this block overlaps with requested block
         * defined by lo and hi */
        chk = 1;
        for (j=0; j<ndim; j++) {
          /* check to see if at least one end point of the interval
           * represented by blo and bhi falls in the interval
           * represented by lo and hi */
          check1 = ((blo[j] >= hdl->lo[j] && blo[j] <= hdl->hi[j]) ||
              (bhi[j] >= hdl->lo[j] && bhi[j] <= hdl->hi[j]));
          /* check to see if interval represented by lo and hi
           * falls entirely within interval represented by blo and bhi */
          check2 = ((hdl->lo[j] >= blo[j] && hdl->lo[j] <= bhi[j]) &&
              (hdl->hi[j] >= blo[j] && hdl->hi[j] <= bhi[j]));
          /* If there is some data, move to the next section of code,
           * otherwise, check next block */
          if (!check1 && !check2) {
            chk = 0;
          }
        }
        
        if (!chk) {
          /* evaluate new offset for block idx */
          jtot = 1;
          for (j=0; j<ndim; j++) {
            jtot *= bhi[j]-blo[j]+1;
          }
          hdl->offset += jtot;
          /* increment to next block */
          hdl->iblock += pnga_nnodes();
          if (hdl->iblock >= blk_tot) {
            hdl->offset = 0;
            hdl->iproc++;
            hdl->iblock = hdl->iproc;
            if (hdl->iproc >= GAnproc) return 0;
          }
        }
      }

      /* The block overlaps some data in lo,hi */
      if (chk) {
        Integer *clo, *chi;
        *plo = hdl->lobuf;
        *phi = hdl->hibuf;
        clo = *plo;
        chi = *phi;
        /* get the patch of block that overlaps requested region */
        gam_GetBlockPatch(blo,bhi,hdl->lo,hdl->hi,clo,chi,ndim);

        /* evaluate offset within block */
        last = ndim - 1;
        jtot = 1;
        if (last == 0) ldrem[0] = bhi[0] - blo[0] + 1;
        l_offset = 0;
        for (j=0; j<last; j++) {
          l_offset += (clo[j]-blo[j])*jtot;
          ldrem[j] = bhi[j]-blo[j]+1;
          jtot *= ldrem[j];
        }
        l_offset += (clo[last]-blo[last])*jtot;
        l_offset += hdl->offset;

        /* get pointer to data on remote block */
        pinv = idx%GAnproc;
        if (p_handle > 0) {
          pinv = PGRP_LIST[p_handle].inv_map_proc_list[pinv];
        }
        *prem =  GA[handle].ptr[pinv]+l_offset*GA[handle].elemsize;
        *proc = pinv;

        /* evaluate new offset for block idx */
        jtot = 1;
        for (j=0; j<ndim; j++) {
          jtot *= bhi[j]-blo[j]+1;
        }
        hdl->offset += jtot;

        hdl->iblock += pnga_nnodes();
        if (hdl->iblock >= blk_tot) {
          hdl->iproc++;
          hdl->iblock = hdl->iproc;
          hdl->offset = 0;
        }
      }
      return 1;
    } else if (GA[handle].distr_type == SCALAPACK ||
        GA[handle].distr_type == TILED) {
      /* Scalapack-type data distribution */
      Integer proc_index[MAXDIM], index[MAXDIM];
      Integer itmp;
      Integer blk_jinc;
      /* Return false at the end of the iteration */
      if (hdl->iproc >= GAnproc) return 0;
      chk = 0;
      /* loop over blocks until a block with data is found */
      while (!chk) {
        /* get bounds for current block */
        for (j = 0; j < ndim; j++) {
          blo[j] = hdl->blk_size[j]*(hdl->index[j])+1;
          bhi[j] = hdl->blk_size[j]*(hdl->index[j]+1);
          if (bhi[j] > GA[handle].dims[j]) bhi[j] = GA[handle].dims[j];
        }
        /* check to see if this block overlaps with requested block
         * defined by lo and hi */
        chk = 1;
        for (j=0; j<ndim; j++) {
          /* check to see if at least one end point of the interval
           * represented by blo and bhi falls in the interval
           * represented by lo and hi */
          check1 = ((blo[j] >= hdl->lo[j] && blo[j] <= hdl->hi[j]) ||
              (bhi[j] >= hdl->lo[j] && bhi[j] <= hdl->hi[j]));
          /* check to see if interval represented by lo and hi
           * falls entirely within interval represented by blo and bhi */
          check2 = ((hdl->lo[j] >= blo[j] && hdl->lo[j] <= bhi[j]) &&
              (hdl->hi[j] >= blo[j] && hdl->hi[j] <= bhi[j]));
          /* If there is some data, move to the next section of code,
           * otherwise, check next block */
          if (!check1 && !check2) {
            chk = 0;
          }
        }
        
        if (!chk) {
          /* evaluate new offset for block */
          itmp = 1;
          for (j=0; j<ndim; j++) {
            itmp *= bhi[j]-blo[j]+1;
          }
          hdl->offset += itmp;
          /* increment to next block */
          hdl->index[0] += GA[handle].nblock[0];
          for (j=0; j<ndim; j++) {
            if (hdl->index[j] >= GA[handle].num_blocks[j] && j < ndim-1) {
              hdl->index[j] = hdl->proc_index[j];
              hdl->index[j+1] += GA[handle].nblock[j+1];
            }
          }
          if (hdl->index[ndim-1] >= GA[handle].num_blocks[ndim-1]) {
            hdl->iproc++;
            if (hdl->iproc >= GAnproc) return 0;
            hdl->offset = 0;
            if (GA[handle].distr_type == TILED) {
              gam_find_tile_proc_indices(handle, hdl->iproc, hdl->proc_index);
              gam_find_tile_proc_indices(handle, hdl->iproc, hdl->index);
            } else if (GA[handle].distr_type == SCALAPACK) {
              gam_find_proc_indices(handle, hdl->iproc, hdl->proc_index);
              gam_find_proc_indices(handle, hdl->iproc, hdl->index);
            }
          }
        }
      }
      if (chk) {
        Integer *clo, *chi;
        *plo = hdl->lobuf;
        *phi = hdl->hibuf;
        clo = *plo;
        chi = *phi;
        /* get the patch of block that overlaps requested region */
        gam_GetBlockPatch(blo,bhi,hdl->lo,hdl->hi,clo,chi,ndim);

        /* evaluate offset within block */
        last = ndim - 1;
        if (GA[handle].distr_type == TILED) {
          jtot = 1;
          if (last == 0) ldrem[0] = bhi[0] - blo[0] + 1;
          l_offset = 0;
          for (j=0; j<last; j++) {
            l_offset += (clo[j]-blo[j])*jtot;
            ldrem[j] = bhi[j]-blo[j]+1;
            jtot *= ldrem[j];
          }
          l_offset += (clo[last]-blo[last])*jtot;
          l_offset += hdl->offset;
        } else if (GA[handle].distr_type == SCALAPACK) {
          l_offset = 0;
          jtot = 1;
          for (j=0; j<last; j++)  {
            ldrem[j] = hdl->blk_ld[j];
            blk_jinc = GA[handle].dims[j]%hdl->blk_size[j];
            if (hdl->blk_inc[j] > 0) {
              if (hdl->proc_index[j]<hdl->hlf_blk[j]) {
                blk_jinc = hdl->blk_size[j];
              } else if (hdl->proc_index[j] == hdl->hlf_blk[j]) {
                blk_jinc = hdl->blk_inc[j]%hdl->blk_size[j];
              } else {
                blk_jinc = 0;
              }
            }
            ldrem[j] += blk_jinc;
            l_offset += (clo[j]-blo[j]
                + ((blo[j]-1)/hdl->blk_dim[j])*hdl->blk_size[j])*jtot;
            jtot *= ldrem[j];
          }
          l_offset += (clo[last]-blo[last]
              + ((blo[last]-1)/hdl->blk_dim[j])*hdl->blk_size[last])*jtot;
        }
        /* get pointer to data on remote block */
        pinv = (hdl->iproc)%GAnproc;
        if (p_handle > 0) {
          pinv = PGRP_LIST[p_handle].inv_map_proc_list[pinv];
        }
        *prem =  GA[handle].ptr[pinv]+l_offset*GA[handle].elemsize;
        *proc = pinv;

        /* evaluate new offset for block */
        itmp = 1;
        for (j=0; j<ndim; j++) {
          itmp *= bhi[j]-blo[j]+1;
        }
        hdl->offset += itmp;
        /* increment to next block */
        hdl->index[0] += GA[handle].nblock[0];
        for (j=0; j<ndim; j++) {
          if (hdl->index[j] >= GA[handle].num_blocks[j] && j < ndim-1) {
            hdl->index[j] = hdl->proc_index[j];
            hdl->index[j+1] += GA[handle].nblock[j+1];
          }
        }
        if (hdl->index[ndim-1] >= GA[handle].num_blocks[ndim-1]) {
          hdl->iproc++;
          hdl->offset = 0;
          if (GA[handle].distr_type == TILED) {
            gam_find_tile_proc_indices(handle, hdl->iproc, hdl->proc_index);
            gam_find_tile_proc_indices(handle, hdl->iproc, hdl->index);
          } else if (GA[handle].distr_type == SCALAPACK) {
            gam_find_proc_indices(handle, hdl->iproc, hdl->proc_index);
            gam_find_proc_indices(handle, hdl->iproc, hdl->index);
          }
        }
      }
    }
    return 1;
  }
  return 0;
}