void pnga_select_elem(Integer g_a, char* op, void* val, Integer *subscript) { Integer ndim, type, me, elems, ind=0, i; Integer lo[MAXDIM],hi[MAXDIM],dims[MAXDIM],ld[MAXDIM-1]; elem_info_t info; Integer num_blocks; int participate=0; int local_sync_begin; local_sync_begin = _ga_sync_begin; _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ if(local_sync_begin)pnga_sync(); me = pnga_nodeid(); pnga_check_handle(g_a, "ga_select_elem"); GA_PUSH_NAME("ga_elem_op"); if (strncmp(op,"min",3) == 0); else if (strncmp(op,"max",3) == 0); else pnga_error("operator not recognized",0); pnga_inquire(g_a, &type, &ndim, dims); num_blocks = pnga_total_blocks(g_a); if (num_blocks < 0) { pnga_distribution(g_a, me, lo, hi); if ( lo[0]> 0 ){ /* base index is 1: we get 0 if no elements stored on p */ /******************* calculate local result ************************/ void *ptr; pnga_access_ptr(g_a, lo, hi, &ptr, ld); GET_ELEMS(ndim,lo,hi,ld,&elems); participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release(g_a, lo, hi); /* determine element subscript in the ndim-array */ for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } } else { void *ptr; Integer j, offset, jtot, upper; Integer nproc = pnga_nnodes(); pnga_access_block_segment_ptr(g_a, me, &ptr, &elems); if (elems > 0) { participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release_block_segment(g_a, me); /* convert local index back into a global array index */ if (!pnga_uses_proc_grid(g_a)) { offset = 0; for (i=me; i<num_blocks; i += nproc) { pnga_distribution(g_a, i, lo, hi); jtot = 1; for (j=0; j<ndim; j++) { jtot *= (hi[j]-lo[j]+1); } upper = offset + jtot; if (ind >= offset && ind < upper) { break; } else { offset += jtot; } } /* determine element subscript in the ndim-array */ ind -= offset; for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } else { Integer stride[MAXDIM], index[MAXDIM]; Integer blocks[MAXDIM], block_dims[MAXDIM]; Integer proc_index[MAXDIM], topology[MAXDIM]; Integer l_index[MAXDIM]; Integer min, max; pnga_get_proc_index(g_a, me, proc_index); pnga_get_block_info(g_a, blocks, block_dims); pnga_get_proc_grid(g_a, topology); /* figure out strides for locally held block of data */ for (i=0; i<ndim; i++) { stride[i] = 0; for (j=proc_index[i]; j<blocks[i]; j += topology[i]) { min = j*block_dims[i] + 1; max = (j+1)*block_dims[i]; if (max > dims[i]) max = dims[i]; stride[i] += (max - min + 1); } } /* use strides to figure out local index */ l_index[0] = ind%stride[0]; for (i=1; i<ndim; i++) { ind = (ind-l_index[i-1])/stride[i-1]; l_index[i] = ind%stride[i]; } /* figure out block index for block holding data element */ for (i=0; i<ndim; i++) { index[i] = l_index[i]/block_dims[i]; } for (i=0; i<ndim; i++) { lo[i] = (topology[i]*index[i] + proc_index[i])*block_dims[i]; info.subscr[i] = l_index[i]%block_dims[i] + lo[i]; } } } } /* calculate global result */ if(type==C_INT){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_INT,participate); *(int*)val = (int)info.v.ival; }else if(type==C_LONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG,participate); *(long*)val = info.v.lval; }else if(type==C_LONGLONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG_LONG,participate); *(long long*)val = info.v.llval; }else if(type==C_DBL){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoublePrecision*)val = info.v.dval; }else if(type==C_FLOAT){ int size = sizeof(double) + sizeof(Integer)*ndim; armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(float*)val = info.v.fval; }else if(type==C_SCPL){ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(SingleComplex*)val = info.extra2; }else{ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoubleComplex*)val = info.extra; } for(i = 0; i < ndim; i++) subscript[i]= info.subscr[i]; GA_POP_NAME; }
/** * Get the next sub-block from the larger block defined when the iterator was * initialized * @param hdl handle for iterator * @param proc processor on which the next block resides * @param plo indices for lower corner of remote block * @param phi indices for upper corner of remote block * @param prem pointer to remote buffer * @return returns false if there is no new block, true otherwise */ int gai_iterator_next(_iterator_hdl *hdl, int *proc, Integer *plo[], Integer *phi[], char **prem, Integer ldrem[]) { Integer idx, i, p; Integer handle = GA_OFFSET + hdl->g_a; Integer p_handle = GA[handle].p_handle; Integer n_rstrctd = GA[handle].num_rstrctd; Integer *rank_rstrctd = GA[handle].rank_rstrctd; Integer elemsize = GA[handle].elemsize; int ndim; ndim = GA[handle].ndim; if (GA[handle].distr_type == REGULAR) { Integer *blo, *bhi; Integer nelems; idx = hdl->count; /* no blocks left, so return */ if (idx>=hdl->nproc) return 0; p = (Integer)ProcListPerm[idx]; *proc = (int)GA_proclist[p]; if (p_handle >= 0) { *proc = (int)PGRP_LIST[p_handle].inv_map_proc_list[*proc]; } #ifdef PERMUTE_PIDS if (GA_Proc_list) *proc = (int)GA_inv_Proc_list[*proc]; #endif /* Find visible portion of patch held by processor p and * return the result in plo and phi. Also get actual processor * index corresponding to p and store the result in proc. */ gam_GetRangeFromMap(p, ndim, plo, phi); *proc = (int)GA_proclist[p]; blo = *plo; bhi = *phi; #ifdef LARGE_BLOCK_REQ /* Check to see if block size will overflow int values and initialize * counter over sub-blocks if the block is too big*/ if (!hdl->oversize) { nelems = 1; for (i=0; i<ndim; i++) nelems *= (bhi[i]-blo[i]+1); if (elemsize*nelems > MAX_INT_VALUE) { Integer maxint = 0; int maxidx; hdl->oversize = 1; /* Figure out block dimensions that correspond to block sizes * that are beneath MAX_INT_VALUE */ for (i=0; i<ndim; i++) { hdl->blk_size[i] = (bhi[i]-blo[i]+1); } while (elemsize*nelems > MAX_INT_VALUE) { for (i=0; i<ndim; i++) { if (hdl->blk_size[i] > maxint) { maxidx = i; maxint = hdl->blk_size[i]; } } hdl->blk_size[maxidx] /= 2; nelems = 1; for (i=0; i<ndim; i++) nelems *= hdl->blk_size[i]; } /* Calculate the number of blocks along each dimension */ for (i=0; i<ndim; i++) { hdl->blk_dim[i] = (bhi[i]-blo[i]+1)/hdl->blk_size[i]; if (hdl->blk_dim[i]*hdl->blk_size[i] < (bhi[i]-blo[i]+1)) hdl->blk_dim[i]++; } /* initialize block counting */ for (i=0; i<ndim; i++) hdl->blk_inc[i] = 0; } } /* Get sub-block bounding dimensions */ if (hdl->oversize) { Integer tmp; for (i=0; i<ndim; i++) { hdl->lobuf[i] = blo[i]; hdl->hibuf[i] = bhi[i]; } *plo = hdl->lobuf; *phi = hdl->hibuf; blo = *plo; bhi = *phi; for (i=0; i<ndim; i++) { hdl->lobuf[i] += hdl->blk_inc[i]*hdl->blk_size[i]; tmp = hdl->lobuf[i] + hdl->blk_size[i]-1; if (tmp < hdl->hibuf[i]) hdl->hibuf[i] = tmp; } } #endif if (n_rstrctd == 0) { gam_Location(*proc, handle, blo, prem, ldrem); } else { gam_Location(rank_rstrctd[*proc], handle, blo, prem, ldrem); } if (p_handle >= 0) { *proc = (int)GA_proclist[p]; /* BJP */ *proc = PGRP_LIST[p_handle].inv_map_proc_list[*proc]; } #ifdef LARGE_BLOCK_REQ if (!hdl->oversize) { #endif hdl->count++; #ifdef LARGE_BLOCK_REQ } else { /* update blk_inc array */ hdl->blk_inc[0]++; for (i=0; i<ndim-1; i++) { if (hdl->blk_inc[i] >= hdl->blk_dim[i]) { hdl->blk_inc[i] = 0; hdl->blk_inc[i+1]++; } } if (hdl->blk_inc[ndim-1] >= hdl->blk_dim[ndim-1]) { hdl->count++; hdl->oversize = 0; } } #endif return 1; } else { Integer offset, l_offset, last, pinv; Integer blk_tot = GA[handle].block_total; Integer blo[MAXDIM], bhi[MAXDIM]; Integer idx, j, jtot, chk, iproc; int check1, check2; if (GA[handle].distr_type == BLOCK_CYCLIC) { /* Simple block-cyclic distribution */ if (hdl->iproc >= GAnproc) return 0; /*if (hdl->iproc == GAnproc-1 && hdl->iblock >= blk_tot) return 0;*/ if (hdl->iblock == hdl->iproc) hdl->offset = 0; chk = 0; /* loop over blocks until a block with data is found */ while (!chk) { /* get the block corresponding to the current value of iblock */ idx = hdl->iblock; ga_ownsM(handle,idx,blo,bhi); /* check to see if this block overlaps with requested block * defined by lo and hi */ chk = 1; for (j=0; j<ndim; j++) { /* check to see if at least one end point of the interval * represented by blo and bhi falls in the interval * represented by lo and hi */ check1 = ((blo[j] >= hdl->lo[j] && blo[j] <= hdl->hi[j]) || (bhi[j] >= hdl->lo[j] && bhi[j] <= hdl->hi[j])); /* check to see if interval represented by lo and hi * falls entirely within interval represented by blo and bhi */ check2 = ((hdl->lo[j] >= blo[j] && hdl->lo[j] <= bhi[j]) && (hdl->hi[j] >= blo[j] && hdl->hi[j] <= bhi[j])); /* If there is some data, move to the next section of code, * otherwise, check next block */ if (!check1 && !check2) { chk = 0; } } if (!chk) { /* evaluate new offset for block idx */ jtot = 1; for (j=0; j<ndim; j++) { jtot *= bhi[j]-blo[j]+1; } hdl->offset += jtot; /* increment to next block */ hdl->iblock += pnga_nnodes(); if (hdl->iblock >= blk_tot) { hdl->offset = 0; hdl->iproc++; hdl->iblock = hdl->iproc; if (hdl->iproc >= GAnproc) return 0; } } } /* The block overlaps some data in lo,hi */ if (chk) { Integer *clo, *chi; *plo = hdl->lobuf; *phi = hdl->hibuf; clo = *plo; chi = *phi; /* get the patch of block that overlaps requested region */ gam_GetBlockPatch(blo,bhi,hdl->lo,hdl->hi,clo,chi,ndim); /* evaluate offset within block */ last = ndim - 1; jtot = 1; if (last == 0) ldrem[0] = bhi[0] - blo[0] + 1; l_offset = 0; for (j=0; j<last; j++) { l_offset += (clo[j]-blo[j])*jtot; ldrem[j] = bhi[j]-blo[j]+1; jtot *= ldrem[j]; } l_offset += (clo[last]-blo[last])*jtot; l_offset += hdl->offset; /* get pointer to data on remote block */ pinv = idx%GAnproc; if (p_handle > 0) { pinv = PGRP_LIST[p_handle].inv_map_proc_list[pinv]; } *prem = GA[handle].ptr[pinv]+l_offset*GA[handle].elemsize; *proc = pinv; /* evaluate new offset for block idx */ jtot = 1; for (j=0; j<ndim; j++) { jtot *= bhi[j]-blo[j]+1; } hdl->offset += jtot; hdl->iblock += pnga_nnodes(); if (hdl->iblock >= blk_tot) { hdl->iproc++; hdl->iblock = hdl->iproc; hdl->offset = 0; } } return 1; } else if (GA[handle].distr_type == SCALAPACK || GA[handle].distr_type == TILED) { /* Scalapack-type data distribution */ Integer proc_index[MAXDIM], index[MAXDIM]; Integer itmp; Integer blk_jinc; /* Return false at the end of the iteration */ if (hdl->iproc >= GAnproc) return 0; chk = 0; /* loop over blocks until a block with data is found */ while (!chk) { /* get bounds for current block */ for (j = 0; j < ndim; j++) { blo[j] = hdl->blk_size[j]*(hdl->index[j])+1; bhi[j] = hdl->blk_size[j]*(hdl->index[j]+1); if (bhi[j] > GA[handle].dims[j]) bhi[j] = GA[handle].dims[j]; } /* check to see if this block overlaps with requested block * defined by lo and hi */ chk = 1; for (j=0; j<ndim; j++) { /* check to see if at least one end point of the interval * represented by blo and bhi falls in the interval * represented by lo and hi */ check1 = ((blo[j] >= hdl->lo[j] && blo[j] <= hdl->hi[j]) || (bhi[j] >= hdl->lo[j] && bhi[j] <= hdl->hi[j])); /* check to see if interval represented by lo and hi * falls entirely within interval represented by blo and bhi */ check2 = ((hdl->lo[j] >= blo[j] && hdl->lo[j] <= bhi[j]) && (hdl->hi[j] >= blo[j] && hdl->hi[j] <= bhi[j])); /* If there is some data, move to the next section of code, * otherwise, check next block */ if (!check1 && !check2) { chk = 0; } } if (!chk) { /* evaluate new offset for block */ itmp = 1; for (j=0; j<ndim; j++) { itmp *= bhi[j]-blo[j]+1; } hdl->offset += itmp; /* increment to next block */ hdl->index[0] += GA[handle].nblock[0]; for (j=0; j<ndim; j++) { if (hdl->index[j] >= GA[handle].num_blocks[j] && j < ndim-1) { hdl->index[j] = hdl->proc_index[j]; hdl->index[j+1] += GA[handle].nblock[j+1]; } } if (hdl->index[ndim-1] >= GA[handle].num_blocks[ndim-1]) { hdl->iproc++; if (hdl->iproc >= GAnproc) return 0; hdl->offset = 0; if (GA[handle].distr_type == TILED) { gam_find_tile_proc_indices(handle, hdl->iproc, hdl->proc_index); gam_find_tile_proc_indices(handle, hdl->iproc, hdl->index); } else if (GA[handle].distr_type == SCALAPACK) { gam_find_proc_indices(handle, hdl->iproc, hdl->proc_index); gam_find_proc_indices(handle, hdl->iproc, hdl->index); } } } } if (chk) { Integer *clo, *chi; *plo = hdl->lobuf; *phi = hdl->hibuf; clo = *plo; chi = *phi; /* get the patch of block that overlaps requested region */ gam_GetBlockPatch(blo,bhi,hdl->lo,hdl->hi,clo,chi,ndim); /* evaluate offset within block */ last = ndim - 1; if (GA[handle].distr_type == TILED) { jtot = 1; if (last == 0) ldrem[0] = bhi[0] - blo[0] + 1; l_offset = 0; for (j=0; j<last; j++) { l_offset += (clo[j]-blo[j])*jtot; ldrem[j] = bhi[j]-blo[j]+1; jtot *= ldrem[j]; } l_offset += (clo[last]-blo[last])*jtot; l_offset += hdl->offset; } else if (GA[handle].distr_type == SCALAPACK) { l_offset = 0; jtot = 1; for (j=0; j<last; j++) { ldrem[j] = hdl->blk_ld[j]; blk_jinc = GA[handle].dims[j]%hdl->blk_size[j]; if (hdl->blk_inc[j] > 0) { if (hdl->proc_index[j]<hdl->hlf_blk[j]) { blk_jinc = hdl->blk_size[j]; } else if (hdl->proc_index[j] == hdl->hlf_blk[j]) { blk_jinc = hdl->blk_inc[j]%hdl->blk_size[j]; } else { blk_jinc = 0; } } ldrem[j] += blk_jinc; l_offset += (clo[j]-blo[j] + ((blo[j]-1)/hdl->blk_dim[j])*hdl->blk_size[j])*jtot; jtot *= ldrem[j]; } l_offset += (clo[last]-blo[last] + ((blo[last]-1)/hdl->blk_dim[j])*hdl->blk_size[last])*jtot; } /* get pointer to data on remote block */ pinv = (hdl->iproc)%GAnproc; if (p_handle > 0) { pinv = PGRP_LIST[p_handle].inv_map_proc_list[pinv]; } *prem = GA[handle].ptr[pinv]+l_offset*GA[handle].elemsize; *proc = pinv; /* evaluate new offset for block */ itmp = 1; for (j=0; j<ndim; j++) { itmp *= bhi[j]-blo[j]+1; } hdl->offset += itmp; /* increment to next block */ hdl->index[0] += GA[handle].nblock[0]; for (j=0; j<ndim; j++) { if (hdl->index[j] >= GA[handle].num_blocks[j] && j < ndim-1) { hdl->index[j] = hdl->proc_index[j]; hdl->index[j+1] += GA[handle].nblock[j+1]; } } if (hdl->index[ndim-1] >= GA[handle].num_blocks[ndim-1]) { hdl->iproc++; hdl->offset = 0; if (GA[handle].distr_type == TILED) { gam_find_tile_proc_indices(handle, hdl->iproc, hdl->proc_index); gam_find_tile_proc_indices(handle, hdl->iproc, hdl->index); } else if (GA[handle].distr_type == SCALAPACK) { gam_find_proc_indices(handle, hdl->iproc, hdl->proc_index); gam_find_proc_indices(handle, hdl->iproc, hdl->index); } } } } return 1; } return 0; }