/** * Get the next sub-block from local portion of global array * @param hdl handle for iterator * @param plo indices for lower corner of block * @param phi indices for upper corner of block * @param ptr pointer to local buffer * @param ld array of strides for local block * @return returns false if there is no new block, true otherwise */ int pnga_local_iterator_next(_iterator_hdl *hdl, Integer plo[], Integer phi[], char **ptr, Integer ld[]) { Integer i; Integer handle = GA_OFFSET + hdl->g_a; Integer grp = GA[handle].p_handle; Integer elemsize = GA[handle].elemsize; int ndim; int me = pnga_pgroup_nodeid(grp); ndim = GA[handle].ndim; if (GA[handle].distr_type == REGULAR) { Integer nelems; /* no blocks left, so return */ if (hdl->count>0) return 0; /* Find visible portion of patch held by this processor and * return the result in plo and phi. Return pointer to local * data as well */ pnga_distribution(hdl->g_a, me, plo, phi); /* Check to see if this process has any data. Return 0 if * it does not */ for (i=0; i<ndim; i++) { if (phi[i]<plo[i]) return 0; } pnga_access_ptr(hdl->g_a,plo,phi,ptr,ld); hdl->count++; } else if (GA[handle].distr_type == BLOCK_CYCLIC) { /* Simple block-cyclic distribution */ if (hdl->count >= pnga_total_blocks(hdl->g_a)) return 0; pnga_distribution(hdl->g_a,hdl->count,plo,phi); pnga_access_block_ptr(hdl->g_a,hdl->count,ptr,ld); hdl->count += pnga_pgroup_nnodes(grp); } else if (GA[handle].distr_type == SCALAPACK || GA[handle].distr_type == TILED) { /* Scalapack-type data distribution */ if (hdl->index[ndim-1] >= hdl->blk_num[ndim-1]) return 0; /* Find coordinates of bounding block */ for (i=0; i<ndim; i++) { plo[i] = hdl->index[i]*hdl->blk_size[i]+1; phi[i] = (hdl->index[i]+1)*hdl->blk_size[i]; if (phi[i] > hdl->blk_dim[i]) phi[i] = hdl->blk_dim[i]; } pnga_access_block_grid_ptr(hdl->g_a,hdl->index,ptr,ld); hdl->index[0] += hdl->blk_inc[0]; for (i=0; i<ndim; i++) { if (hdl->index[i] >= hdl->blk_num[i] && i<ndim-1) { hdl->index[i] = hdl->proc_index[i]; hdl->index[i+1] += hdl->blk_inc[i+1]; } } } return 1; }
void pgp_distribution(Integer g_p, Integer proc, Integer *lo, Integer *hi) { Integer handle, ndim, i; handle = g_p + GP_OFFSET; if (pnga_nodeid() == proc) { ndim = pnga_ndim(GP[handle].g_ptr_array); for (i=0; i<ndim; i++) { lo[i] = GP[handle].lo[i]; hi[i] = GP[handle].hi[i]; } } else { pnga_distribution(GP[handle].g_ptr_array, proc, lo, hi); } }
logical pgp_allocate(Integer g_p) { logical status; Integer handle, me, i; handle = g_p + GP_OFFSET; status = pnga_allocate(GP[handle].g_size_array); status = status && pnga_allocate(GP[handle].g_ptr_array); if (!status) { pnga_error("gp_allocate: unable to allocate GP array", 0); } pnga_zero(GP[handle].g_size_array); pnga_zero(GP[handle].g_ptr_array); me = pnga_nodeid(); pnga_distribution(GP[handle].g_ptr_array, me, GP[handle].lo, GP[handle].hi); GP[handle].active = 1; for (i=0; i<GP[handle].ndim-1; i++) { GP[handle].ld[i] = GP[handle].hi[i] - GP[handle].lo[i] + 1; } return status; }
void pnga_select_elem(Integer g_a, char* op, void* val, Integer *subscript) { Integer ndim, type, me, elems, ind=0, i; Integer lo[MAXDIM],hi[MAXDIM],dims[MAXDIM],ld[MAXDIM-1]; elem_info_t info; Integer num_blocks; int participate=0; int local_sync_begin; local_sync_begin = _ga_sync_begin; _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ if(local_sync_begin)pnga_sync(); me = pnga_nodeid(); pnga_check_handle(g_a, "ga_select_elem"); GA_PUSH_NAME("ga_elem_op"); if (strncmp(op,"min",3) == 0); else if (strncmp(op,"max",3) == 0); else pnga_error("operator not recognized",0); pnga_inquire(g_a, &type, &ndim, dims); num_blocks = pnga_total_blocks(g_a); if (num_blocks < 0) { pnga_distribution(g_a, me, lo, hi); if ( lo[0]> 0 ){ /* base index is 1: we get 0 if no elements stored on p */ /******************* calculate local result ************************/ void *ptr; pnga_access_ptr(g_a, lo, hi, &ptr, ld); GET_ELEMS(ndim,lo,hi,ld,&elems); participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release(g_a, lo, hi); /* determine element subscript in the ndim-array */ for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } } else { void *ptr; Integer j, offset, jtot, upper; Integer nproc = pnga_nnodes(); pnga_access_block_segment_ptr(g_a, me, &ptr, &elems); if (elems > 0) { participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release_block_segment(g_a, me); /* convert local index back into a global array index */ if (!pnga_uses_proc_grid(g_a)) { offset = 0; for (i=me; i<num_blocks; i += nproc) { pnga_distribution(g_a, i, lo, hi); jtot = 1; for (j=0; j<ndim; j++) { jtot *= (hi[j]-lo[j]+1); } upper = offset + jtot; if (ind >= offset && ind < upper) { break; } else { offset += jtot; } } /* determine element subscript in the ndim-array */ ind -= offset; for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } else { Integer stride[MAXDIM], index[MAXDIM]; Integer blocks[MAXDIM], block_dims[MAXDIM]; Integer proc_index[MAXDIM], topology[MAXDIM]; Integer l_index[MAXDIM]; Integer min, max; pnga_get_proc_index(g_a, me, proc_index); pnga_get_block_info(g_a, blocks, block_dims); pnga_get_proc_grid(g_a, topology); /* figure out strides for locally held block of data */ for (i=0; i<ndim; i++) { stride[i] = 0; for (j=proc_index[i]; j<blocks[i]; j += topology[i]) { min = j*block_dims[i] + 1; max = (j+1)*block_dims[i]; if (max > dims[i]) max = dims[i]; stride[i] += (max - min + 1); } } /* use strides to figure out local index */ l_index[0] = ind%stride[0]; for (i=1; i<ndim; i++) { ind = (ind-l_index[i-1])/stride[i-1]; l_index[i] = ind%stride[i]; } /* figure out block index for block holding data element */ for (i=0; i<ndim; i++) { index[i] = l_index[i]/block_dims[i]; } for (i=0; i<ndim; i++) { lo[i] = (topology[i]*index[i] + proc_index[i])*block_dims[i]; info.subscr[i] = l_index[i]%block_dims[i] + lo[i]; } } } } /* calculate global result */ if(type==C_INT){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_INT,participate); *(int*)val = (int)info.v.ival; }else if(type==C_LONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG,participate); *(long*)val = info.v.lval; }else if(type==C_LONGLONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG_LONG,participate); *(long long*)val = info.v.llval; }else if(type==C_DBL){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoublePrecision*)val = info.v.dval; }else if(type==C_FLOAT){ int size = sizeof(double) + sizeof(Integer)*ndim; armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(float*)val = info.v.fval; }else if(type==C_SCPL){ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(SingleComplex*)val = info.extra2; }else{ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoubleComplex*)val = info.extra; } for(i = 0; i < ndim; i++) subscript[i]= info.subscr[i]; GA_POP_NAME; }