void pgp_memzero(Integer g_p, Integer intsize) { void *gp_ptr, *size_array; Integer handle, i, me, nelems, ndim; Integer lo[GP_MAX_DIM], hi[GP_MAX_DIM], ld[GP_MAX_DIM-1]; Integer j; pnga_sync(); handle = g_p + GP_OFFSET; me = pnga_nodeid(); ndim = GP[handle].ndim; /* Determine number of elements held locally */ pgp_distribution(g_p, me, lo, hi); nelems = 1; for (i=0; i<ndim; i++) { nelems *= (hi[i]-lo[i]+1); } /* Get pointers to local data elements and their sizes */ pnga_access_ptr(GP[handle].g_ptr_array,lo,hi,&gp_ptr,ld); pnga_access_ptr(GP[handle].g_size_array,lo,hi,&size_array,ld); /* Zero bits in data elements */ /*bjp printf("p[%d] nelems: %d ld[0]: %d\n",me,nelems,ld[0]); */ for (i=0; i<nelems; i++) { /*bjp printf("p[%d] gp_ptr[%d].addr: %p size_array[%d]: %d\n",me, i,(void*)((armci_meminfo_t*)gp_ptr)[i].addr,i, (int)((int*)size_array)[i]); */ if (intsize == 4) { memset((void*)((armci_meminfo_t*)gp_ptr)[i].addr, 0, (size_t)((int*)size_array)[i]); #if 0 for (j=0; j<((int*)size_array)[i]; j++) { if (((char*)((armci_meminfo_t*)gp_ptr)[i].addr)[j] != 0) { printf("p[%d] mismatch for i: %d j: %d\n",me,i,j); } } #endif } else { memset((void*)((armci_meminfo_t*)gp_ptr)[i].addr, 0, (size_t)((int64_t*)size_array)[i]); #if 0 for (j=0; j<((int64_t*)size_array)[i]; j++) { if (((char*)((armci_meminfo_t*)gp_ptr)[i].addr)[j] != 0) { printf("p[%d] mismatch for i: %d j: %d\n",me,i,j); } } #endif } } pnga_release_update(GP[handle].g_ptr_array,lo,hi); pnga_release_update(GP[handle].g_size_array,lo,hi); pnga_sync(); }
void* pgp_free_local_element(Integer g_p, Integer *subscript) { armci_meminfo_t *gp_ptr; void *ptr; Integer handle, ld[GP_MAX_DIM-1], i; GP_Int buf; handle = g_p + GP_OFFSET; /* check to make sure that element is located in local block of GP array */ for (i=0; i<GP[handle].ndim; i++) { if (subscript[i]<GP[handle].lo[i] || subscript[i]>GP[handle].hi[i]) { pnga_error("gp_free_local_element: subscript out of bounds", i); } } pnga_access_ptr(GP[handle].g_ptr_array,subscript,subscript,&gp_ptr,ld); ptr = (*gp_ptr).addr; memset((void*)gp_ptr,0,sizeof(armci_meminfo_t)); pnga_release_update(GP[handle].g_ptr_array, subscript, subscript); /* set corresponding element of size array to zero */ buf = 0; for (i=0; i<GP[handle].ndim-1; i++) { ld[i] = 1; } pnga_put(GP[handle].g_size_array, subscript, subscript, &buf, ld); return ptr; }
/** * Get the next sub-block from local portion of global array * @param hdl handle for iterator * @param plo indices for lower corner of block * @param phi indices for upper corner of block * @param ptr pointer to local buffer * @param ld array of strides for local block * @return returns false if there is no new block, true otherwise */ int pnga_local_iterator_next(_iterator_hdl *hdl, Integer plo[], Integer phi[], char **ptr, Integer ld[]) { Integer i; Integer handle = GA_OFFSET + hdl->g_a; Integer grp = GA[handle].p_handle; Integer elemsize = GA[handle].elemsize; int ndim; int me = pnga_pgroup_nodeid(grp); ndim = GA[handle].ndim; if (GA[handle].distr_type == REGULAR) { Integer nelems; /* no blocks left, so return */ if (hdl->count>0) return 0; /* Find visible portion of patch held by this processor and * return the result in plo and phi. Return pointer to local * data as well */ pnga_distribution(hdl->g_a, me, plo, phi); /* Check to see if this process has any data. Return 0 if * it does not */ for (i=0; i<ndim; i++) { if (phi[i]<plo[i]) return 0; } pnga_access_ptr(hdl->g_a,plo,phi,ptr,ld); hdl->count++; } else if (GA[handle].distr_type == BLOCK_CYCLIC) { /* Simple block-cyclic distribution */ if (hdl->count >= pnga_total_blocks(hdl->g_a)) return 0; pnga_distribution(hdl->g_a,hdl->count,plo,phi); pnga_access_block_ptr(hdl->g_a,hdl->count,ptr,ld); hdl->count += pnga_pgroup_nnodes(grp); } else if (GA[handle].distr_type == SCALAPACK || GA[handle].distr_type == TILED) { /* Scalapack-type data distribution */ if (hdl->index[ndim-1] >= hdl->blk_num[ndim-1]) return 0; /* Find coordinates of bounding block */ for (i=0; i<ndim; i++) { plo[i] = hdl->index[i]*hdl->blk_size[i]+1; phi[i] = (hdl->index[i]+1)*hdl->blk_size[i]; if (phi[i] > hdl->blk_dim[i]) phi[i] = hdl->blk_dim[i]; } pnga_access_block_grid_ptr(hdl->g_a,hdl->index,ptr,ld); hdl->index[0] += hdl->blk_inc[0]; for (i=0; i<ndim; i++) { if (hdl->index[i] >= hdl->blk_num[i] && i<ndim-1) { hdl->index[i] = hdl->proc_index[i]; hdl->index[i+1] += hdl->blk_inc[i+1]; } } } return 1; }
void pgp_assign_local_element(Integer g_p, Integer *subscript, void *ptr, Integer size, Integer intsize) { void *gp_ptr; Integer handle, ld[GP_MAX_DIM-1], i; handle = g_p + GP_OFFSET; /* check to make sure that element is located in local block of GP array */ for (i=0; i<GP[handle].ndim; i++) { if (subscript[i]<GP[handle].lo[i] || subscript[i]>GP[handle].hi[i]) { /*bjp printf("p[%d] subscript[%d]: %d\n",pnga_nodeid(),i,subscript[i]); printf("p[%d] lo[%d]: %d hi[%d]: %d\n",pnga_nodeid(),i,GP[handle].lo[i],i, GP[handle].hi[i]); */ /* printf("p[%d] subscript[%d]: %d lo[%d]: %d hi[%d]: %d\n",pnga_nodeid(), i, subscript[i], i, GP[handle].lo[i], i, GP[handle].hi[i]); */ pnga_error("gp_assign_local_element: subscript out of bounds", i); } } pnga_access_ptr(GP[handle].g_size_array,subscript,subscript,&gp_ptr,ld); if (intsize == 4) { *((int*)gp_ptr) = (int)size; } else { *((int64_t*)gp_ptr) = (int64_t)size; } /*bjp printf("p[%ld] (internal) size %d at location [%ld:%ld]\n", (long)pnga_nodeid(), *((int*)gp_ptr), (long)subscript[0],(long)subscript[1]); */ pnga_release_update(GP[handle].g_size_array, subscript, subscript); pnga_access_ptr(GP[handle].g_ptr_array,subscript,subscript,&gp_ptr,ld); *((armci_meminfo_t*)gp_ptr) = *((armci_meminfo_t*)(((char*)ptr)-sizeof(armci_meminfo_t))); pnga_release_update(GP[handle].g_ptr_array, subscript, subscript); }
void pnga_select_elem(Integer g_a, char* op, void* val, Integer *subscript) { Integer ndim, type, me, elems, ind=0, i; Integer lo[MAXDIM],hi[MAXDIM],dims[MAXDIM],ld[MAXDIM-1]; elem_info_t info; Integer num_blocks; int participate=0; int local_sync_begin; local_sync_begin = _ga_sync_begin; _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ if(local_sync_begin)pnga_sync(); me = pnga_nodeid(); pnga_check_handle(g_a, "ga_select_elem"); GA_PUSH_NAME("ga_elem_op"); if (strncmp(op,"min",3) == 0); else if (strncmp(op,"max",3) == 0); else pnga_error("operator not recognized",0); pnga_inquire(g_a, &type, &ndim, dims); num_blocks = pnga_total_blocks(g_a); if (num_blocks < 0) { pnga_distribution(g_a, me, lo, hi); if ( lo[0]> 0 ){ /* base index is 1: we get 0 if no elements stored on p */ /******************* calculate local result ************************/ void *ptr; pnga_access_ptr(g_a, lo, hi, &ptr, ld); GET_ELEMS(ndim,lo,hi,ld,&elems); participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release(g_a, lo, hi); /* determine element subscript in the ndim-array */ for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } } else { void *ptr; Integer j, offset, jtot, upper; Integer nproc = pnga_nnodes(); pnga_access_block_segment_ptr(g_a, me, &ptr, &elems); if (elems > 0) { participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release_block_segment(g_a, me); /* convert local index back into a global array index */ if (!pnga_uses_proc_grid(g_a)) { offset = 0; for (i=me; i<num_blocks; i += nproc) { pnga_distribution(g_a, i, lo, hi); jtot = 1; for (j=0; j<ndim; j++) { jtot *= (hi[j]-lo[j]+1); } upper = offset + jtot; if (ind >= offset && ind < upper) { break; } else { offset += jtot; } } /* determine element subscript in the ndim-array */ ind -= offset; for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } else { Integer stride[MAXDIM], index[MAXDIM]; Integer blocks[MAXDIM], block_dims[MAXDIM]; Integer proc_index[MAXDIM], topology[MAXDIM]; Integer l_index[MAXDIM]; Integer min, max; pnga_get_proc_index(g_a, me, proc_index); pnga_get_block_info(g_a, blocks, block_dims); pnga_get_proc_grid(g_a, topology); /* figure out strides for locally held block of data */ for (i=0; i<ndim; i++) { stride[i] = 0; for (j=proc_index[i]; j<blocks[i]; j += topology[i]) { min = j*block_dims[i] + 1; max = (j+1)*block_dims[i]; if (max > dims[i]) max = dims[i]; stride[i] += (max - min + 1); } } /* use strides to figure out local index */ l_index[0] = ind%stride[0]; for (i=1; i<ndim; i++) { ind = (ind-l_index[i-1])/stride[i-1]; l_index[i] = ind%stride[i]; } /* figure out block index for block holding data element */ for (i=0; i<ndim; i++) { index[i] = l_index[i]/block_dims[i]; } for (i=0; i<ndim; i++) { lo[i] = (topology[i]*index[i] + proc_index[i])*block_dims[i]; info.subscr[i] = l_index[i]%block_dims[i] + lo[i]; } } } } /* calculate global result */ if(type==C_INT){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_INT,participate); *(int*)val = (int)info.v.ival; }else if(type==C_LONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG,participate); *(long*)val = info.v.lval; }else if(type==C_LONGLONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG_LONG,participate); *(long long*)val = info.v.llval; }else if(type==C_DBL){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoublePrecision*)val = info.v.dval; }else if(type==C_FLOAT){ int size = sizeof(double) + sizeof(Integer)*ndim; armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(float*)val = info.v.fval; }else if(type==C_SCPL){ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(SingleComplex*)val = info.extra2; }else{ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoubleComplex*)val = info.extra; } for(i = 0; i < ndim; i++) subscript[i]= info.subscr[i]; GA_POP_NAME; }