void pgp_memzero(Integer g_p, Integer intsize) { void *gp_ptr, *size_array; Integer handle, i, me, nelems, ndim; Integer lo[GP_MAX_DIM], hi[GP_MAX_DIM], ld[GP_MAX_DIM-1]; Integer j; pnga_sync(); handle = g_p + GP_OFFSET; me = pnga_nodeid(); ndim = GP[handle].ndim; /* Determine number of elements held locally */ pgp_distribution(g_p, me, lo, hi); nelems = 1; for (i=0; i<ndim; i++) { nelems *= (hi[i]-lo[i]+1); } /* Get pointers to local data elements and their sizes */ pnga_access_ptr(GP[handle].g_ptr_array,lo,hi,&gp_ptr,ld); pnga_access_ptr(GP[handle].g_size_array,lo,hi,&size_array,ld); /* Zero bits in data elements */ /*bjp printf("p[%d] nelems: %d ld[0]: %d\n",me,nelems,ld[0]); */ for (i=0; i<nelems; i++) { /*bjp printf("p[%d] gp_ptr[%d].addr: %p size_array[%d]: %d\n",me, i,(void*)((armci_meminfo_t*)gp_ptr)[i].addr,i, (int)((int*)size_array)[i]); */ if (intsize == 4) { memset((void*)((armci_meminfo_t*)gp_ptr)[i].addr, 0, (size_t)((int*)size_array)[i]); #if 0 for (j=0; j<((int*)size_array)[i]; j++) { if (((char*)((armci_meminfo_t*)gp_ptr)[i].addr)[j] != 0) { printf("p[%d] mismatch for i: %d j: %d\n",me,i,j); } } #endif } else { memset((void*)((armci_meminfo_t*)gp_ptr)[i].addr, 0, (size_t)((int64_t*)size_array)[i]); #if 0 for (j=0; j<((int64_t*)size_array)[i]; j++) { if (((char*)((armci_meminfo_t*)gp_ptr)[i].addr)[j] != 0) { printf("p[%d] mismatch for i: %d j: %d\n",me,i,j); } } #endif } } pnga_release_update(GP[handle].g_ptr_array,lo,hi); pnga_release_update(GP[handle].g_size_array,lo,hi); pnga_sync(); }
void pnga_select_elem(Integer g_a, char* op, void* val, Integer *subscript) { Integer ndim, type, me, elems, ind=0, i; Integer lo[MAXDIM],hi[MAXDIM],dims[MAXDIM],ld[MAXDIM-1]; elem_info_t info; Integer num_blocks; int participate=0; int local_sync_begin; local_sync_begin = _ga_sync_begin; _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ if(local_sync_begin)pnga_sync(); me = pnga_nodeid(); pnga_check_handle(g_a, "ga_select_elem"); GA_PUSH_NAME("ga_elem_op"); if (strncmp(op,"min",3) == 0); else if (strncmp(op,"max",3) == 0); else pnga_error("operator not recognized",0); pnga_inquire(g_a, &type, &ndim, dims); num_blocks = pnga_total_blocks(g_a); if (num_blocks < 0) { pnga_distribution(g_a, me, lo, hi); if ( lo[0]> 0 ){ /* base index is 1: we get 0 if no elements stored on p */ /******************* calculate local result ************************/ void *ptr; pnga_access_ptr(g_a, lo, hi, &ptr, ld); GET_ELEMS(ndim,lo,hi,ld,&elems); participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release(g_a, lo, hi); /* determine element subscript in the ndim-array */ for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } } else { void *ptr; Integer j, offset, jtot, upper; Integer nproc = pnga_nnodes(); pnga_access_block_segment_ptr(g_a, me, &ptr, &elems); if (elems > 0) { participate =1; /* select local element */ snga_select_elem(type, op, ptr, elems, &info, &ind); /* release access to the data */ pnga_release_block_segment(g_a, me); /* convert local index back into a global array index */ if (!pnga_uses_proc_grid(g_a)) { offset = 0; for (i=me; i<num_blocks; i += nproc) { pnga_distribution(g_a, i, lo, hi); jtot = 1; for (j=0; j<ndim; j++) { jtot *= (hi[j]-lo[j]+1); } upper = offset + jtot; if (ind >= offset && ind < upper) { break; } else { offset += jtot; } } /* determine element subscript in the ndim-array */ ind -= offset; for(i = 0; i < ndim; i++){ int elems = (int)( hi[i]-lo[i]+1); info.subscr[i] = ind%elems + lo[i] ; ind /= elems; } } else { Integer stride[MAXDIM], index[MAXDIM]; Integer blocks[MAXDIM], block_dims[MAXDIM]; Integer proc_index[MAXDIM], topology[MAXDIM]; Integer l_index[MAXDIM]; Integer min, max; pnga_get_proc_index(g_a, me, proc_index); pnga_get_block_info(g_a, blocks, block_dims); pnga_get_proc_grid(g_a, topology); /* figure out strides for locally held block of data */ for (i=0; i<ndim; i++) { stride[i] = 0; for (j=proc_index[i]; j<blocks[i]; j += topology[i]) { min = j*block_dims[i] + 1; max = (j+1)*block_dims[i]; if (max > dims[i]) max = dims[i]; stride[i] += (max - min + 1); } } /* use strides to figure out local index */ l_index[0] = ind%stride[0]; for (i=1; i<ndim; i++) { ind = (ind-l_index[i-1])/stride[i-1]; l_index[i] = ind%stride[i]; } /* figure out block index for block holding data element */ for (i=0; i<ndim; i++) { index[i] = l_index[i]/block_dims[i]; } for (i=0; i<ndim; i++) { lo[i] = (topology[i]*index[i] + proc_index[i])*block_dims[i]; info.subscr[i] = l_index[i]%block_dims[i] + lo[i]; } } } } /* calculate global result */ if(type==C_INT){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_INT,participate); *(int*)val = (int)info.v.ival; }else if(type==C_LONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG,participate); *(long*)val = info.v.lval; }else if(type==C_LONGLONG){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_LONG_LONG,participate); *(long long*)val = info.v.llval; }else if(type==C_DBL){ int size = sizeof(double) + sizeof(Integer)*(int)ndim; armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoublePrecision*)val = info.v.dval; }else if(type==C_FLOAT){ int size = sizeof(double) + sizeof(Integer)*ndim; armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(float*)val = info.v.fval; }else if(type==C_SCPL){ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_FLOAT,participate); *(SingleComplex*)val = info.extra2; }else{ int size = sizeof(info); /* for simplicity we send entire info */ armci_msg_sel(&info,size,op,ARMCI_DOUBLE,participate); *(DoubleComplex*)val = info.extra; } for(i = 0; i < ndim; i++) subscript[i]= info.subscr[i]; GA_POP_NAME; }
void pgp_sync() { pnga_sync(); }