/* -------------------------------------------------------- *\ DDI_Distrib(handle,rank,ilo,ihi,jlo,jhi) ======================================== [IN] handle - handle of array [IN] rank - rank of compute process [OUT] ilo - lowest row dimension found on rank. [OUT] ihi - highest row dimension found on rank. [OUT] jlo - lowest column dimension found on rank. [OUT] jhi - highest column dimension found on rank. \* -------------------------------------------------------- */ void DDI_Distrib(int handle,int rank,int *ilo,int *ihi,int *jlo,int *jhi) { DDI_Patch patch; DDI_DistribP(handle,rank,&patch); *ilo = patch.ilo; *ihi = patch.ihi; *jlo = patch.jlo; *jhi = patch.jhi; }
/* ------------------------------- *\ FORTRAN Wrapper for DDI_DISTRIB \* ------------------------------- */ void F77_Distrib(int_f77 *handle,int_f77 *rank,int_f77 *ilo,int_f77 *ihi,int_f77 *jlo,int_f77 *jhi) { DDI_Patch Patch; DDI_DistribP((int) *handle,(int) *rank,&Patch); *ilo = (int_f77) Patch.ilo + 1; *ihi = (int_f77) Patch.ihi + 1; *jlo = (int_f77) Patch.jlo + 1; *jhi = (int_f77) Patch.jhi + 1; }
/* -------------------------------------------------------- *\ DDI_NDistribP(handle,rank,patch) ================================ [IN] handle - handle of array [IN] rank - rank of a node [OUT] patch - patch of array 'handle' stored on node - 'rank' \* -------------------------------------------------------- */ void DDI_NDistribP(int handle,int rank,DDI_Patch *patch) { # if FULL_SMP || DDI_ARMCI_SMP patch->handle = handle; patch->ilo = 0; patch->ihi = gv(nrow)[handle]-1; patch->jlo = gv(ncmap)[handle][rank]; patch->jhi = gv(ncmap)[handle][rank+1]-1; # else DDI_DistribP(handle,rank,patch); # endif }
/* -------------------------------------------------------------------- *\ DDI_Create_custom(idim,jdim,jcols,handle) ========================================= [IN] idim - Number of rows in the array to be created. [IN] jdim - Number of columns in the array to be created. [IN] jcols - Array holding the number of columns to be given to - each processor when creating the distributed array. [OUT] handle - Handle given to the newly created array. Creates a distributed array where the user can customize how the array is distributed across the processors. \* -------------------------------------------------------------------- */ void DDI_Create_custom(int idim,int jdim,int *jcols,int *handle) { int i,np,me,nn,my; int inode; DDI_INT64 totwrds; DDI_INT64 longrows,longcols,longslice,longnd,long2g; # ifndef USE_SYSV int remote_id; # endif DDI_Patch patch; const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM); np = comm->np; me = comm->me; nn = comm->nn; my = comm->my; Comm_sync(3001,comm); /* find an unused handle */ for (i=0; i<gv(nxtdda); ++i) { if (gv(ddacomm)[i] == DDI_COMM_NULL) break; } if (i==gv(nxtdda)) ++gv(nxtdda); *handle = i; # ifndef USE_SYSV remote_id = my; # endif DEBUG_ROOT(LVL2,(stdout," DDI: Entering DDI_Create_custom.\n")) DEBUG_ROOT(LVL2,(stdout," DDI: Creating Array [%i] - %ix%i=%i.\n",*handle,idim,jdim,idim*jdim)) DEBUG_OUT(LVL3,(stdout,"%s: Entering DDI_Create_custom.\n",DDI_Id())) # ifdef DS_SIGNAL if(comm->me_local == 1) { signal(SIGALRM,DS_Thread_main); } # endif if(me == 0) { if(gv(dda_output)) { longrows = idim; longcols = jdim; totwrds = longrows*longcols; fprintf(stdout," DDI: Creating Array [%i] - %i x %i = %li words.\n", *handle,idim,jdim,totwrds); fflush(stdout); } } /* Make sure each slice of the distributed array will be under 2 GWords. Even on 64-bit hardware, most counting in this program is done with 32-bit data types, meaning we can't count higher than 2**31-1. If on 32-bit hardware, the 'long' data types here will be 32-bits, and so we'll see crazy products, including less than zero. In present form, nothing will be trapped here on a 32 bit machine! */ longrows = idim; longcols = jdim; totwrds = longrows*longcols; /* Total distributed array over 2 Gwords is OK, but each */ /* slice (MEMDDI per data server) must be under 2 GWords. */ /* TCP/IP has gv(nd)=-1 (uninitialized) */ /* Cray on one node has gv(nd)=0 since no d.s. exists. */ # if defined DDI_MPI longnd = gv(nd); if (longnd <= 0) longnd=1; # endif # if defined DDI_SOC longnd = np; # endif longslice = totwrds/longnd; /* next is largest signed 32 bit integer, stored as 64 bit quantity */ long2g = 2147483643; if (longslice > long2g) { fprintf(stdout,"\n"); fprintf(stdout," DDI: trouble creating distributed array!\n"); fprintf(stdout," Current number of data servers is %li\n",longnd); fprintf(stdout," so each data server's slice of array"); fprintf(stdout," [%i] is %li words\n",*handle,longslice); fprintf(stdout,"\n"); fprintf(stdout," Add more processors so required total array"); fprintf(stdout," size %li words\n",totwrds); fprintf(stdout," divided by no. of processors (data servers)"); fprintf(stdout," is less than 2 Gwords= %li\n",long2g); fprintf(stdout," For example, %li or more data servers...\n", 1+totwrds/long2g); fprintf(stdout,"\n"); fflush(stdout); Fatal_error(911); } /* ------------------------------------ *\ Ensure 'jcols' is properly formatted \* ------------------------------------ */ for(i=0; i<np; i++) { if(jcols[i] < 0 && me == 0) { fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must be >= 0.\n"); Fatal_error(911); } if(i > 0) if(jcols[i] < jcols[i-1]) { fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must increase monotonically.\n"); Fatal_error(911); } } /* ----------------------------------------------------------------- *\ Check to ensure the maximum number of arrays hasn't been reached. \* ----------------------------------------------------------------- */ if( gv(nxtdda) == MAX_DD_ARRAYS ) { if(me == 0) { fprintf(stderr," DDI Error: The maximum number of distributed arrays [%i] has been reached.\n",MAX_DD_ARRAYS); fprintf(stderr," Information: The maximum number of distributed arrays is a DDI compile-time option.\n"); } Fatal_error(911); } gv(nrow)[*handle] = idim; gv(ncol)[*handle] = jdim; gv(ddacomm)[*handle]=gv(ddi_working_comm); /* ---------------------------------------------------- *\ Generate Column Mapping by Compute Process & by Node \* ---------------------------------------------------- */ for(i=0,inode=-1; i<np; i++) { gv(pcmap)[*handle][i] = jcols[i]; /* if(inode == gv(ddiprocs)[i].node) continue; */ if(inode == comm->local_nid[i]) continue; gv(ncmap)[*handle][++inode] = gv(pcmap)[*handle][i]; } gv(pcmap)[*handle][np] = jdim; gv(ncmap)[*handle][nn] = jdim; /* -------------------------- *\ Get local patch dimensions \* -------------------------- */ DDI_DistribP(*handle,me,&patch); /* ----------------------------- *\ Create Distributed Data Array \* ----------------------------- */ patch.handle = *handle; # if defined WINTEL patch.oper = DDI_CREATE_OP; # else patch.oper = DDI_CREATE; # endif patch.size = jdim; # if defined USE_SYSV || defined DDI_ARMCI || defined DDI_MPI2 DDI_Index_create(&patch); # else DDI_Send_request(&patch,&remote_id,NULL); # endif /* ----------------------------- *\ Synchronize Compute Processes \* ----------------------------- */ Comm_sync(3002,comm); DEBUG_OUT(LVL3,(stdout,"%s: Leaving DDI_Create_custom.\n",DDI_Id())) }
int main(int argc,char *argv[]) { int i,np,me,ncols; int nn,my; int handle; size_t counter; double a[10]; DDI_Patch patch; DDI_Init(argc,argv); // DebugOutput(0); DDI_Memory(50); DDI_NProc(&np,&me); DDI_NNode(&nn,&my); DDI_Create(10,np,&handle); DDI_DistribP(handle,me,&patch); ncols = patch.jhi-patch.jlo+1; // if(me == 0) Comm_patch_print(&patch); for(i=0; i<10; i++) a[i] = 93*1.0; for(i=patch.jlo; i<=patch.jhi; i++) { DDI_Put(handle,patch.ilo,patch.ihi,i,i,&a); } for(i=0; i<10; i++) a[i] = -1.0; counter = -1; DDI_DLBReset(); do { DDI_DLBNext(&counter); if(counter % 200 == 0) { fprintf(stdout,"%s: counter=%i\n",DDI_Id(),counter); fflush(stdout); } if(my == 0) usleep(20); } while (counter < 30000); // MPI_Barrier(MPI_COMM_WORLD); // if(me) sleep(2); // else DS_Thread_init(); // MPI_Barrier(MPI_COMM_WORLD); // fprintf(stdout,"%s: first get\n",DDI_Id()); // fflush(stdout); DDI_Sync(10); if(me == 0) printf("done with dlb test\n"); fflush(stdout); DDI_Sync(11); DDI_Get(handle,patch.ilo,patch.ihi,0,0,a); for(i=0; i<10; i++) { if(a[i] != 93.0) fprintf(stdout," %i: a[%i]=%lf != 93\n",me,i,a[i]); a[i] = 1.0; fflush(stdout); } if(me == 0) printf("%s: done with get\n",DDI_Id()); fflush(stdout); DDI_Sync(10); if(me == 0) fprintf(stdout,"%s: starting acc\n",DDI_Id()); fflush(stdout); DDI_Sync(12); DDI_Acc(handle,patch.ilo,patch.ihi,0,0,a); if(me == 0) fprintf(stdout,"%s: finished acc; syncing\n",DDI_Id()); fflush(stdout); DDI_Sync(14); if(me==0) fprintf(stdout,"%s: finished acc; new get\n",DDI_Id()); fflush(stdout); DDI_Sync(20); DDI_Get(handle,patch.ilo,patch.ihi,0,0,a); for(i=0; i<10; i++) { if(a[i] != 93.0+np) fprintf(stdout," %i: a[%i]=%lf !=%d \n",me,i,a[i],np+93); a[i] = 1.0; fflush(stdout); } DDI_Sync(15); if(me==0) fprintf(stdout,"%i: tested get\n",me); fflush(stdout); DDI_Sync(20); // DDI_Put(handle,patch.ilo,patch.ihi,0,np-1,a); if(me==0) fprintf(stdout,"%s: finished global put\n",DDI_Id()); fflush(stdout); DDI_Destroy(handle); // fprintf(stdout,"%s: after destroy\n",DDI_Id()); // fflush(stdout); DDI_Finalize(); return 0; }