コード例 #1
0
ファイル: ddi_create.c プロジェクト: streaver91/gamess
/* -------------------------------------------------------------------- *\
   DDI_Create(idim,jdim,handle)
   ============================
   [IN]  idim   - Number of rows in the array to be created.
   [IN]  jdim   - Number of columns in the array to be created.
   [OUT] handle - Handle given to the newly created array.
   
   Creates a distributed array with the columns evenly divided amongst
   the processors.
\* -------------------------------------------------------------------- */
   void DDI_Create(int idim,int jdim,int *handle) {

   /* --------------- *\
      Local Variables
   \* --------------- */
      int i,np,me;
      int icol,mincol,lftcol;
      int jcols[MAX_PROCESSORS];
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);

      DEBUG_ROOT(LVL1,(stdout," DDI: Entering DDI_Create.\n"))
      DEBUG_OUT(LVL3,(stdout,"%s: Entering DDI_Create.\n",DDI_Id()))
     
      np = comm->np;
      me = comm->me;
/*      
      if(jdim < np && me == 0) {
         fprintf(stdout," DDI Error: Trying to create an array with fewer columns than processors.\n");
         fprintf(stdout," DDI Error: Reduce the number of processors and try again.\n");
         Fatal_error(911);
      }
 */           
      mincol = jdim / np;
      lftcol = jdim % np;
      
      for(i=0,icol=0; i<np; i++) {
         jcols[i] = icol;
         icol += mincol;
         if(i<lftcol) icol++;
      }
      
      DDI_Create_custom(idim,jdim,jcols,handle);
      
      DEBUG_ROOT(LVL2,(stdout," DDI: Array[%i] successfully created.\n",*handle)) 
   }
コード例 #2
0
ファイル: ddi_init.c プロジェクト: andremirt/v_cond
   static void Init_mpi(int targc,char *targv[]) {

    # ifndef HOSTNAME_LEN
    # define HOSTNAME_LEN  96
    # endif

      int argc = targc;
      char **argv = targv;
      int i,j,np,me,nc,nd,ndpn;
      int np_local,me_local;
      int nnodes,mynode,master;
      int icp,ids,cpus,myds,ext;
      int *ranks,*disp,*world;
      int *ranks_local;

      int me_mpi,me_ddi,rbn;

      MPI_Group Comm_World_grp;
      MPI_Group SMP_World_grp;
      MPI_Group SMP_Compute_grp;
      MPI_Group DDI_World_grp;
      MPI_Group DDI_Compute_grp;

      MPI_Comm SMP_World_comm;
      MPI_Comm SMP_Compute_comm;
      MPI_Comm SMP_Masters_comm;

      MPI_Comm DDI_World_comm;
      MPI_Comm DDI_Compute_comm;

      char hostname[HOSTNAME_LEN],*c,*hostnames;

      DDI_Comm *comm = (DDI_Comm *) &gv(ddi_base_comm);
      int threadLevel;

 # ifdef WINDOWS
   /* ------------------------------ *\
      Initialize Windows Sockets 2.2
   \* ------------------------------ */
      WORD wVersionRequested;
      WSADATA wsaData;
      wVersionRequested = MAKEWORD(2, 2);
      WSAStartup(wVersionRequested, &wsaData);      
 # endif

   /* -------------- *\
      Initialize MPI
   \* -------------- */
      if(MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &threadLevel) != MPI_SUCCESS) {
         fprintf(stdout," DDI: MPI_Init failed.\n");
         fflush(stdout); exit(911);
      }

   /* -------------------------------- *\
    * Initialize DDI working directory
   \* -------------------------------- */
      Init_scratch(argc,argv);


   /* ------------------------------------------ *\
      Determine Rank and Number of MPI Processes
   \* ------------------------------------------ */
      MPI_Comm_size(MPI_COMM_WORLD,&np);
      MPI_Comm_rank(MPI_COMM_WORLD,&me);


   /* -------------------------------------- *\
      For debugging purposes, set gv(myproc)
   \* -------------------------------------- */
      comm->me = me;
      DEBUG_ROOT(LVL1,(stdout," DDI: MPI initialized.  %i MPI processes.\n",np))


   /* ---------------------------------------------------- *\
      MPI-1 requires data servers unless it is using LAPI.
      MPI-2 does not require data servers at all.
      ----------------------------------------------------
      nc = 0  ==> standard data server model (cp:ds::1:1).
      nc = np ==> specialized model such as LAPI || MPI-2.
   \* ---------------------------------------------------- */
      nc = 0;
    # if defined DDI_LAPI || defined DDI_MPI2 || defined CRAY_MPI
      nc = np;
    # endif


   /* ------------------------------------------ *\
      Standard MPI-1 model (nc=0) ==> cp:ds::1:1
   \* ------------------------------------------ */
      if(nc == 0) {
         if((np % 2) && (me == 0)) {
            fprintf(stdout," Error: Expecting an even number of MPI processes (cp:ds::1:1).\n");
            Fatal_error(911);
         }
         
         nc = nd = np/2;
      }


   /* ------------------------------------------------ *\
      MPI-2 or MPI-1/LAPI model (nc=np) ==> cp:ds::1:0
   \* ------------------------------------------------ */
      if(nc == np) nd = 0;
      
      
   /* ------------------------------------------------------------- *\
      Check to make sure the job complies with compile time limits.
   \* ------------------------------------------------------------- */
      if(nc > MAX_PROCESSORS) {
         
         if(me == 0) {
            fprintf(stdout," DDI: \"Houston, we have a problem.\"\n");
            fprintf(stdout," DDI: MAX_NODES = %i\n",MAX_NODES);
            fprintf(stdout," DDI: MAX_SMP_PROCS = %i\n",MAX_SMP_PROCS);
            fprintf(stdout," DDI: MAX_PROCESSORS = MAX_NODES * MAX_SMP_PROCS = %i\n",MAX_PROCESSORS);
            fprintf(stdout," DDI: MPI reports %i processes ==> %i processors.\n",np,nc);
            fprintf(stdout," DDI: Please correct the limits and recompile DDI.\n");
            fflush(stdout);
         }
         
         MPI_Barrier(MPI_COMM_WORLD);
         MPI_Finalize();
         exit(0);
      }
      
      
   /* ------------------------------------------------------------------- *\
      Non-Standard MPI-1 Model (nc < np && ((nc | np) || (np-nc | np)))
      Can be used to vary the number of data server per node by assigning
      a number of data servers each compute process or a number of data
      server per node.  This code has not been implemented.
   \* ------------------------------------------------------------------- */
      if(nc != nd && nc != np) {
         fprintf(stdout," DDI: This should never have been executed.\n");
         Fatal_error(911);
      }


   /* ---------------------------------- *\
      System command to get the hostname
   \* ---------------------------------- */
      gethostname(hostname,HOSTNAME_LEN);
      DEBUG_OUT(LVL4,(stdout," MPI Process %i: hostname=%s\n",me,hostname))


   /* -------------------------------------------- *\
      Gather all the hostnames into a single array
   \* -------------------------------------------- */
      hostnames = (char *) Malloc(np*HOSTNAME_LEN);
      MPI_Allgather(hostname, HOSTNAME_LEN,MPI_BYTE,
                    hostnames,HOSTNAME_LEN,MPI_BYTE,MPI_COMM_WORLD);


   /* -------------------------------------- *\
      Determine all MPI Process on "my" node
   \* -------------------------------------- */
      ranks = (int *) Malloc(np*sizeof(int));
      for(i=0,np_local=0,c=hostnames; i<np; i++,c+=HOSTNAME_LEN) {
         if(strcmp(hostname,c) == 0) ranks[np_local++] = i;
      }
      DEBUG_OUT(LVL4,(stdout," MPI Process %i: %i local MPI processes.\n",me,np_local))

      ranks_local = (int *) Malloc(np_local*sizeof(int));
      memcpy(ranks_local,ranks,np_local*sizeof(int));


   /* ----------------------------- *\
      Create SMP_World communicator
   \* ----------------------------- */
      MPI_Comm_group(MPI_COMM_WORLD,&Comm_World_grp);
      MPI_Group_incl(Comm_World_grp,np_local,ranks_local,&SMP_World_grp);
      MPI_Comm_create(MPI_COMM_WORLD,SMP_World_grp,&SMP_World_comm);

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: SMP_World_comm created.\n"))

   /* ------------------------------ *\
      Create SMP_Master communicator
   \* ------------------------------ */
      MPI_Comm_rank(SMP_World_comm,&me_local);

      master = 0;
      if(me_local == 0) master = 1;

      MPI_Comm_split(MPI_COMM_WORLD,master,0,&SMP_Masters_comm);

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: SMP_Master_comm created.\n"))

   /* --------------------------------------------------------------------------- *\
      Create Compute_comm and World_comm communicators
      ================================================
      First gather the node information, then sort that information by node (not
      guarenteed to be sorted).  Next assign compute processes and data servers
      (if they exist), and finally create the communicators.
   \* --------------------------------------------------------------------------- */
      MPI_Comm_size(SMP_Masters_comm,&nnodes);
      MPI_Comm_rank(SMP_Masters_comm,&mynode);
      MPI_Bcast(&nnodes,1,MPI_INT,0,SMP_World_comm);
      MPI_Bcast(&mynode,1,MPI_INT,0,SMP_World_comm);
      
      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: There are %i nodes.\n",nnodes))

   /* --------------------------------------- *\
      Check compile-time limits for MAX_NODES
   \* --------------------------------------- */
      if(nnodes > MAX_NODES) {
      
         if(me == 0) {
            fprintf(stdout," DDI: MAX_NODES = %i\n",MAX_NODES);
            fprintf(stdout," DDI: MPI topology suggests %i nodes.\n",nnodes);
            fprintf(stdout," DDI: Increase MAX_NODES and recompile DDI.\n");
            fflush(stdout);
         }
         
         MPI_Barrier(MPI_COMM_WORLD);
         MPI_Finalize();
         exit(0);
      }


   /* ----------------------- *\
      Gather node information
   \* ----------------------- */
      np_by_node = (int *) Malloc(nnodes*sizeof(int));
      ranks_by_node = (int **) Malloc(nnodes*sizeof(int*));

      if(me_local == 0) {
         DEBUG_OUT(LVL4,(stdout," MPI Process %i: Node %i master.\n",me,mynode))
	      
         MPI_Allgather(&np_local,1,MPI_INT,np_by_node,1,MPI_INT,SMP_Masters_comm);

         for(i=0,j=0; i<nnodes; i++) j += np_by_node[i];
         if(j != np) {
            fprintf(stdout,"ddi_init: got j= %i, expected np= %i\n",j,np);
            fprintf(stdout," DDI Error: Sum of PPN over all nodes != NP\n");
            Fatal_error(911);
         }

         disp = (int *) Malloc(nnodes*sizeof(int));
         for(i=1,disp[0]=0; i<nnodes; i++) disp[i] = disp[i-1] + np_by_node[i-1];

         MPI_Allgatherv(ranks_local,np_local,MPI_INT,ranks,np_by_node,disp,MPI_INT,
                        SMP_Masters_comm);
         free(disp);
      }

      MPI_Bcast(np_by_node,nnodes,MPI_INT,0,SMP_World_comm);
      MPI_Bcast(ranks,np,MPI_INT,0,SMP_World_comm);

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: Node topology determined.\n"))

      ranks_by_node[0] = ranks;
      for(i=1; i<nnodes; i++) ranks_by_node[i] = (ranks_by_node[i-1] + np_by_node[i-1]);


   /* --------------------------------------------------------------------------- *\
      Each MPI process has a list of MPI ranks sorted by node.  The list of ranks
      for a particular node is sorted from lowest to highest rank, where the rank
      corresponds to the value in MPI_COMM_WORLD communicator. Next determine the 
      number of compute processes/node.  Data servers/node can be inferred.
   \* --------------------------------------------------------------------------- */
      nc_by_node = (int *) Malloc(nnodes*sizeof(int));
      nd_by_node = (int *) Malloc(nnodes*sizeof(int));

      if(nc == nd) {

      /* ------------------------------------------------------------- *\
         There are a given number of data servers per compute process.
         Now the ratio must be 1:1.  CP:DS:1:N not implemented (yet).
      \* ------------------------------------------------------------- */
         j = nd/nc + 1;  /* j represents the number of MPI process per compute process */

         for(i=0; i<nnodes; i++) {

            if((np_by_node[i] % j)) {
               fprintf(stdout," DDI: For every CP requested there should be %i MPI processes.\n",j);
               fprintf(stdout," DDI Error: np on node %i is not divisible by %i.\n",i,j);
               Fatal_error(911);
            }

            nc_by_node[i] = np_by_node[i] / j;
            nd_by_node[i] = np_by_node[i] - nc_by_node[i];
         }

      }
      
      
      if(nc == np) {
      
       # if defined CRAY_MPI
      /* ------------------------------------------------------------- *\
         The environmental variable DDI_DS_PER_NODE is used to control
         the number of MPI processes that become data servers.
      \* ------------------------------------------------------------- */
         if(me == 0) {
           if(getenv("DDI_DS_PER_NODE")) {
             ndpn = atoi(getenv("DDI_DS_PER_NODE"));
           } else {
             ndpn = 1;
           }
           if(nnodes == 1) ndpn = 0;
           fprintf(stdout,"MPI is using %i data servers/node. (DDI_DS_PER_NODE)\n",ndpn);
         }
         MPI_Bcast(&ndpn,1,MPI_INT,0,MPI_COMM_WORLD);

      /* -------------------------------------------------------- *\
         If DDI_DS_PER_NODE is invalid, then shutdown gracefully.
      \* -------------------------------------------------------- */
         if(ndpn < 0 || ndpn > MAX_SMP_PROCS-1) {
           if(me == 0) {
             fprintf(stdout,"%s: DDI_DS_PER_NODE=%i is invalid.\n",
                  DDI_Id(),ndpn);
             fprintf(stdout,"%s: The value must between 0 and %i.\n",
                  DDI_Id(),MAX_SMP_PROCS-1);
             fflush(stdout);
             sleep(1);
           }
           MPI_Finalize();
         }

         nd = nnodes*ndpn;
         nc = np - nd;
       # endif


      /* --------------------------------------------- *\
         MPI-2 or MPI-1/LAPI model ==> no data servers
      \* --------------------------------------------- */
         for(i=0; i<nnodes; i++) {
             nc_by_node[i] = np_by_node[i];
             nd_by_node[i] = 0;

           # if defined CRAY_MPI
             nc_by_node[i] = np_by_node[i]-ndpn;
             nd_by_node[i] = ndpn;

          /* ------------------------------------------- *\
             Sanity check - Ensure >1 CP exists per node
          \* ------------------------------------------- */
             if(nc_by_node[i] <= 0) {
               if(me == 0) {
                 fprintf(stdout,
                   " ERROR: There are no CPs assigned to node %i.\n",i);
                 fprintf(stdout,
                   " The total number of processes on node %i = %i.\n",
                   i,np_by_node[i]);
                 fprintf(stdout,
                   " Attempted to reserve %i processes as data servers.\n",
                   ndpn);
                 fflush(stdout);
                 sleep(1);
               }
               MPI_Finalize();
             }
           # endif
         }
         
      } 

      gv(np) = np;
      gv(nc) = nc;
      gv(nd) = nd;
      
      DEBUG_ROOT(LVL3,(stdout," DDI: There are %i DDI compute processes.\n",nc))
      DEBUG_ROOT(LVL3,(stdout," DDI: There are %i DDI data servers.\n",nd))

   /* -------------------------------------------------------------------- *\
      Create a list of ranks that will eventually become the communicators
   \* -------------------------------------------------------------------- */
      world = (int *) Malloc(np*sizeof(int));

      for(i=0,icp=0,ids=nc; i<nnodes; i++) {
         for(j=0; j<np_by_node[i]; j++) {
            if(j<nc_by_node[i]) world[icp++] = ranks_by_node[i][j];
            else                world[ids++] = ranks_by_node[i][j];
         }
      }

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_OUT(LVL4,(stdout," MPI Process %i: nc=%i; np=%i.\n",me,nc,np))


   /* ------------------------------------ *\
      Create DDI_Compute_comm communicator
   \* ------------------------------------ */
      MPI_Group_incl(Comm_World_grp,nc,world,&DDI_Compute_grp);
      MPI_Comm_create(MPI_COMM_WORLD,DDI_Compute_grp,&DDI_Compute_comm);


   /* ---------------------------------- *\
      Create DDI_World_comm communicator
   \* ---------------------------------- */
      MPI_Group_incl(Comm_World_grp,np,world,&DDI_World_grp);
      MPI_Comm_create(MPI_COMM_WORLD,DDI_World_grp,&DDI_World_comm);


   /* ------------------------------------ *\
      Create SMP_Compute_comm communicator
   \* ------------------------------------ */
      MPI_Group_intersection(DDI_Compute_grp,SMP_World_grp,&SMP_Compute_grp);
      MPI_Comm_create(MPI_COMM_WORLD,SMP_Compute_grp,&SMP_Compute_comm);

      DEBUG_ROOT(LVL3,(stdout," DDI: finished forming communicators.\n"))

   /* ------------------------------------ *\
      Finished creating MPI communicators.
      Initialize internal DDI structures.
   \* ------------------------------------ */
      MPI_Comm_rank(DDI_World_comm,&me);
      comm->np = nc;
      comm->me = me;
      comm->nn = nnodes;
      comm->my = mynode;

      MPI_Comm_rank(MPI_COMM_WORLD,&me_mpi); 
      MPI_Comm_rank(DDI_World_comm,&me_ddi); 

      DEBUG_OUT(LVL3,(stdout," MPI Process %i = DDI Process %i\n",me_mpi,me_ddi))
      
      comm->id           = DDI_COMM_WORLD;
      comm->smp_comm     = SMP_Compute_comm;
      comm->world_comm   = DDI_World_comm;
      comm->compute_comm = DDI_Compute_comm;
      comm->node_comm    = SMP_Masters_comm;
      comm->smp_world    = SMP_World_comm;

    # if !defined USE_SYSV 
      comm->nn = nc;
      comm->my = me;
      if(comm->my >= nc) comm->my -= nc;
      comm->smp_comm     = MPI_COMM_SELF;
      comm->node_comm    = DDI_Compute_comm;
    # endif


   /* -------------------------------------------------------------------- *\
      Check for network extention.  The extension would be appended to the
      hostname if it becomes necessary to form a TCP/IP socket to the host
   \* -------------------------------------------------------------------- */
    # ifdef DDI_SOC
      for(i=0,ext=0; i<argc && strcmp("-netext",argv[i]) != 0; i++);
      if(i != argc) ext = ++i;
    # endif


   /* ---------------------------------------------------------------- *\
      Scan through the list of hostnames and extract the node topology
   \* ---------------------------------------------------------------- */
      MPI_Allgather(hostname, HOSTNAME_LEN,MPI_BYTE,
                    hostnames,HOSTNAME_LEN,MPI_BYTE,DDI_World_comm);

      MPI_Allgather(&me,1,MPI_INT,ranks_local,1,MPI_INT,SMP_World_comm);
      if(me_local == 0) {
         disp = (int *) Malloc(nnodes*sizeof(int));
         for(i=1,disp[0]=0; i<nnodes; i++) disp[i] = disp[i-1] + np_by_node[i-1];
         MPI_Allgatherv(ranks_local,np_local,MPI_INT,ranks,np_by_node,disp,MPI_INT,
                        SMP_Masters_comm);
         free(disp);
      }
      MPI_Bcast(ranks,np,MPI_INT,0,SMP_World_comm);

      for(i=0; i<nnodes; i++) {

         cpus = nc_by_node[i];
         master = ranks_by_node[i][0];

      /* --------------------------------------------------------------- *\
         For each node, one data server is chosen from the all the data
         servers on that node in a round-robin manner based on the rank
         of the process.
      \* --------------------------------------------------------------- */
         if(nd_by_node[i]) myds = cpus + (me % nd_by_node[i]);
         else              myds = -1;
 
 
      /* --------------------------------------------------------------- *\
         Using LAPI or MPI-2, we have no data servers, but we still need
         to know which compute process to interrupt to get, put, or acc!
      \* --------------------------------------------------------------- */
       # if defined DDI_LAPI
         myds = (me % nc_by_node[i]);
       # endif 


      /* ------------------------------------------------------ *\
         Sanity check: myds must correspond to a rank on node i
      \* ------------------------------------------------------ */
      /*  1st bit of next line was 'i<nd', changed by Ryan to 'nd', May 2010 */
         if(nd && (myds < 0 || myds >= np_by_node[i])) {
           if(me == 0) {
             fprintf(stdout," ERROR: Unable to assign a DS for node %i.\n",i);
             fprintf(stdout," Please report this error to:\n");
             fprintf(stdout,"   [email protected] and/or\n");
             fprintf(stdout,"   [email protected]\n");
             fprintf(stdout," myds=%i; np_by_node[%i]=%i\n",
                      myds,i,np_by_node[i]);
             fflush(stdout);
           # if defined WINDOWS
             Sleep(1*1000);
           # else
             sleep(1);
           # endif
           }
           MPI_Finalize();
         }


      /* ----------------------------------------------------- *\
         For each remote node, assign a data server rank
      \* ----------------------------------------------------- */
         if(nd) gv(ddinodes)[i].myds       = ranks_by_node[i][myds];
         else   gv(ddinodes)[i].myds       = -1;

      /* --------------------------------- *\
         Save these values in gv(ddinodes)
      \* --------------------------------- */
         gv(ddinodes)[i].cpus       = cpus;
         gv(ddinodes)[i].nodemaster = master;


      /* ----------------------------------------------------------------- *\
         Dig up the hostname of the node and append any network extensions
      \* ----------------------------------------------------------------- */
       # ifdef DDI_SOC
         c = (hostnames + master*HOSTNAME_LEN);
         if(ext) strcat(c,argv[ext]);
       # endif


      /* ------------------------------------------------------------------- *\
         All DDI processes on the node share the same node rank and hostname
      \* ------------------------------------------------------------------- */
         for(j=0; j<np_by_node[i]; j++) {
            rbn = ranks_by_node[i][j];
            gv(ddiprocs)[rbn].node = i;

          # ifdef DDI_SOC
            gv(ddiprocs)[rbn].hostname = (char *) strdup(c);
          # endif

          # if !defined USE_SYSV
            gv(ddiprocs)[rbn].node = rbn;
            if(rbn >= comm->np) gv(ddiprocs)[rbn].node -= comm->np;
          # endif

         }

      }


   /* ------------------------- *\
      Free any Malloc'ed Memory
   \* ------------------------- */
      free(hostnames);
      free(world);
      free(ranks_local);



   /* ---------------------------- *\
      Do NOT free global variables
   \* ---------------------------- */
/* --- moved to ddi_finalize
      free(ranks);
      free(np_by_node);
      free(nc_by_node);
      free(nd_by_node);
      free(ranks_by_node);
*/


   /* ---------------------------------- *\
      Synchronize processes and continue
   \* ---------------------------------- */
      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: Init_mpi finished.\n"))
   }
コード例 #3
0
ファイル: ddi_create.c プロジェクト: streaver91/gamess
/* -------------------------------------------------------------------- *\
   DDI_Create_custom(idim,jdim,jcols,handle)
   =========================================
   [IN]  idim   - Number of rows in the array to be created.
   [IN]  jdim   - Number of columns in the array to be created.
   [IN]  jcols  - Array holding the number of columns to be given to
                - each processor when creating the distributed array.
   [OUT] handle - Handle given to the newly created array.
   
   Creates a distributed array where the user can customize how the
   array is distributed across the processors.
\* -------------------------------------------------------------------- */
   void DDI_Create_custom(int idim,int jdim,int *jcols,int *handle) {
   
      int i,np,me,nn,my;
      int inode;
      DDI_INT64 totwrds;
      DDI_INT64 longrows,longcols,longslice,longnd,long2g;
    # ifndef USE_SYSV
      int remote_id;
    # endif
      DDI_Patch patch;
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);
      
      np = comm->np;
      me = comm->me;
      nn = comm->nn;
      my = comm->my;

      Comm_sync(3001,comm);

      /* find an unused handle */
      for (i=0; i<gv(nxtdda); ++i) {
        if (gv(ddacomm)[i] == DDI_COMM_NULL) break;
      }
      if (i==gv(nxtdda)) ++gv(nxtdda);
      *handle = i;
     
    # ifndef USE_SYSV
      remote_id = my;
    # endif

      DEBUG_ROOT(LVL2,(stdout," DDI: Entering DDI_Create_custom.\n"))
      DEBUG_ROOT(LVL2,(stdout," DDI: Creating Array [%i] - %ix%i=%i.\n",*handle,idim,jdim,idim*jdim))
      DEBUG_OUT(LVL3,(stdout,"%s: Entering DDI_Create_custom.\n",DDI_Id()))

    # ifdef DS_SIGNAL
      if(comm->me_local == 1) {
         signal(SIGALRM,DS_Thread_main);
      }
    # endif
      
      if(me == 0) {
         if(gv(dda_output)) {
            longrows = idim;
            longcols = jdim;
            totwrds = longrows*longcols;
            fprintf(stdout," DDI: Creating Array [%i] - %i x %i = %li words.\n",
                                    *handle,idim,jdim,totwrds);
            fflush(stdout);
         }
      }

   /*
       Make sure each slice of the distributed array will be under 2 GWords.

       Even on 64-bit hardware, most counting in this program is done
       with 32-bit data types, meaning we can't count higher than 2**31-1.

       If on 32-bit hardware, the 'long' data types here will be 32-bits,
       and so we'll see crazy products, including less than zero.
       In present form, nothing will be trapped here on a 32 bit machine!
   */
      longrows  = idim;
      longcols  = jdim;
      totwrds   = longrows*longcols;
   /*     Total distributed array over 2 Gwords is OK, but each  */
   /*     slice (MEMDDI per data server) must be under 2 GWords. */
   /*     TCP/IP has gv(nd)=-1 (uninitialized)                   */
   /*     Cray on one node has gv(nd)=0 since no d.s. exists.    */
      # if defined DDI_MPI
         longnd    = gv(nd);
         if (longnd <= 0) longnd=1;
      # endif
      # if defined DDI_SOC
         longnd = np;
      # endif
      longslice = totwrds/longnd;
   /*  next is largest signed 32 bit integer, stored as 64 bit quantity  */
      long2g   = 2147483643;
      if (longslice > long2g)
         {
            fprintf(stdout,"\n");
            fprintf(stdout," DDI: trouble creating distributed array!\n");
            fprintf(stdout," Current number of data servers is %li\n",longnd);
            fprintf(stdout," so each data server's slice of array");
            fprintf(stdout," [%i] is %li words\n",*handle,longslice);
            fprintf(stdout,"\n");
            fprintf(stdout," Add more processors so required total array");
            fprintf(stdout," size %li words\n",totwrds);
            fprintf(stdout," divided by no. of processors (data servers)");
            fprintf(stdout," is less than 2 Gwords= %li\n",long2g);
            fprintf(stdout," For example, %li or more data servers...\n",
                                1+totwrds/long2g);
            fprintf(stdout,"\n");
            fflush(stdout);
            Fatal_error(911);
         }

   /* ------------------------------------ *\
      Ensure 'jcols' is properly formatted
   \* ------------------------------------ */
      for(i=0; i<np; i++) {
         if(jcols[i] < 0 && me == 0) {
            fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must be >= 0.\n");
            Fatal_error(911);
         }
         
         if(i > 0)
         if(jcols[i] < jcols[i-1]) {
            fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must increase monotonically.\n");
            Fatal_error(911);
         }
      }
   
   /* ----------------------------------------------------------------- *\
      Check to ensure the maximum number of arrays hasn't been reached.
   \* ----------------------------------------------------------------- */
      if( gv(nxtdda) == MAX_DD_ARRAYS ) {
        if(me == 0) {
           fprintf(stderr," DDI Error:  The maximum number of distributed arrays [%i] has been reached.\n",MAX_DD_ARRAYS);
           fprintf(stderr," Information:  The maximum number of distributed arrays is a DDI compile-time option.\n");
        }
        Fatal_error(911);
      }

      gv(nrow)[*handle] = idim;
      gv(ncol)[*handle] = jdim;
      gv(ddacomm)[*handle]=gv(ddi_working_comm);
      
 
   /* ---------------------------------------------------- *\
      Generate Column Mapping by Compute Process & by Node
   \* ---------------------------------------------------- */
      for(i=0,inode=-1; i<np; i++) {
        gv(pcmap)[*handle][i] = jcols[i];

     /* if(inode == gv(ddiprocs)[i].node) continue; */
        if(inode == comm->local_nid[i]) continue;
        gv(ncmap)[*handle][++inode] = gv(pcmap)[*handle][i];
      }

      gv(pcmap)[*handle][np] = jdim;
      gv(ncmap)[*handle][nn] = jdim;


   /* -------------------------- *\
      Get local patch dimensions
   \* -------------------------- */
      DDI_DistribP(*handle,me,&patch);
 
   /* ----------------------------- *\
      Create Distributed Data Array
   \* ----------------------------- */
      patch.handle = *handle;
# if defined WINTEL
      patch.oper   = DDI_CREATE_OP;
# else
      patch.oper   = DDI_CREATE;
# endif
      patch.size   = jdim;


# if defined USE_SYSV || defined DDI_ARMCI || defined DDI_MPI2
      DDI_Index_create(&patch);
# else
      DDI_Send_request(&patch,&remote_id,NULL);
# endif 


   /* ----------------------------- *\
      Synchronize Compute Processes
   \* ----------------------------- */
      Comm_sync(3002,comm);

      DEBUG_OUT(LVL3,(stdout,"%s: Leaving DDI_Create_custom.\n",DDI_Id()))
   }
コード例 #4
0
ファイル: ddi_comm_create.c プロジェクト: andremirt/v_cond
   void Comm_create(int np,int *ids, int ngroups, int mygroup, int comm_id, int *new_comm_id) {

      int i,ip,in,ismp,nn,nid,np_local,tmp;
      int err;
      size_t size;

      const DDI_Comm *cur_comm   = (DDI_Comm *) Comm_find(comm_id);
      const DDI_Comm *comm_world = &gv(ddi_base_comm);
      DDI_Comm *new_comm = (DDI_Comm *) Malloc(sizeof(DDI_Comm));
      DDI_Comm *end_comm = (DDI_Comm *) Comm_find_end();

      DEBUG_ROOT(LVL1,(stdout," DDI: Entering DDI_Create_comm.\n")) 
      DEBUG_OUT(LVL2,(stdout,"%s: Entering DDI_Create_comm.\n",DDI_Id()))

      Comm_sync(123,cur_comm);

   /* ------------------------------- *\
      Add new_comm to the linked list
   \* ------------------------------- */
      new_comm->next = NULL;
      end_comm->next = (void *) new_comm;

   /* new_data->next = NULL; */
   /* end_data->next = (void *) new_data; */

      new_comm->ngroups = ngroups;
      new_comm->mygroup = mygroup;

      new_comm->id = *new_comm_id = gv(ddi_comm_id)++;
      new_comm->local_nid  = (int *) Malloc(np*sizeof(int));
      new_comm->global_pid = (int *) Malloc(np*sizeof(int));
      new_comm->global_nid = (int *) Malloc(np*sizeof(int));

      i = 0;
      if(np > 1) {
         do {
	     
            if(ids[i] > cur_comm->np) {
               fprintf(stdout,"%s: Invalid id list in DDI_Comm_create.\n",DDI_Id());
               Fatal_error(911);
            }
   
            if(ids[i+1] < ids[i]) {
               tmp      = ids[i];
               ids[i]   = ids[i+1];
               ids[i+1] = tmp;
               if(i) i--; i--;
            }
   
         } while(++i < np-1);
      }

      Comm_sync(126,cur_comm);

      nn  = -1;
      nid = -1;
      np_local = 0;

      for(i=0; i<np; i++) {
         new_comm->global_pid[i] = cur_comm->global_pid[ids[i]];
         new_comm->global_nid[i] = cur_comm->global_nid[ids[i]];
         fflush(stdout);

         if(new_comm->global_nid[i] != nid) {
            nid = new_comm->global_nid[i];
            nn++;
         }

         new_comm->local_nid[i] = nn;
         
         if(nid == comm_world->my) np_local++;
         
      }

      nn++;
/*
      fprintf(stdout,"%s: new_comm->nn = %i.\n",DDI_Id(),nn);
      fprintf(stdout,"%s: new_comm->np_local = %i.\n",DDI_Id(),np_local);
*/
      Comm_sync(127,cur_comm);
      DEBUG_ROOT(LVL5,(stdout," comm_create - global_pid/global_nid formed.\n"))

      new_comm->smp_pid     = (int *) Malloc(np_local*sizeof(int));
      new_comm->node_master = (int *) Malloc(nn*sizeof(int));
      new_comm->global_dsid = (int *) Malloc(nn*sizeof(int));

      for(ip=0,in=-1,ismp=0,nid=-1; ip<np; ip++) {

         if(new_comm->global_nid[ip] != nid) {
            in++;
            nid = new_comm->global_nid[ip];
            new_comm->global_dsid[in] = comm_world->global_dsid[nid];
            new_comm->node_master[in] = new_comm->global_pid[ip];
         }

         if(new_comm->global_pid[ip] == comm_world->me) {
            new_comm->me = ip;
            new_comm->my = in;
         }
         
         if(nid == comm_world->my) {
            if(new_comm->global_pid[ip] == comm_world->me) new_comm->me_local = ismp;
            new_comm->smp_pid[ismp++] = new_comm->global_pid[ip];
         }
         
      }

      new_comm->nn = nn;
      new_comm->np = np;
      new_comm->np_local = ismp;

      DEBUG_OUT(LVL5,(stdout,"%s: np=%i, nn=%i, np_smp=%i, me=%i, my=%i, me_smp=%i.\n",
		      DDI_Id(),new_comm->np,new_comm->nn,new_comm->np_local,
		      new_comm->me,new_comm->my,new_comm->me_local));
      
    # if defined DDI_MPI
      new_comm->world_comm = cur_comm->world_comm;
      MPI_Comm_split(cur_comm->smp_comm,new_comm->global_pid[0],new_comm->me_local,&new_comm->smp_comm);
      MPI_Comm_split(cur_comm->compute_comm,new_comm->global_pid[0],new_comm->me,&new_comm->compute_comm);
      MPI_Comm_split(new_comm->compute_comm,new_comm->me_local,new_comm->my,&new_comm->node_comm);
    # endif
      
      DEBUG_OUT(LVL3,(stdout,"%s: Exiting DDI_Comm_create.\n",DDI_Id()))

   }