示例#1
0
文件: ddi_gdlb.c 项目: ryanolson/ddi
/* -------------------------------------------------------- *\
   DDI_GDLBRest()
   ==============
   
   Reset the global dynamic load-balance counter.
\* -------------------------------------------------------- */
   void DDI_GDLBReset() {
      int np,me;

    # if !defined USE_SYSV
      int remote_id = 0;
      DDI_Patch Patch;
    # endif

      if(DDI_WORKING_COMM != DDI_COMM_WORLD) {
         fprintf(stdout," DDI Error: group dlb counter must be reset in DDI_WORLD scope.\n");
         Fatal_error(911);
      }

      DDI_NProc(&np,&me);
      DDI_Sync(3041);

#if defined DDI_ARMCI
      DDI_ARMCI_GDLBReset();
#else
      if(me == 0) {
    # if defined USE_SYSV
         DDI_GDLBReset_local();
    # else
         Patch.oper = DDI_GDLBRESET;
         DDI_Send_request(&Patch,&remote_id,NULL);
    # endif
      }
#endif

      DDI_Sync(3042);
   }
示例#2
0
/* -------------------------------------------------------------------- *\
   DDI_Create_custom(idim,jdim,jcols,handle)
   =========================================
   [IN]  idim   - Number of rows in the array to be created.
   [IN]  jdim   - Number of columns in the array to be created.
   [IN]  jcols  - Array holding the number of columns to be given to
                - each processor when creating the distributed array.
   [OUT] handle - Handle given to the newly created array.
   
   Creates a distributed array where the user can customize how the
   array is distributed across the processors.
\* -------------------------------------------------------------------- */
   void DDI_Create_custom(int idim,int jdim,int *jcols,int *handle) {
   
      int i,np,me,nn,my;
      int inode;
      DDI_INT64 totwrds;
      DDI_INT64 longrows,longcols,longslice,longnd,long2g;
    # ifndef USE_SYSV
      int remote_id;
    # endif
      DDI_Patch patch;
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);
      
      np = comm->np;
      me = comm->me;
      nn = comm->nn;
      my = comm->my;

      Comm_sync(3001,comm);

      /* find an unused handle */
      for (i=0; i<gv(nxtdda); ++i) {
        if (gv(ddacomm)[i] == DDI_COMM_NULL) break;
      }
      if (i==gv(nxtdda)) ++gv(nxtdda);
      *handle = i;
     
    # ifndef USE_SYSV
      remote_id = my;
    # endif

      DEBUG_ROOT(LVL2,(stdout," DDI: Entering DDI_Create_custom.\n"))
      DEBUG_ROOT(LVL2,(stdout," DDI: Creating Array [%i] - %ix%i=%i.\n",*handle,idim,jdim,idim*jdim))
      DEBUG_OUT(LVL3,(stdout,"%s: Entering DDI_Create_custom.\n",DDI_Id()))

    # ifdef DS_SIGNAL
      if(comm->me_local == 1) {
         signal(SIGALRM,DS_Thread_main);
      }
    # endif
      
      if(me == 0) {
         if(gv(dda_output)) {
            longrows = idim;
            longcols = jdim;
            totwrds = longrows*longcols;
            fprintf(stdout," DDI: Creating Array [%i] - %i x %i = %li words.\n",
                                    *handle,idim,jdim,totwrds);
            fflush(stdout);
         }
      }

   /*
       Make sure each slice of the distributed array will be under 2 GWords.

       Even on 64-bit hardware, most counting in this program is done
       with 32-bit data types, meaning we can't count higher than 2**31-1.

       If on 32-bit hardware, the 'long' data types here will be 32-bits,
       and so we'll see crazy products, including less than zero.
       In present form, nothing will be trapped here on a 32 bit machine!
   */
      longrows  = idim;
      longcols  = jdim;
      totwrds   = longrows*longcols;
   /*     Total distributed array over 2 Gwords is OK, but each  */
   /*     slice (MEMDDI per data server) must be under 2 GWords. */
   /*     TCP/IP has gv(nd)=-1 (uninitialized)                   */
   /*     Cray on one node has gv(nd)=0 since no d.s. exists.    */
      # if defined DDI_MPI
         longnd    = gv(nd);
         if (longnd <= 0) longnd=1;
      # endif
      # if defined DDI_SOC
         longnd = np;
      # endif
      longslice = totwrds/longnd;
   /*  next is largest signed 32 bit integer, stored as 64 bit quantity  */
      long2g   = 2147483643;
      if (longslice > long2g)
         {
            fprintf(stdout,"\n");
            fprintf(stdout," DDI: trouble creating distributed array!\n");
            fprintf(stdout," Current number of data servers is %li\n",longnd);
            fprintf(stdout," so each data server's slice of array");
            fprintf(stdout," [%i] is %li words\n",*handle,longslice);
            fprintf(stdout,"\n");
            fprintf(stdout," Add more processors so required total array");
            fprintf(stdout," size %li words\n",totwrds);
            fprintf(stdout," divided by no. of processors (data servers)");
            fprintf(stdout," is less than 2 Gwords= %li\n",long2g);
            fprintf(stdout," For example, %li or more data servers...\n",
                                1+totwrds/long2g);
            fprintf(stdout,"\n");
            fflush(stdout);
            Fatal_error(911);
         }

   /* ------------------------------------ *\
      Ensure 'jcols' is properly formatted
   \* ------------------------------------ */
      for(i=0; i<np; i++) {
         if(jcols[i] < 0 && me == 0) {
            fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must be >= 0.\n");
            Fatal_error(911);
         }
         
         if(i > 0)
         if(jcols[i] < jcols[i-1]) {
            fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must increase monotonically.\n");
            Fatal_error(911);
         }
      }
   
   /* ----------------------------------------------------------------- *\
      Check to ensure the maximum number of arrays hasn't been reached.
   \* ----------------------------------------------------------------- */
      if( gv(nxtdda) == MAX_DD_ARRAYS ) {
        if(me == 0) {
           fprintf(stderr," DDI Error:  The maximum number of distributed arrays [%i] has been reached.\n",MAX_DD_ARRAYS);
           fprintf(stderr," Information:  The maximum number of distributed arrays is a DDI compile-time option.\n");
        }
        Fatal_error(911);
      }

      gv(nrow)[*handle] = idim;
      gv(ncol)[*handle] = jdim;
      gv(ddacomm)[*handle]=gv(ddi_working_comm);
      
 
   /* ---------------------------------------------------- *\
      Generate Column Mapping by Compute Process & by Node
   \* ---------------------------------------------------- */
      for(i=0,inode=-1; i<np; i++) {
        gv(pcmap)[*handle][i] = jcols[i];

     /* if(inode == gv(ddiprocs)[i].node) continue; */
        if(inode == comm->local_nid[i]) continue;
        gv(ncmap)[*handle][++inode] = gv(pcmap)[*handle][i];
      }

      gv(pcmap)[*handle][np] = jdim;
      gv(ncmap)[*handle][nn] = jdim;


   /* -------------------------- *\
      Get local patch dimensions
   \* -------------------------- */
      DDI_DistribP(*handle,me,&patch);
 
   /* ----------------------------- *\
      Create Distributed Data Array
   \* ----------------------------- */
      patch.handle = *handle;
# if defined WINTEL
      patch.oper   = DDI_CREATE_OP;
# else
      patch.oper   = DDI_CREATE;
# endif
      patch.size   = jdim;


# if defined USE_SYSV || defined DDI_ARMCI || defined DDI_MPI2
      DDI_Index_create(&patch);
# else
      DDI_Send_request(&patch,&remote_id,NULL);
# endif 


   /* ----------------------------- *\
      Synchronize Compute Processes
   \* ----------------------------- */
      Comm_sync(3002,comm);

      DEBUG_OUT(LVL3,(stdout,"%s: Leaving DDI_Create_custom.\n",DDI_Id()))
   }
示例#3
0
void DDI_ARR_select_remote(DDI_Patch *dAPatch, DDI_ARR_Element *element, int rank) {
  const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);

  DDI_Send_request(dAPatch, &rank, NULL);
  Comm_recv(element, sizeof(DDI_ARR_Element), rank, comm);
}
示例#4
0
文件: ddi_gdlb.c 项目: ryanolson/ddi
/* ---------------------------------------------------------- *\
   DDI_GDLBNext(counter)
   ====================
   [OUT] counter - value of the load balance counter returned
                   to the calling process.

   An atomic operation that sets the value of counter to the
   value of the global load-balance counter, then increments
   the global counter.
\* --------------------------------------------------------- */
   void DDI_GDLBNext(size_t *counter) {
      int np,me,nn,my,tmp_scope,remote_id=0;
      DDI_Patch Patch;
      
    # if defined DDI_LAPI
      lapi_cntr_t org_cntr;
      uint tgt          = gv(lapi_map)[0];
      int *tgt_var      = gv(lapi_gdlb_cntr_addr)[tgt];
      int  in_val       = 1;
      int  prev_tgt_val = -1;
    # endif

# if defined DDI_ARMCI
      DDI_ARMCI_GDLBNext(counter);
      return;
# endif
    
      DDI_NProc(&np,&me);

      if(me == 0) {
      /* ---------------------------------- *\
         We need to work in the world scope
      \* ---------------------------------- */
         tmp_scope = DDI_WORKING_COMM;
         gv(ddi_working_comm) = DDI_COMM_WORLD;
   
         DDI_NProc(&np,&me);
         DDI_NNode(&nn,&my);

       # if FULL_SMP
       # if defined DDI_LAPI
         if(LAPI_Setcntr(gv(lapi_hnd),&org_cntr,0) != LAPI_SUCCESS) {
            fprintf(stdout,"%s: LAPI_Setcntr failed in DDI_GDLBNext.\n",DDI_Id());
            Fatal_error(911);
         }
         
         if(LAPI_Rmw(gv(lapi_hnd),FETCH_AND_ADD,tgt,tgt_var,&in_val,
                       &prev_tgt_val,&org_cntr) != LAPI_SUCCESS) {
            fprintf(stdout,"%s: LAPI_Rmw failed in DDI_GDLBNext.\n",DDI_Id());
            Fatal_error(911);
         }
         
         if(LAPI_Waitcntr(gv(lapi_hnd),&org_cntr,1,NULL) != LAPI_SUCCESS) {
            fprintf(stdout,"%s: LAPI_Waitcntr failed in DDI_GDLBNext.\n",DDI_Id());
            Fatal_error(911);
         }
         
         if(prev_tgt_val == -1) {
            fprintf(stdout,"%s: LAPI version of DDI_GDLBNext is not working correctly.\n",DDI_Id());
            Fatal_error(911);
         } else {
            *counter = (size_t) prev_tgt_val;
         }
       # else
         if(my == 0) {
            DDI_GDLBNext_local(counter);
         } else {
            Patch.oper = DDI_GDLBNEXT;
            DDI_Send_request(&Patch,&remote_id,NULL);
            DDI_Recv(counter,sizeof(size_t),remote_id);
         }
       # endif
       # else
         Patch.oper = DDI_GDLBNEXT;
         DDI_Send_request(&Patch,&remote_id,NULL);
         DDI_Recv(counter,sizeof(size_t),remote_id);
       # endif
   
      /* --------------------------- *\
         Return to the working scope
      \* --------------------------- */
         gv(ddi_working_comm) = tmp_scope;
      }

      DDI_BCast(counter,sizeof(size_t),0);
   }
示例#5
0
/* -------------------------------------------------------------- *\
   DDI_GetAccP(handle,patch,buff)
   ============================
   [IN] handle - Handle of the distributed array to be accessed.
   [IN] patch  - structure containing ilo, ihi, jlo, jhi, etc.
   [IN] buff   - Data segment to be operated on.
\* -------------------------------------------------------------- */
   void DDI_GetAccP(int handle,DDI_Patch *patch,void *buff) {
   
   /* --------------- *\
      Local Variables
   \* --------------- */
      char ack=57;
      int i,np,me,nn,my,remote_id,nsubp;
      int ranks[MAX_NODES];
      DDI_Patch subp[MAX_NODES];
      char *working_buffer = (char *) buff;

    # if defined DDI_LAPI
      DDI_Patch *local_patch = NULL;
      lapi_cntr_t cntr[MAX_NODES];
    # endif
    
      STD_DEBUG((stdout,"%s: Entering DDI_GetAccP.\n",DDI_Id()))

   /* -------------------- *\
      Process OR Node Rank
   \* -------------------- */
      DDI_NProc(&np,&me);
      DDI_NNode(&nn,&my);


   /* ------------------------------------- *\
      Ensure the patch has the correct info
   \* ------------------------------------- */
      patch->oper   = DDI_GETACC;
      patch->handle = handle;


   /* ---------------------------------- *\
      Check calling arguments for errors
   \* ---------------------------------- */
    # if defined DDI_CHECK_ARGS
      if(handle < 0 || handle >= gv(ndda)) {
         fprintf(stdout,"%s: Invalid handle [%i] in DDI_GetAcc.\n",DDI_Id(),handle);
         Fatal_error(911);
      }
      
      if(patch->ilo > patch->ihi || patch->ilo < 0 || patch->ihi >= gv(nrow)[handle]) {
         fprintf(stdout,"%s: Invalid row dimensions during DDI_GetAcc => ilo=%i ihi=%i.\n",DDI_Id(),patch->ilo,patch->ihi);
         Fatal_error(911);
      }
      
      if(patch->jlo > patch->jhi || patch->jlo < 0 || patch->jhi >= gv(ncol)[handle]) {
         fprintf(stdout,"%s: Invalid colum dimensions during DDI_GetAcc => jlo=%i jhi=%i.\n",DDI_Id(),patch->jlo,patch->jhi);
         Fatal_error(911);
      }
    # endif


   /* ------------------------------ *\
      Log some simple profiling info
   \* ------------------------------ */
    # if defined DDI_COUNTERS
      gv(acc_profile).ncalls++;
      gv(acc_profile).nbytes += DDI_Patch_sizeof(patch);
    # endif


   /* ------------------------------------------------------- *\
      Determine where the pieces of the requested patch exist
   \* ------------------------------------------------------- */
      DDI_Subpatch(handle,patch,&nsubp,ranks,subp);
      MAX_DEBUG((stdout,"%s: %i subpatches.\n",DDI_Id(),nsubp))

      
   /* ------------------------------------------------------------------- *\
      Send data requests for all non-local pieces of the requested patch.
      Operate immediately to GetAcc a local portion of the patch.
   \* ------------------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         ULTRA_DEBUG((stdout,"%s: GetAccumulating subpatch %i.\n",DDI_Id(),i))

      /* ------------------------------------------------------------- *\
         Using SysV, take advantage of shared-memory for a local patch
      \* ------------------------------------------------------------- */
       # if defined USE_SYSV

      /* ------------------------------------------------ *\
         Determine if the ith patch is local to 'my' node
      \* ------------------------------------------------ */
         if(ranks[i] == my) {
            MAX_DEBUG((stdout,"%s: Subpatch %i is local.\n",DDI_Id(),i))

         /* ---------------------------------------------------- *\
            Using LAPI, perform the local Getacc after all the data
            requests have been sent ==> maximize concurrency.
         \* ---------------------------------------------------- */
          # if defined DDI_LAPI
            local_patch = &subp[i];
            local_patch->cp_buffer_addr = working_buffer;
          # else
         /* --------------------------------------------- *\
            Otherwise, perform the local Getacc immediately.
         \* --------------------------------------------- */
            DDI_GetAcc_local(&subp[i],working_buffer);
          # endif

         /* ------------------------------------------------------- *\
            Move the working buffer to the next patch and continue.
         \* ------------------------------------------------------- */
            working_buffer += subp[i].size;
            continue;
         }
       # endif


      /* --------------------------------- *\
         If the current patch is NOT local 
      \* --------------------------------- */
         remote_id = ranks[i];


      /* ----------------------------------------------- *\
         Using LAPI, then include some extra information
      \* ----------------------------------------------- */
       # if defined DDI_LAPI
         subp[i].cp_lapi_id     = gv(lapi_map)[me];
         subp[i].cp_lapi_cntr   = (void *) &cntr[i];
         subp[i].cp_buffer_addr = (void *) working_buffer;
         LAPI_Setcntr(gv(lapi_hnd),&cntr[i],0);

         ULTRA_DEBUG((stdout,"%s: cp_lapi_id=%i.\n",DDI_Id(),gv(lapi_map)[me]))
         ULTRA_DEBUG((stdout,"%s: cp_lapi_cntr=%x.\n",DDI_Id(),&cntr[i]))
         ULTRA_DEBUG((stdout,"%s: cp_buffer_addr=%x.\n",DDI_Id(),working_buffer))
       # endif
      
      /* -------------------------------- *\
         Send data request for subpatch i
      \* -------------------------------- */
         MAX_DEBUG((stdout,"%s: Sending data request to node %i.\n",DDI_Id(),remote_id))
         DDI_Send_request(&subp[i],&remote_id,NULL);
         MAX_DEBUG((stdout,"%s: data request sent to global process %i.\n",DDI_Id(),remote_id))


      /* ------------------------------------------------------------ *\
         Receive an acknowledgement that the data server has raised
         a fence that will protect the distributed array from get or
         put access until all accumulates have finished.  This block-
         ing receive ensures that the current process executing this
         accumulate can *NOT* finish, until the fence has been raised 
      \* ------------------------------------------------------------ */
       # if !defined DDI_LAPI
       # if defined USE_SYSV
         MAX_DEBUG((stdout,"%s: Receiving remote fence ACK.\n",DDI_Id()))
         DDI_Recv(&ack,1,remote_id);
       # endif


      /* ---------------------------- *\
         Recv subpatch from remote_id
      \* ---------------------------- */
         MAX_DEBUG((stdout,"%s: Sending subpatch %i to %i.\n",DDI_Id(),i,remote_id))
         DDI_Send(working_buffer,subp[i].size,remote_id);
         DDI_Recv(working_buffer,subp[i].size,remote_id);
       # endif

      
      /* ------------ *\
         Shift buffer 
      \* ------------ */
         working_buffer += subp[i].size;
      }

   /* ----------------------------------------------------------- *\
      Using LAPI, perform the local Getaccumulate (if needed) as the
      remote processes are getting the data to Getaccumulate on the
      target processes.  Then wait for all the data to be copied
      out of the buffer before returning.
   \* ----------------------------------------------------------- */
    # if defined DDI_LAPI

   /* ------------------------------------ *\
      GetAccumulating local patch (if exists)
   \* ------------------------------------ */
      if(local_patch) DDI_GetAcc_local(local_patch,local_patch->cp_buffer_addr);

   /* ---------------------------------------------------------- *\
      Wait for all remote LAPI_Gets to finish copying local data
   \* ---------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         if(subp[i].cp_lapi_cntr) {
            ULTRA_DEBUG((stdout,"%s: Wait for subpatch %i to be copied.\n",DDI_Id(),i))
            LAPI_Waitcntr(gv(lapi_hnd),&cntr[i],3,NULL);
            ULTRA_DEBUG((stdout,"%s: Subpatch %i copy completed.\n",DDI_Id(),i))
         }
      }