示例#1
0
/* ----------------------------------------------------------------- *\
   DDI_GetAcc_server(patch,from)
   ==========================
   [IN] patch - structure containing ilo, ihi, jlo, jhi, etc.
   [IN] from  - rank of DDI process sending data to be accumulated.
   
   Used by the data server to accept incoming data and perform a
   local accumulate.  Note, the fence is raised to protect the array
   from local get/put operations until the accumulate has finished.
\* ----------------------------------------------------------------- */
   void DDI_GetAcc_server(const DDI_Patch *msg,int from) {
   
   /* --------------- *\
      Local Variables
   \* --------------- */
      char ack = 57;
      void *buffer = NULL;

   /* -------------------------------------------------------------------- *\
      Raise protective fence.  This is necessary because a compute process
      can finish with the DDI_Acc subroutine before the remote data server
      has finished accumulating the patch.
   \* -------------------------------------------------------------------- */
    # if defined USE_SYSV
      DDI_Fence_acquire(msg->handle);
      DDI_Send(&ack,1,from);
    # endif


   /* ----------------------------------------------------------------- *\
      If enough memory is available to receive all the data in a single
      message, then do so ... otherwise receive and update in batches.
      *TODO: Implement the second option*
   \* ----------------------------------------------------------------- */
      DDI_Memory_push(msg->size,&buffer,NULL);


   /* ----------------------- *\
      Receive and update data
   \* ----------------------- */
      DDI_Recv(buffer,msg->size,from);
      DDI_GetAcc_local(msg,buffer);
      DDI_Send(buffer,msg->size,from);


   /* ------------------- *\
      Free receive buffer
   \* ------------------- */
      DDI_Memory_pop(msg->size);


   /* --------------- *\
      Take down fence
   \* --------------- */
    # if defined USE_SYSV
      DDI_Fence_release(msg->handle);
    # endif

   }
示例#2
0
/* -------------------------------------------------------------- *\
   DDI_GetAccP(handle,patch,buff)
   ============================
   [IN] handle - Handle of the distributed array to be accessed.
   [IN] patch  - structure containing ilo, ihi, jlo, jhi, etc.
   [IN] buff   - Data segment to be operated on.
\* -------------------------------------------------------------- */
   void DDI_GetAccP(int handle,DDI_Patch *patch,void *buff) {
   
   /* --------------- *\
      Local Variables
   \* --------------- */
      char ack=57;
      int i,np,me,nn,my,remote_id,nsubp;
      int ranks[MAX_NODES];
      DDI_Patch subp[MAX_NODES];
      char *working_buffer = (char *) buff;

    # if defined DDI_LAPI
      DDI_Patch *local_patch = NULL;
      lapi_cntr_t cntr[MAX_NODES];
    # endif
    
      STD_DEBUG((stdout,"%s: Entering DDI_GetAccP.\n",DDI_Id()))

   /* -------------------- *\
      Process OR Node Rank
   \* -------------------- */
      DDI_NProc(&np,&me);
      DDI_NNode(&nn,&my);


   /* ------------------------------------- *\
      Ensure the patch has the correct info
   \* ------------------------------------- */
      patch->oper   = DDI_GETACC;
      patch->handle = handle;


   /* ---------------------------------- *\
      Check calling arguments for errors
   \* ---------------------------------- */
    # if defined DDI_CHECK_ARGS
      if(handle < 0 || handle >= gv(ndda)) {
         fprintf(stdout,"%s: Invalid handle [%i] in DDI_GetAcc.\n",DDI_Id(),handle);
         Fatal_error(911);
      }
      
      if(patch->ilo > patch->ihi || patch->ilo < 0 || patch->ihi >= gv(nrow)[handle]) {
         fprintf(stdout,"%s: Invalid row dimensions during DDI_GetAcc => ilo=%i ihi=%i.\n",DDI_Id(),patch->ilo,patch->ihi);
         Fatal_error(911);
      }
      
      if(patch->jlo > patch->jhi || patch->jlo < 0 || patch->jhi >= gv(ncol)[handle]) {
         fprintf(stdout,"%s: Invalid colum dimensions during DDI_GetAcc => jlo=%i jhi=%i.\n",DDI_Id(),patch->jlo,patch->jhi);
         Fatal_error(911);
      }
    # endif


   /* ------------------------------ *\
      Log some simple profiling info
   \* ------------------------------ */
    # if defined DDI_COUNTERS
      gv(acc_profile).ncalls++;
      gv(acc_profile).nbytes += DDI_Patch_sizeof(patch);
    # endif


   /* ------------------------------------------------------- *\
      Determine where the pieces of the requested patch exist
   \* ------------------------------------------------------- */
      DDI_Subpatch(handle,patch,&nsubp,ranks,subp);
      MAX_DEBUG((stdout,"%s: %i subpatches.\n",DDI_Id(),nsubp))

      
   /* ------------------------------------------------------------------- *\
      Send data requests for all non-local pieces of the requested patch.
      Operate immediately to GetAcc a local portion of the patch.
   \* ------------------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         ULTRA_DEBUG((stdout,"%s: GetAccumulating subpatch %i.\n",DDI_Id(),i))

      /* ------------------------------------------------------------- *\
         Using SysV, take advantage of shared-memory for a local patch
      \* ------------------------------------------------------------- */
       # if defined USE_SYSV

      /* ------------------------------------------------ *\
         Determine if the ith patch is local to 'my' node
      \* ------------------------------------------------ */
         if(ranks[i] == my) {
            MAX_DEBUG((stdout,"%s: Subpatch %i is local.\n",DDI_Id(),i))

         /* ---------------------------------------------------- *\
            Using LAPI, perform the local Getacc after all the data
            requests have been sent ==> maximize concurrency.
         \* ---------------------------------------------------- */
          # if defined DDI_LAPI
            local_patch = &subp[i];
            local_patch->cp_buffer_addr = working_buffer;
          # else
         /* --------------------------------------------- *\
            Otherwise, perform the local Getacc immediately.
         \* --------------------------------------------- */
            DDI_GetAcc_local(&subp[i],working_buffer);
          # endif

         /* ------------------------------------------------------- *\
            Move the working buffer to the next patch and continue.
         \* ------------------------------------------------------- */
            working_buffer += subp[i].size;
            continue;
         }
       # endif


      /* --------------------------------- *\
         If the current patch is NOT local 
      \* --------------------------------- */
         remote_id = ranks[i];


      /* ----------------------------------------------- *\
         Using LAPI, then include some extra information
      \* ----------------------------------------------- */
       # if defined DDI_LAPI
         subp[i].cp_lapi_id     = gv(lapi_map)[me];
         subp[i].cp_lapi_cntr   = (void *) &cntr[i];
         subp[i].cp_buffer_addr = (void *) working_buffer;
         LAPI_Setcntr(gv(lapi_hnd),&cntr[i],0);

         ULTRA_DEBUG((stdout,"%s: cp_lapi_id=%i.\n",DDI_Id(),gv(lapi_map)[me]))
         ULTRA_DEBUG((stdout,"%s: cp_lapi_cntr=%x.\n",DDI_Id(),&cntr[i]))
         ULTRA_DEBUG((stdout,"%s: cp_buffer_addr=%x.\n",DDI_Id(),working_buffer))
       # endif
      
      /* -------------------------------- *\
         Send data request for subpatch i
      \* -------------------------------- */
         MAX_DEBUG((stdout,"%s: Sending data request to node %i.\n",DDI_Id(),remote_id))
         DDI_Send_request(&subp[i],&remote_id,NULL);
         MAX_DEBUG((stdout,"%s: data request sent to global process %i.\n",DDI_Id(),remote_id))


      /* ------------------------------------------------------------ *\
         Receive an acknowledgement that the data server has raised
         a fence that will protect the distributed array from get or
         put access until all accumulates have finished.  This block-
         ing receive ensures that the current process executing this
         accumulate can *NOT* finish, until the fence has been raised 
      \* ------------------------------------------------------------ */
       # if !defined DDI_LAPI
       # if defined USE_SYSV
         MAX_DEBUG((stdout,"%s: Receiving remote fence ACK.\n",DDI_Id()))
         DDI_Recv(&ack,1,remote_id);
       # endif


      /* ---------------------------- *\
         Recv subpatch from remote_id
      \* ---------------------------- */
         MAX_DEBUG((stdout,"%s: Sending subpatch %i to %i.\n",DDI_Id(),i,remote_id))
         DDI_Send(working_buffer,subp[i].size,remote_id);
         DDI_Recv(working_buffer,subp[i].size,remote_id);
       # endif

      
      /* ------------ *\
         Shift buffer 
      \* ------------ */
         working_buffer += subp[i].size;
      }

   /* ----------------------------------------------------------- *\
      Using LAPI, perform the local Getaccumulate (if needed) as the
      remote processes are getting the data to Getaccumulate on the
      target processes.  Then wait for all the data to be copied
      out of the buffer before returning.
   \* ----------------------------------------------------------- */
    # if defined DDI_LAPI

   /* ------------------------------------ *\
      GetAccumulating local patch (if exists)
   \* ------------------------------------ */
      if(local_patch) DDI_GetAcc_local(local_patch,local_patch->cp_buffer_addr);

   /* ---------------------------------------------------------- *\
      Wait for all remote LAPI_Gets to finish copying local data
   \* ---------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         if(subp[i].cp_lapi_cntr) {
            ULTRA_DEBUG((stdout,"%s: Wait for subpatch %i to be copied.\n",DDI_Id(),i))
            LAPI_Waitcntr(gv(lapi_hnd),&cntr[i],3,NULL);
            ULTRA_DEBUG((stdout,"%s: Subpatch %i copy completed.\n",DDI_Id(),i))
         }
      }
示例#3
0
文件: ddi_isend.c 项目: ryanolson/ddi
 static void *DDI_ISend_thread(void *myarg) {
    DDI_Request *req = (DDI_Request *) myarg;
    DDI_Send(req->buffer,req->size,req->to);
    ULTRA_DEBUG((stdout,"%s: isend_thread finished.\n",DDI_Id()))
    return NULL;
 }  
示例#4
0
/* ---------------------------- *\
   FORTRAN Wrapper for DDI_Send
\* ---------------------------- */
   void F77_Send(void *buff,int_f77 *size,int_f77 *to) {
      size_t isize = (size_t) *size;
      int ito      = (int) *to;
      DDI_Send(buff,isize,ito);
   }
示例#5
0
/* -------------------------------------------------------------------- *\
   DDI_Timer_output()
   ==================
   Synchronous barrier on compute processes, but also collects total
   cpu time from each compute process and prints the totals stdout.
\* -------------------------------------------------------------------- */
   void DDI_Timer_output() {

      int i,me,np;
      struct rusage mycputime;
      struct rusage *timings = NULL;
      struct timeval cpu_total;
      struct timeval wall_total;

      DDI_NProc(&np,&me);
      DDI_Sync(3081);

      if(me == 0) {
         timings = (struct rusage *) Malloc(np*sizeof(struct rusage));
         getrusage(RUSAGE_SELF,timings);
         gettimeofday(&wall_total,NULL);
      } else {
         getrusage(RUSAGE_SELF,&mycputime);
         timings = &mycputime;
      }

      timings->ru_utime.tv_sec  -= gv(cpu_timer).ru_utime.tv_sec;
      timings->ru_utime.tv_usec -= gv(cpu_timer).ru_utime.tv_usec;
      if(timings->ru_utime.tv_usec < 0) {
         timings->ru_utime.tv_sec--;
         timings->ru_utime.tv_usec += 1000000;
      }

      timings->ru_stime.tv_sec  -= gv(cpu_timer).ru_stime.tv_sec;
      timings->ru_stime.tv_usec -= gv(cpu_timer).ru_stime.tv_usec;
      if(timings->ru_stime.tv_usec < 0) {
         timings->ru_stime.tv_sec--;
         timings->ru_stime.tv_usec += 1000000;
      } 

      wall_total.tv_sec  -= gv(wall_timer).tv_sec;
      wall_total.tv_usec -= gv(wall_timer).tv_usec;
      if(wall_total.tv_usec < 0) {
         wall_total.tv_sec--;
         wall_total.tv_usec += 1000000;
      }

      if(me == 0) {
         for(i=1; i<np; i++) DDI_Recv(&timings[i],sizeof(struct rusage),i);

         fprintf(stdout,"\n ------------------------------------------------");
         fprintf(stdout,"\n CPU timing information for all compute processes");
         fprintf(stdout,"\n ================================================");

         for(i=0; i<np; i++) {


            cpu_total.tv_sec  = timings[i].ru_utime.tv_sec  + timings[i].ru_stime.tv_sec;
            cpu_total.tv_usec = timings[i].ru_utime.tv_usec + timings[i].ru_stime.tv_usec;
            if(cpu_total.tv_usec > 1000000) {
               cpu_total.tv_sec++;
               cpu_total.tv_usec -= 1000000;
            }

            fprintf(stdout,"\n %4i: %d.%.6d + %d.%.6d = %d.%.6d",i,
               (int)timings[i].ru_utime.tv_sec,(int)timings[i].ru_utime.tv_usec,
               (int)timings[i].ru_stime.tv_sec,(int)timings[i].ru_stime.tv_usec,
               (int)cpu_total.tv_sec,(int)cpu_total.tv_usec);
         }

         fprintf(stdout,"\n Wall: %d.%.6d", 
                (int) wall_total.tv_sec, (int) wall_total.tv_usec);
         fprintf(stdout,"\n ================================================\n\n");
                  
         fflush(stdout);
         free(timings);

      } else {

         DDI_Send(&mycputime,sizeof(struct rusage),0);

      }

      DDI_Sync(3082);
   }
示例#6
0
/* -------------------------------------------------------------------- *\
   DDI_Server()
   ============
   
   Called by DDI processes that specialize to become data servers.
\* -------------------------------------------------------------------- */
   void DDI_Server() {
   
   /* --------------- *\
      Local Variables
   \* --------------- */
      int from;
      char ack=57;
      char server=1;
      DDI_Patch *msg;
      DDI_Patch patch;
      size_t counter_value = 0;
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_COMM_WORLD);
 

    # ifdef CRAY_MPI
      int i;
      int nfinished =  0;
      int last      = -1;
      int size      = sizeof(DDI_Patch);
      index_ds = (int *) Malloc(comm->np*sizeof(int));
      p = (DDI_Patch *) Malloc(comm->np*sizeof(DDI_Patch));
      s = (MPI_Status *) Malloc(comm->np*sizeof(MPI_Status));
      r = (MPI_Request *) Malloc(comm->np*sizeof(MPI_Request));

   /* ----------------------------------------------------------- *\
      Post IRecvs for remote data requests from all the processes
   \* ----------------------------------------------------------- */
      DEBUG_OUT(LVL2,(stdout,"%s: (DS) Posting MPI_IRecvs for data requests.\n",DDI_Id()))
      for(i=0; i<comm->np; i++) {
         MPI_Irecv(&p[i],size,MPI_BYTE,i,37,comm->world_comm,&r[i]);
      }
      NRequests = comm->np;
    # endif

      DEBUG_OUT(LVL2,(stdout,"%s: (DS) Starting DDI data server.\n",DDI_Id()))

   /* -------------------- *\
      DDI Data Server Loop
   \* -------------------- */
      do {
 
       # ifdef CRAY_MPI
         MPI_Testsome(NRequests,r,&nfinished,index_ds,s); 
         for(i=0; i<nfinished; i++) {
            msg = &p[index_ds[i]];
            from  = s[i].MPI_SOURCE;
       # else
         DDI_Recv_request(&patch,&from);
         msg = (DDI_Patch *) &patch;
       # endif
   
         switch(msg->oper) {

           case DDI_DEBUGFLAG:
              DebugOutput(msg->handle);
              break;

   
           case DDI_MEMORY:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_MEMORY request.\n",DDI_Id()))
              DDI_Memory_server(msg->size);
              Comm_send(&ack,1,from,comm);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) DDI_MEMORY requested completed.\n",DDI_Id()))
              break;
   
   
           case DDI_CREATE:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_CREATE[%i] request.\n",DDI_Id(),msg->handle))
              DDI_Index_create(msg);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Array[%i] created successfully.\n",DDI_Id(),msg->handle))
              break;
   
   
           case DDI_DESTROY:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_DESTROY[%i] request.\n",DDI_Id(),msg->handle))
              DDI_Index_destroy(msg); 
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Array[%i] destroyed successfully.\n",DDI_Id(),msg->handle))
              break;
   
   
           case DDI_ZERO:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_ZERO request from %i.\n",DDI_Id(),from))
              DDI_Array_zero(msg->handle);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_ZERO request from %i.\n",DDI_Id(),from))
              break;
   
   
           case DDI_GET:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_GET request from %i.\n",DDI_Id(),from))
              DDI_Get_server(msg,from);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_GET request from %i.\n",DDI_Id(),from))
              break;
   
           
           case DDI_PUT:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_PUT request from %i.\n",DDI_Id(),from))
              DDI_Put_server(msg,from);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_PUT request from %i.\n",DDI_Id(),from))
              break;
   
   
           case DDI_ACC:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_ACC request from %i.\n",DDI_Id(),from))
              DDI_Acc_server(msg,from);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_ACC request from %i.\n",DDI_Id(),from))
              break;
              
              
           case DDI_GETACC:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_GETACC request from %i.\n",DDI_Id(),from))
              DDI_GetAcc_server(msg,from);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_GETACC request from %i.\n",DDI_Id(),from))
              break;
              
              
           case DDI_DLBRESET:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_DLBRESET request from %i.\n",DDI_Id(),from))
              DDI_DLBReset_local();
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_DLBRESET request from %i.\n",DDI_Id(),from))
              break;
              break;
   
    
           case DDI_DLBNEXT:
              DEBUG_OUT(LVL2,(stdout,"%s: (DS) Received DDI_DLBNEXT request from %i.\n",DDI_Id(),from))
              DDI_DLBNext_local(&counter_value);
              Comm_send(&counter_value,sizeof(size_t),from,comm);
              DEBUG_OUT(LVL3,(stdout,"%s: (DS) Finished DDI_DLBNEXT request from %i.\n",DDI_Id(),from))
              break;


           case DDI_GDLBRESET:
              DDI_GDLBReset_local();
              break;
      
      
           case DDI_GDLBNEXT: 
              DDI_GDLBNext_local(&counter_value);
              DDI_Send(&counter_value,sizeof(size_t),from);
              break;

   
           case DDI_QUIT: /* Quit server loop, synchronize, and exit */
             DEBUG_OUT(LVL3,(stdout,"%s: (DS) Received DDI_QUIT request\n",DDI_Id()))
          /* if(me == np) DB_Close(); */
             DDI_Memory_finalize(); 
             Comm_send(&ack,1,from,comm);
             server=0;
             break;

   
          /* --------------------------------------------- *\
             Clean-up distributed-memory & check for leaks
          \* --------------------------------------------- */
/*
           case DB_CREATE_ENTRY:
             DEBUG_OUT(LVL3,(stdout,"%s: Recieved DB_CREATE_ENTRY request.\n",DDI_Id()))
             if(me != np) {
                fprintf(stdout,"%s: recieved DB request but is not master data server.\n",DDI_Id());
                Fatal_error(911);
             }
             DB_Create_server(&msg,from);
             DEBUG_OUT(LVL3,(stdout,"%s: Finished DB_CREATE_ENTRY request.\n",DDI_Id()))
             break;

           case DB_READ_ENTRY:
             DEBUG_OUT(LVL3,(stdout,"%s: Recieved DB_READ_ENTRY request.\n",DDI_Id()))
             if(me != np) {
                fprintf(stdout,"%s: recieved DB request but is not master data server.\n",DDI_Id());
                Fatal_error(911);
             }
             DB_Read_server(&msg,from);
             DEBUG_OUT(LVL3,(stdout,"%s: Finished DB_READ_ENTRY request.\n",DDI_Id()))
             break;

           case DB_WRITE_ENTRY:
             DEBUG_OUT(LVL3,(stdout,"%s: Recieved DB_WRITE_ENTRY request.\n",DDI_Id()))
             if(me != np) {
                fprintf(stdout,"%s: recieved DB request but is not master data server.\n",DDI_Id());
                Fatal_error(911);
             }
             DB_Write_server(&msg,from);
             DEBUG_OUT(LVL3,(stdout,"%s: Finished DB_WRITE_ENTRY request.\n",DDI_Id()))
             break;
*/
         }

     # ifdef CRAY_MPI
       /* ----------------------------------------------------- *\
          Repost the asynchronus IRecv for remote data requests
       \* ----------------------------------------------------- */
          MPI_Irecv(&p[index_ds[i]],size,MPI_BYTE,from,37,comm->world_comm,&r[index_ds[i]]);
       }
     # endif

     } while(server);


   /* -------------------------------------------------------------------------- *\
      If using MPI and not TCP socekts -- cancel/release the persistent receives
   \* -------------------------------------------------------------------------- */
    # if defined DDI_MPI && !defined DDI_SOC

      /* Working on this bit */

    # endif


   /* ------------------------------- *\
      Finalize communication and exit
   \* ------------------------------- */
      DDI_Finalize();
      exit(0);
   }