示例#1
0
void DDI_ARMCI_Acc(DDI_Patch *patch, void *scale, void *buf) {
    int handle = patch->handle;
    int i,nops = 0;
    int nsubp,ranks[MAX_NODES];
    DDI_Patch subp[MAX_NODES];
    char *working_buffer = (char *) buf;
    
    DDI_Subpatch(handle,patch,&nsubp,ranks,subp);
    
    for(i=0; i<nsubp; i++) {
#if defined DDI_ARMCI_SMP
	nops += DDI_ARMCI_Acc_domain_SMP(&subp[i],scale,working_buffer,ranks[i]);
#else
	nops += DDI_ARMCI_Acc_proc(&subp[i],scale,working_buffer,ranks[i]);
#endif
	working_buffer += subp[i].size;
    }
    
#if defined DDI_ARMCI_IMPLICIT_NBACC && defined DDI_ARMCI_IMPLICIT_WAIT
    // wait for implicit non-blocking operations
    ARMCI_WaitAll();
#endif

    return;
}
示例#2
0
void DDI_ARR_select_(DDI_Patch *dAPatch, double *alpha, int *index) {
  int i;
  int op = dAPatch->oper;
  DDI_Patch dASubPatch[MAX_PROCESSORS];
  int dASubPatchRank[MAX_PROCESSORS], dANSubPatch;
  DDI_ARR_Element recvbuf, *element = NULL;
  const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);
  int rank = comm->me;
  int node = comm->my;
  int smpRank = comm->me_local;
  int smpRoot;

  DDI_Subpatch_node(dAPatch->handle, dAPatch, &dANSubPatch, dASubPatchRank, dASubPatch, node);

  for (i = 0; i < dANSubPatch; ++i) {
    if (dASubPatchRank[i] == rank) {
      if (element == NULL) element = (DDI_ARR_Element*)malloc(sizeof(DDI_ARR_Element));
#ifdef DDI_ARR_DMA
      DDI_ARR_select_local(&dASubPatch[i], element);
#else
      DDI_ARR_select_remote(&dASubPatch[i], element, rank);
#endif
    }
  }

  /* Now selected elements need to be gathered from other nodes.
     Point-to-point is used because patch may not be located on all processors,
     but perhaps consider change to collective operation.
     IRecv or Recvany are not used because they seem to give problems.
  */

  /* select intra-node element */
  smpRoot = comm->node_master[node];
  if (smpRank == 0) {
    for (i = 0; i < dANSubPatch; ++i) {
      if (dASubPatchRank[i] == rank) continue;
      Comm_recv(&recvbuf, sizeof(DDI_ARR_Element), dASubPatchRank[i], comm);
      DDI_ARR_Element_select(element, &recvbuf, op);
    }
  }
  else if (element != NULL) {
    Comm_send(element, sizeof(DDI_ARR_Element), smpRoot, comm);
  }

  /* select inter-node element */
  DDI_Subpatch(dAPatch->handle, dAPatch, &dANSubPatch, dASubPatchRank, dASubPatch);
  if (rank == 0) {
    for (i = 0; i < dANSubPatch; ++i) {
      if (dASubPatchRank[i] == rank) continue;
      smpRoot = comm->node_master[dASubPatchRank[i]];
      Comm_recv(&recvbuf, sizeof(DDI_ARR_Element), smpRoot, comm);
      DDI_ARR_Element_select(element, &recvbuf, op);
    }
  }
  else if (element != NULL && rank == smpRoot) {
    Comm_send(element, sizeof(DDI_ARR_Element), 0, comm);
  }

  /* broadcast selected element */
  if (element == NULL) element = (DDI_ARR_Element*)malloc(sizeof(DDI_ARR_Element));
  Comm_bcast(element, sizeof(DDI_ARR_Element), 0, comm);

  *alpha = element->alpha;
  index[0] = element->index[0];
  index[1] = element->index[1];
  free(element);
}
示例#3
0
/* -------------------------------------------------------------- *\
   DDI_GetAccP(handle,patch,buff)
   ============================
   [IN] handle - Handle of the distributed array to be accessed.
   [IN] patch  - structure containing ilo, ihi, jlo, jhi, etc.
   [IN] buff   - Data segment to be operated on.
\* -------------------------------------------------------------- */
   void DDI_GetAccP(int handle,DDI_Patch *patch,void *buff) {
   
   /* --------------- *\
      Local Variables
   \* --------------- */
      char ack=57;
      int i,np,me,nn,my,remote_id,nsubp;
      int ranks[MAX_NODES];
      DDI_Patch subp[MAX_NODES];
      char *working_buffer = (char *) buff;

    # if defined DDI_LAPI
      DDI_Patch *local_patch = NULL;
      lapi_cntr_t cntr[MAX_NODES];
    # endif
    
      STD_DEBUG((stdout,"%s: Entering DDI_GetAccP.\n",DDI_Id()))

   /* -------------------- *\
      Process OR Node Rank
   \* -------------------- */
      DDI_NProc(&np,&me);
      DDI_NNode(&nn,&my);


   /* ------------------------------------- *\
      Ensure the patch has the correct info
   \* ------------------------------------- */
      patch->oper   = DDI_GETACC;
      patch->handle = handle;


   /* ---------------------------------- *\
      Check calling arguments for errors
   \* ---------------------------------- */
    # if defined DDI_CHECK_ARGS
      if(handle < 0 || handle >= gv(ndda)) {
         fprintf(stdout,"%s: Invalid handle [%i] in DDI_GetAcc.\n",DDI_Id(),handle);
         Fatal_error(911);
      }
      
      if(patch->ilo > patch->ihi || patch->ilo < 0 || patch->ihi >= gv(nrow)[handle]) {
         fprintf(stdout,"%s: Invalid row dimensions during DDI_GetAcc => ilo=%i ihi=%i.\n",DDI_Id(),patch->ilo,patch->ihi);
         Fatal_error(911);
      }
      
      if(patch->jlo > patch->jhi || patch->jlo < 0 || patch->jhi >= gv(ncol)[handle]) {
         fprintf(stdout,"%s: Invalid colum dimensions during DDI_GetAcc => jlo=%i jhi=%i.\n",DDI_Id(),patch->jlo,patch->jhi);
         Fatal_error(911);
      }
    # endif


   /* ------------------------------ *\
      Log some simple profiling info
   \* ------------------------------ */
    # if defined DDI_COUNTERS
      gv(acc_profile).ncalls++;
      gv(acc_profile).nbytes += DDI_Patch_sizeof(patch);
    # endif


   /* ------------------------------------------------------- *\
      Determine where the pieces of the requested patch exist
   \* ------------------------------------------------------- */
      DDI_Subpatch(handle,patch,&nsubp,ranks,subp);
      MAX_DEBUG((stdout,"%s: %i subpatches.\n",DDI_Id(),nsubp))

      
   /* ------------------------------------------------------------------- *\
      Send data requests for all non-local pieces of the requested patch.
      Operate immediately to GetAcc a local portion of the patch.
   \* ------------------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         ULTRA_DEBUG((stdout,"%s: GetAccumulating subpatch %i.\n",DDI_Id(),i))

      /* ------------------------------------------------------------- *\
         Using SysV, take advantage of shared-memory for a local patch
      \* ------------------------------------------------------------- */
       # if defined USE_SYSV

      /* ------------------------------------------------ *\
         Determine if the ith patch is local to 'my' node
      \* ------------------------------------------------ */
         if(ranks[i] == my) {
            MAX_DEBUG((stdout,"%s: Subpatch %i is local.\n",DDI_Id(),i))

         /* ---------------------------------------------------- *\
            Using LAPI, perform the local Getacc after all the data
            requests have been sent ==> maximize concurrency.
         \* ---------------------------------------------------- */
          # if defined DDI_LAPI
            local_patch = &subp[i];
            local_patch->cp_buffer_addr = working_buffer;
          # else
         /* --------------------------------------------- *\
            Otherwise, perform the local Getacc immediately.
         \* --------------------------------------------- */
            DDI_GetAcc_local(&subp[i],working_buffer);
          # endif

         /* ------------------------------------------------------- *\
            Move the working buffer to the next patch and continue.
         \* ------------------------------------------------------- */
            working_buffer += subp[i].size;
            continue;
         }
       # endif


      /* --------------------------------- *\
         If the current patch is NOT local 
      \* --------------------------------- */
         remote_id = ranks[i];


      /* ----------------------------------------------- *\
         Using LAPI, then include some extra information
      \* ----------------------------------------------- */
       # if defined DDI_LAPI
         subp[i].cp_lapi_id     = gv(lapi_map)[me];
         subp[i].cp_lapi_cntr   = (void *) &cntr[i];
         subp[i].cp_buffer_addr = (void *) working_buffer;
         LAPI_Setcntr(gv(lapi_hnd),&cntr[i],0);

         ULTRA_DEBUG((stdout,"%s: cp_lapi_id=%i.\n",DDI_Id(),gv(lapi_map)[me]))
         ULTRA_DEBUG((stdout,"%s: cp_lapi_cntr=%x.\n",DDI_Id(),&cntr[i]))
         ULTRA_DEBUG((stdout,"%s: cp_buffer_addr=%x.\n",DDI_Id(),working_buffer))
       # endif
      
      /* -------------------------------- *\
         Send data request for subpatch i
      \* -------------------------------- */
         MAX_DEBUG((stdout,"%s: Sending data request to node %i.\n",DDI_Id(),remote_id))
         DDI_Send_request(&subp[i],&remote_id,NULL);
         MAX_DEBUG((stdout,"%s: data request sent to global process %i.\n",DDI_Id(),remote_id))


      /* ------------------------------------------------------------ *\
         Receive an acknowledgement that the data server has raised
         a fence that will protect the distributed array from get or
         put access until all accumulates have finished.  This block-
         ing receive ensures that the current process executing this
         accumulate can *NOT* finish, until the fence has been raised 
      \* ------------------------------------------------------------ */
       # if !defined DDI_LAPI
       # if defined USE_SYSV
         MAX_DEBUG((stdout,"%s: Receiving remote fence ACK.\n",DDI_Id()))
         DDI_Recv(&ack,1,remote_id);
       # endif


      /* ---------------------------- *\
         Recv subpatch from remote_id
      \* ---------------------------- */
         MAX_DEBUG((stdout,"%s: Sending subpatch %i to %i.\n",DDI_Id(),i,remote_id))
         DDI_Send(working_buffer,subp[i].size,remote_id);
         DDI_Recv(working_buffer,subp[i].size,remote_id);
       # endif

      
      /* ------------ *\
         Shift buffer 
      \* ------------ */
         working_buffer += subp[i].size;
      }

   /* ----------------------------------------------------------- *\
      Using LAPI, perform the local Getaccumulate (if needed) as the
      remote processes are getting the data to Getaccumulate on the
      target processes.  Then wait for all the data to be copied
      out of the buffer before returning.
   \* ----------------------------------------------------------- */
    # if defined DDI_LAPI

   /* ------------------------------------ *\
      GetAccumulating local patch (if exists)
   \* ------------------------------------ */
      if(local_patch) DDI_GetAcc_local(local_patch,local_patch->cp_buffer_addr);

   /* ---------------------------------------------------------- *\
      Wait for all remote LAPI_Gets to finish copying local data
   \* ---------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         if(subp[i].cp_lapi_cntr) {
            ULTRA_DEBUG((stdout,"%s: Wait for subpatch %i to be copied.\n",DDI_Id(),i))
            LAPI_Waitcntr(gv(lapi_hnd),&cntr[i],3,NULL);
            ULTRA_DEBUG((stdout,"%s: Subpatch %i copy completed.\n",DDI_Id(),i))
         }
      }