Пример #1
0
/*@
   PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

   Collective but not synchronizing

   Input Arguments:
   comm - communicator on which split reduction has been queued

   Level: advanced

   Note:
   Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
   VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

.seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
@*/
PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
{
  PetscErrorCode      ierr;
  PetscSplitReduction *sr;

  PetscFunctionBegin;
  ierr = PetscSplitReductionGet(comm,&sr);CHKERRQ(ierr);
  if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
  if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
    PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
    PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
    PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
    MPI_Comm       comm = sr->comm;
    PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
    ierr = PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);CHKERRQ(ierr);
    ierr = MPI_Comm_size(sr->comm,&size);CHKERRQ(ierr);
    if (size == 1) {
      ierr = PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));CHKERRQ(ierr);
    } else {
      /* determine if all reductions are sum, max, or min */
      for (i=0; i<numops; i++) {
        if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
        else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
        else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
        else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
      }
      if (sum_flg + max_flg + min_flg > 1) {
        /*
         after all the entires in lvalues we store the reducetype flags to indicate
         to the reduction operations what are sums and what are max
         */
        for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

        ierr = MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);CHKERRQ(ierr);
      } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
        ierr = MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);CHKERRQ(ierr);
      } else if (min_flg) {
        ierr = MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);CHKERRQ(ierr);
      } else {
        ierr = MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);CHKERRQ(ierr);
      }
    }
    sr->state     = STATE_PENDING;
    sr->numopsend = 0;
    ierr = PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);CHKERRQ(ierr);
  } else {
    ierr = PetscSplitReductionApply(sr);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Пример #2
0
static PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
{
  PetscErrorCode ierr;

  PetscFunctionBegin;
  switch (sr->state) {
  case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
    ierr = PetscSplitReductionApply(sr);CHKERRQ(ierr);
    break;
  case STATE_PENDING:
    /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
    ierr = PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);CHKERRQ(ierr);
    if (sr->request != MPI_REQUEST_NULL) {
      ierr = MPI_Wait(&sr->request,MPI_STATUS_IGNORE);CHKERRQ(ierr);
    }
    sr->state = STATE_END;
    ierr = PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);CHKERRQ(ierr);
    break;
  default: break;            /* everything is already done */
  }
  PetscFunctionReturn(0);
}