/*@ PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction Collective but not synchronizing Input Arguments: comm - communicator on which split reduction has been queued Level: advanced Note: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()). .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd() @*/ PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm) { PetscErrorCode ierr; PetscSplitReduction *sr; PetscFunctionBegin; ierr = PetscSplitReductionGet(comm,&sr);CHKERRQ(ierr); if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called"); if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */ PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype; PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues; PetscInt sum_flg = 0,max_flg = 0, min_flg = 0; MPI_Comm comm = sr->comm; PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);; ierr = PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);CHKERRQ(ierr); ierr = MPI_Comm_size(sr->comm,&size);CHKERRQ(ierr); if (size == 1) { ierr = PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));CHKERRQ(ierr); } else { /* determine if all reductions are sum, max, or min */ for (i=0; i<numops; i++) { if (reducetype[i] == REDUCE_MAX) max_flg = 1; else if (reducetype[i] == REDUCE_SUM) sum_flg = 1; else if (reducetype[i] == REDUCE_MIN) min_flg = 1; else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption"); } if (sum_flg + max_flg + min_flg > 1) { /* after all the entires in lvalues we store the reducetype flags to indicate to the reduction operations what are sums and what are max */ for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i]; ierr = MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);CHKERRQ(ierr); } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */ ierr = MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);CHKERRQ(ierr); } else if (min_flg) { ierr = MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);CHKERRQ(ierr); } else { ierr = MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);CHKERRQ(ierr); } } sr->state = STATE_PENDING; sr->numopsend = 0; ierr = PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);CHKERRQ(ierr); } else { ierr = PetscSplitReductionApply(sr);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr) { PetscErrorCode ierr; PetscFunctionBegin; switch (sr->state) { case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */ ierr = PetscSplitReductionApply(sr);CHKERRQ(ierr); break; case STATE_PENDING: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */ ierr = PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);CHKERRQ(ierr); if (sr->request != MPI_REQUEST_NULL) { ierr = MPI_Wait(&sr->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); } sr->state = STATE_END; ierr = PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);CHKERRQ(ierr); break; default: break; /* everything is already done */ } PetscFunctionReturn(0); }