static PetscErrorCode VecSetOption_MPI(Vec V,VecOption op,PetscBool flag) { Vec_MPI *v = (Vec_MPI*)V->data; PetscErrorCode ierr; PetscFunctionBegin; switch (op) { case VEC_IGNORE_OFF_PROC_ENTRIES: V->stash.donotstash = flag; break; case VEC_IGNORE_NEGATIVE_INDICES: V->stash.ignorenegidx = flag; break; case VEC_SUBSET_OFF_PROC_ENTRIES: v->assembly_subset = flag; /* See the same logic in MatAssembly wrt MAT_SUBSET_OFF_PROC_ENTRIES */ if (!v->assembly_subset) { /* User indicates "do not reuse the communication pattern" */ ierr = VecAssemblyReset_MPI(V);CHKERRQ(ierr); /* Reset existing pattern to free memory */ v->first_assembly_done = PETSC_FALSE; /* Mark the first assembly is not done */ } break; } PetscFunctionReturn(0); }
static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X) { Vec_MPI *x = (Vec_MPI*)X->data; PetscInt bs = X->map->bs; PetscMPIInt npending,*some_indices,r; MPI_Status *some_statuses; PetscScalar *xarray; PetscErrorCode ierr; VecAssemblyFrame *frame; PetscFunctionBegin; if (X->stash.donotstash) { X->stash.insertmode = NOT_SET_VALUES; X->bstash.insertmode = NOT_SET_VALUES; PetscFunctionReturn(0); } ierr = VecGetArray(X,&xarray);CHKERRQ(ierr); ierr = PetscSegBufferExtractInPlace(x->segrecvframe,&frame);CHKERRQ(ierr); ierr = PetscMalloc2(4*x->nrecvranks,&some_indices,x->use_status?4*x->nrecvranks:0,&some_statuses);CHKERRQ(ierr); for (r=0,npending=0; r<x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb; while (npending>0) { PetscMPIInt ndone,ii; /* Filling MPI_Status fields requires some resources from the MPI library. We skip it on the first assembly, or * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial * rendezvous. When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that * subsequent assembly can set a proper subset of the values. */ ierr = MPI_Waitsome(4*x->nrecvranks,x->recvreqs,&ndone,some_indices,x->use_status?some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); for (ii=0; ii<ndone; ii++) { PetscInt i = some_indices[ii]/4,j,k; InsertMode imode = (InsertMode)x->recvhdr[i].insertmode; PetscInt *recvint; PetscScalar *recvscalar; PetscBool intmsg = (PetscBool)(some_indices[ii]%2 == 0); PetscBool blockmsg = (PetscBool)((some_indices[ii]%4)/2 == 1); npending--; if (!blockmsg) { /* Scalar stash */ PetscMPIInt count; if (--frame[i].pendings > 0) continue; if (x->use_status) { ierr = MPI_Get_count(&some_statuses[ii],intmsg ? MPIU_INT : MPIU_SCALAR,&count);CHKERRQ(ierr); } else count = x->recvhdr[i].count; for (j=0,recvint=frame[i].ints,recvscalar=frame[i].scalars; j<count; j++,recvint++) { PetscInt loc = *recvint - X->map->rstart; if (*recvint < X->map->rstart || X->map->rend <= *recvint) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Received vector entry %D out of local range [%D,%D)]",*recvint,X->map->rstart,X->map->rend); switch (imode) { case ADD_VALUES: xarray[loc] += *recvscalar++; break; case INSERT_VALUES: xarray[loc] = *recvscalar++; break; default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",imode); } } } else { /* Block stash */ PetscMPIInt count; if (--frame[i].pendingb > 0) continue; if (x->use_status) { ierr = MPI_Get_count(&some_statuses[ii],intmsg ? MPIU_INT : MPIU_SCALAR,&count);CHKERRQ(ierr); if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */ } else count = x->recvhdr[i].bcount; for (j=0,recvint=frame[i].intb,recvscalar=frame[i].scalarb; j<count; j++,recvint++) { PetscInt loc = (*recvint)*bs - X->map->rstart; switch (imode) { case ADD_VALUES: for (k=loc; k<loc+bs; k++) xarray[k] += *recvscalar++; break; case INSERT_VALUES: for (k=loc; k<loc+bs; k++) xarray[k] = *recvscalar++; break; default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",imode); } } } } } ierr = VecRestoreArray(X,&xarray);CHKERRQ(ierr); ierr = MPI_Waitall(4*x->nsendranks,x->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree2(some_indices,some_statuses);CHKERRQ(ierr); if (x->assembly_subset) { void *dummy; /* reset segbuffers */ ierr = PetscSegBufferExtractInPlace(x->segrecvint,&dummy);CHKERRQ(ierr); ierr = PetscSegBufferExtractInPlace(x->segrecvscalar,&dummy);CHKERRQ(ierr); } else { ierr = VecAssemblyReset_MPI(X);CHKERRQ(ierr); } X->stash.insertmode = NOT_SET_VALUES; X->bstash.insertmode = NOT_SET_VALUES; ierr = VecStashScatterEnd_Private(&X->stash);CHKERRQ(ierr); ierr = VecStashScatterEnd_Private(&X->bstash);CHKERRQ(ierr); PetscFunctionReturn(0); }