static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt i,*tag; MPI_Aint lb,unitbytes; MPI_Request *sendreq,*recvreq; PetscFunctionBegin; ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); if (ntags > 0) { ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); } for (i=1; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); } /* Perform complete initial rendezvous */ ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); } for (i=0; i<*nfrom; i++) { void *header = (*(char**)fromdata) + count*unitbytes*i; PetscMPIInt k; for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscFree(tag);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); *toreqs = sendreq; *fromreqs = recvreq; PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt rank,size,*toranks,*fromranks,nto,nfrom; PetscInt i,n; PetscBool verbose,build_twosided_f; Unit *todata,*fromdata; MPI_Datatype dtype; PetscInitialize(&argc,&argv,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); verbose = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-verbose",&verbose,NULL);CHKERRQ(ierr); build_twosided_f = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-build_twosided_f",&build_twosided_f,NULL);CHKERRQ(ierr); for (i=1,nto=0; i<size; i*=2) nto++; ierr = PetscMalloc2(nto,&todata,nto,&toranks);CHKERRQ(ierr); for (n=0,i=1; i<size; n++,i*=2) { toranks[n] = (rank+i) % size; todata[n].rank = (rank+i) % size; todata[n].value = (PetscScalar)rank; todata[n].ok[0] = 'o'; todata[n].ok[1] = 'k'; todata[n].ok[2] = 0; } if (verbose) { for (i=0; i<nto; i++) { ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] TO %d: {%D, %g, \"%s\"}\n",rank,toranks[i],todata[i].rank,(double)PetscRealPart(todata[i].value),todata[i].ok);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); } ierr = MakeDatatype(&dtype);CHKERRQ(ierr); if (build_twosided_f) { struct FCtx fctx; PetscMPIInt *todummy,*fromdummy; fctx.rank = rank; fctx.nto = nto; fctx.toranks = toranks; fctx.todata = todata; ierr = PetscSegBufferCreate(sizeof(Unit),1,&fctx.seg);CHKERRQ(ierr); ierr = PetscMalloc1(nto,&todummy);CHKERRQ(ierr); for (i=0; i<nto; i++) todummy[i] = rank; ierr = PetscCommBuildTwoSidedF(PETSC_COMM_WORLD,1,MPI_INT,nto,toranks,todummy,&nfrom,&fromranks,&fromdummy,2,FSend,FRecv,&fctx);CHKERRQ(ierr); ierr = PetscFree(todummy);CHKERRQ(ierr); ierr = PetscFree(fromdummy);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(fctx.seg,&fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&fctx.seg);CHKERRQ(ierr); } else { ierr = PetscCommBuildTwoSided(PETSC_COMM_WORLD,1,dtype,nto,toranks,todata,&nfrom,&fromranks,&fromdata);CHKERRQ(ierr); } ierr = MPI_Type_free(&dtype);CHKERRQ(ierr); if (verbose) { PetscInt *iranks,*iperm; ierr = PetscMalloc2(nfrom,&iranks,nfrom,&iperm);CHKERRQ(ierr); for (i=0; i<nfrom; i++) { iranks[i] = fromranks[i]; iperm[i] = i; } /* Receive ordering is non-deterministic in general, so sort to make verbose output deterministic. */ ierr = PetscSortIntWithPermutation(nfrom,iranks,iperm);CHKERRQ(ierr); for (i=0; i<nfrom; i++) { PetscInt ip = iperm[i]; ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] FROM %d: {%D, %g, \"%s\"}\n",rank,fromranks[ip],fromdata[ip].rank,(double)PetscRealPart(fromdata[ip].value),fromdata[ip].ok);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); ierr = PetscFree2(iranks,iperm);CHKERRQ(ierr); } if (nto != nfrom) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] From ranks %d does not match To ranks %d",rank,nto,nfrom); for (i=1; i<size; i*=2) { PetscMPIInt expected_rank = (rank-i+size)%size; PetscBool flg; for (n=0; n<nfrom; n++) { if (expected_rank == fromranks[n]) goto found; } SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"[%d] Could not find expected from rank %d",rank,expected_rank); found: if (PetscRealPart(fromdata[n].value) != expected_rank) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got data %g from rank %d",rank,(double)PetscRealPart(fromdata[n].value),expected_rank); ierr = PetscStrcmp(fromdata[n].ok,"ok",&flg);CHKERRQ(ierr); if (!flg) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got string %s from rank %d",rank,fromdata[n].ok,expected_rank); } ierr = PetscFree2(todata,toranks);CHKERRQ(ierr); ierr = PetscFree(fromdata);CHKERRQ(ierr); ierr = PetscFree(fromranks);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/*@ ISBuildTwoSided - Takes an IS that describes where we will go. Generates an IS that contains new numbers from remote or local on the IS. Collective on IS Input Parameters . to - an IS describes where we will go. Negative target rank will be ignored . toindx - an IS describes what indices should send. NULL means sending natural numbering Output Parameter: . rows - contains new numbers from remote or local Level: advanced .seealso: MatPartitioningCreate(), ISPartitioningToNumbering(), ISPartitioningCount() @*/ PetscErrorCode ISBuildTwoSided(IS ito,IS toindx, IS *rows) { const PetscInt *ito_indices,*toindx_indices; PetscInt *send_indices,rstart,*recv_indices,nrecvs,nsends; PetscInt *tosizes,*fromsizes,i,j,*tosizes_tmp,*tooffsets_tmp,ito_ln; PetscMPIInt *toranks,*fromranks,size,target_rank,*fromperm_newtoold,nto,nfrom; PetscLayout isrmap; MPI_Comm comm; PetscSF sf; PetscSFNode *iremote; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ito,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = ISGetLocalSize(ito,&ito_ln);CHKERRQ(ierr); /* why we do not have ISGetLayout? */ isrmap = ito->map; ierr = PetscLayoutGetRange(isrmap,&rstart,NULL);CHKERRQ(ierr); ierr = ISGetIndices(ito,&ito_indices);CHKERRQ(ierr); ierr = PetscCalloc2(size,&tosizes_tmp,size+1,&tooffsets_tmp);CHKERRQ(ierr); for(i=0; i<ito_ln; i++){ if(ito_indices[i]<0) continue; #if defined(PETSC_USE_DEBUG) if(ito_indices[i]>=size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"target rank %d is larger than communicator size %d ",ito_indices[i],size); #endif tosizes_tmp[ito_indices[i]]++; } nto = 0; for(i=0; i<size; i++){ tooffsets_tmp[i+1] = tooffsets_tmp[i]+tosizes_tmp[i]; if(tosizes_tmp[i]>0) nto++; } ierr = PetscCalloc2(nto,&toranks,2*nto,&tosizes);CHKERRQ(ierr); nto = 0; for(i=0; i<size; i++){ if(tosizes_tmp[i]>0){ toranks[nto] = i; tosizes[2*nto] = tosizes_tmp[i];/* size */ tosizes[2*nto+1] = tooffsets_tmp[i];/* offset */ nto++; } } nsends = tooffsets_tmp[size]; ierr = PetscCalloc1(nsends,&send_indices);CHKERRQ(ierr); if(toindx){ ierr = ISGetIndices(toindx,&toindx_indices);CHKERRQ(ierr); } for(i=0; i<ito_ln; i++){ if(ito_indices[i]<0) continue; target_rank = ito_indices[i]; send_indices[tooffsets_tmp[target_rank]] = toindx? toindx_indices[i]:(i+rstart); tooffsets_tmp[target_rank]++; } if(toindx){ ierr = ISRestoreIndices(toindx,&toindx_indices);CHKERRQ(ierr); } ierr = ISRestoreIndices(ito,&ito_indices);CHKERRQ(ierr); ierr = PetscFree2(tosizes_tmp,tooffsets_tmp);CHKERRQ(ierr); ierr = PetscCommBuildTwoSided(comm,2,MPIU_INT,nto,toranks,tosizes,&nfrom,&fromranks,&fromsizes);CHKERRQ(ierr); ierr = PetscFree2(toranks,tosizes);CHKERRQ(ierr); ierr = PetscCalloc1(nfrom,&fromperm_newtoold);CHKERRQ(ierr); for(i=0; i<nfrom; i++){ fromperm_newtoold[i] = i; } ierr = PetscSortMPIIntWithArray(nfrom,fromranks,fromperm_newtoold);CHKERRQ(ierr); nrecvs = 0; for(i=0; i<nfrom; i++){ nrecvs += fromsizes[i*2]; } ierr = PetscCalloc1(nrecvs,&recv_indices);CHKERRQ(ierr); ierr = PetscCalloc1(nrecvs,&iremote);CHKERRQ(ierr); nrecvs = 0; for(i=0; i<nfrom; i++){ for(j=0; j<fromsizes[2*fromperm_newtoold[i]]; j++){ iremote[nrecvs].rank = fromranks[i]; iremote[nrecvs++].index = fromsizes[2*fromperm_newtoold[i]+1]+j; } } ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nsends,nrecvs,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); /* how to put a prefix ? */ ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,send_indices,recv_indices);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,send_indices,recv_indices);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree(fromranks);CHKERRQ(ierr); ierr = PetscFree(fromsizes);CHKERRQ(ierr); ierr = PetscFree(fromperm_newtoold);CHKERRQ(ierr); ierr = PetscFree(send_indices);CHKERRQ(ierr); if(rows){ ierr = PetscSortInt(nrecvs,recv_indices);CHKERRQ(ierr); ierr = ISCreateGeneral(comm, nrecvs,recv_indices,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); }else{ ierr = PetscFree(recv_indices);CHKERRQ(ierr); } PetscFunctionReturn(0); }