PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash) { PetscErrorCode ierr; PetscInt max,*opt,nopt; PetscBool flg; PetscFunctionBegin; /* Require 2 tags, get the second using PetscCommGetNewTag() */ stash->comm = comm; ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); nopt = stash->size; ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); ierr = PetscOptionsGetIntArray(NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); if (flg) { if (nopt == 1) max = opt[0]; else if (nopt == stash->size) max = opt[stash->rank]; else if (stash->rank < nopt) max = opt[stash->rank]; else max = 0; /* use default */ stash->umax = max; } else { stash->umax = 0; } ierr = PetscFree(opt);CHKERRQ(ierr); if (bs <= 0) bs = 1; stash->bs = bs; stash->nmax = 0; stash->oldnmax = 0; stash->n = 0; stash->reallocs = -1; stash->idx = 0; stash->array = 0; stash->send_waits = 0; stash->recv_waits = 0; stash->send_status = 0; stash->nsends = 0; stash->nrecvs = 0; stash->svalues = 0; stash->rvalues = 0; stash->rmax = 0; stash->nprocs = 0; stash->nprocessed = 0; stash->donotstash = PETSC_FALSE; stash->ignorenegidx = PETSC_FALSE; PetscFunctionReturn(0); }
PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) { PetscErrorCode ierr; PetscInt max,*opt,nopt,i; PetscTruth flg; PetscFunctionBegin; /* Require 2 tags,get the second using PetscCommGetNewTag() */ stash->comm = comm; ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; nopt = stash->size; ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); if (flg) { if (nopt == 1) max = opt[0]; else if (nopt == stash->size) max = opt[stash->rank]; else if (stash->rank < nopt) max = opt[stash->rank]; else max = 0; /* Use default */ stash->umax = max; } else { stash->umax = 0; } ierr = PetscFree(opt);CHKERRQ(ierr); if (bs <= 0) bs = 1; stash->bs = bs; stash->nmax = 0; stash->oldnmax = 0; stash->n = 0; stash->reallocs = -1; stash->space_head = 0; stash->space = 0; stash->send_waits = 0; stash->recv_waits = 0; stash->send_status = 0; stash->nsends = 0; stash->nrecvs = 0; stash->svalues = 0; stash->rvalues = 0; stash->rindices = 0; stash->nprocessed = 0; PetscFunctionReturn(0); }
/*@C PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All processors that share the object MUST call this routine EXACTLY the same number of times. This tag should only be used with the current objects communicator; do NOT use it with any other MPI communicator. Collective on PetscObject Input Parameter: . obj - the PETSc object; this must be cast with a (PetscObject), for example, PetscObjectGetNewTag((PetscObject)mat,&tag); Output Parameter: . tag - the new tag Level: developer Concepts: tag^getting Concepts: message tag^getting Concepts: MPI message tag^getting .seealso: PetscCommGetNewTag() @*/ PetscErrorCode PetscObjectGetNewTag(PetscObject obj,PetscMPIInt *tag) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscCommGetNewTag(obj->comm,tag);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,unitbytes,done; PetscInt i; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); tdata = (char*)todata; ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(&segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(&segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(&segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(&segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() except it takes TWO ilenths and output TWO olengths. Collective on MPI_Comm Input Parameters: + comm - Communicator . nsends - number of messages that are to be sent. . nrecvs - number of messages being received - ilengths1, ilengths2 - array of integers of length sizeof(comm) a non zero ilengths[i] represent a message to i of length ilengths[i] Output Parameters: + onodes - list of node-ids from which messages are expected - olengths1, olengths2 - corresponding message lengths Level: developer Concepts: mpi utility Notes: With this info, the correct MPI_Irecv() can be posted with the correct from-id, with a buffer with the right amount of memory required. The calling function deallocates the memory in onodes and olengths To determine nrecevs, one can use PetscGatherNumberOfMessages() .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages() @*/ PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) { PetscErrorCode ierr; PetscMPIInt size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL; MPI_Request *s_waits = NULL,*r_waits = NULL; MPI_Status *w_status = NULL; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr); s_waits = r_waits + nrecvs; /* Post the Irecv to get the message length-info */ ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr); ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr); for (i=0; i<nrecvs; i++) { buf_j = buf_r + (2*i); ierr = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr); } /* Post the Isends with the message length-info */ for (i=0,j=0; i<size; ++i) { if (ilengths1[i]) { buf_j = buf_s + (2*j); buf_j[0] = *(ilengths1+i); buf_j[1] = *(ilengths2+i); ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr); j++; } } if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends); /* Post waits on sends and receivs */ if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);} /* Pack up the received data */ ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr); for (i=0; i<nrecvs; ++i) { (*onodes)[i] = w_status[i].MPI_SOURCE; buf_j = buf_r + (2*i); (*olengths1)[i] = buf_j[0]; (*olengths2)[i] = buf_j[1]; } ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt i,*tag; MPI_Aint lb,unitbytes; MPI_Request *sendreq,*recvreq; PetscFunctionBegin; ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); if (ntags > 0) { ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); } for (i=1; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); } /* Perform complete initial rendezvous */ ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); } for (i=0; i<*nfrom; i++) { void *header = (*(char**)fromdata) + count*unitbytes*i; PetscMPIInt k; for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscFree(tag);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); *toreqs = sendreq; *fromreqs = recvreq; PetscFunctionReturn(0); }
PetscErrorCode DMPlexVTKWritePartition_ASCII(DM dm, FILE *fp) { MPI_Comm comm; PetscInt numCells = 0, cellHeight; PetscInt numLabelCells, cMax, cStart, cEnd, c; PetscMPIInt numProcs, rank, proc, tag; PetscBool hasLabel; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &numProcs);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = DMPlexGetVTKCellHeight(dm, &cellHeight);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetHybridBounds(dm, &cMax, NULL, NULL, NULL);CHKERRQ(ierr); if (cMax >= 0) cEnd = PetscMin(cEnd, cMax); ierr = DMPlexGetStratumSize(dm, "vtk", 1, &numLabelCells);CHKERRQ(ierr); hasLabel = numLabelCells > 0 ? PETSC_TRUE : PETSC_FALSE; for (c = cStart; c < cEnd; ++c) { if (hasLabel) { PetscInt value; ierr = DMPlexGetLabelValue(dm, "vtk", c, &value);CHKERRQ(ierr); if (value != 1) continue; } ++numCells; } if (!rank) { for (c = 0; c < numCells; ++c) {ierr = PetscFPrintf(comm, fp, "%d\n", rank);CHKERRQ(ierr);} for (proc = 1; proc < numProcs; ++proc) { MPI_Status status; ierr = MPI_Recv(&numCells, 1, MPIU_INT, proc, tag, comm, &status);CHKERRQ(ierr); for (c = 0; c < numCells; ++c) {ierr = PetscFPrintf(comm, fp, "%d\n", proc);CHKERRQ(ierr);} } } else { ierr = MPI_Send(&numCells, 1, MPIU_INT, 0, tag, comm);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@C PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive, including (from-id,length) pairs for each message. Collective on MPI_Comm Input Parameters: + comm - Communicator . nsends - number of messages that are to be sent. . nrecvs - number of messages being received - ilengths - an array of integers of length sizeof(comm) a non zero ilengths[i] represent a message to i of length ilengths[i] Output Parameters: + onodes - list of node-ids from which messages are expected - olengths - corresponding message lengths Level: developer Concepts: mpi utility Notes: With this info, the correct MPI_Irecv() can be posted with the correct from-id, with a buffer with the right amount of memory required. The calling function deallocates the memory in onodes and olengths To determine nrecevs, one can use PetscGatherNumberOfMessages() .seealso: PetscGatherNumberOfMessages() @*/ PetscErrorCode PETSC_DLLEXPORT PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths) { PetscErrorCode ierr; PetscMPIInt size,tag,i,j; MPI_Request *s_waits = PETSC_NULL,*r_waits = PETSC_NULL; MPI_Status *w_status = PETSC_NULL; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ ierr = PetscMalloc2(nrecvs+nsends,MPI_Request,&r_waits,nrecvs+nsends,MPI_Status,&w_status);CHKERRQ(ierr); s_waits = r_waits+nrecvs; /* Post the Irecv to get the message length-info */ ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),olengths);CHKERRQ(ierr); for (i=0; i<nrecvs; i++) { ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr); } /* Post the Isends with the message length-info */ for (i=0,j=0; i<size; ++i) { if (ilengths[i]) { ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr); j++; } } /* Post waits on sends and receivs */ if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);} /* Pack up the received data */ ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),onodes);CHKERRQ(ierr); for (i=0; i<nrecvs; ++i) { (*onodes)[i] = w_status[i].MPI_SOURCE; } ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; PetscInt i; char *tdata,*fdata; MPI_Request *reqs,*sendreqs; MPI_Status *statuses; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr); ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); for (i=0; i<nto; i++) iflags[toranks[i]] = 1; ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); ierr = PetscFree(iflags);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); tdata = (char*)todata; ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr); sendreqs = reqs + nrecvs; for (i=0; i<nrecvs; i++) { ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); } for (i=0; i<nto; i++) { ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr); for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); *nfrom = nrecvs; *fromranks = franks; *(void**)fromdata = fdata; PetscFunctionReturn(0); }
/* MatStashCreate_Private - Creates a stash,currently used for all the parallel matrix implementations. The stash is where elements of a matrix destined to be stored on other processors are kept until matrix assembly is done. This is a simple minded stash. Simply adds entries to end of stash. Input Parameters: comm - communicator, required for scatters. bs - stash block size. used when stashing blocks of values Output Parameters: stash - the newly created stash */ PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) { PetscErrorCode ierr; PetscInt max,*opt,nopt,i; PetscBool flg; PetscFunctionBegin; /* Require 2 tags,get the second using PetscCommGetNewTag() */ stash->comm = comm; ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; nopt = stash->size; ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); if (flg) { if (nopt == 1) max = opt[0]; else if (nopt == stash->size) max = opt[stash->rank]; else if (stash->rank < nopt) max = opt[stash->rank]; else max = 0; /* Use default */ stash->umax = max; } else { stash->umax = 0; } ierr = PetscFree(opt);CHKERRQ(ierr); if (bs <= 0) bs = 1; stash->bs = bs; stash->nmax = 0; stash->oldnmax = 0; stash->n = 0; stash->reallocs = -1; stash->space_head = 0; stash->space = 0; stash->send_waits = 0; stash->recv_waits = 0; stash->send_status = 0; stash->nsends = 0; stash->nrecvs = 0; stash->svalues = 0; stash->rvalues = 0; stash->rindices = 0; stash->nprocessed = 0; stash->reproduce = PETSC_FALSE; stash->blocktype = MPI_DATATYPE_NULL; ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); #if !defined(PETSC_HAVE_MPIUNI) ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); if (!flg) { stash->ScatterBegin = MatStashScatterBegin_BTS; stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; stash->ScatterEnd = MatStashScatterEnd_BTS; stash->ScatterDestroy = MatStashScatterDestroy_BTS; } else { #endif stash->ScatterBegin = MatStashScatterBegin_Ref; stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; stash->ScatterEnd = MatStashScatterEnd_Ref; stash->ScatterDestroy = NULL; #if !defined(PETSC_HAVE_MPIUNI) } #endif PetscFunctionReturn(0); }
PetscErrorCode DMPlexVTKWriteCells_ASCII(DM dm, FILE *fp, PetscInt *totalCells) { MPI_Comm comm; DMLabel label; IS globalVertexNumbers = NULL; const PetscInt *gvertex; PetscInt dim; PetscInt numCorners = 0, totCorners = 0, maxCorners, *corners; PetscInt numCells = 0, totCells = 0, maxCells, cellHeight; PetscInt numLabelCells, maxLabelCells, cMax, cStart, cEnd, c, vStart, vEnd, v; PetscMPIInt numProcs, rank, proc, tag; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &numProcs);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetVTKCellHeight(dm, &cellHeight);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd);CHKERRQ(ierr); ierr = DMPlexGetHybridBounds(dm, &cMax, NULL, NULL, NULL);CHKERRQ(ierr); if (cMax >= 0) cEnd = PetscMin(cEnd, cMax); ierr = DMPlexGetLabel(dm, "vtk", &label);CHKERRQ(ierr); ierr = DMPlexGetStratumSize(dm, "vtk", 1, &numLabelCells);CHKERRQ(ierr); ierr = MPI_Allreduce(&numLabelCells, &maxLabelCells, 1, MPIU_INT, MPI_MAX, comm);CHKERRQ(ierr); if (!maxLabelCells) label = NULL; for (c = cStart; c < cEnd; ++c) { PetscInt *closure = NULL; PetscInt closureSize, value; if (label) { ierr = DMLabelGetValue(label, c, &value);CHKERRQ(ierr); if (value != 1) continue; } ierr = DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); for (v = 0; v < closureSize*2; v += 2) { if ((closure[v] >= vStart) && (closure[v] < vEnd)) ++numCorners; } ierr = DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); ++numCells; } maxCells = numCells; ierr = MPI_Reduce(&numCells, &totCells, 1, MPIU_INT, MPI_SUM, 0, comm);CHKERRQ(ierr); ierr = MPI_Reduce(&numCells, &maxCells, 1, MPIU_INT, MPI_MAX, 0, comm);CHKERRQ(ierr); ierr = MPI_Reduce(&numCorners, &totCorners, 1, MPIU_INT, MPI_SUM, 0, comm);CHKERRQ(ierr); ierr = MPI_Reduce(&numCorners, &maxCorners, 1, MPIU_INT, MPI_MAX, 0, comm);CHKERRQ(ierr); ierr = DMPlexGetVertexNumbering(dm, &globalVertexNumbers);CHKERRQ(ierr); ierr = ISGetIndices(globalVertexNumbers, &gvertex);CHKERRQ(ierr); ierr = PetscMalloc1(maxCells, &corners);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fp, "CELLS %d %d\n", totCells, totCorners+totCells);CHKERRQ(ierr); if (!rank) { PetscInt *remoteVertices; int *vertices; ierr = PetscMalloc1(maxCorners, &vertices);CHKERRQ(ierr); for (c = cStart, numCells = 0; c < cEnd; ++c) { PetscInt *closure = NULL; PetscInt closureSize, value, nC = 0; if (label) { ierr = DMLabelGetValue(label, c, &value);CHKERRQ(ierr); if (value != 1) continue; } ierr = DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); for (v = 0; v < closureSize*2; v += 2) { if ((closure[v] >= vStart) && (closure[v] < vEnd)) { const PetscInt gv = gvertex[closure[v] - vStart]; vertices[nC++] = gv < 0 ? -(gv+1) : gv; } } ierr = DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); corners[numCells++] = nC; ierr = PetscFPrintf(comm, fp, "%d ", nC);CHKERRQ(ierr); ierr = DMPlexInvertCell(dim, nC, vertices);CHKERRQ(ierr); for (v = 0; v < nC; ++v) { ierr = PetscFPrintf(comm, fp, " %d", vertices[v]);CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fp, "\n");CHKERRQ(ierr); } if (numProcs > 1) {ierr = PetscMalloc1(maxCorners+maxCells, &remoteVertices);CHKERRQ(ierr);} for (proc = 1; proc < numProcs; ++proc) { MPI_Status status; ierr = MPI_Recv(&numCorners, 1, MPIU_INT, proc, tag, comm, &status);CHKERRQ(ierr); ierr = MPI_Recv(remoteVertices, numCorners, MPIU_INT, proc, tag, comm, &status);CHKERRQ(ierr); for (c = 0; c < numCorners;) { PetscInt nC = remoteVertices[c++]; for (v = 0; v < nC; ++v, ++c) { vertices[v] = remoteVertices[c]; } ierr = DMPlexInvertCell(dim, nC, vertices);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fp, "%d ", nC);CHKERRQ(ierr); for (v = 0; v < nC; ++v) { ierr = PetscFPrintf(comm, fp, " %d", vertices[v]);CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fp, "\n");CHKERRQ(ierr); } } if (numProcs > 1) {ierr = PetscFree(remoteVertices);CHKERRQ(ierr);} ierr = PetscFree(vertices);CHKERRQ(ierr); } else { PetscInt *localVertices, numSend = numCells+numCorners, k = 0; ierr = PetscMalloc1(numSend, &localVertices);CHKERRQ(ierr); for (c = cStart, numCells = 0; c < cEnd; ++c) { PetscInt *closure = NULL; PetscInt closureSize, value, nC = 0; if (label) { ierr = DMLabelGetValue(label, c, &value);CHKERRQ(ierr); if (value != 1) continue; } ierr = DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); for (v = 0; v < closureSize*2; v += 2) { if ((closure[v] >= vStart) && (closure[v] < vEnd)) { const PetscInt gv = gvertex[closure[v] - vStart]; closure[nC++] = gv < 0 ? -(gv+1) : gv; } } corners[numCells++] = nC; localVertices[k++] = nC; for (v = 0; v < nC; ++v, ++k) { localVertices[k] = closure[v]; } ierr = DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); } if (k != numSend) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Invalid number of vertices to send %d should be %d", k, numSend); ierr = MPI_Send(&numSend, 1, MPIU_INT, 0, tag, comm);CHKERRQ(ierr); ierr = MPI_Send(localVertices, numSend, MPIU_INT, 0, tag, comm);CHKERRQ(ierr); ierr = PetscFree(localVertices);CHKERRQ(ierr); } ierr = ISRestoreIndices(globalVertexNumbers, &gvertex);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fp, "CELL_TYPES %d\n", totCells);CHKERRQ(ierr); if (!rank) { PetscInt cellType; for (c = 0; c < numCells; ++c) { ierr = DMPlexVTKGetCellType(dm, dim, corners[c], &cellType);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fp, "%d\n", cellType);CHKERRQ(ierr); } for (proc = 1; proc < numProcs; ++proc) { MPI_Status status; ierr = MPI_Recv(&numCells, 1, MPIU_INT, proc, tag, comm, &status);CHKERRQ(ierr); ierr = MPI_Recv(corners, numCells, MPIU_INT, proc, tag, comm, &status);CHKERRQ(ierr); for (c = 0; c < numCells; ++c) { ierr = DMPlexVTKGetCellType(dm, dim, corners[c], &cellType);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fp, "%d\n", cellType);CHKERRQ(ierr); } } } else { ierr = MPI_Send(&numCells, 1, MPIU_INT, 0, tag, comm);CHKERRQ(ierr); ierr = MPI_Send(corners, numCells, MPIU_INT, 0, tag, comm);CHKERRQ(ierr); } ierr = PetscFree(corners);CHKERRQ(ierr); *totalCells = totCells; PetscFunctionReturn(0); }
PetscErrorCode DMPlexVTKWriteSection_ASCII(DM dm, PetscSection section, PetscSection globalSection, Vec v, FILE *fp, PetscInt enforceDof, PetscInt precision, PetscReal scale) { MPI_Comm comm; const MPI_Datatype mpiType = MPIU_SCALAR; PetscScalar *array; PetscInt numDof = 0, maxDof; PetscInt numLabelCells, cellHeight, cMax, cStart, cEnd, numLabelVertices, vMax, vStart, vEnd, pStart, pEnd, p; PetscMPIInt numProcs, rank, proc, tag; PetscBool hasLabel; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); PetscValidHeaderSpecific(dm,DM_CLASSID,1); PetscValidHeaderSpecific(v,VEC_CLASSID,4); if (precision < 0) precision = 6; ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &numProcs);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr); /* VTK only wants the values at cells or vertices */ ierr = DMPlexGetVTKCellHeight(dm, &cellHeight);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd);CHKERRQ(ierr); ierr = DMPlexGetHybridBounds(dm, &cMax, NULL, NULL, &vMax);CHKERRQ(ierr); if (cMax >= 0) cEnd = PetscMin(cEnd, cMax); if (vMax >= 0) vEnd = PetscMin(vEnd, vMax); pStart = PetscMax(PetscMin(cStart, vStart), pStart); pEnd = PetscMin(PetscMax(cEnd, vEnd), pEnd); ierr = DMPlexGetStratumSize(dm, "vtk", 1, &numLabelCells);CHKERRQ(ierr); ierr = DMPlexGetStratumSize(dm, "vtk", 2, &numLabelVertices);CHKERRQ(ierr); hasLabel = numLabelCells > 0 || numLabelVertices > 0 ? PETSC_TRUE : PETSC_FALSE; for (p = pStart; p < pEnd; ++p) { /* Reject points not either cells or vertices */ if (((p < cStart) || (p >= cEnd)) && ((p < vStart) || (p >= vEnd))) continue; if (hasLabel) { PetscInt value; if (((p >= cStart) && (p < cEnd) && numLabelCells) || ((p >= vStart) && (p < vEnd) && numLabelVertices)) { ierr = DMPlexGetLabelValue(dm, "vtk", p, &value);CHKERRQ(ierr); if (value != 1) continue; } } ierr = PetscSectionGetDof(section, p, &numDof);CHKERRQ(ierr); if (numDof) break; } ierr = MPI_Allreduce(&numDof, &maxDof, 1, MPIU_INT, MPI_MAX, comm);CHKERRQ(ierr); enforceDof = PetscMax(enforceDof, maxDof); ierr = VecGetArray(v, &array);CHKERRQ(ierr); if (!rank) { char formatString[8]; ierr = PetscSNPrintf(formatString, 8, "%%.%de", precision);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { /* Here we lose a way to filter points by keeping them out of the Numbering */ PetscInt dof, off, goff, d; /* Reject points not either cells or vertices */ if (((p < cStart) || (p >= cEnd)) && ((p < vStart) || (p >= vEnd))) continue; if (hasLabel) { PetscInt value; if (((p >= cStart) && (p < cEnd) && numLabelCells) || ((p >= vStart) && (p < vEnd) && numLabelVertices)) { ierr = DMPlexGetLabelValue(dm, "vtk", p, &value);CHKERRQ(ierr); if (value != 1) continue; } } ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = PetscSectionGetOffset(globalSection, p, &goff);CHKERRQ(ierr); if (dof && goff >= 0) { for (d = 0; d < dof; d++) { if (d > 0) { ierr = PetscFPrintf(comm, fp, " ");CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fp, formatString, PetscRealPart(array[off+d])*scale);CHKERRQ(ierr); } for (d = dof; d < enforceDof; d++) { ierr = PetscFPrintf(comm, fp, " 0.0");CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fp, "\n");CHKERRQ(ierr); } } for (proc = 1; proc < numProcs; ++proc) { PetscScalar *remoteValues; PetscInt size = 0, d; MPI_Status status; ierr = MPI_Recv(&size, 1, MPIU_INT, proc, tag, comm, &status);CHKERRQ(ierr); ierr = PetscMalloc1(size, &remoteValues);CHKERRQ(ierr); ierr = MPI_Recv(remoteValues, size, mpiType, proc, tag, comm, &status);CHKERRQ(ierr); for (p = 0; p < size/maxDof; ++p) { for (d = 0; d < maxDof; ++d) { if (d > 0) { ierr = PetscFPrintf(comm, fp, " ");CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fp, formatString, PetscRealPart(remoteValues[p*maxDof+d])*scale);CHKERRQ(ierr); } for (d = maxDof; d < enforceDof; ++d) { ierr = PetscFPrintf(comm, fp, " 0.0");CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fp, "\n");CHKERRQ(ierr); } ierr = PetscFree(remoteValues);CHKERRQ(ierr); } } else { PetscScalar *localValues; PetscInt size, k = 0; ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); ierr = PetscMalloc1(size, &localValues);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt dof, off, goff, d; /* Reject points not either cells or vertices */ if (((p < cStart) || (p >= cEnd)) && ((p < vStart) || (p >= vEnd))) continue; if (hasLabel) { PetscInt value; if (((p >= cStart) && (p < cEnd) && numLabelCells) || ((p >= vStart) && (p < vEnd) && numLabelVertices)) { ierr = DMPlexGetLabelValue(dm, "vtk", p, &value);CHKERRQ(ierr); if (value != 1) continue; } } ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = PetscSectionGetOffset(globalSection, p, &goff);CHKERRQ(ierr); if (goff >= 0) { for (d = 0; d < dof; ++d) { localValues[k++] = array[off+d]; } } } ierr = MPI_Send(&k, 1, MPIU_INT, 0, tag, comm);CHKERRQ(ierr); ierr = MPI_Send(localValues, k, mpiType, 0, tag, comm);CHKERRQ(ierr); ierr = PetscFree(localValues);CHKERRQ(ierr); } ierr = VecRestoreArray(v, &array);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat Cmpi; Mat_PtAPMPI *ptap; PetscFreeSpaceList free_space=NULL,current_space=NULL; Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c; Mat_SeqAIJ *ad =(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; Mat_SeqAIJ *p_loc,*p_oth; PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pdti,*pdtj,*poti,*potj,*ptJ; PetscInt *adi=ad->i,*aj,*aoi=ao->i,nnz; PetscInt *lnk,*owners_co,*coi,*coj,i,k,pnz,row; PetscInt am=A->rmap->n,pN=P->cmap->N,pm=P->rmap->n,pn=P->cmap->n; PetscBT lnkbt; MPI_Comm comm; PetscMPIInt size,rank,tagi,tagj,*len_si,*len_s,*len_ri,icompleted=0; PetscInt **buf_rj,**buf_ri,**buf_ri_k; PetscInt len,proc,*dnz,*onz,*owners; PetscInt nzi,*pti,*ptj; PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; MPI_Request *swaits,*rwaits; MPI_Status *sstatus,rstatus; Mat_Merge_SeqsToMPI *merge; PetscInt *api,*apj,*Jptr,apnz,*prmap=p->garray,pon,nspacedouble=0,j,ap_rmax=0; PetscReal afill=1.0,afill_tmp; PetscInt rmax; #if defined(PTAP_PROFILE) PetscLogDouble t0,t1,t2,t3,t4; #endif PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t0);CHKERRQ(ierr); #endif /* check if matrix local sizes are compatible */ if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) { SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); } if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) { SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); } ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* create struct Mat_PtAPMPI and attached it to C later */ ierr = PetscNew(&ptap);CHKERRQ(ierr); ierr = PetscNew(&merge);CHKERRQ(ierr); ptap->merge = merge; ptap->reuse = MAT_INITIAL_MATRIX; /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr); /* get P_loc by taking all local rows of P */ ierr = MatMPIAIJGetLocalMat(P,MAT_INITIAL_MATRIX,&ptap->P_loc);CHKERRQ(ierr); p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; pi_loc = p_loc->i; pj_loc = p_loc->j; pi_oth = p_oth->i; pj_oth = p_oth->j; #if defined(PTAP_PROFILE) ierr = PetscTime(&t1);CHKERRQ(ierr); #endif /* first, compute symbolic AP = A_loc*P = A_diag*P_loc + A_off*P_oth */ /*-------------------------------------------------------------------*/ ierr = PetscMalloc1((am+1),&api);CHKERRQ(ierr); api[0] = 0; /* create and initialize a linked list */ ierr = PetscLLCondensedCreate(pN,pN,&lnk,&lnkbt);CHKERRQ(ierr); /* Initial FreeSpace size is fill*(nnz(A) + nnz(P)) -OOM for ex56, np=8k on Intrepid! */ ierr = PetscFreeSpaceGet((PetscInt)(fill*(adi[am]+aoi[am]+pi_loc[pm])),&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<am; i++) { /* diagonal portion of A */ nzi = adi[i+1] - adi[i]; aj = ad->j + adi[i]; for (j=0; j<nzi; j++) { row = aj[j]; pnz = pi_loc[row+1] - pi_loc[row]; Jptr = pj_loc + pi_loc[row]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } /* off-diagonal portion of A */ nzi = aoi[i+1] - aoi[i]; aj = ao->j + aoi[i]; for (j=0; j<nzi; j++) { row = aj[j]; pnz = pi_oth[row+1] - pi_oth[row]; Jptr = pj_oth + pi_oth[row]; ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } apnz = lnk[0]; api[i+1] = api[i] + apnz; if (ap_rmax < apnz) ap_rmax = apnz; /* if free space is not available, double the total space in the list */ if (current_space->local_remaining<apnz) { ierr = PetscFreeSpaceGet(apnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, then initialize lnk */ ierr = PetscLLCondensedClean(pN,apnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); current_space->array += apnz; current_space->local_used += apnz; current_space->local_remaining -= apnz; } /* Allocate space for apj, initialize apj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1((api[am]+1),&apj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,apj);CHKERRQ(ierr); afill_tmp = (PetscReal)api[am]/(adi[am]+aoi[am]+pi_loc[pm]+1); if (afill_tmp > afill) afill = afill_tmp; #if defined(PTAP_PROFILE) ierr = PetscTime(&t2);CHKERRQ(ierr); #endif /* determine symbolic Co=(p->B)^T*AP - send to others */ /*----------------------------------------------------*/ ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&poti,&potj);CHKERRQ(ierr); /* then, compute symbolic Co = (p->B)^T*AP */ pon = (p->B)->cmap->n; /* total num of rows to be sent to other processors >= (num of nonzero rows of C_seq) - pn */ ierr = PetscMalloc1((pon+1),&coi);CHKERRQ(ierr); coi[0] = 0; /* set initial free space to be fill*(nnz(p->B) + nnz(AP)) */ nnz = fill*(poti[pon] + api[am]); ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<pon; i++) { pnz = poti[i+1] - poti[i]; ptJ = potj + poti[i]; for (j=0; j<pnz; j++) { row = ptJ[j]; /* row of AP == col of Pot */ apnz = api[row+1] - api[row]; Jptr = apj + api[row]; /* add non-zero cols of AP into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(apnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } nnz = lnk[0]; /* If free space is not available, double the total space in the list */ if (current_space->local_remaining<nnz) { ierr = PetscFreeSpaceGet(nnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLCondensedClean(pN,nnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); current_space->array += nnz; current_space->local_used += nnz; current_space->local_remaining -= nnz; coi[i+1] = coi[i] + nnz; } ierr = PetscMalloc1((coi[pon]+1),&coj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,coj);CHKERRQ(ierr); afill_tmp = (PetscReal)coi[pon]/(poti[pon] + api[am]+1); if (afill_tmp > afill) afill = afill_tmp; ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->B,&poti,&potj);CHKERRQ(ierr); /* send j-array (coj) of Co to other processors */ /*----------------------------------------------*/ /* determine row ownership */ ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); merge->rowmap->n = pn; merge->rowmap->bs = 1; ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); owners = merge->rowmap->range; /* determine the number of messages to send, their lengths */ ierr = PetscMalloc2(size,&len_si,size,&sstatus);CHKERRQ(ierr); ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); ierr = PetscCalloc1(size,&merge->len_s);CHKERRQ(ierr); len_s = merge->len_s; merge->nsend = 0; ierr = PetscMalloc1((size+2),&owners_co);CHKERRQ(ierr); proc = 0; for (i=0; i<pon; i++) { while (prmap[i] >= owners[proc+1]) proc++; len_si[proc]++; /* num of rows in Co to be sent to [proc] */ len_s[proc] += coi[i+1] - coi[i]; } len = 0; /* max length of buf_si[] */ owners_co[0] = 0; for (proc=0; proc<size; proc++) { owners_co[proc+1] = owners_co[proc] + len_si[proc]; if (len_si[proc]) { merge->nsend++; len_si[proc] = 2*(len_si[proc] + 1); len += len_si[proc]; } } /* determine the number and length of messages to receive for coi and coj */ ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); /* post the Irecv and Isend of coj */ ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rwaits);CHKERRQ(ierr); ierr = PetscMalloc1((merge->nsend+1),&swaits);CHKERRQ(ierr); for (proc=0, k=0; proc<size; proc++) { if (!len_s[proc]) continue; i = owners_co[proc]; ierr = MPI_Isend(coj+coi[i],len_s[proc],MPIU_INT,proc,tagj,comm,swaits+k);CHKERRQ(ierr); k++; } /* receives and sends of coj are complete */ for (i=0; i<merge->nrecv; i++) { ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus);CHKERRQ(ierr); } ierr = PetscFree(rwaits);CHKERRQ(ierr); if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus);CHKERRQ(ierr);} /* send and recv coi */ /*-------------------*/ ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&rwaits);CHKERRQ(ierr); ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ for (proc=0,k=0; proc<size; proc++) { if (!len_s[proc]) continue; /* form outgoing message for i-structure: buf_si[0]: nrows to be sent [1:nrows]: row index (global) [nrows+1:2*nrows+1]: i-structure index */ /*-------------------------------------------*/ nrows = len_si[proc]/2 - 1; buf_si_i = buf_si + nrows+1; buf_si[0] = nrows; buf_si_i[0] = 0; nrows = 0; for (i=owners_co[proc]; i<owners_co[proc+1]; i++) { nzi = coi[i+1] - coi[i]; buf_si_i[nrows+1] = buf_si_i[nrows] + nzi; /* i-structure */ buf_si[nrows+1] = prmap[i] -owners[proc]; /* local row index */ nrows++; } ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,swaits+k);CHKERRQ(ierr); k++; buf_si += len_si[proc]; } i = merge->nrecv; while (i--) { ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus);CHKERRQ(ierr); } ierr = PetscFree(rwaits);CHKERRQ(ierr); if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus);CHKERRQ(ierr);} ierr = PetscFree2(len_si,sstatus);CHKERRQ(ierr); ierr = PetscFree(len_ri);CHKERRQ(ierr); ierr = PetscFree(swaits);CHKERRQ(ierr); ierr = PetscFree(buf_s);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t3);CHKERRQ(ierr); #endif /* compute the local portion of C (mpi mat) */ /*------------------------------------------*/ ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&pdti,&pdtj);CHKERRQ(ierr); /* allocate pti array and free space for accumulating nonzero column info */ ierr = PetscMalloc1((pn+1),&pti);CHKERRQ(ierr); pti[0] = 0; /* set initial free space to be fill*(nnz(P) + nnz(AP)) */ nnz = fill*(pi_loc[pm] + api[am]); ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr); current_space = free_space; ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci);CHKERRQ(ierr); for (k=0; k<merge->nrecv; k++) { buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ nrows = *buf_ri_k[k]; nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ } ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); rmax = 0; for (i=0; i<pn; i++) { /* add pdt[i,:]*AP into lnk */ pnz = pdti[i+1] - pdti[i]; ptJ = pdtj + pdti[i]; for (j=0; j<pnz; j++) { row = ptJ[j]; /* row of AP == col of Pt */ apnz = api[row+1] - api[row]; Jptr = apj + api[row]; /* add non-zero cols of AP into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(apnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } /* add received col data into lnk */ for (k=0; k<merge->nrecv; k++) { /* k-th received message */ if (i == *nextrow[k]) { /* i-th row */ nzi = *(nextci[k]+1) - *nextci[k]; Jptr = buf_rj[k] + *nextci[k]; ierr = PetscLLCondensedAddSorted(nzi,Jptr,lnk,lnkbt);CHKERRQ(ierr); nextrow[k]++; nextci[k]++; } } nnz = lnk[0]; /* if free space is not available, make more free space */ if (current_space->local_remaining<nnz) { ierr = PetscFreeSpaceGet(nnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* copy data into free space, then initialize lnk */ ierr = PetscLLCondensedClean(pN,nnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); ierr = MatPreallocateSet(i+owners[rank],nnz,current_space->array,dnz,onz);CHKERRQ(ierr); current_space->array += nnz; current_space->local_used += nnz; current_space->local_remaining -= nnz; pti[i+1] = pti[i] + nnz; if (nnz > rmax) rmax = nnz; } ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&pdti,&pdtj);CHKERRQ(ierr); ierr = PetscFree3(buf_ri_k,nextrow,nextci);CHKERRQ(ierr); ierr = PetscMalloc1((pti[pn]+1),&ptj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,ptj);CHKERRQ(ierr); afill_tmp = (PetscReal)pti[pn]/(pi_loc[pm] + api[am]+1); if (afill_tmp > afill) afill = afill_tmp; ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); /* create symbolic parallel matrix Cmpi */ /*--------------------------------------*/ ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr); ierr = MatSetSizes(Cmpi,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizes(Cmpi,P->cmap->bs,P->cmap->bs);CHKERRQ(ierr); ierr = MatSetType(Cmpi,MATMPIAIJ);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); merge->bi = pti; /* Cseq->i */ merge->bj = ptj; /* Cseq->j */ merge->coi = coi; /* Co->i */ merge->coj = coj; /* Co->j */ merge->buf_ri = buf_ri; merge->buf_rj = buf_rj; merge->owners_co = owners_co; merge->destroy = Cmpi->ops->destroy; merge->duplicate = Cmpi->ops->duplicate; /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */ Cmpi->assembled = PETSC_FALSE; Cmpi->ops->destroy = MatDestroy_MPIAIJ_PtAP; Cmpi->ops->duplicate = MatDuplicate_MPIAIJ_MatPtAP; /* attach the supporting struct to Cmpi for reuse */ c = (Mat_MPIAIJ*)Cmpi->data; c->ptap = ptap; ptap->api = api; ptap->apj = apj; ptap->rmax = ap_rmax; *C = Cmpi; /* flag 'scalable' determines which implementations to be used: 0: do dense axpy in MatPtAPNumeric() - fast, but requires storage of a nonscalable dense array apa; 1: do sparse axpy in MatPtAPNumeric() - might slow, uses a sparse array apa */ /* set default scalable */ ptap->scalable = PETSC_TRUE; ierr = PetscOptionsGetBool(((PetscObject)Cmpi)->prefix,"-matptap_scalable",&ptap->scalable,NULL);CHKERRQ(ierr); if (!ptap->scalable) { /* Do dense axpy */ ierr = PetscCalloc1(pN,&ptap->apa);CHKERRQ(ierr); } else { ierr = PetscCalloc1(ap_rmax+1,&ptap->apa);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t4);CHKERRQ(ierr); if (rank==1) PetscPrintf(MPI_COMM_SELF," [%d] PtAPSymbolic %g/P + %g/AP + %g/comm + %g/PtAP = %g\n",rank,t1-t0,t2-t1,t3-t2,t4-t3,t4-t0);CHKERRQ(ierr); #endif #if defined(PETSC_USE_INFO) if (pti[pn] != 0) { ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); ierr = PetscInfo1(Cmpi,"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);CHKERRQ(ierr); } else { ierr = PetscInfo(Cmpi,"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
PetscErrorCode heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscInt verbose,PetscCoarsenData **a_locals_llist) { PetscErrorCode ierr; PetscBool isMPI; MPI_Comm wcomm = ((PetscObject)a_Gmat)->comm; PetscInt sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0; PetscMPIInt rank,size; const PetscInt nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */ PetscInt *lid_cprowID,*lid_gid; PetscBool *lid_matched; Mat_SeqAIJ *matA, *matB=0; Mat_MPIAIJ *mpimat=0; PetscScalar one=1.; PetscCoarsenData *agg_llists = PETSC_NULL,*deleted_list = PETSC_NULL; Mat cMat,tMat,P; MatScalar *ap; PetscMPIInt tag1,tag2; PetscFunctionBegin; ierr = MPI_Comm_rank(wcomm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(wcomm, &size);CHKERRQ(ierr); ierr = MatGetOwnershipRange(a_Gmat, &my0, &Iend);CHKERRQ(ierr); ierr = PetscCommGetNewTag(wcomm, &tag1);CHKERRQ(ierr); ierr = PetscCommGetNewTag(wcomm, &tag2);CHKERRQ(ierr); ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_gid);CHKERRQ(ierr); /* explicit array needed */ ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_cprowID);CHKERRQ(ierr); ierr = PetscMalloc(nloc*sizeof(PetscBool), &lid_matched);CHKERRQ(ierr); ierr = PetscCDCreate(nloc, &agg_llists);CHKERRQ(ierr); /* ierr = PetscCDSetChuckSize(agg_llists, nloc+1);CHKERRQ(ierr); */ *a_locals_llist = agg_llists; ierr = PetscCDCreate(size, &deleted_list);CHKERRQ(ierr); ierr = PetscCDSetChuckSize(deleted_list, 100);CHKERRQ(ierr); /* setup 'lid_gid' for scatters and add self to all lists */ for (kk=0;kk<nloc;kk++) { lid_gid[kk] = kk + my0; ierr = PetscCDAppendID(agg_llists, kk, my0+kk);CHKERRQ(ierr); } /* make a copy of the graph, this gets destroyed in iterates */ ierr = MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);CHKERRQ(ierr); iter = 0; while(iter++ < n_iter) { PetscScalar *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew; PetscBool *cpcol_matched; PetscMPIInt *cpcol_pe,proc; Vec locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE; PetscInt nEdges,n_nz_row,jj; Edge *Edges; PetscInt gid; const PetscInt *perm_ix, n_sub_its = 120; /* get submatrices of cMat */ if (isMPI) { mpimat = (Mat_MPIAIJ*)cMat->data; matA = (Mat_SeqAIJ*)mpimat->A->data; matB = (Mat_SeqAIJ*)mpimat->B->data; /* force compressed storage of B */ matB->compressedrow.check = PETSC_TRUE; ierr = MatCheckCompressedRow(mpimat->B,&matB->compressedrow,matB->i,cMat->rmap->n,-1.0);CHKERRQ(ierr); assert(matB->compressedrow.use); } else { matA = (Mat_SeqAIJ*)cMat->data; } assert(matA && !matA->compressedrow.use); assert(matB==0 || matB->compressedrow.use); /* set max edge on nodes */ ierr = MatGetVecs(cMat, &locMaxEdge, 0);CHKERRQ(ierr); ierr = MatGetVecs(cMat, &locMaxPE, 0);CHKERRQ(ierr); /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */ if (mpimat) { Vec vec; PetscScalar vval; ierr = MatGetVecs(cMat, &vec, 0);CHKERRQ(ierr); /* cpcol_pe */ vval = (PetscScalar)(rank); for (kk=0,gid=my0;kk<nloc;kk++,gid++) { ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */ } ierr = VecAssemblyBegin(vec);CHKERRQ(ierr); ierr = VecAssemblyEnd(vec);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */ ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt), &cpcol_pe);CHKERRQ(ierr); for (kk=0;kk<n;kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]); ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* cpcol_gid */ for (kk=0,gid=my0;kk<nloc;kk++,gid++) { vval = (PetscScalar)(gid); ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */ } ierr = VecAssemblyBegin(vec);CHKERRQ(ierr); ierr = VecAssemblyEnd(vec);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecDestroy(&vec);CHKERRQ(ierr); ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */ /* cpcol_matched */ ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscBool), &cpcol_matched);CHKERRQ(ierr); for (kk=0;kk<n;kk++) cpcol_matched[kk] = PETSC_FALSE; } /* need an inverse map - locals */ for (kk=0;kk<nloc;kk++) lid_cprowID[kk] = -1; /* set index into compressed row 'lid_cprowID' */ if (matB) { ii = matB->compressedrow.i; for (ix=0; ix<matB->compressedrow.nrows; ix++) { lid_cprowID[matB->compressedrow.rindex[ix]] = ix; } } /* get removed IS, use '' */ /* if (iter==1) { */ /* PetscInt *lid_rem,idx; */ /* ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_rem);CHKERRQ(ierr); */ /* for (kk=idx=0;kk<nloc;kk++){ */ /* PetscInt nn,lid=kk; */ /* ii = matA->i; nn = ii[lid+1] - ii[lid]; */ /* if ((ix=lid_cprowID[lid]) != -1) { /\* if I have any ghost neighbors *\/ */ /* ii = matB->compressedrow.i; */ /* nn += ii[ix+1] - ii[ix]; */ /* } */ /* if (nn < 2) { */ /* lid_rem[idx++] = kk + my0; */ /* } */ /* } */ /* ierr = PetscCDSetRemovedIS(agg_llists, wcomm, idx, lid_rem);CHKERRQ(ierr); */ /* ierr = PetscFree(lid_rem);CHKERRQ(ierr); */ /* } */ /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */ for (nEdges=0,kk=0,gid=my0;kk<nloc;kk++,gid++){ PetscReal max_e = 0., tt; PetscScalar vval; PetscInt lid = kk; PetscMPIInt max_pe=rank,pe; ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid]; ap = matA->a + ii[lid]; for (jj=0; jj<n; jj++) { PetscInt lidj = idx[jj]; if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]); if (lidj > lid) nEdges++; } if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; ap = matB->a + ii[ix]; idx = matB->j + ii[ix]; for (jj=0 ; jj<n ; jj++) { if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt; nEdges++; if ((pe=cpcol_pe[idx[jj]]) > max_pe) max_pe = pe; } } vval = max_e; ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); vval = (PetscScalar)max_pe; ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr); ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr); ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr); ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr); /* get 'cpcol_max_ew' & 'cpcol_max_pe' */ if (mpimat) { ierr = VecDuplicate(mpimat->lvec, &ghostMaxEdge);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr); ierr = VecDuplicate(mpimat->lvec, &ghostMaxPE);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr); } /* setup sorted list of edges */ ierr = PetscMalloc(nEdges*sizeof(Edge), &Edges);CHKERRQ(ierr); ierr = ISGetIndices(perm, &perm_ix);CHKERRQ(ierr); for (nEdges=n_nz_row=kk=0;kk<nloc;kk++){ PetscInt nn, lid = perm_ix[kk]; ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid]; ap = matA->a + ii[lid]; for (jj=0; jj<n; jj++) { PetscInt lidj = idx[jj]; assert(PetscRealPart(ap[jj])>0.); if (lidj > lid) { Edges[nEdges].lid0 = lid; Edges[nEdges].gid1 = lidj + my0; Edges[nEdges].cpid1 = -1; Edges[nEdges].weight = PetscRealPart(ap[jj]); nEdges++; } } if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; ap = matB->a + ii[ix]; idx = matB->j + ii[ix]; nn += n; for (jj=0 ; jj<n ; jj++) { assert(PetscRealPart(ap[jj])>0.); Edges[nEdges].lid0 = lid; Edges[nEdges].gid1 = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]); Edges[nEdges].cpid1 = idx[jj]; Edges[nEdges].weight = PetscRealPart(ap[jj]); nEdges++; } } if (nn > 1) n_nz_row++; else if (iter == 1){ /* should select this because it is technically in the MIS but lets not */ ierr = PetscCDRemoveAll(agg_llists, lid);CHKERRQ(ierr); } } ierr = ISRestoreIndices(perm,&perm_ix);CHKERRQ(ierr); qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare); /* projection matrix */ ierr = MatCreateAIJ(wcomm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, 0, 1, 0, &P);CHKERRQ(ierr); /* clear matched flags */ for (kk=0;kk<nloc;kk++) lid_matched[kk] = PETSC_FALSE; /* process - communicate - process */ for (sub_it=0;sub_it<n_sub_its;sub_it++){ PetscInt nactive_edges; ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr); for (kk=nactive_edges=0;kk<nEdges;kk++){ /* HEM */ const Edge *e = &Edges[kk]; const PetscInt lid0=e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0; PetscBool isOK = PETSC_TRUE; /* skip if either (local) vertex is done already */ if (lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0])) { continue; } /* skip if ghost vertex is done */ if (cpid1 != -1 && cpcol_matched[cpid1]) { continue; } nactive_edges++; /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */ if (PetscRealPart(lid_max_ew[lid0]) > e->weight + 1.e-12) { continue; } if (cpid1 == -1) { if (PetscRealPart(lid_max_ew[lid1]) > e->weight + 1.e-12) { continue; } } else { /* see if edge might get matched on other proc */ PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]); if (g_max_e > e->weight + 1.e-12) { continue; } else if (e->weight > g_max_e - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) { /* check for max_e == to this edge and larger processor that will deal with this */ continue; } } /* check ghost for v0 */ if (isOK){ PetscReal max_e,ew; if ((ix=lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; ap = matB->a + ii[ix]; idx = matB->j + ii[ix]; for (jj=0 ; jj<n && isOK; jj++) { PetscInt lidj = idx[jj]; if (cpcol_matched[lidj]) continue; ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]); /* check for max_e == to this edge and larger processor that will deal with this */ if (ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid0]) - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank){ isOK = PETSC_FALSE; } } } /* for v1 */ if (cpid1 == -1 && isOK){ if ((ix=lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; ap = matB->a + ii[ix]; idx = matB->j + ii[ix]; for (jj=0 ; jj<n && isOK ; jj++) { PetscInt lidj = idx[jj]; if (cpcol_matched[lidj]) continue; ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]); /* check for max_e == to this edge and larger processor that will deal with this */ if (ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid1]) - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) { isOK = PETSC_FALSE; } } } } } /* do it */ if (isOK){ if (cpid1 == -1) { lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ ierr = PetscCDAppendRemove(agg_llists, lid0, lid1);CHKERRQ(ierr); } else if (sub_it != n_sub_its-1) { /* add gid1 to list of ghost deleted by me -- I need their children */ proc = cpcol_pe[cpid1]; cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */ ierr = PetscCDAppendID(deleted_list, proc, cpid1);CHKERRQ(ierr); /* cache to send messages */ ierr = PetscCDAppendID(deleted_list, proc, lid0);CHKERRQ(ierr); } else { continue; } lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */ /* set projection */ ierr = MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr); } /* matched */ } /* edge loop */ /* deal with deleted ghost on first pass */ if (size>1 && sub_it != n_sub_its-1){ PetscCDPos pos; PetscBool ise = PETSC_FALSE; PetscInt nSend1, **sbuffs1,nSend2; #define REQ_BF_SIZE 100 MPI_Request *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE]; MPI_Status status; /* send request */ for (proc=0,nSend1=0;proc<size;proc++){ ierr = PetscCDEmptyAt(deleted_list,proc,&ise);CHKERRQ(ierr); if (!ise) nSend1++; } ierr = PetscMalloc(nSend1*sizeof(PetscInt*), &sbuffs1);CHKERRQ(ierr); /* ierr = PetscMalloc4(nSend1, PetscInt*, sbuffs1, nSend1, PetscInt*, rbuffs1, nSend1, MPI_Request*, sreqs1, nSend1, MPI_Request*, rreqs1);CHKERRQ(ierr); */ /* PetscFree4(sbuffs1,rbuffs1,sreqs1,rreqs1); */ for (proc=0,nSend1=0;proc<size;proc++){ /* count ghosts */ ierr = PetscCDSizeAt(deleted_list,proc,&n);CHKERRQ(ierr); if (n>0){ #define CHUNCK_SIZE 100 PetscInt *sbuff,*pt; MPI_Request *request; assert(n%2==0); n /= 2; ierr = PetscMalloc((2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff);CHKERRQ(ierr); /* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */ /* save requests */ sbuffs1[nSend1] = sbuff; request = (MPI_Request*)sbuff; sbuff = pt = (PetscInt*)(request+1); *pt++ = n; *pt++ = rank; ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr); while(pos){ PetscInt lid0, cpid, gid; ierr = PetscLLNGetID(pos, &cpid);CHKERRQ(ierr); gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]); ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr); ierr = PetscLLNGetID(pos, &lid0);CHKERRQ(ierr); ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr); *pt++ = gid; *pt++ = lid0; } /* send request tag1 [n, proc, n*[gid1,lid0] ] */ ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);CHKERRQ(ierr); /* post recieve */ request = (MPI_Request*)pt; rreqs2[nSend1] = request; /* cache recv request */ pt = (PetscInt*)(request+1); ierr = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr); /* clear list */ ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr); nSend1++; } } /* recieve requests, send response, clear lists */ kk = nactive_edges; ierr = MPI_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,wcomm);CHKERRQ(ierr); /* not correct syncronization and global */ nSend2 = 0; while(1){ #define BF_SZ 10000 PetscMPIInt flag,count; PetscInt rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3; MPI_Request *request; ierr = MPI_Iprobe(MPI_ANY_SOURCE, tag1, wcomm, &flag, &status);CHKERRQ(ierr); if (!flag) break; ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr); if (count > BF_SZ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for recieve: %d",count); proc = status.MPI_SOURCE; /* recieve request tag1 [n, proc, n*[gid1,lid0] ] */ ierr = MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, wcomm, &status);CHKERRQ(ierr); /* count sends */ pt = rbuff; count3 = count2 = 0; n = *pt++; kk = *pt++; assert(kk==proc); while(n--){ PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++; assert(lid1>=0 && lid1<nloc); if (lid_matched[lid1]){ PetscPrintf(PETSC_COMM_SELF,"\t *** [%d]%s %d) ERROR recieved deleted gid %d, deleted by (lid) %d from proc %d\n",rank,__FUNCT__,sub_it,gid1,kk); PetscSleep(1); } assert(!lid_matched[lid1]); lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ ierr = PetscCDSizeAt(agg_llists, lid1, &kk);CHKERRQ(ierr); count2 += kk + 2; count3++; /* number of verts requested (n) */ } assert(pt-rbuff==count); if (count2 > count3*CHUNCK_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Irecv will be too small: %d",count2); /* send tag2 *[lid0, n, n*[gid] ] */ ierr = PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);CHKERRQ(ierr); request = (MPI_Request*)sbuff; sreqs2[nSend2++] = request; /* cache request */ if (nSend2==REQ_BF_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for requests: %d",nSend2); pt2 = sbuff = (PetscInt*)(request+1); pt = rbuff; n = *pt++; kk = *pt++; assert(kk==proc); while(n--){ /* read [n, proc, n*[gid1,lid0] */ PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++; assert(lid1>=0 && lid1<nloc); /* write [lid0, n, n*[gid] ] */ *pt2++ = lid0; pt3 = pt2++; /* save pointer for later */ /* for (pos=PetscCDGetHeadPos(agg_llists,lid1) ; pos ; pos=PetscCDGetNextPos(agg_llists,lid1,pos)){ */ ierr = PetscCDGetHeadPos(agg_llists,lid1,&pos);CHKERRQ(ierr); while(pos){ PetscInt gid; ierr = PetscLLNGetID(pos, &gid);CHKERRQ(ierr); ierr = PetscCDGetNextPos(agg_llists,lid1,&pos);CHKERRQ(ierr); *pt2++ = gid; } *pt3 = (pt2-pt3)-1; /* clear list */ ierr = PetscCDRemoveAll(agg_llists, lid1);CHKERRQ(ierr); } assert(pt2-sbuff==count2); assert(pt-rbuff==count); /* send requested data tag2 *[lid0, n, n*[gid1] ] */ ierr = MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr); } /* recieve tag2 *[lid0, n, n*[gid] ] */ for (kk=0;kk<nSend1;kk++){ PetscMPIInt count; MPI_Request *request; PetscInt *pt, *pt2; request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */ ierr = MPI_Wait(request, &status);CHKERRQ(ierr); ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr); pt = pt2 = (PetscInt*)(request+1); while(pt-pt2 < count){ PetscInt lid0 = *pt++, n = *pt++; assert(lid0>=0 && lid0<nloc); while(n--){ PetscInt gid1 = *pt++; ierr = PetscCDAppendID(agg_llists, lid0, gid1);CHKERRQ(ierr); } } assert(pt-pt2==count); } /* wait for tag1 isends */ while(nSend1--){ MPI_Request *request; request = (MPI_Request*)sbuffs1[nSend1]; ierr = MPI_Wait(request, &status);CHKERRQ(ierr); ierr = PetscFree(request);CHKERRQ(ierr); } ierr = PetscFree(sbuffs1);CHKERRQ(ierr); /* wait for tag2 isends */ while(nSend2--){ MPI_Request *request = sreqs2[nSend2]; ierr = MPI_Wait(request, &status);CHKERRQ(ierr); ierr = PetscFree(request);CHKERRQ(ierr); } ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr); ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr); /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */ for (kk=0,gid=my0;kk<nloc;kk++,gid++) { PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */ } ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr); ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr); ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr); for (kk=0;kk<n;kk++) { cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0); } ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr); } /* size > 1 */ /* compute 'locMaxEdge' */ ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr); for (kk=0,gid=my0;kk<nloc;kk++,gid++){ PetscReal max_e = 0.,tt; PetscScalar vval; PetscInt lid = kk; if (lid_matched[lid]) vval = 0.; else { ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid]; ap = matA->a + ii[lid]; for (jj=0; jj<n; jj++) { PetscInt lidj = idx[jj]; if (lid_matched[lidj]) continue; /* this is new - can change local max */ if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]); } if (lid_cprowID && (ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; ap = matB->a + ii[ix]; idx = matB->j + ii[ix]; for (jj=0 ; jj<n ; jj++) { PetscInt lidj = idx[jj]; if (cpcol_matched[lidj]) continue; if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt; } } } vval = (PetscScalar)max_e; ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */ } ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr); ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr); if (size>1 && sub_it != n_sub_its-1){ /* compute 'cpcol_max_ew' */ ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr); ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr); /* compute 'cpcol_max_pe' */ for (kk=0,gid=my0;kk<nloc;kk++,gid++){ PetscInt lid = kk; PetscReal ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]); PetscScalar vval; PetscMPIInt max_pe=rank,pe; if (lid_matched[lid]) vval = (PetscScalar)rank; else if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; ap = matB->a + ii[ix]; idx = matB->j + ii[ix]; for (jj=0 ; jj<n ; jj++) { PetscInt lidj = idx[jj]; if (cpcol_matched[lidj]) continue; ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]); /* get max pe that has a max_e == to this edge w */ if ((pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - 1.e-12 && ew > v0_max_e - 1.e-12) max_pe = pe; assert(ew < v0_max_e + 1.e-12 && ew < v1_max_e + 1.e-12); } vval = (PetscScalar)max_pe; } ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr); ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr); ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr); } /* deal with deleted ghost */ if (verbose>2) PetscPrintf(wcomm,"\t[%d]%s %d.%d: %d active edges.\n", rank,__FUNCT__,iter,sub_it,nactive_edges); if (!nactive_edges) break; } /* sub_it loop */ /* clean up iteration */ ierr = PetscFree(Edges);CHKERRQ(ierr); if (mpimat){ ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr); ierr = VecDestroy(&ghostMaxEdge);CHKERRQ(ierr); ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr); ierr = VecDestroy(&ghostMaxPE);CHKERRQ(ierr); ierr = PetscFree(cpcol_pe);CHKERRQ(ierr); ierr = PetscFree(cpcol_matched);CHKERRQ(ierr); } ierr = VecDestroy(&locMaxEdge);CHKERRQ(ierr); ierr = VecDestroy(&locMaxPE);CHKERRQ(ierr); if (mpimat){ ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); } /* create next G if needed */ if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */ ierr = MatDestroy(&P);CHKERRQ(ierr); ierr = MatDestroy(&cMat);CHKERRQ(ierr); break; } else { Vec diag; /* add identity for unmatched vertices so they stay alive */ for (kk=0,gid=my0;kk<nloc;kk++,gid++){ if (!lid_matched[kk]) { gid = kk+my0; ierr = MatGetRow(cMat,gid,&n,0,0);CHKERRQ(ierr); if (n>1){ ierr = MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatRestoreRow(cMat,gid,&n,0,0);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* project to make new graph with colapsed edges */ ierr = MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); ierr = MatDestroy(&cMat);CHKERRQ(ierr); cMat = tMat; ierr = MatGetVecs(cMat, &diag, 0);CHKERRQ(ierr); ierr = MatGetDiagonal(cMat, diag);CHKERRQ(ierr); /* effectively PCJACOBI */ ierr = VecReciprocal(diag);CHKERRQ(ierr); ierr = VecSqrtAbs(diag);CHKERRQ(ierr); ierr = MatDiagonalScale(cMat, diag, diag);CHKERRQ(ierr); ierr = VecDestroy(&diag);CHKERRQ(ierr); } } /* coarsen iterator */ /* make fake matrix */ if (size>1){ Mat mat; PetscCDPos pos; PetscInt gid, NN, MM, jj = 0, mxsz = 0; for (kk=0;kk<nloc;kk++){ ierr = PetscCDSizeAt(agg_llists, kk, &jj);CHKERRQ(ierr); if (jj > mxsz) mxsz = jj; } ierr = MatGetSize(a_Gmat, &MM, &NN);CHKERRQ(ierr); if (mxsz > MM-nloc) mxsz = MM-nloc; ierr = MatCreateAIJ(wcomm, nloc, nloc,PETSC_DETERMINE, PETSC_DETERMINE,0, 0, mxsz, 0, &mat);CHKERRQ(ierr); /* */ for (kk=0,gid=my0;kk<nloc;kk++,gid++){ /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)){ */ ierr = PetscCDGetHeadPos(agg_llists,kk,&pos);CHKERRQ(ierr); while(pos){ PetscInt gid1; ierr = PetscLLNGetID(pos, &gid1);CHKERRQ(ierr); ierr = PetscCDGetNextPos(agg_llists,kk,&pos);CHKERRQ(ierr); if (gid1 < my0 || gid1 >= my0+nloc) { ierr = MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscCDSetMat(agg_llists, mat);CHKERRQ(ierr); } ierr = PetscFree(lid_cprowID);CHKERRQ(ierr); ierr = PetscFree(lid_gid);CHKERRQ(ierr); ierr = PetscFree(lid_matched);CHKERRQ(ierr); ierr = PetscCDDestroy(deleted_list);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode VecAssemblyBegin_MPI_BTS(Vec X) { Vec_MPI *x = (Vec_MPI*)X->data; PetscErrorCode ierr; MPI_Comm comm; PetscInt i,j,jb,bs; PetscFunctionBegin; if (X->stash.donotstash) PetscFunctionReturn(0); ierr = PetscObjectGetComm((PetscObject)X,&comm);CHKERRQ(ierr); ierr = VecGetBlockSize(X,&bs);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) { InsertMode addv; ierr = MPIU_Allreduce((PetscEnum*)&X->stash.insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(comm,PETSC_ERR_ARG_NOTSAMETYPE,"Some processors inserted values while others added"); } #endif X->bstash.insertmode = X->stash.insertmode; /* Block stash implicitly tracks InsertMode of scalar stash */ ierr = VecStashSortCompress_Private(&X->stash);CHKERRQ(ierr); ierr = VecStashSortCompress_Private(&X->bstash);CHKERRQ(ierr); if (!x->sendranks) { PetscMPIInt nowners,bnowners,*owners,*bowners; PetscInt ntmp; ierr = VecStashGetOwnerList_Private(&X->stash,X->map,&nowners,&owners);CHKERRQ(ierr); ierr = VecStashGetOwnerList_Private(&X->bstash,X->map,&bnowners,&bowners);CHKERRQ(ierr); ierr = PetscMergeMPIIntArray(nowners,owners,bnowners,bowners,&ntmp,&x->sendranks);CHKERRQ(ierr); x->nsendranks = ntmp; ierr = PetscFree(owners);CHKERRQ(ierr); ierr = PetscFree(bowners);CHKERRQ(ierr); ierr = PetscMalloc1(x->nsendranks,&x->sendhdr);CHKERRQ(ierr); ierr = PetscCalloc1(x->nsendranks,&x->sendptrs);CHKERRQ(ierr); } for (i=0,j=0,jb=0; i<x->nsendranks; i++) { PetscMPIInt rank = x->sendranks[i]; x->sendhdr[i].insertmode = X->stash.insertmode; /* Initialize pointers for non-empty stashes the first time around. Subsequent assemblies with * VEC_SUBSET_OFF_PROC_ENTRIES will leave the old pointers (dangling because the stash has been collected) when * there is nothing new to send, so that size-zero messages get sent instead. */ x->sendhdr[i].count = 0; if (X->stash.n) { x->sendptrs[i].ints = &X->stash.idx[j]; x->sendptrs[i].scalars = &X->stash.array[j]; for ( ; j<X->stash.n && X->stash.idx[j] < X->map->range[rank+1]; j++) x->sendhdr[i].count++; } x->sendhdr[i].bcount = 0; if (X->bstash.n) { x->sendptrs[i].intb = &X->bstash.idx[jb]; x->sendptrs[i].scalarb = &X->bstash.array[jb*bs]; for ( ; jb<X->bstash.n && X->bstash.idx[jb]*bs < X->map->range[rank+1]; jb++) x->sendhdr[i].bcount++; } } if (!x->segrecvint) {ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&x->segrecvint);CHKERRQ(ierr);} if (!x->segrecvscalar) {ierr = PetscSegBufferCreate(sizeof(PetscScalar),1000,&x->segrecvscalar);CHKERRQ(ierr);} if (!x->segrecvframe) {ierr = PetscSegBufferCreate(sizeof(VecAssemblyFrame),50,&x->segrecvframe);CHKERRQ(ierr);} if (x->recvhdr) { /* VEC_SUBSET_OFF_PROC_ENTRIES and this is not the first assembly */ PetscMPIInt tag[4]; if (!x->assembly_subset) SETERRQ(comm,PETSC_ERR_PLIB,"Attempt to reuse rendezvous when not VEC_SUBSET_OFF_PROC_ENTRIES"); for (i=0; i<4; i++) {ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr);} for (i=0; i<x->nsendranks; i++) { ierr = VecAssemblySend_MPI_Private(comm,tag,i,x->sendranks[i],x->sendhdr+i,x->sendreqs+4*i,X);CHKERRQ(ierr); } for (i=0; i<x->nrecvranks; i++) { ierr = VecAssemblyRecv_MPI_Private(comm,tag,x->recvranks[i],x->recvhdr+i,x->recvreqs+4*i,X);CHKERRQ(ierr); } x->use_status = PETSC_TRUE; } else { /* First time */ ierr = PetscCommBuildTwoSidedFReq(comm,3,MPIU_INT,x->nsendranks,x->sendranks,(PetscInt*)x->sendhdr,&x->nrecvranks,&x->recvranks,&x->recvhdr,4,&x->sendreqs,&x->recvreqs,VecAssemblySend_MPI_Private,VecAssemblyRecv_MPI_Private,X);CHKERRQ(ierr); x->use_status = PETSC_FALSE; } { PetscInt nstash,reallocs; ierr = VecStashGetInfo_Private(&X->stash,&nstash,&reallocs);CHKERRQ(ierr); ierr = PetscInfo2(X,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); ierr = VecStashGetInfo_Private(&X->bstash,&nstash,&reallocs);CHKERRQ(ierr); ierr = PetscInfo2(X,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,*tags,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,*usendreqs,*req,barrier; PetscSegBuffer segrank,segdata,segreq; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); for (i=0; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); } ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); /* Post synchronous sends */ for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the * synchronous messages above. */ for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank,k; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); *toreqs = usendreqs; ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); ierr = PetscFree(tags);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
void PETSC_STDCALL petsccommgetnewtag_(MPI_Fint * comm,PetscMPIInt *tag, int *__ierr ){ *__ierr = PetscCommGetNewTag( MPI_Comm_f2c( *(comm) ),tag); }
PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) { PetscErrorCode ierr; Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; Mat_SeqAIJ *p_loc,*p_oth; Mat_PtAPMPI *ptap; Mat_Merge_SeqsToMPI *merge; PetscInt *adi=ad->i,*aoi=ao->i,*adj,*aoj,*apJ,nextp; PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pj; PetscInt i,j,k,anz,pnz,apnz,nextap,row,*cj; MatScalar *ada,*aoa,*apa,*pa,*ca,*pa_loc,*pa_oth,valtmp; PetscInt am =A->rmap->n,cm=C->rmap->n,pon=(p->B)->cmap->n; MPI_Comm comm; PetscMPIInt size,rank,taga,*len_s; PetscInt *owners,proc,nrows,**buf_ri_k,**nextrow,**nextci; PetscInt **buf_ri,**buf_rj; PetscInt cnz=0,*bj_i,*bi,*bj,bnz,nextcj; /* bi,bj,ba: local array of C(mpi mat) */ MPI_Request *s_waits,*r_waits; MPI_Status *status; MatScalar **abuf_r,*ba_i,*pA,*coa,*ba; PetscInt *api,*apj,*coi,*coj; PetscInt *poJ=po->j,*pdJ=pd->j,pcstart=P->cmap->rstart,pcend=P->cmap->rend; PetscBool scalable; #if defined(PTAP_PROFILE) PetscLogDouble t0,t1,t2,t3,t4,et2_AP=0.0,et2_PtAP=0.0,t2_0,t2_1,t2_2; #endif PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t0);CHKERRQ(ierr); #endif ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ptap = c->ptap; if (!ptap) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_INCOMP,"MatPtAP() has not been called to create matrix C yet, cannot use MAT_REUSE_MATRIX"); merge = ptap->merge; apa = ptap->apa; scalable = ptap->scalable; /* 1) get P_oth = ptap->P_oth and P_loc = ptap->P_loc */ /*--------------------------------------------------*/ if (ptap->reuse == MAT_INITIAL_MATRIX) { /* P_oth and P_loc are obtained in MatPtASymbolic(), skip calling MatGetBrowsOfAoCols() and MatMPIAIJGetLocalMat() */ ptap->reuse = MAT_REUSE_MATRIX; } else { /* update numerical values of P_oth and P_loc */ ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_REUSE_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr); ierr = MatMPIAIJGetLocalMat(P,MAT_REUSE_MATRIX,&ptap->P_loc);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t1);CHKERRQ(ierr); #endif /* 2) compute numeric C_seq = P_loc^T*A_loc*P - dominating part */ /*--------------------------------------------------------------*/ /* get data from symbolic products */ p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; coi = merge->coi; coj = merge->coj; ierr = PetscCalloc1(coi[pon]+1,&coa);CHKERRQ(ierr); bi = merge->bi; bj = merge->bj; owners = merge->rowmap->range; ierr = PetscCalloc1(bi[cm]+1,&ba);CHKERRQ(ierr); /* ba: Cseq->a */ api = ptap->api; apj = ptap->apj; if (!scalable) { /* Do dense axpy on apa (length of pN, stores A[i,:]*P) - nonscalable, but fast */ ierr = PetscInfo(C,"Using non-scalable dense axpy\n");CHKERRQ(ierr); /*-----------------------------------------------------------------------------------------------------*/ for (i=0; i<am; i++) { #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_0);CHKERRQ(ierr); #endif /* 2-a) form i-th sparse row of A_loc*P = Ad*P_loc + Ao*P_oth */ /*------------------------------------------------------------*/ apJ = apj + api[i]; /* diagonal portion of A */ anz = adi[i+1] - adi[i]; adj = ad->j + adi[i]; ada = ad->a + adi[i]; for (j=0; j<anz; j++) { row = adj[j]; pnz = pi_loc[row+1] - pi_loc[row]; pj = pj_loc + pi_loc[row]; pa = pa_loc + pi_loc[row]; /* perform dense axpy */ valtmp = ada[j]; for (k=0; k<pnz; k++) { apa[pj[k]] += valtmp*pa[k]; } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } /* off-diagonal portion of A */ anz = aoi[i+1] - aoi[i]; aoj = ao->j + aoi[i]; aoa = ao->a + aoi[i]; for (j=0; j<anz; j++) { row = aoj[j]; pnz = pi_oth[row+1] - pi_oth[row]; pj = pj_oth + pi_oth[row]; pa = pa_oth + pi_oth[row]; /* perform dense axpy */ valtmp = aoa[j]; for (k=0; k<pnz; k++) { apa[pj[k]] += valtmp*pa[k]; } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_1);CHKERRQ(ierr); et2_AP += t2_1 - t2_0; #endif /* 2-b) Compute Cseq = P_loc[i,:]^T*AP[i,:] using outer product */ /*--------------------------------------------------------------*/ apnz = api[i+1] - api[i]; /* put the value into Co=(p->B)^T*AP (off-diagonal part, send to others) */ pnz = po->i[i+1] - po->i[i]; poJ = po->j + po->i[i]; pA = po->a + po->i[i]; for (j=0; j<pnz; j++) { row = poJ[j]; cnz = coi[row+1] - coi[row]; cj = coj + coi[row]; ca = coa + coi[row]; /* perform dense axpy */ valtmp = pA[j]; for (k=0; k<cnz; k++) { ca[k] += valtmp*apa[cj[k]]; } ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr); } /* put the value into Cd (diagonal part) */ pnz = pd->i[i+1] - pd->i[i]; pdJ = pd->j + pd->i[i]; pA = pd->a + pd->i[i]; for (j=0; j<pnz; j++) { row = pdJ[j]; cnz = bi[row+1] - bi[row]; cj = bj + bi[row]; ca = ba + bi[row]; /* perform dense axpy */ valtmp = pA[j]; for (k=0; k<cnz; k++) { ca[k] += valtmp*apa[cj[k]]; } ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr); } /* zero the current row of A*P */ for (k=0; k<apnz; k++) apa[apJ[k]] = 0.0; #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_2);CHKERRQ(ierr); et2_PtAP += t2_2 - t2_1; #endif } } else { /* Do sparse axpy on apa (length of ap_rmax, stores A[i,:]*P) - scalable, but slower */ ierr = PetscInfo(C,"Using scalable sparse axpy\n");CHKERRQ(ierr); /*-----------------------------------------------------------------------------------------*/ pA=pa_loc; for (i=0; i<am; i++) { #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_0);CHKERRQ(ierr); #endif /* form i-th sparse row of A*P */ apnz = api[i+1] - api[i]; apJ = apj + api[i]; /* diagonal portion of A */ anz = adi[i+1] - adi[i]; adj = ad->j + adi[i]; ada = ad->a + adi[i]; for (j=0; j<anz; j++) { row = adj[j]; pnz = pi_loc[row+1] - pi_loc[row]; pj = pj_loc + pi_loc[row]; pa = pa_loc + pi_loc[row]; valtmp = ada[j]; nextp = 0; for (k=0; nextp<pnz; k++) { if (apJ[k] == pj[nextp]) { /* col of AP == col of P */ apa[k] += valtmp*pa[nextp++]; } } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } /* off-diagonal portion of A */ anz = aoi[i+1] - aoi[i]; aoj = ao->j + aoi[i]; aoa = ao->a + aoi[i]; for (j=0; j<anz; j++) { row = aoj[j]; pnz = pi_oth[row+1] - pi_oth[row]; pj = pj_oth + pi_oth[row]; pa = pa_oth + pi_oth[row]; valtmp = aoa[j]; nextp = 0; for (k=0; nextp<pnz; k++) { if (apJ[k] == pj[nextp]) { /* col of AP == col of P */ apa[k] += valtmp*pa[nextp++]; } } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_1);CHKERRQ(ierr); et2_AP += t2_1 - t2_0; #endif /* 2-b) Compute Cseq = P_loc[i,:]^T*AP[i,:] using outer product */ /*--------------------------------------------------------------*/ pnz = pi_loc[i+1] - pi_loc[i]; pJ = pj_loc + pi_loc[i]; for (j=0; j<pnz; j++) { nextap = 0; row = pJ[j]; /* global index */ if (row < pcstart || row >=pcend) { /* put the value into Co */ row = *poJ; cj = coj + coi[row]; ca = coa + coi[row]; poJ++; } else { /* put the value into Cd */ row = *pdJ; cj = bj + bi[row]; ca = ba + bi[row]; pdJ++; } valtmp = pA[j]; for (k=0; nextap<apnz; k++) { if (cj[k]==apJ[nextap]) ca[k] += valtmp*apa[nextap++]; } ierr = PetscLogFlops(2.0*apnz);CHKERRQ(ierr); } pA += pnz; /* zero the current row info for A*P */ ierr = PetscMemzero(apa,apnz*sizeof(MatScalar));CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_2);CHKERRQ(ierr); et2_PtAP += t2_2 - t2_1; #endif } } #if defined(PTAP_PROFILE) ierr = PetscTime(&t2);CHKERRQ(ierr); #endif /* 3) send and recv matrix values coa */ /*------------------------------------*/ buf_ri = merge->buf_ri; buf_rj = merge->buf_rj; len_s = merge->len_s; ierr = PetscCommGetNewTag(comm,&taga);CHKERRQ(ierr); ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); ierr = PetscMalloc2(merge->nsend+1,&s_waits,size,&status);CHKERRQ(ierr); for (proc=0,k=0; proc<size; proc++) { if (!len_s[proc]) continue; i = merge->owners_co[proc]; ierr = MPI_Isend(coa+coi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); k++; } if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} ierr = PetscFree2(s_waits,status);CHKERRQ(ierr); ierr = PetscFree(r_waits);CHKERRQ(ierr); ierr = PetscFree(coa);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t3);CHKERRQ(ierr); #endif /* 4) insert local Cseq and received values into Cmpi */ /*------------------------------------------------------*/ ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci);CHKERRQ(ierr); for (k=0; k<merge->nrecv; k++) { buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ nrows = *(buf_ri_k[k]); nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ } for (i=0; i<cm; i++) { row = owners[rank] + i; /* global row index of C_seq */ bj_i = bj + bi[i]; /* col indices of the i-th row of C */ ba_i = ba + bi[i]; bnz = bi[i+1] - bi[i]; /* add received vals into ba */ for (k=0; k<merge->nrecv; k++) { /* k-th received message */ /* i-th row */ if (i == *nextrow[k]) { cnz = *(nextci[k]+1) - *nextci[k]; cj = buf_rj[k] + *(nextci[k]); ca = abuf_r[k] + *(nextci[k]); nextcj = 0; for (j=0; nextcj<cnz; j++) { if (bj_i[j] == cj[nextcj]) { /* bcol == ccol */ ba_i[j] += ca[nextcj++]; } } nextrow[k]++; nextci[k]++; ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr); } } ierr = MatSetValues(C,1,&row,bnz,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree(ba);CHKERRQ(ierr); ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); ierr = PetscFree(abuf_r);CHKERRQ(ierr); ierr = PetscFree3(buf_ri_k,nextrow,nextci);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t4);CHKERRQ(ierr); if (rank==1) PetscPrintf(MPI_COMM_SELF," [%d] PtAPNum %g/P + %g/PtAP( %g + %g ) + %g/comm + %g/Cloc = %g\n\n",rank,t1-t0,t2-t1,et2_AP,et2_PtAP,t3-t2,t4-t3,t4-t0);CHKERRQ(ierr); #endif PetscFunctionReturn(0); }