/*@C PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank Not Collective Input Arguments: + sf - star forest - unit - data type for each node Output Arguments: + localtypes - types describing part of local leaf buffer referencing each remote rank - remotetypes - types describing part of remote root buffer referenced for each remote rank Level: developer .seealso: PetscSFSetGraph(), PetscSFView() @*/ static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes) { PetscSF_Window *w = (PetscSF_Window*)sf->data; PetscErrorCode ierr; PetscSFDataLink link; PetscInt i,nranks; const PetscInt *roffset,*rmine,*rremote; const PetscMPIInt *ranks; PetscFunctionBegin; /* Look for types in cache */ for (link=w->link; link; link=link->next) { PetscBool match; ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); if (match) { *localtypes = link->mine; *remotetypes = link->remote; PetscFunctionReturn(0); } } /* Create new composite types for each send rank */ ierr = PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(*link),&link);CHKERRQ(ierr); ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr); ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr); for (i=0; i<nranks; i++) { PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i]; PetscMPIInt *rmine,*rremote; #if !defined(PETSC_USE_64BIT_INDICES) rmine = sf->rmine + sf->roffset[i]; rremote = sf->rremote + sf->roffset[i]; #else PetscInt j; ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr); for (j=0; j<rcount; j++) { ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr); ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr); } #endif ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr); ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr); #if defined(PETSC_USE_64BIT_INDICES) ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr); #endif ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr); ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr); } link->next = w->link; w->link = link; *localtypes = link->mine; *remotetypes = link->remote; PetscFunctionReturn(0); }
static PetscErrorCode ISGatherTotal_Private(IS is) { PetscErrorCode ierr; PetscInt i,n,N; const PetscInt *lindices; MPI_Comm comm; PetscMPIInt rank,size,*sizes = NULL,*offsets = NULL,nn; PetscFunctionBegin; PetscValidHeaderSpecific(is,IS_CLASSID,1); ierr = PetscObjectGetComm((PetscObject)is,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); ierr = PetscMalloc2(size,PetscMPIInt,&sizes,size,PetscMPIInt,&offsets);CHKERRQ(ierr); ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPI_INT,sizes,1,MPI_INT,comm);CHKERRQ(ierr); offsets[0] = 0; for (i=1; i<size; ++i) offsets[i] = offsets[i-1] + sizes[i-1]; N = offsets[size-1] + sizes[size-1]; ierr = PetscMalloc(N*sizeof(PetscInt),&(is->total));CHKERRQ(ierr); ierr = ISGetIndices(is,&lindices);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)lindices,nn,MPIU_INT,is->total,sizes,offsets,MPIU_INT,comm);CHKERRQ(ierr); ierr = ISRestoreIndices(is,&lindices);CHKERRQ(ierr); is->local_offset = offsets[rank]; ierr = PetscFree2(sizes,offsets);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode ISView_General_Binary(IS is,PetscViewer viewer) { PetscErrorCode ierr; IS_General *isa = (IS_General*) is->data; PetscMPIInt rank,size,mesgsize,tag = ((PetscObject)viewer)->tag, mesglen; PetscInt n,N,len,j,tr[2]; int fdes; MPI_Status status; PetscInt message_count,flowcontrolcount,*values; PetscFunctionBegin; ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr); ierr = PetscLayoutGetSize(is->map, &N);CHKERRQ(ierr); ierr = PetscViewerBinaryGetDescriptor(viewer,&fdes);CHKERRQ(ierr); /* determine maximum message to arrive */ ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)is),&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)is),&size);CHKERRQ(ierr); tr[0] = IS_FILE_CLASSID; tr[1] = N; ierr = PetscViewerBinaryWrite(viewer,tr,2,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); ierr = MPI_Reduce(&n,&len,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)is));CHKERRQ(ierr); ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); if (!rank) { ierr = PetscBinaryWrite(fdes,isa->idx,n,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); ierr = PetscMalloc1(len,&values);CHKERRQ(ierr); ierr = PetscMPIIntCast(len,&mesgsize);CHKERRQ(ierr); /* receive and save messages */ for (j=1; j<size; j++) { ierr = PetscViewerFlowControlStepMaster(viewer,j,&message_count,flowcontrolcount);CHKERRQ(ierr); ierr = MPI_Recv(values,mesgsize,MPIU_INT,j,tag,PetscObjectComm((PetscObject)is),&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPIU_INT,&mesglen);CHKERRQ(ierr); ierr = PetscBinaryWrite(fdes,values,(PetscInt)mesglen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); } ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); ierr = PetscFree(values);CHKERRQ(ierr); } else { ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); ierr = PetscMPIIntCast(n,&mesgsize);CHKERRQ(ierr); ierr = MPI_Send(isa->idx,mesgsize,MPIU_INT,0,tag,PetscObjectComm((PetscObject)is));CHKERRQ(ierr); ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@ ISAllGather - Given an index set (IS) on each processor, generates a large index set (same on each processor) by concatenating together each processors index set. Collective on IS Input Parameter: . is - the distributed index set Output Parameter: . isout - the concatenated index set (same on all processors) Notes: ISAllGather() is clearly not scalable for large index sets. The IS created on each processor must be created with a common communicator (e.g., PETSC_COMM_WORLD). If the index sets were created with PETSC_COMM_SELF, this routine will not work as expected, since each process will generate its own new IS that consists only of itself. The communicator for this new IS is PETSC_COMM_SELF Level: intermediate Concepts: gather^index sets Concepts: index sets^gathering to all processors Concepts: IS^gathering to all processors .seealso: ISCreateGeneral(), ISCreateStride(), ISCreateBlock() @*/ PetscErrorCode ISAllGather(IS is,IS *isout) { PetscErrorCode ierr; PetscInt *indices,n,i,N,step,first; const PetscInt *lindices; MPI_Comm comm; PetscMPIInt size,*sizes = NULL,*offsets = NULL,nn; PetscBool stride; PetscFunctionBegin; PetscValidHeaderSpecific(is,IS_CLASSID,1); PetscValidPointer(isout,2); ierr = PetscObjectGetComm((PetscObject)is,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)is,ISSTRIDE,&stride);CHKERRQ(ierr); if (size == 1 && stride) { /* should handle parallel ISStride also */ ierr = ISStrideGetInfo(is,&first,&step);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,n,first,step,isout);CHKERRQ(ierr); } else { ierr = PetscMalloc2(size,&sizes,size,&offsets);CHKERRQ(ierr); ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPI_INT,sizes,1,MPI_INT,comm);CHKERRQ(ierr); offsets[0] = 0; for (i=1; i<size; i++) { PetscInt s = offsets[i-1] + sizes[i-1]; ierr = PetscMPIIntCast(s,&offsets[i]);CHKERRQ(ierr); } N = offsets[size-1] + sizes[size-1]; ierr = PetscMalloc1(N,&indices);CHKERRQ(ierr); ierr = ISGetIndices(is,&lindices);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)lindices,nn,MPIU_INT,indices,sizes,offsets,MPIU_INT,comm);CHKERRQ(ierr); ierr = ISRestoreIndices(is,&lindices);CHKERRQ(ierr); ierr = PetscFree2(sizes,offsets);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,N,indices,PETSC_OWN_POINTER,isout);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,unitbytes,done; PetscInt i; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); tdata = (char*)todata; ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(&segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(&segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(&segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(&segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ DMDAGetBoundingBox - Returns the global bounding box for the DMDA. Collective on DMDA Input Parameter: . dm - the DM Output Parameters: + gmin - global minimum coordinates (length dim, optional) - gmax - global maximim coordinates (length dim, optional) Level: beginner .keywords: distributed array, get, coordinates .seealso: DMDAGetCoordinateDA(), DMGetCoordinates(), DMDAGetLocalBoundingBox() @*/ PetscErrorCode DMDAGetBoundingBox(DM dm,PetscReal gmin[],PetscReal gmax[]) { PetscErrorCode ierr; PetscMPIInt count; PetscReal lmin[3],lmax[3]; PetscFunctionBegin; PetscValidHeaderSpecific(dm,DM_CLASSID,1); ierr = PetscMPIIntCast(dm->dim,&count);CHKERRQ(ierr); ierr = DMDAGetLocalBoundingBox(dm,lmin,lmax);CHKERRQ(ierr); if (gmin) {ierr = MPIU_Allreduce(lmin,gmin,count,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)dm));CHKERRQ(ierr);} if (gmax) {ierr = MPIU_Allreduce(lmax,gmax,count,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)dm));CHKERRQ(ierr);} PetscFunctionReturn(0); }
/*@C PetscSubcommSetNumber - Set total number of subcommunicators. Collective on MPI_Comm Input Parameter: + psubcomm - PetscSubcomm context - nsubcomm - the total number of subcommunicators in psubcomm Level: advanced .keywords: communicator .seealso: PetscSubcommCreate(),PetscSubcommDestroy(),PetscSubcommSetType(),PetscSubcommSetTypeGeneral() @*/ PetscErrorCode PetscSubcommSetNumber(PetscSubcomm psubcomm,PetscInt nsubcomm) { PetscErrorCode ierr; MPI_Comm comm=psubcomm->parent; PetscMPIInt msub,size; PetscFunctionBegin; if (!psubcomm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"PetscSubcomm is not created. Call PetscSubcommCreate() first"); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscMPIIntCast(nsubcomm,&msub);CHKERRQ(ierr); if (msub < 1 || msub > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %d cannot be < 1 or > input comm size %d",msub,size); psubcomm->n = msub; PetscFunctionReturn(0); }
/*@ ISPartitioningCount - Takes a ISPartitioning and determines the number of resulting elements on each (partition) process Collective on IS Input Parameters: + partitioning - a partitioning as generated by MatPartitioningApply() - len - length of the array count, this is the total number of partitions Output Parameter: . count - array of length size, to contain the number of elements assigned to each partition, where size is the number of partitions generated (see notes below). Level: advanced Notes: By default the number of partitions generated (and thus the length of count) is the size of the communicator associated with IS, but it can be set by MatPartitioningSetNParts. The resulting array of lengths can for instance serve as input of PCBJacobiSetTotalBlocks. .seealso: MatPartitioningCreate(), AOCreateBasic(), ISPartitioningToNumbering(), MatPartitioningSetNParts(), MatPartitioningApply() @*/ PetscErrorCode ISPartitioningCount(IS part,PetscInt len,PetscInt count[]) { MPI_Comm comm; PetscInt i,n,*lsizes; const PetscInt *indices; PetscErrorCode ierr; PetscMPIInt npp; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)part,&comm);CHKERRQ(ierr); if (len == PETSC_DEFAULT) { PetscMPIInt size; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); len = (PetscInt) size; } /* count the number of partitions */ ierr = ISGetLocalSize(part,&n);CHKERRQ(ierr); ierr = ISGetIndices(part,&indices);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) { PetscInt np = 0,npt; for (i=0; i<n; i++) np = PetscMax(np,indices[i]); ierr = MPI_Allreduce(&np,&npt,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); np = npt+1; /* so that it looks like a MPI_Comm_size output */ if (np > len) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Length of count array %D is less than number of partitions %D",len,np); } #endif /* lsizes - number of elements of each partition on this particular processor sums - total number of "previous" nodes for any particular partition starts - global number of first element in each partition on this processor */ ierr = PetscMalloc(len*sizeof(PetscInt),&lsizes);CHKERRQ(ierr); ierr = PetscMemzero(lsizes,len*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<n; i++) lsizes[indices[i]]++; ierr = ISRestoreIndices(part,&indices);CHKERRQ(ierr); ierr = PetscMPIIntCast(len,&npp);CHKERRQ(ierr); ierr = MPI_Allreduce(lsizes,count,npp,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); ierr = PetscFree(lsizes);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscViewerBinaryMPIIO(PetscViewer viewer,void *data,PetscInt count,PetscDataType dtype,PetscBool write) { PetscViewer_Binary *vbinary = (PetscViewer_Binary*)viewer->data; PetscErrorCode ierr; MPI_Datatype mdtype; PetscMPIInt cnt = PetscMPIIntCast(count); MPI_Status status; MPI_Aint ul,dsize; PetscFunctionBegin; ierr = PetscDataTypeToMPIDataType(dtype,&mdtype);CHKERRQ(ierr); ierr = MPI_File_set_view(vbinary->mfdes,vbinary->moff,mdtype,mdtype,(char *)"native",MPI_INFO_NULL);CHKERRQ(ierr); if (write) { ierr = MPIU_File_write_all(vbinary->mfdes,data,cnt,mdtype,&status);CHKERRQ(ierr); } else { ierr = MPIU_File_read_all(vbinary->mfdes,data,cnt,mdtype,&status);CHKERRQ(ierr); } ierr = MPI_Type_get_extent(mdtype,&ul,&dsize);CHKERRQ(ierr); vbinary->moff += dsize*cnt; PetscFunctionReturn(0); }
static PetscErrorCode VecLoad_Binary_MPIIO(Vec vec, PetscViewer viewer) { PetscErrorCode ierr; PetscMPIInt lsize; PetscScalar *avec; MPI_File mfdes; MPI_Offset off; PetscFunctionBegin; ierr = VecGetArray(vec,&avec);CHKERRQ(ierr); ierr = PetscMPIIntCast(vec->map->n,&lsize);CHKERRQ(ierr); ierr = PetscViewerBinaryGetMPIIODescriptor(viewer,&mfdes);CHKERRQ(ierr); ierr = PetscViewerBinaryGetMPIIOOffset(viewer,&off);CHKERRQ(ierr); off += vec->map->rstart*sizeof(PetscScalar); ierr = MPI_File_set_view(mfdes,off,MPIU_SCALAR,MPIU_SCALAR,(char*)"native",MPI_INFO_NULL);CHKERRQ(ierr); ierr = MPIU_File_read_all(mfdes,avec,lsize,MPIU_SCALAR,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = PetscViewerBinaryAddMPIIOOffset(viewer,vec->map->N*sizeof(PetscScalar));CHKERRQ(ierr); ierr = VecRestoreArray(vec,&avec);CHKERRQ(ierr); ierr = VecAssemblyBegin(vec);CHKERRQ(ierr); ierr = VecAssemblyEnd(vec);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode ISView_General_Binary(IS is,PetscViewer viewer) { PetscErrorCode ierr; PetscBool skipHeader,useMPIIO; IS_General *isa = (IS_General*) is->data; PetscMPIInt rank,size,mesgsize,tag = ((PetscObject)viewer)->tag, mesglen; PetscInt n,N,len,j,tr[2]; int fdes; MPI_Status status; PetscInt message_count,flowcontrolcount,*values; PetscFunctionBegin; /* ierr = ISGetLayout(is,&map);CHKERRQ(ierr); */ ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr); ierr = PetscLayoutGetSize(is->map, &N);CHKERRQ(ierr); tr[0] = IS_FILE_CLASSID; tr[1] = N; ierr = PetscViewerBinaryGetSkipHeader(viewer,&skipHeader);CHKERRQ(ierr); if (!skipHeader) { ierr = PetscViewerBinaryWrite(viewer,tr,2,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); } ierr = PetscViewerBinaryGetUseMPIIO(viewer,&useMPIIO);CHKERRQ(ierr); #if defined(PETSC_HAVE_MPIIO) if (useMPIIO) { MPI_File mfdes; MPI_Offset off; PetscMPIInt lsize; PetscInt rstart; const PetscInt *iarray; ierr = PetscMPIIntCast(n,&lsize);CHKERRQ(ierr); ierr = PetscViewerBinaryGetMPIIODescriptor(viewer,&mfdes);CHKERRQ(ierr); ierr = PetscViewerBinaryGetMPIIOOffset(viewer,&off);CHKERRQ(ierr); ierr = PetscLayoutGetRange(is->map,&rstart,NULL);CHKERRQ(ierr); off += rstart*(MPI_Offset)sizeof(PetscInt); /* off is MPI_Offset, not PetscMPIInt */ ierr = MPI_File_set_view(mfdes,off,MPIU_INT,MPIU_INT,(char*)"native",MPI_INFO_NULL);CHKERRQ(ierr); ierr = ISGetIndices(is,&iarray);CHKERRQ(ierr); ierr = MPIU_File_write_all(mfdes,(void*)iarray,lsize,MPIU_INT,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = ISRestoreIndices(is,&iarray);CHKERRQ(ierr); ierr = PetscViewerBinaryAddMPIIOOffset(viewer,N*(MPI_Offset)sizeof(PetscInt));CHKERRQ(ierr); PetscFunctionReturn(0); } #endif ierr = PetscViewerBinaryGetDescriptor(viewer,&fdes);CHKERRQ(ierr); ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)is),&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)is),&size);CHKERRQ(ierr); /* determine maximum message to arrive */ ierr = MPI_Reduce(&n,&len,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)is));CHKERRQ(ierr); ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); if (!rank) { ierr = PetscBinaryWrite(fdes,isa->idx,n,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); ierr = PetscMalloc1(len,&values);CHKERRQ(ierr); ierr = PetscMPIIntCast(len,&mesgsize);CHKERRQ(ierr); /* receive and save messages */ for (j=1; j<size; j++) { ierr = PetscViewerFlowControlStepMaster(viewer,j,&message_count,flowcontrolcount);CHKERRQ(ierr); ierr = MPI_Recv(values,mesgsize,MPIU_INT,j,tag,PetscObjectComm((PetscObject)is),&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPIU_INT,&mesglen);CHKERRQ(ierr); ierr = PetscBinaryWrite(fdes,values,(PetscInt)mesglen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); } ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); ierr = PetscFree(values);CHKERRQ(ierr); } else { ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); ierr = PetscMPIIntCast(n,&mesgsize);CHKERRQ(ierr); ierr = MPI_Send(isa->idx,mesgsize,MPIU_INT,0,tag,PetscObjectComm((PetscObject)is));CHKERRQ(ierr); ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCGraphComputeConnectedComponents(PCBDDCGraph graph) { PetscBool adapt_interface_reduced; MPI_Comm interface_comm; PetscMPIInt size; PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; /* compute connected components locally */ ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&interface_comm);CHKERRQ(ierr); ierr = PCBDDCGraphComputeConnectedComponentsLocal(graph);CHKERRQ(ierr); /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ ierr = MPI_Comm_size(interface_comm,&size);CHKERRQ(ierr); adapt_interface_reduced = PETSC_FALSE; if (size > 1) { PetscInt i; PetscBool adapt_interface = PETSC_FALSE; for (i=0;i<graph->n_subsets;i++) { /* We are not sure that on a given subset of the local interface, with two connected components, the latters be the same among sharing subdomains */ if (graph->subset_ncc[i] > 1) { adapt_interface = PETSC_TRUE; break; } } ierr = MPIU_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_BOOL,MPI_LOR,interface_comm);CHKERRQ(ierr); } if (graph->n_subsets && adapt_interface_reduced) { PetscBT subset_cc_adapt; MPI_Request *send_requests,*recv_requests; PetscInt *send_buffer,*recv_buffer; PetscInt sum_requests,start_of_recv,start_of_send; PetscInt *cum_recv_counts; PetscInt *labels; PetscInt ncc,cum_queue,mss,mns,j,k,s; PetscInt **refine_buffer=NULL,*private_labels = NULL; ierr = PetscMalloc1(graph->nvtxs,&labels);CHKERRQ(ierr); ierr = PetscMemzero(labels,graph->nvtxs*sizeof(*labels));CHKERRQ(ierr); for (i=0;i<graph->ncc;i++) for (j=graph->cptr[i];j<graph->cptr[i+1];j++) labels[graph->queue[j]] = i; /* allocate some space */ ierr = PetscMalloc1(graph->n_subsets+1,&cum_recv_counts);CHKERRQ(ierr); ierr = PetscMemzero(cum_recv_counts,(graph->n_subsets+1)*sizeof(*cum_recv_counts));CHKERRQ(ierr); /* first count how many neighbours per connected component I will receive from */ cum_recv_counts[0] = 0; for (i=0;i<graph->n_subsets;i++) cum_recv_counts[i+1] = cum_recv_counts[i]+graph->count[graph->subset_idxs[i][0]]; ierr = PetscMalloc1(cum_recv_counts[graph->n_subsets],&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc2(cum_recv_counts[graph->n_subsets],&send_requests,cum_recv_counts[graph->n_subsets],&recv_requests);CHKERRQ(ierr); for (i=0;i<cum_recv_counts[graph->n_subsets];i++) { send_requests[i] = MPI_REQUEST_NULL; recv_requests[i] = MPI_REQUEST_NULL; } /* exchange with my neighbours the number of my connected components on the subset of interface */ sum_requests = 0; for (i=0;i<graph->n_subsets;i++) { PetscMPIInt neigh,tag; PetscInt count,*neighs; count = graph->count[graph->subset_idxs[i][0]]; neighs = graph->neighbours_set[graph->subset_idxs[i][0]]; ierr = PetscMPIIntCast(2*graph->subset_ref_node[i],&tag);CHKERRQ(ierr); for (k=0;k<count;k++) { ierr = PetscMPIIntCast(neighs[k],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&graph->subset_ncc[i],1,MPIU_INT,neigh,tag,interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[sum_requests],1,MPIU_INT,neigh,tag,interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); sum_requests++; } } ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* determine the subsets I have to adapt (those having more than 1 cc) */ ierr = PetscBTCreate(graph->n_subsets,&subset_cc_adapt);CHKERRQ(ierr); ierr = PetscBTMemzero(graph->n_subsets,subset_cc_adapt);CHKERRQ(ierr); for (i=0;i<graph->n_subsets;i++) { if (graph->subset_ncc[i] > 1) { ierr = PetscBTSet(subset_cc_adapt,i);CHKERRQ(ierr); continue; } for (j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ if (recv_buffer[j] > 1) { ierr = PetscBTSet(subset_cc_adapt,i);CHKERRQ(ierr); break; } } } ierr = PetscFree(recv_buffer);CHKERRQ(ierr); /* determine send/recv buffers sizes */ j = 0; mss = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { j += graph->subset_size[i]; mss = PetscMax(graph->subset_size[i],mss); } } k = 0; mns = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { k += (cum_recv_counts[i+1]-cum_recv_counts[i])*graph->subset_size[i]; mns = PetscMax(cum_recv_counts[i+1]-cum_recv_counts[i],mns); } } ierr = PetscMalloc2(j,&send_buffer,k,&recv_buffer);CHKERRQ(ierr); /* fill send buffer (order matters: subset_idxs ordered by global ordering) */ j = 0; for (i=0;i<graph->n_subsets;i++) if (PetscBTLookup(subset_cc_adapt,i)) for (k=0;k<graph->subset_size[i];k++) send_buffer[j++] = labels[graph->subset_idxs[i][k]]; /* now exchange the data */ start_of_recv = 0; start_of_send = 0; sum_requests = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { PetscMPIInt neigh,tag; PetscInt size_of_send = graph->subset_size[i]; j = graph->subset_idxs[i][0]; ierr = PetscMPIIntCast(2*graph->subset_ref_node[i]+1,&tag);CHKERRQ(ierr); for (k=0;k<graph->count[j];k++) { ierr = PetscMPIIntCast(graph->neighbours_set[j][k],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neigh,tag,interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_send,MPIU_INT,neigh,tag,interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); start_of_recv += size_of_send; sum_requests++; } start_of_send += size_of_send; } } ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* refine connected components */ start_of_recv = 0; /* allocate some temporary space */ if (mss) { ierr = PetscMalloc1(mss,&refine_buffer);CHKERRQ(ierr); ierr = PetscMalloc2(mss*(mns+1),&refine_buffer[0],mss,&private_labels);CHKERRQ(ierr); } ncc = 0; cum_queue = 0; graph->cptr[0] = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { PetscInt subset_counter = 0; PetscInt sharingprocs = cum_recv_counts[i+1]-cum_recv_counts[i]+1; /* count myself */ PetscInt buffer_size = graph->subset_size[i]; /* compute pointers */ for (j=1;j<buffer_size;j++) refine_buffer[j] = refine_buffer[j-1] + sharingprocs; /* analyze contributions from subdomains that share the i-th subset The stricture of refine_buffer is suitable to find intersections of ccs among sharingprocs. supposing the current subset is shared by 3 processes and has dimension 5 with global dofs 0,1,2,3,4 (local 0,4,3,1,2) sharing procs connected components: neigh 0: [0 1 4], [2 3], labels [4,7] (2 connected components) neigh 1: [0 1], [2 3 4], labels [3 2] (2 connected components) neigh 2: [0 4], [1], [2 3], labels [1 5 6] (3 connected components) refine_buffer will be filled as: [ 4, 3, 1; 4, 2, 1; 7, 2, 6; 4, 3, 5; 7, 2, 6; ]; The connected components in local ordering are [0], [1], [2 3], [4] */ /* fill temp_buffer */ for (k=0;k<buffer_size;k++) refine_buffer[k][0] = labels[graph->subset_idxs[i][k]]; for (j=0;j<sharingprocs-1;j++) { for (k=0;k<buffer_size;k++) refine_buffer[k][j+1] = recv_buffer[start_of_recv+k]; start_of_recv += buffer_size; } ierr = PetscMemzero(private_labels,buffer_size*sizeof(PetscInt));CHKERRQ(ierr); for (j=0;j<buffer_size;j++) { if (!private_labels[j]) { /* found a new cc */ PetscBool same_set; graph->cptr[ncc] = cum_queue; ncc++; subset_counter++; private_labels[j] = subset_counter; graph->queue[cum_queue++] = graph->subset_idxs[i][j]; for (k=j+1;k<buffer_size;k++) { /* check for other nodes in new cc */ same_set = PETSC_TRUE; for (s=0;s<sharingprocs;s++) { if (refine_buffer[j][s] != refine_buffer[k][s]) { same_set = PETSC_FALSE; break; } } if (same_set) { private_labels[k] = subset_counter; graph->queue[cum_queue++] = graph->subset_idxs[i][k]; } } } } graph->cptr[ncc] = cum_queue; graph->subset_ncc[i] = subset_counter; graph->queue_sorted = PETSC_FALSE; } else { /* this subset does not need to be adapted */ ierr = PetscMemcpy(graph->queue+cum_queue,graph->subset_idxs[i],graph->subset_size[i]*sizeof(PetscInt));CHKERRQ(ierr); ncc++; cum_queue += graph->subset_size[i]; graph->cptr[ncc] = cum_queue; } } graph->cptr[ncc] = cum_queue; graph->ncc = ncc; if (mss) { ierr = PetscFree2(refine_buffer[0],private_labels);CHKERRQ(ierr); ierr = PetscFree(refine_buffer);CHKERRQ(ierr); } ierr = PetscFree(labels);CHKERRQ(ierr); ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree2(send_requests,recv_requests);CHKERRQ(ierr); ierr = PetscFree2(send_buffer,recv_buffer);CHKERRQ(ierr); ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); ierr = PetscBTDestroy(&subset_cc_adapt);CHKERRQ(ierr); } /* Determine if we are in 2D or 3D */ if (!graph->twodimset) { PetscBool twodim = PETSC_TRUE; for (i=0;i<graph->ncc;i++) { PetscInt repdof = graph->queue[graph->cptr[i]]; PetscInt ccsize = graph->cptr[i+1]-graph->cptr[i]; if (graph->count[repdof] > 1 && ccsize > graph->custom_minimal_size) { twodim = PETSC_FALSE; break; } } ierr = MPIU_Allreduce(&twodim,&graph->twodim,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)graph->l2gmap));CHKERRQ(ierr); graph->twodimset = PETSC_TRUE; } PetscFunctionReturn(0); }
/*@C PetscIntView - Prints an array of integers; useful for debugging. Collective on PetscViewer Input Parameters: + N - number of integers in array . idx - array of integers - viewer - location to print array, PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_STDOUT_SELF or 0 Level: intermediate Developer Notes: idx cannot be const because may be passed to binary viewer where byte swappping is done .seealso: PetscRealView() @*/ PetscErrorCode PetscIntView(PetscInt N,const PetscInt idx[],PetscViewer viewer) { PetscErrorCode ierr; PetscInt j,i,n = N/20,p = N % 20; PetscBool iascii,isbinary; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_SELF; PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,3); if (N) PetscValidIntPointer(idx,2); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); if (iascii) { ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D:",20*i);CHKERRQ(ierr); for (j=0; j<20; j++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," %D",idx[i*20+j]);CHKERRQ(ierr); } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } if (p) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D:",20*n);CHKERRQ(ierr); for (i=0; i<p; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," %D",idx[20*n+i]);CHKERRQ(ierr);} ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); } else if (isbinary) { PetscMPIInt rank,size,*sizes,Ntotal,*displs,NN; PetscInt *array; ierr = PetscMPIIntCast(N,&NN);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size > 1) { if (rank) { ierr = MPI_Gather(&NN,1,MPI_INT,0,0,MPI_INT,0,comm);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_INT,0,0,0,MPIU_INT,0,comm);CHKERRQ(ierr); } else { ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sizes);CHKERRQ(ierr); ierr = MPI_Gather(&NN,1,MPI_INT,sizes,1,MPI_INT,0,comm);CHKERRQ(ierr); Ntotal = sizes[0]; ierr = PetscMalloc(size*sizeof(PetscMPIInt),&displs);CHKERRQ(ierr); displs[0] = 0; for (i=1; i<size; i++) { Ntotal += sizes[i]; displs[i] = displs[i-1] + sizes[i-1]; } ierr = PetscMalloc(Ntotal*sizeof(PetscInt),&array);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_INT,array,sizes,displs,MPIU_INT,0,comm);CHKERRQ(ierr); ierr = PetscViewerBinaryWrite(viewer,array,Ntotal,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscFree(sizes);CHKERRQ(ierr); ierr = PetscFree(displs);CHKERRQ(ierr); ierr = PetscFree(array);CHKERRQ(ierr); } } else { ierr = PetscViewerBinaryWrite(viewer,(void*) idx,N,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); } } else { const char *tname; ierr = PetscObjectGetName((PetscObject)viewer,&tname);CHKERRQ(ierr); SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot handle that PetscViewer of type %s",tname); } PetscFunctionReturn(0); }
/*@C PetscScalarView - Prints an array of scalars; useful for debugging. Collective on PetscViewer Input Parameters: + N - number of scalars in array . idx - array of scalars - viewer - location to print array, PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_STDOUT_SELF or 0 Level: intermediate Developer Notes: idx cannot be const because may be passed to binary viewer where byte swappping is done .seealso: PetscIntView(), PetscRealView() @*/ PetscErrorCode PetscScalarView(PetscInt N,const PetscScalar idx[],PetscViewer viewer) { PetscErrorCode ierr; PetscInt j,i,n = N/3,p = N % 3; PetscBool iascii,isbinary; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_SELF; PetscValidHeader(viewer,3); PetscValidScalarPointer(idx,2); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); if (iascii) { ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%2d:",3*i);CHKERRQ(ierr); for (j=0; j<3; j++) { #if defined(PETSC_USE_COMPLEX) ierr = PetscViewerASCIISynchronizedPrintf(viewer," (%12.4e,%12.4e)", PetscRealPart(idx[i*3+j]),PetscImaginaryPart(idx[i*3+j]));CHKERRQ(ierr); #else ierr = PetscViewerASCIISynchronizedPrintf(viewer," %12.4e",idx[i*3+j]);CHKERRQ(ierr); #endif } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } if (p) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%2d:",3*n);CHKERRQ(ierr); for (i=0; i<p; i++) { #if defined(PETSC_USE_COMPLEX) ierr = PetscViewerASCIISynchronizedPrintf(viewer," (%12.4e,%12.4e)", PetscRealPart(idx[n*3+i]),PetscImaginaryPart(idx[n*3+i]));CHKERRQ(ierr); #else ierr = PetscViewerASCIISynchronizedPrintf(viewer," %12.4e",idx[3*n+i]);CHKERRQ(ierr); #endif } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); } else if (isbinary) { PetscMPIInt size,rank,*sizes,Ntotal,*displs,NN; PetscScalar *array; ierr = PetscMPIIntCast(N,&NN);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size > 1) { if (rank) { ierr = MPI_Gather(&NN,1,MPI_INT,0,0,MPI_INT,0,comm);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_SCALAR,0,0,0,MPIU_SCALAR,0,comm);CHKERRQ(ierr); } else { ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sizes);CHKERRQ(ierr); ierr = MPI_Gather(&NN,1,MPI_INT,sizes,1,MPI_INT,0,comm);CHKERRQ(ierr); Ntotal = sizes[0]; ierr = PetscMalloc(size*sizeof(PetscMPIInt),&displs);CHKERRQ(ierr); displs[0] = 0; for (i=1; i<size; i++) { Ntotal += sizes[i]; displs[i] = displs[i-1] + sizes[i-1]; } ierr = PetscMalloc(Ntotal*sizeof(PetscScalar),&array);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_SCALAR,array,sizes,displs,MPIU_SCALAR,0,comm);CHKERRQ(ierr); ierr = PetscViewerBinaryWrite(viewer,array,Ntotal,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscFree(sizes);CHKERRQ(ierr); ierr = PetscFree(displs);CHKERRQ(ierr); ierr = PetscFree(array);CHKERRQ(ierr); } } else { ierr = PetscViewerBinaryWrite(viewer,(void*) idx,N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); } } else { const char *tname; ierr = PetscObjectGetName((PetscObject)viewer,&tname);CHKERRQ(ierr); SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot handle that PetscViewer of type %s",tname); } PetscFunctionReturn(0); }
/* PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing in polynomial eigenproblems. */ PetscErrorCode PEPBuildDiagonalScaling(PEP pep) { PetscErrorCode ierr; PetscInt it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl; const PetscInt *cidx,*ridx; Mat M,*T,A; PetscMPIInt n; PetscBool cont=PETSC_TRUE,flg=PETSC_FALSE; PetscScalar *array,*Dr,*Dl,t; PetscReal l2,d,*rsum,*aux,*csum,w=1.0; MatStructure str; MatInfo info; PetscFunctionBegin; l2 = 2*PetscLogReal(2.0); nmat = pep->nmat; ierr = PetscMPIIntCast(pep->n,&n); ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr); ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr); for (k=0;k<nmat;k++) { ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr); } /* Form local auxiliar matrix M */ ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types"); ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr); if (cont) { ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr); flg = PETSC_TRUE; } else { ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr); } ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); for (k=1;k<nmat;k++) { if (flg) { ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); } else { if (str==SAME_NONZERO_PATTERN) { ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr); } } ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); w *= pep->slambda*pep->slambda*pep->sfactor; ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr); if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) { ierr = MatDestroy(&A);CHKERRQ(ierr); } } ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices"); ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr); ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr); ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr); ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr); ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr); ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) { /* Search non-zero columns outsize lst-lend */ if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j]; /* Local column sums */ aux[cidx[j]] += PetscAbsScalar(array[j]); } for (it=0;it<pep->sits && cont;it++) { emaxl = 0; eminl = 0; /* Column sum */ if (it>0) { /* it=0 has been already done*/ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]); ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); } ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr)); /* Update Dr */ for (j=lst;j<lend;j++) { d = PetscLogReal(csum[j])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dr[j-lst] *= d; aux[j] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } for (j=0;j<nc;j++) { d = PetscLogReal(csum[cols[j]])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); aux[cols[j]] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } /* Scale M */ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (j=0;j<nz;j++) { array[j] *= aux[cidx[j]]; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Row sum */ ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nr;i++) { for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]); /* Update Dl */ d = PetscLogReal(rsum[i])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dl[i] *= d; /* Scale M */ for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Compute global max and min */ ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl)); ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl)); if (emax<=emin+2) cont = PETSC_FALSE; } ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr); /* Free memory*/ ierr = MatDestroy(&M);CHKERRQ(ierr); ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr); ierr = PetscFree(T);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode AOCreate_Basic(AO ao) { AO_Basic *aobasic; PetscMPIInt size,rank,count,*lens,*disp; PetscInt napp,*allpetsc,*allapp,ip,ia,N,i,*petsc=NULL,start; PetscErrorCode ierr; IS isapp=ao->isapp,ispetsc=ao->ispetsc; MPI_Comm comm; const PetscInt *myapp,*mypetsc=NULL; PetscFunctionBegin; /* create special struct aobasic */ ierr = PetscNewLog(ao,&aobasic);CHKERRQ(ierr); ao->data = (void*) aobasic; ierr = PetscMemcpy(ao->ops,&AOOps_Basic,sizeof(struct _AOOps));CHKERRQ(ierr); ierr = PetscObjectChangeTypeName((PetscObject)ao,AOBASIC);CHKERRQ(ierr); ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); ierr = PetscMPIIntCast(napp,&count);CHKERRQ(ierr); /* transmit all lengths to all processors */ ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc2(size, &lens,size,&disp);CHKERRQ(ierr); ierr = MPI_Allgather(&count, 1, MPI_INT, lens, 1, MPI_INT, comm);CHKERRQ(ierr); N = 0; for (i = 0; i < size; i++) { ierr = PetscMPIIntCast(N,disp+i);CHKERRQ(ierr); /* = sum(lens[j]), j< i */ N += lens[i]; } ao->N = N; ao->n = N; /* If mypetsc is 0 then use "natural" numbering */ if (napp) { if (!ispetsc) { start = disp[rank]; ierr = PetscMalloc1((napp+1), &petsc);CHKERRQ(ierr); for (i=0; i<napp; i++) petsc[i] = start + i; } else { ierr = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr); petsc = (PetscInt*)mypetsc; } } /* get all indices on all processors */ ierr = PetscMalloc2(N,&allpetsc,N,&allapp);CHKERRQ(ierr); ierr = MPI_Allgatherv(petsc, count, MPIU_INT, allpetsc, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)myapp, count, MPIU_INT, allapp, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = PetscFree2(lens,disp);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) { PetscInt *sorted; ierr = PetscMalloc1(N,&sorted);CHKERRQ(ierr); ierr = PetscMemcpy(sorted,allpetsc,N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortInt(N,sorted);CHKERRQ(ierr); for (i=0; i<N; i++) { if (sorted[i] != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"PETSc ordering requires a permutation of numbers 0 to N-1\n it is missing %D has %D",i,sorted[i]); } ierr = PetscMemcpy(sorted,allapp,N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortInt(N,sorted);CHKERRQ(ierr); for (i=0; i<N; i++) { if (sorted[i] != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Application ordering requires a permutation of numbers 0 to N-1\n it is missing %D has %D",i,sorted[i]); } ierr = PetscFree(sorted);CHKERRQ(ierr); } #endif /* generate a list of application and PETSc node numbers */ ierr = PetscMalloc2(N, &aobasic->app,N,&aobasic->petsc);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao,2*N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(aobasic->app, N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(aobasic->petsc, N*sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < N; i++) { ip = allpetsc[i]; ia = allapp[i]; /* check there are no duplicates */ if (aobasic->app[ip]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Duplicate in PETSc ordering at position %d. Already mapped to %d, not %d.", i, aobasic->app[ip]-1, ia); aobasic->app[ip] = ia + 1; if (aobasic->petsc[ia]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Duplicate in Application ordering at position %d. Already mapped to %d, not %d.", i, aobasic->petsc[ia]-1, ip); aobasic->petsc[ia] = ip + 1; } if (napp && !mypetsc) { ierr = PetscFree(petsc);CHKERRQ(ierr); } ierr = PetscFree2(allpetsc,allapp);CHKERRQ(ierr); /* shift indices down by one */ for (i = 0; i < N; i++) { aobasic->app[i]--; aobasic->petsc[i]--; } ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); if (napp) { if (ispetsc) { ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); } else { ierr = PetscFree(petsc);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) { PetscErrorCode ierr; PetscMPIInt size,*ncolsonproc,*disp,nn; PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb; const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL; PetscInt nis=iscoloring->n,nctot,*cols; IS *isa; ISLocalToGlobalMapping map=mat->cmap->mapping; PetscInt ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx; Mat A,B; PetscScalar *A_val,*B_val,**valaddrhit; MatEntry *Jentry; MatEntry2 *Jentry2; PetscBool isBAIJ; PetscInt bcols=c->bcols; #if defined(PETSC_USE_CTABLE) PetscTable colmap=NULL; #else PetscInt *colmap=NULL; /* local col number of off-diag col */ #endif PetscFunctionBegin; if (ctype == IS_COLORING_GHOSTED) { if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr); } ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); if (isBAIJ) { Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; Mat_SeqBAIJ *spA,*spB; A = baij->A; spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a; B = baij->B; spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a; nz = spA->nz + spB->nz; /* total nonzero entries of mat */ if (!baij->colmap) { ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); colmap = baij->colmap; } ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ PetscInt *garray; ierr = PetscMalloc1(B->cmap->n,&garray);CHKERRQ(ierr); for (i=0; i<baij->B->cmap->n/bs; i++) { for (j=0; j<bs; j++) { garray[i*bs+j] = bs*baij->garray[i]+j; } } ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); ierr = PetscFree(garray);CHKERRQ(ierr); } } else { Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *spA,*spB; A = aij->A; spA = (Mat_SeqAIJ*)A->data; A_val = spA->a; B = aij->B; spB = (Mat_SeqAIJ*)B->data; B_val = spB->a; nz = spA->nz + spB->nz; /* total nonzero entries of mat */ if (!aij->colmap) { /* Allow access to data structures of local part of matrix - creates aij->colmap which maps global column number to local number in part B */ ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); colmap = aij->colmap; } ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); } } m = mat->rmap->n/bs; cstart = mat->cmap->rstart/bs; cend = mat->cmap->rend/bs; ierr = PetscMalloc1(nis,&c->ncolumns);CHKERRQ(ierr); ierr = PetscMalloc1(nis,&c->columns);CHKERRQ(ierr); ierr = PetscMalloc1(nis,&c->nrows);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); if (c->htype[0] == 'd') { ierr = PetscMalloc1(nz,&Jentry);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr); c->matentry = Jentry; } else if (c->htype[0] == 'w') { ierr = PetscMalloc1(nz,&Jentry2);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr); c->matentry2 = Jentry2; } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported"); ierr = PetscMalloc2(m+1,&rowhit,m+1,&valaddrhit);CHKERRQ(ierr); nz = 0; ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); for (i=0; i<nis; i++) { /* for each local color */ ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); c->ncolumns[i] = n; /* local number of columns of this color on this process */ if (n) { ierr = PetscMalloc1(n,&c->columns[i]);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); } else { c->columns[i] = 0; } if (ctype == IS_COLORING_GLOBAL) { /* Determine nctot, the total (parallel) number of columns of this color */ ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); ierr = PetscMalloc2(size,&ncolsonproc,size,&disp);CHKERRQ(ierr); /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; if (!nctot) { ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); } disp[0] = 0; for (j=1; j<size; j++) { disp[j] = disp[j-1] + ncolsonproc[j-1]; } /* Get cols, the complete list of columns for this color on each process */ ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); } else if (ctype == IS_COLORING_GHOSTED) { /* Determine local number of columns of this color on this process, including ghost points */ nctot = n; ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); /* Mark all rows affect by these columns */ ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr); bs2 = bs*bs; nrows_i = 0; for (j=0; j<nctot; j++) { /* loop over columns*/ if (ctype == IS_COLORING_GHOSTED) { col = ltog[cols[j]]; } else { col = cols[j]; } if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */ row = A_cj + A_ci[col-cstart]; nrows = A_ci[col-cstart+1] - A_ci[col-cstart]; nrows_i += nrows; /* loop over columns of A marking them in rowhit */ for (k=0; k<nrows; k++) { /* set valaddrhit for part A */ spidx = bs2*spidxA[A_ci[col-cstart] + k]; valaddrhit[*row] = &A_val[spidx]; rowhit[*row++] = col - cstart + 1; /* local column index */ } } else { /* column is in B, off-diagonal block of mat */ #if defined(PETSC_USE_CTABLE) ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr); colb--; #else colb = colmap[col] - 1; /* local column index */ #endif if (colb == -1) { nrows = 0; } else { colb = colb/bs; row = B_cj + B_ci[colb]; nrows = B_ci[colb+1] - B_ci[colb]; } nrows_i += nrows; /* loop over columns of B marking them in rowhit */ for (k=0; k<nrows; k++) { /* set valaddrhit for part B */ spidx = bs2*spidxB[B_ci[colb] + k]; valaddrhit[*row] = &B_val[spidx]; rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ } } } c->nrows[i] = nrows_i; if (c->htype[0] == 'd') { for (j=0; j<m; j++) { if (rowhit[j]) { Jentry[nz].row = j; /* local row index */ Jentry[nz].col = rowhit[j] - 1; /* local column index */ Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ nz++; } } } else { /* c->htype == 'wp' */ for (j=0; j<m; j++) { if (rowhit[j]) { Jentry2[nz].row = j; /* local row index */ Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ nz++; } } } ierr = PetscFree(cols);CHKERRQ(ierr); } if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr); } if (isBAIJ) { ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); ierr = PetscMalloc1(bs*mat->rmap->n,&c->dy);CHKERRQ(ierr); } else { ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); } ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); if (ctype == IS_COLORING_GHOSTED) { ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr); } ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,*tags,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,*usendreqs,*req,barrier; PetscSegBuffer segrank,segdata,segreq; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); for (i=0; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); } ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); /* Post synchronous sends */ for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the * synchronous messages above. */ for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank,k; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); *toreqs = usendreqs; ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); ierr = PetscFree(tags);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
EXTERN_C_END #undef __FUNCT__ #define __FUNCT__ "MatGetFactor_aij_superlu_dist" PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) { Mat B; Mat_SuperLU_DIST *lu; PetscErrorCode ierr; PetscInt M=A->rmap->N,N=A->cmap->N,indx; PetscMPIInt size; superlu_options_t options; PetscBool flg; const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; const char *rowperm[] = {"LargeDiag","NATURAL"}; const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; PetscFunctionBegin; /* Create the factorization matrix */ ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; B->ops->view = MatView_SuperLU_DIST; B->ops->destroy = MatDestroy_SuperLU_DIST; ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);CHKERRQ(ierr); B->factortype = MAT_FACTOR_LU; ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); B->spptr = lu; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ParSymbFact = NO; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.IterRefine = DOUBLE; options.Trans = NOTRANS; options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); /* Default num of process columns and rows */ lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + PetscSqrtReal((PetscReal)size))); if (!lu->npcol) lu->npcol = 1; while (lu->npcol > 0) { lu->nprow = PetscMPIIntCast(size/lu->npcol); if (size == lu->nprow * lu->npcol) break; lu->npcol --; } ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); lu->MatInputMode = DISTRIBUTED; ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); if (!flg) { options.Equil = NO; } ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,2,rowperm[0],&indx,&flg);CHKERRQ(ierr); if (flg) { switch (indx) { case 0: options.RowPerm = LargeDiag; break; case 1: options.RowPerm = NOROWPERM; break; } } ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); if (flg) { switch (indx) { case 0: options.ColPerm = NATURAL; break; case 1: options.ColPerm = MMD_AT_PLUS_A; break; case 2: options.ColPerm = MMD_ATA; break; case 3: options.ColPerm = METIS_AT_PLUS_A; break; case 4: options.ColPerm = PARMETIS; /* only works for np>1 */ break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); } } ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); if (!flg) { options.ReplaceTinyPivot = NO; } options.ParSymbFact = NO; ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); if (flg){ #ifdef PETSC_HAVE_PARMETIS options.ParSymbFact = YES; options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ #else printf("parsymbfact needs PARMETIS"); #endif } lu->FactPattern = SamePattern_SameRowPerm; ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[1],&indx,&flg);CHKERRQ(ierr); if (flg) { switch (indx) { case 0: lu->FactPattern = SamePattern; break; case 1: lu->FactPattern = SamePattern_SameRowPerm; break; } } options.IterRefine = NOREFINE; ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); if (flg) { options.IterRefine = SLU_DOUBLE; } if (PetscLogPrintInfo) { options.PrintStat = YES; } else { options.PrintStat = NO; } ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None", (PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,0);CHKERRQ(ierr); PetscOptionsEnd(); lu->options = options; lu->options.Fact = DOFACT; lu->matsolve_iscalled = PETSC_FALSE; lu->matmatsolve_iscalled = PETSC_FALSE; *F = B; PetscFunctionReturn(0); }
PetscErrorCode VecView_Seq_Binary(Vec xin,PetscViewer viewer) { PetscErrorCode ierr; int fdes; PetscInt n = xin->map->n,classid=VEC_FILE_CLASSID; FILE *file; const PetscScalar *xv; #if defined(PETSC_HAVE_MPIIO) PetscBool isMPIIO; #endif PetscBool skipHeader; PetscViewerFormat format; PetscFunctionBegin; /* Write vector header */ ierr = PetscViewerBinaryGetSkipHeader(viewer,&skipHeader);CHKERRQ(ierr); if (!skipHeader) { ierr = PetscViewerBinaryWrite(viewer,&classid,1,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); ierr = PetscViewerBinaryWrite(viewer,&n,1,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); } /* Write vector contents */ #if defined(PETSC_HAVE_MPIIO) ierr = PetscViewerBinaryGetUseMPIIO(viewer,&isMPIIO);CHKERRQ(ierr); if (!isMPIIO) { #endif ierr = PetscViewerBinaryGetDescriptor(viewer,&fdes);CHKERRQ(ierr); ierr = VecGetArrayRead(xin,&xv);CHKERRQ(ierr); ierr = PetscBinaryWrite(fdes,(void*)xv,n,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); ierr = VecRestoreArrayRead(xin,&xv);CHKERRQ(ierr); ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); if (format == PETSC_VIEWER_BINARY_MATLAB) { MPI_Comm comm; FILE *info; const char *name; ierr = PetscObjectGetName((PetscObject)xin,&name);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscViewerBinaryGetInfoPointer(viewer,&info);CHKERRQ(ierr); ierr = PetscFPrintf(comm,info,"#--- begin code written by PetscViewerBinary for MATLAB format ---#\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,info,"#$$ Set.%s = PetscBinaryRead(fd);\n",name);CHKERRQ(ierr); ierr = PetscFPrintf(comm,info,"#--- end code written by PetscViewerBinary for MATLAB format ---#\n\n");CHKERRQ(ierr); } #if defined(PETSC_HAVE_MPIIO) } else { MPI_Offset off; MPI_File mfdes; PetscMPIInt lsize; ierr = PetscMPIIntCast(n,&lsize);CHKERRQ(ierr); ierr = PetscViewerBinaryGetMPIIODescriptor(viewer,&mfdes);CHKERRQ(ierr); ierr = PetscViewerBinaryGetMPIIOOffset(viewer,&off);CHKERRQ(ierr); ierr = MPI_File_set_view(mfdes,off,MPIU_SCALAR,MPIU_SCALAR,(char*)"native",MPI_INFO_NULL);CHKERRQ(ierr); ierr = VecGetArrayRead(xin,&xv);CHKERRQ(ierr); ierr = MPIU_File_write_all(mfdes,(void*)xv,lsize,MPIU_SCALAR,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = VecRestoreArrayRead(xin,&xv);CHKERRQ(ierr); ierr = PetscViewerBinaryAddMPIIOOffset(viewer,n*sizeof(PetscScalar));CHKERRQ(ierr); } #endif ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); if (file) { if (((PetscObject)xin)->prefix) { ierr = PetscFPrintf(PETSC_COMM_SELF,file,"-%svecload_block_size %D\n",((PetscObject)xin)->prefix,PetscAbs(xin->map->bs));CHKERRQ(ierr); } else { ierr = PetscFPrintf(PETSC_COMM_SELF,file,"-vecload_block_size %D\n",PetscAbs(xin->map->bs));CHKERRQ(ierr); } } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx fetidpmat_ctx ) { PetscErrorCode ierr; PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; PCBDDCGraph mat_graph=pcbddc->mat_graph; Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; MPI_Comm comm; Mat ScalingMat; Vec lambda_global; IS IS_l2g_lambda; PetscBool skip_node,fully_redundant; PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; PetscMPIInt rank,size,buf_size,neigh; PetscScalar scalar_value; PetscInt *vertex_indices; PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; PetscInt *aux_sums,*cols_B_delta,*l2g_indices; PetscScalar *array,*scaling_factors,*vals_B_delta; PetscInt *aux_local_numbering_2; /* For communication of scaling factors */ PetscInt *ptrs_buffer,neigh_position; PetscScalar **all_factors,*send_buffer,*recv_buffer; MPI_Request *send_reqs,*recv_reqs; /* tests */ Vec test_vec; PetscBool test_fetidp; PetscViewer viewer; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)(fetidpmat_ctx->pc),&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* Default type of lagrange multipliers is non-redundant */ fully_redundant = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); /* Evaluate local and global number of lagrange multipliers */ ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); n_local_lambda = 0; partial_sum = 0; n_boundary_dofs = 0; s = 0; /* Get Vertices used to define the BDDC */ ierr = PCBDDCGetPrimalVerticesLocalIdx(fetidpmat_ctx->pc,&n_vertices,&vertex_indices);CHKERRQ(ierr); dual_size = pcis->n_B-n_vertices; ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_2);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<pcis->n;i++){ j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ if ( j > 0 ) { n_boundary_dofs++; } skip_node = PETSC_FALSE; if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ skip_node = PETSC_TRUE; s++; } if (j < 1) { skip_node = PETSC_TRUE; } if ( !skip_node ) { if (fully_redundant) { /* fully redundant set of lagrange multipliers */ n_lambda_for_dof = (j*(j+1))/2; } else { n_lambda_for_dof = j; } n_local_lambda += j; /* needed to evaluate global number of lagrange multipliers */ array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ /* store some data needed */ dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; aux_local_numbering_1[partial_sum] = i; aux_local_numbering_2[partial_sum] = n_lambda_for_dof; partial_sum++; } } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); fetidpmat_ctx->n_lambda = (PetscInt)PetscRealPart(scalar_value); /* compute global ordering of lagrange multipliers and associate l2g map */ ierr = PCBDDCSubsetNumbering(comm,matis->mapping,partial_sum,aux_local_numbering_1,aux_local_numbering_2,&i,&aux_global_numbering);CHKERRQ(ierr); if (i != fetidpmat_ctx->n_lambda) { SETERRQ3(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Error in %s: global number of multipliers mismatch! (%d!=%d)\n",__FUNCT__,fetidpmat_ctx->n_lambda,i); } ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); /* init data for scaling factors exchange */ partial_sum = 0; j = 0; ierr = PetscMalloc1(pcis->n_neigh,&ptrs_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&send_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&recv_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&all_factors);CHKERRQ(ierr); ptrs_buffer[0]=0; for (i=1;i<pcis->n_neigh;i++) { partial_sum += pcis->n_shared[i]; ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; } ierr = PetscMalloc1(partial_sum,&send_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&all_factors[0]);CHKERRQ(ierr); for (i=0;i<pcis->n-1;i++) { j = mat_graph->count[i]; all_factors[i+1]=all_factors[i]+j; } /* scatter B scaling to N vec */ ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); /* communications */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; } ierr = PetscMPIIntCast(ptrs_buffer[i]-ptrs_buffer[i-1],&buf_size);CHKERRQ(ierr); ierr = PetscMPIIntCast(pcis->neigh[i],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&send_reqs[i-1]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* put values in correct places */ for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { k = pcis->shared[i][j]; neigh_position = 0; while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; } } ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree(send_reqs);CHKERRQ(ierr); ierr = PetscFree(recv_reqs);CHKERRQ(ierr); ierr = PetscFree(send_buffer);CHKERRQ(ierr); ierr = PetscFree(recv_buffer);CHKERRQ(ierr); ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); /* Compute B and B_delta (local actions) */ ierr = PetscMalloc1(pcis->n_neigh,&aux_sums);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&l2g_indices);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&vals_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&cols_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&scaling_factors);CHKERRQ(ierr); n_global_lambda=0; partial_sum=0; for (i=0;i<dual_size;i++) { n_global_lambda = aux_global_numbering[i]; j = mat_graph->count[aux_local_numbering_1[i]]; aux_sums[0]=0; for (s=1;s<j;s++) { aux_sums[s]=aux_sums[s-1]+j-s+1; } array = all_factors[aux_local_numbering_1[i]]; n_neg_values = 0; while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values] < rank) {n_neg_values++;} n_pos_values = j - n_neg_values; if (fully_redundant) { for (s=0;s<n_neg_values;s++) { l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=-1.0; scaling_factors[partial_sum+s]=array[s]; } for (s=0;s<n_pos_values;s++) { l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s+n_neg_values]=1.0; scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; } partial_sum += j; } else { /* l2g_indices and default cols and vals of B_delta */ for (s=0;s<j;s++) { l2g_indices [partial_sum+s]=n_global_lambda+s; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=0.0; } /* B_delta */ if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ vals_B_delta [partial_sum+n_neg_values-1]=-1.0; } if ( n_neg_values < j ) { /* there's a rank next to me to the right */ vals_B_delta [partial_sum+n_neg_values]=1.0; } /* scaling as in Klawonn-Widlund 1999*/ for (s=0;s<n_neg_values;s++) { scalar_value = 0.0; for (k=0;k<s+1;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s] = -scalar_value; } for (s=0;s<n_pos_values;s++) { scalar_value = 0.0; for (k=s+n_neg_values;k<j;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s+n_neg_values] = scalar_value; } partial_sum += j; } } ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); ierr = PetscFree(aux_sums);CHKERRQ(ierr); ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); ierr = PetscFree(all_factors);CHKERRQ(ierr); /* Local to global mapping of fetidpmat */ ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); /* Create local part of B_delta */ ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (fully_redundant) { ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); } else { ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = PetscFree(scaling_factors);CHKERRQ(ierr); ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); /* Create some vectors needed by fetidp */ ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); test_fetidp = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); if (test_fetidp && !pcbddc->use_deluxe_scaling) { PetscReal real_value; ierr = PetscViewerASCIIGetStdout(comm,&viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); if (fully_redundant) { ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST A/B: Test numbering of global lambda dofs */ /******************************************************************/ ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecNorm(test_vec,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (fully_redundant) { ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,PetscRealPart(scalar_value)-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } /******************************************************************/ /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ /* This is the meaning of the B matrix */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ /* E_D = R_D^TR */ /* P_D = B_{D,delta}^T B_{delta} */ /* eq.44 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ /* compute a random vector in \widetilde{W} */ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* store w for final comparison */ ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Average operator E_D : results stored in pcis->vec2_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec2_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* test E_D=I-P_D */ scalar_value = 1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ /* eq.48 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* scaling */ ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (!fully_redundant) { /******************************************************************/ /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); } } /* final cleanup */ ierr = PetscFree(vertex_indices);CHKERRQ(ierr); ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFSetGraph - Set a parallel star forest Collective Input Arguments: + sf - star forest . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) . nleaves - number of leaf vertices on the current process, each of these references a root on any process . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage . localmode - copy mode for ilocal . iremote - remote locations of root vertices for each leaf on the current process - remotemode - copy mode for iremote Level: intermediate .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() @*/ PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) { PetscErrorCode ierr; PetscTable table; PetscTablePosition pos; PetscMPIInt size; PetscInt i,*rcount,*ranks; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); if (nleaves && ilocal) PetscValidIntPointer(ilocal,4); if (nleaves) PetscValidPointer(iremote,6); if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots); if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); ierr = PetscSFReset(sf);CHKERRQ(ierr); sf->nroots = nroots; sf->nleaves = nleaves; if (ilocal) { switch (localmode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); sf->mine = sf->mine_alloc; ierr = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr); sf->minleaf = PETSC_MAX_INT; sf->maxleaf = PETSC_MIN_INT; for (i=0; i<nleaves; i++) { sf->minleaf = PetscMin(sf->minleaf,ilocal[i]); sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]); } break; case PETSC_OWN_POINTER: sf->mine_alloc = (PetscInt*)ilocal; sf->mine = sf->mine_alloc; break; case PETSC_USE_POINTER: sf->mine = (PetscInt*)ilocal; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); } } if (!ilocal || nleaves > 0) { sf->minleaf = 0; sf->maxleaf = nleaves - 1; } switch (remotemode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); sf->remote = sf->remote_alloc; ierr = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr); break; case PETSC_OWN_POINTER: sf->remote_alloc = (PetscSFNode*)iremote; sf->remote = sf->remote_alloc; break; case PETSC_USE_POINTER: sf->remote = (PetscSFNode*)iremote; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); } ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); for (i=0; i<nleaves; i++) { /* Log 1-based rank */ ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); } ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr); ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); for (i=0; i<sf->nranks; i++) { ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); ranks[i]--; /* Convert back to 0-based */ } ierr = PetscTableDestroy(&table);CHKERRQ(ierr); ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr); sf->roffset[0] = 0; for (i=0; i<sf->nranks; i++) { ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); sf->roffset[i+1] = sf->roffset[i] + rcount[i]; rcount[i] = 0; } for (i=0; i<nleaves; i++) { PetscInt lo,hi,irank; /* Search for index of iremote[i].rank in sf->ranks */ lo = 0; hi = sf->nranks; while (hi - lo > 1) { PetscInt mid = lo + (hi - lo)/2; if (iremote[i].rank < sf->ranks[mid]) hi = mid; else lo = mid; } if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo; else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank); sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i; sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index; rcount[irank]++; } ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (nroots == PETSC_DETERMINE) { /* Jed, if you have a better way to do this, put it in */ PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0; /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */ ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr); ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank]; ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Could set nroots to this maximum */ for (i = 0; i < size; ++i) maxRoots += numRankRoots[i]; /* Gather all indices */ ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr); leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index; leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; rootOff[0] = 0; for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i]; ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Sort and reduce */ ierr = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr); ierr = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr); ierr = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr); sf->nroots = maxRoots; } #endif sf->graphset = PETSC_TRUE; ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }