PetscErrorCode DMPlexPreallocateOperator_2(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix) { PetscInt *tmpClosure,*tmpAdj,*visits; PetscInt c,cStart,cEnd,pStart,pEnd; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); ierr = DMPlexGetMaxSizes(dm, &maxConeSize, &maxSupportSize);CHKERRQ(ierr); maxClosureSize = 2*PetscMax(PetscPowInt(mesh->maxConeSize,depth+1),PetscPowInt(mesh->maxSupportSize,depth+1)); ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr); npoints = pEnd - pStart; ierr = PetscMalloc3(maxClosureSize,&tmpClosure,npoints,&lvisits,npoints,&visits);CHKERRQ(ierr); ierr = PetscMemzero(lvisits,(pEnd-pStart)*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(visits,(pEnd-pStart)*sizeof(PetscInt));CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); for (c=cStart; c<cEnd; c++) { PetscInt *support = tmpClosure; ierr = DMPlexGetTransitiveClosure(dm, c, PETSC_FALSE, &supportSize, (PetscInt**)&support);CHKERRQ(ierr); for (p=0; p<supportSize; p++) lvisits[support[p]]++; } ierr = PetscSFReduceBegin(sf,MPIU_INT,lvisits,visits,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd (sf,MPIU_INT,lvisits,visits,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,visits,lvisits);CHKERRQ(ierr); ierr = PetscSFBcastEnd (sf,MPIU_INT,visits,lvisits);CHKERRQ(ierr); ierr = PetscSFGetRanks();CHKERRQ(ierr); ierr = PetscMalloc2(maxClosureSize*maxClosureSize,&cellmat,npoints,&owner);CHKERRQ(ierr); for (c=cStart; c<cEnd; c++) { ierr = PetscMemzero(cellmat,maxClosureSize*maxClosureSize*sizeof(PetscInt));CHKERRQ(ierr); /* Depth-first walk of transitive closure. At each leaf frame f of transitive closure that we see, add 1/visits[f] to each pair (p,q) not marked as done in cellmat. This contribution is added to dnz if owning ranks of p and q match, to onz otherwise. */ } ierr = PetscSFReduceBegin(sf,MPIU_INT,ldnz,dnz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd (sf,MPIU_INT,lonz,onz,MPI_SUM);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() Collective Input Arguments: + sf - star forest . unit - data type - leafdata - leaf data to gather to roots Output Argument: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree Level: intermediate .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() @*/ PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) { PetscErrorCode ierr; PetscSF multi; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPartitioningHierarchical_ReassembleFineparts(Mat adj, IS fineparts, ISLocalToGlobalMapping mapping, IS *sfineparts) { PetscInt *local_indices, *global_indices,*owners,*sfineparts_indices,localsize,i; const PetscInt *ranges,*fineparts_indices; PetscMPIInt rank; MPI_Comm comm; PetscLayout rmap; PetscSFNode *remote; PetscSF sf; PetscErrorCode ierr; PetscFunctionBegin; /*get communicator */ ierr = PetscObjectGetComm((PetscObject)adj,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MatGetLayouts(adj,&rmap,PETSC_NULL);CHKERRQ(ierr); ierr = ISGetLocalSize(fineparts,&localsize);CHKERRQ(ierr); ierr = PetscCalloc2(localsize,&global_indices,localsize,&local_indices);CHKERRQ(ierr); for(i=0; i<localsize; i++){ local_indices[i] = i; } /*global indices */ ierr = ISLocalToGlobalMappingApply(mapping,localsize,local_indices,global_indices);CHKERRQ(ierr); ierr = PetscCalloc1(localsize,&owners);CHKERRQ(ierr); /*find owners for global indices */ for(i=0; i<localsize; i++){ ierr = PetscLayoutFindOwner(rmap,global_indices[i],&owners[i]);CHKERRQ(ierr); } /*ranges */ ierr = PetscLayoutGetRanges(rmap,&ranges);CHKERRQ(ierr); ierr = PetscCalloc1(ranges[rank+1]-ranges[rank],&sfineparts_indices);CHKERRQ(ierr); ierr = ISGetIndices(fineparts,&fineparts_indices);CHKERRQ(ierr); /*create a SF to exchange data */ ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); ierr = PetscCalloc1(localsize,&remote);CHKERRQ(ierr); for(i=0; i<localsize; i++){ remote[i].rank = owners[i]; remote[i].index = global_indices[i]-ranges[owners[i]]; } ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); /*not sure how to add prefix to sf*/ ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,localsize,localsize,PETSC_NULL,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,fineparts_indices,sfineparts_indices,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,fineparts_indices,sfineparts_indices,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = ISRestoreIndices(fineparts,&fineparts_indices);CHKERRQ(ierr); /* comm self */ ierr = ISCreateGeneral(comm,ranges[rank+1]-ranges[rank],sfineparts_indices,PETSC_OWN_POINTER,sfineparts);CHKERRQ(ierr); ierr = PetscFree2(global_indices,local_indices);CHKERRQ(ierr); ierr = PetscFree(owners);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* this function maps rows to locally owned rows */ PETSC_INTERN PetscErrorCode MatZeroRowsMapLocal_Private(Mat A,PetscInt N,const PetscInt *rows,PetscInt *nr,PetscInt **olrows) { PetscInt *owners = A->rmap->range; PetscInt n = A->rmap->n; PetscSF sf; PetscInt *lrows; PetscSFNode *rrows; PetscMPIInt rank; PetscInt r, p = 0, len = 0; PetscErrorCode ierr; PetscFunctionBegin; /* Create SF where leaves are input rows and roots are owned rows */ ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); for (r = 0; r < n; ++r) lrows[r] = -1; if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} for (r = 0; r < N; ++r) { const PetscInt idx = rows[r]; if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); } if (A->nooffproczerorows) { if (p != rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,rank); lrows[len++] = idx - owners[p]; } else { rrows[r].rank = p; rrows[r].index = rows[r] - owners[p]; } } if (!A->nooffproczerorows) { ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); /* Collect flags for rows to be zeroed */ ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Compress and put in row numbers */ for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; } if (nr) *nr = len; if (olrows) *olrows = lrows; PetscFunctionReturn(0); }
/*@C PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map Collective Input Arguments: . sf - star forest to invert Output Arguments: . isf - inverse of sf Level: advanced Notes: All roots must have degree 1. The local space may be a permutation, but cannot be sparse. .seealso: PetscSFSetGraph() @*/ PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) { PetscErrorCode ierr; PetscMPIInt rank; PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; const PetscInt *ilocal; PetscSFNode *roots,*leaves; PetscFunctionBegin; ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); for (i=0,maxlocal=0; i<nleaves; i++) maxlocal = PetscMax(maxlocal,(ilocal ? ilocal[i] : i)+1); ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); for (i=0; i<maxlocal; i++) { leaves[i].rank = rank; leaves[i].index = i; } for (i=0; i <nroots; i++) { roots[i].rank = -1; roots[i].index = -1; } ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); /* Check whether our leaves are sparse */ for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; if (count == nroots) newilocal = NULL; else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); for (i=0,count=0; i<nroots; i++) { if (roots[i].rank >= 0) { newilocal[count] = i; roots[count].rank = roots[i].rank; roots[count].index = roots[i].index; count++; } } } ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() Collective Input Arguments: . sf - star forest Output Arguments: . degree - degree of each root vertex Level: advanced .seealso: PetscSFGatherBegin() @*/ PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) { PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); PetscSFCheckGraphSet(sf,1); PetscValidPointer(degree,2); if (!sf->degree) { PetscInt i; ierr = PetscMalloc1(sf->nroots,&sf->degree);CHKERRQ(ierr); ierr = PetscMalloc1(sf->nleaves,&sf->degreetmp);CHKERRQ(ierr); for (i=0; i<sf->nroots; i++) sf->degree[i] = 0; for (i=0; i<sf->nleaves; i++) sf->degreetmp[i] = 1; ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);CHKERRQ(ierr); } *degree = NULL; PetscFunctionReturn(0); }
/*@C PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() Collective Input Arguments: . sf - star forest Output Arguments: . degree - degree of each root vertex Level: advanced .seealso: PetscSFGatherBegin() @*/ PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) { PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); PetscSFCheckGraphSet(sf,1); PetscValidPointer(degree,2); if (!sf->degreeknown) { PetscInt i,maxlocal; if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); for (i=0,maxlocal=0; i<sf->nleaves; i++) maxlocal = PetscMax(maxlocal,(sf->mine ? sf->mine[i] : i)+1); ierr = PetscMalloc1(sf->nroots,&sf->degree);CHKERRQ(ierr); ierr = PetscMalloc1(maxlocal,&sf->degreetmp);CHKERRQ(ierr); for (i=0; i<sf->nroots; i++) sf->degree[i] = 0; for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); } *degree = NULL; PetscFunctionReturn(0); }
/* * Increase overlap for the sub-matrix across sub communicator * sub-matrix could be a graph or numerical matrix * */ PetscErrorCode MatIncreaseOverlapSplit_Single(Mat mat,IS *is,PetscInt ov) { PetscInt i,nindx,*indices_sc,*indices_ov,localsize,*localsizes_sc,localsize_tmp; PetscInt *indices_ov_rd,nroots,nleaves,*localoffsets,*indices_recv,*sources_sc,*sources_sc_rd; const PetscInt *indices; PetscMPIInt srank,ssize,issamecomm,k,grank; IS is_sc,allis_sc,partitioning; MPI_Comm gcomm,dcomm,scomm; PetscSF sf; PetscSFNode *remote; Mat *smat; MatPartitioning part; PetscErrorCode ierr; PetscFunctionBegin; /* get a sub communicator before call individual MatIncreaseOverlap * since the sub communicator may be changed. * */ ierr = PetscObjectGetComm((PetscObject)(*is),&dcomm);CHKERRQ(ierr); /*make a copy before the original one is deleted*/ ierr = PetscCommDuplicate(dcomm,&scomm,NULL);CHKERRQ(ierr); /*get a global communicator, where mat should be a global matrix */ ierr = PetscObjectGetComm((PetscObject)mat,&gcomm);CHKERRQ(ierr); /*increase overlap on each individual subdomain*/ ierr = (*mat->ops->increaseoverlap)(mat,1,is,ov);CHKERRQ(ierr); /*compare communicators */ ierr = MPI_Comm_compare(gcomm,scomm,&issamecomm);CHKERRQ(ierr); /* if the sub-communicator is the same as the global communicator, * user does not want to use a sub-communicator * */ if(issamecomm == MPI_IDENT || issamecomm == MPI_CONGRUENT) PetscFunctionReturn(0); /* if the sub-communicator is petsc_comm_self, * user also does not care the sub-communicator * */ ierr = MPI_Comm_compare(scomm,PETSC_COMM_SELF,&issamecomm);CHKERRQ(ierr); if(issamecomm == MPI_IDENT || issamecomm == MPI_CONGRUENT){PetscFunctionReturn(0);} /*local rank, size in a sub-communicator */ ierr = MPI_Comm_rank(scomm,&srank);CHKERRQ(ierr); ierr = MPI_Comm_size(scomm,&ssize);CHKERRQ(ierr); ierr = MPI_Comm_rank(gcomm,&grank);CHKERRQ(ierr); /*create a new IS based on sub-communicator * since the old IS is often based on petsc_comm_self * */ ierr = ISGetLocalSize(*is,&nindx);CHKERRQ(ierr); ierr = PetscCalloc1(nindx,&indices_sc);CHKERRQ(ierr); ierr = ISGetIndices(*is,&indices);CHKERRQ(ierr); ierr = PetscMemcpy(indices_sc,indices,sizeof(PetscInt)*nindx);CHKERRQ(ierr); ierr = ISRestoreIndices(*is,&indices);CHKERRQ(ierr); /*we do not need any more*/ ierr = ISDestroy(is);CHKERRQ(ierr); /*create a index set based on the sub communicator */ ierr = ISCreateGeneral(scomm,nindx,indices_sc,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr); /*gather all indices within the sub communicator*/ ierr = ISAllGather(is_sc,&allis_sc);CHKERRQ(ierr); ierr = ISDestroy(&is_sc);CHKERRQ(ierr); /* gather local sizes */ ierr = PetscMalloc1(ssize,&localsizes_sc);CHKERRQ(ierr); /*get individual local sizes for all index sets*/ ierr = MPI_Gather(&nindx,1,MPIU_INT,localsizes_sc,1,MPIU_INT,0,scomm);CHKERRQ(ierr); /*only root does these computations */ if(!srank){ /*get local size for the big index set*/ ierr = ISGetLocalSize(allis_sc,&localsize);CHKERRQ(ierr); ierr = PetscCalloc2(localsize,&indices_ov,localsize,&sources_sc);CHKERRQ(ierr); ierr = PetscCalloc2(localsize,&indices_ov_rd,localsize,&sources_sc_rd);CHKERRQ(ierr); ierr = ISGetIndices(allis_sc,&indices);CHKERRQ(ierr); ierr = PetscMemcpy(indices_ov,indices,sizeof(PetscInt)*localsize);CHKERRQ(ierr); ierr = ISRestoreIndices(allis_sc,&indices);CHKERRQ(ierr); /*we do not need it any more */ ierr = ISDestroy(&allis_sc);CHKERRQ(ierr); /*assign corresponding sources */ localsize_tmp = 0; for(k=0; k<ssize; k++){ for(i=0; i<localsizes_sc[k]; i++){ sources_sc[localsize_tmp++] = k; } } /*record where indices come from */ ierr = PetscSortIntWithArray(localsize,indices_ov,sources_sc);CHKERRQ(ierr); /*count local sizes for reduced indices */ ierr = PetscMemzero(localsizes_sc,sizeof(PetscInt)*ssize);CHKERRQ(ierr); /*initialize the first entity*/ if(localsize){ indices_ov_rd[0] = indices_ov[0]; sources_sc_rd[0] = sources_sc[0]; localsizes_sc[sources_sc[0]]++; } localsize_tmp = 1; /*remove duplicate integers */ for(i=1; i<localsize; i++){ if(indices_ov[i] != indices_ov[i-1]){ indices_ov_rd[localsize_tmp] = indices_ov[i]; sources_sc_rd[localsize_tmp++] = sources_sc[i]; localsizes_sc[sources_sc[i]]++; } } ierr = PetscFree2(indices_ov,sources_sc);CHKERRQ(ierr); ierr = PetscCalloc1(ssize+1,&localoffsets);CHKERRQ(ierr); for(k=0; k<ssize; k++){ localoffsets[k+1] = localoffsets[k] + localsizes_sc[k]; } /*construct a star forest to send data back */ nleaves = localoffsets[ssize]; ierr = PetscMemzero(localoffsets,(ssize+1)*sizeof(PetscInt));CHKERRQ(ierr); nroots = localsizes_sc[srank]; ierr = PetscCalloc1(nleaves,&remote);CHKERRQ(ierr); for(i=0; i<nleaves; i++){ remote[i].rank = sources_sc_rd[i]; remote[i].index = localoffsets[sources_sc_rd[i]]++; } ierr = PetscFree(localoffsets);CHKERRQ(ierr); }else{ ierr = ISDestroy(&allis_sc);CHKERRQ(ierr); /*Allocate a 'zero' pointer */ ierr = PetscCalloc1(0,&remote);CHKERRQ(ierr); nleaves = 0; indices_ov_rd = 0; sources_sc_rd = 0; } /*scatter sizes to everybody */ ierr = MPI_Scatter(localsizes_sc,1, MPIU_INT,&nroots,1, MPIU_INT,0,scomm);CHKERRQ(ierr); /*free memory */ ierr = PetscFree(localsizes_sc);CHKERRQ(ierr); ierr = PetscCalloc1(nroots,&indices_recv);CHKERRQ(ierr); /*ierr = MPI_Comm_dup(scomm,&dcomm);CHKERRQ(ierr);*/ /*set data back to every body */ ierr = PetscSFCreate(scomm,&sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nroots,nleaves,PETSC_NULL,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,indices_ov_rd,indices_recv,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,indices_ov_rd,indices_recv,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* free memory */ ierr = PetscFree2(indices_ov_rd,sources_sc_rd);CHKERRQ(ierr); /*create a index set*/ ierr = ISCreateGeneral(scomm,nroots,indices_recv,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr); /*construct a parallel submatrix */ ierr = MatGetSubMatricesMPI(mat,1,&is_sc,&is_sc,MAT_INITIAL_MATRIX,&smat);CHKERRQ(ierr); /* we do not need them any more */ ierr = ISDestroy(&allis_sc);CHKERRQ(ierr); /*create a partitioner to repartition the sub-matrix*/ ierr = MatPartitioningCreate(scomm,&part);CHKERRQ(ierr); ierr = MatPartitioningSetAdjacency(part,smat[0]);CHKERRQ(ierr); #if PETSC_HAVE_PARMETIS /* if there exists a ParMETIS installation, we try to use ParMETIS * because a repartition routine possibly work better * */ ierr = MatPartitioningSetType(part,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); /*try to use reparition function, instead of partition function */ ierr = MatPartitioningParmetisSetRepartition(part);CHKERRQ(ierr); #else /*we at least provide a default partitioner to rebalance the computation */ ierr = MatPartitioningSetType(part,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); #endif /*user can pick up any partitioner by using an option*/ ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); /* apply partition */ ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); ierr = MatDestroy(&(smat[0]));CHKERRQ(ierr); ierr = PetscFree(smat);CHKERRQ(ierr); /* get local rows including overlap */ ierr = ISBuildTwoSided(partitioning,is_sc,is);CHKERRQ(ierr); /* destroy */ ierr = ISDestroy(&is_sc);CHKERRQ(ierr); ierr = ISDestroy(&partitioning);CHKERRQ(ierr); ierr = PetscCommDestroy(&scomm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatISSetMPIXAIJPreallocation_Private(Mat A, Mat B, PetscBool maxreduce) { Mat_IS *matis = (Mat_IS*)(A->data); PetscInt *my_dnz,*my_onz,*dnz,*onz,*mat_ranges,*row_ownership; const PetscInt *global_indices_r,*global_indices_c; PetscInt i,j,bs,rows,cols; PetscInt lrows,lcols; PetscInt local_rows,local_cols; PetscMPIInt nsubdomains; PetscBool isdense,issbaij; PetscErrorCode ierr; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&nsubdomains);CHKERRQ(ierr); ierr = MatGetSize(A,&rows,&cols);CHKERRQ(ierr); ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetIndices(A->rmap->mapping,&global_indices_r);CHKERRQ(ierr); if (A->rmap->mapping != A->cmap->mapping) { ierr = ISLocalToGlobalMappingGetIndices(A->rmap->mapping,&global_indices_c);CHKERRQ(ierr); } else { global_indices_c = global_indices_r; } if (issbaij) { ierr = MatGetRowUpperTriangular(matis->A);CHKERRQ(ierr); } /* An SF reduce is needed to sum up properly on shared rows. Note that generally preallocation is not exact, since it overestimates nonzeros */ if (!matis->sf) { /* setup SF if not yet created and allocate rootdata and leafdata */ ierr = MatISComputeSF_Private(A);CHKERRQ(ierr); } ierr = MatGetLocalSize(A,&lrows,&lcols);CHKERRQ(ierr); ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)A),lrows,lcols,dnz,onz);CHKERRQ(ierr); /* All processes need to compute entire row ownership */ ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); ierr = MatGetOwnershipRanges(A,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); for (i=0;i<nsubdomains;i++) { for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { row_ownership[j] = i; } } /* my_dnz and my_onz contains exact contribution to preallocation from each local mat then, they will be summed up properly. This way, preallocation is always sufficient */ ierr = PetscCalloc2(local_rows,&my_dnz,local_rows,&my_onz);CHKERRQ(ierr); /* preallocation as a MATAIJ */ if (isdense) { /* special case for dense local matrices */ for (i=0;i<local_rows;i++) { PetscInt index_row = global_indices_r[i]; for (j=i;j<local_rows;j++) { PetscInt owner = row_ownership[index_row]; PetscInt index_col = global_indices_c[j]; if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ my_dnz[i] += 1; } else { /* offdiag block */ my_onz[i] += 1; } /* same as before, interchanging rows and cols */ if (i != j) { owner = row_ownership[index_col]; if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { my_dnz[j] += 1; } else { my_onz[j] += 1; } } } } } else { /* TODO: this could be optimized using MatGetRowIJ */ for (i=0;i<local_rows;i++) { const PetscInt *cols; PetscInt ncols,index_row = global_indices_r[i]; ierr = MatGetRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr); for (j=0;j<ncols;j++) { PetscInt owner = row_ownership[index_row]; PetscInt index_col = global_indices_c[cols[j]]; if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ my_dnz[i] += 1; } else { /* offdiag block */ my_onz[i] += 1; } /* same as before, interchanging rows and cols */ if (issbaij && index_col != index_row) { owner = row_ownership[index_col]; if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { my_dnz[cols[j]] += 1; } else { my_onz[cols[j]] += 1; } } } ierr = MatRestoreRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr); } } ierr = ISLocalToGlobalMappingRestoreIndices(A->rmap->mapping,&global_indices_r);CHKERRQ(ierr); if (global_indices_c != global_indices_r) { ierr = ISLocalToGlobalMappingRestoreIndices(A->rmap->mapping,&global_indices_c);CHKERRQ(ierr); } ierr = PetscFree(row_ownership);CHKERRQ(ierr); /* Reduce my_dnz and my_onz */ if (maxreduce) { ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_dnz,dnz,MPI_MAX);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_dnz,dnz,MPI_MAX);CHKERRQ(ierr); ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_onz,onz,MPI_MAX);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_onz,onz,MPI_MAX);CHKERRQ(ierr); } else { ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_dnz,dnz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_dnz,dnz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_onz,onz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_onz,onz,MPI_SUM);CHKERRQ(ierr); } ierr = PetscFree2(my_dnz,my_onz);CHKERRQ(ierr); /* Resize preallocation if overestimated */ for (i=0;i<lrows;i++) { dnz[i] = PetscMin(dnz[i],lcols); onz[i] = PetscMin(onz[i],cols-lcols); } /* set preallocation */ ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); for (i=0;i<lrows/bs;i++) { dnz[i] = dnz[i*bs]/bs; onz[i] = onz[i*bs]/bs; } ierr = MatMPIBAIJSetPreallocation(B,bs,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatMPISBAIJSetPreallocation(B,bs,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); if (issbaij) { ierr = MatRestoreRowUpperTriangular(matis->A);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode DMPlexPreallocateOperator(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix) { MPI_Comm comm; MatType mtype; PetscSF sf, sfDof, sfAdj; PetscSection leafSectionAdj, rootSectionAdj, sectionAdj, anchorSectionAdj; PetscInt nroots, nleaves, l, p; const PetscInt *leaves; const PetscSFNode *remotes; PetscInt dim, pStart, pEnd, numDof, globalOffStart, globalOffEnd, numCols; PetscInt *tmpAdj = NULL, *adj, *rootAdj, *anchorAdj = NULL, *cols, *remoteOffsets; PetscInt adjSize; PetscLayout rLayout; PetscInt locRows, rStart, rEnd, r; PetscMPIInt size; PetscBool doCommLocal, doComm, debug = PETSC_FALSE, isSymBlock, isSymSeqBlock, isSymMPIBlock; PetscBool useAnchors; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(dm, DM_CLASSID, 1); PetscValidHeaderSpecific(section, PETSC_SECTION_CLASSID, 3); PetscValidHeaderSpecific(sectionGlobal, PETSC_SECTION_CLASSID, 4); PetscValidHeaderSpecific(A, MAT_CLASSID, 9); if (dnz) PetscValidPointer(dnz,5); if (onz) PetscValidPointer(onz,6); if (dnzu) PetscValidPointer(dnzu,7); if (onzu) PetscValidPointer(onzu,8); ierr = PetscLogEventBegin(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL, "-dm_view_preallocation", &debug, NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr); ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); doCommLocal = (size > 1) && (nroots >= 0) ? PETSC_TRUE : PETSC_FALSE; ierr = MPI_Allreduce(&doCommLocal, &doComm, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr); /* Create dof SF based on point SF */ if (debug) { ierr = PetscPrintf(comm, "Input Section for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSectionView(section, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Input Global Section for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSectionView(sectionGlobal, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Input SF for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSFView(sf, NULL);CHKERRQ(ierr); } ierr = PetscSFCreateRemoteOffsets(sf, section, section, &remoteOffsets);CHKERRQ(ierr); ierr = PetscSFCreateSectionSF(sf, section, remoteOffsets, section, &sfDof);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Dof SF for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSFView(sfDof, NULL);CHKERRQ(ierr); } /* Create section for dof adjacency (dof ==> # adj dof) */ ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(section, &numDof);CHKERRQ(ierr); ierr = PetscSectionCreate(comm, &leafSectionAdj);CHKERRQ(ierr); ierr = PetscSectionSetChart(leafSectionAdj, 0, numDof);CHKERRQ(ierr); ierr = PetscSectionCreate(comm, &rootSectionAdj);CHKERRQ(ierr); ierr = PetscSectionSetChart(rootSectionAdj, 0, numDof);CHKERRQ(ierr); /* Fill in the ghost dofs on the interface */ ierr = PetscSFGetGraph(sf, NULL, &nleaves, &leaves, &remotes);CHKERRQ(ierr); /* use constraints in finding adjacency in this routine */ ierr = DMPlexGetAdjacencyUseAnchors(dm,&useAnchors);CHKERRQ(ierr); ierr = DMPlexSetAdjacencyUseAnchors(dm,PETSC_TRUE);CHKERRQ(ierr); /* section - maps points to (# dofs, local dofs) sectionGlobal - maps points to (# dofs, global dofs) leafSectionAdj - maps unowned local dofs to # adj dofs rootSectionAdj - maps owned local dofs to # adj dofs adj - adj global dofs indexed by leafSectionAdj rootAdj - adj global dofs indexed by rootSectionAdj sf - describes shared points across procs sfDof - describes shared dofs across procs sfAdj - describes shared adjacent dofs across procs ** The bootstrapping process involves six rounds with similar structure of visiting neighbors of each point. (0). If there are point-to-point constraints, add the adjacencies of constrained points to anchors in anchorAdj (This is done in DMPlexComputeAnchorAdjacencies()) 1. Visit unowned points on interface, count adjacencies placing in leafSectionAdj Reduce those counts to rootSectionAdj (now redundantly counting some interface points) 2. Visit owned points on interface, count adjacencies placing in rootSectionAdj Create sfAdj connecting rootSectionAdj and leafSectionAdj 3. Visit unowned points on interface, write adjacencies to adj Gather adj to rootAdj (note that there is redundancy in rootAdj when multiple procs find the same adjacencies) 4. Visit owned points on interface, write adjacencies to rootAdj Remove redundancy in rootAdj ** The last two traversals use transitive closure 5. Visit all owned points in the subdomain, count dofs for each point (sectionAdj) Allocate memory addressed by sectionAdj (cols) 6. Visit all owned points in the subdomain, insert dof adjacencies into cols ** Knowing all the column adjacencies, check ownership and sum into dnz and onz */ ierr = DMPlexComputeAnchorAdjacencies(dm,section,sectionGlobal,&anchorSectionAdj,&anchorAdj);CHKERRQ(ierr); for (l = 0; l < nleaves; ++l) { PetscInt dof, off, d, q, anDof; PetscInt p = leaves[l], numAdj = PETSC_DETERMINE; if ((p < pStart) || (p >= pEnd)) continue; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(leafSectionAdj, d, ndof-ncdof);CHKERRQ(ierr); } } ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); if (anDof) { for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(leafSectionAdj, d, anDof);CHKERRQ(ierr); } } } ierr = PetscSectionSetUp(leafSectionAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjacency Section for Preallocation on Leaves:\n");CHKERRQ(ierr); ierr = PetscSectionView(leafSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Get maximum remote adjacency sizes for owned dofs on interface (roots) */ if (doComm) { ierr = PetscSFReduceBegin(sfDof, MPIU_INT, leafSectionAdj->atlasDof, rootSectionAdj->atlasDof, MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sfDof, MPIU_INT, leafSectionAdj->atlasDof, rootSectionAdj->atlasDof, MPI_SUM);CHKERRQ(ierr); } if (debug) { ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots:\n");CHKERRQ(ierr); ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Add in local adjacency sizes for owned dofs on interface (roots) */ for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, adof, dof, off, d, q, anDof; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); if (!dof) continue; ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr); if (adof <= 0) continue; ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(rootSectionAdj, d, ndof-ncdof);CHKERRQ(ierr); } } ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); if (anDof) { for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(rootSectionAdj, d, anDof);CHKERRQ(ierr); } } } ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots after local additions:\n");CHKERRQ(ierr); ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Create adj SF based on dof SF */ ierr = PetscSFCreateRemoteOffsets(sfDof, rootSectionAdj, leafSectionAdj, &remoteOffsets);CHKERRQ(ierr); ierr = PetscSFCreateSectionSF(sfDof, rootSectionAdj, remoteOffsets, leafSectionAdj, &sfAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjacency SF for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSFView(sfAdj, NULL);CHKERRQ(ierr); } ierr = PetscSFDestroy(&sfDof);CHKERRQ(ierr); /* Create leaf adjacency */ ierr = PetscSectionSetUp(leafSectionAdj);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(leafSectionAdj, &adjSize);CHKERRQ(ierr); ierr = PetscCalloc1(adjSize, &adj);CHKERRQ(ierr); for (l = 0; l < nleaves; ++l) { PetscInt dof, off, d, q, anDof, anOff; PetscInt p = leaves[l], numAdj = PETSC_DETERMINE; if ((p < pStart) || (p >= pEnd)) continue; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { PetscInt aoff, i = 0; ierr = PetscSectionGetOffset(leafSectionAdj, d, &aoff);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, ngoff, nd; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr); for (nd = 0; nd < ndof-ncdof; ++nd) { adj[aoff+i] = (ngoff < 0 ? -(ngoff+1) : ngoff) + nd; ++i; } } for (q = 0; q < anDof; q++) { adj[aoff+i] = anchorAdj[anOff+q]; ++i; } } } /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Leaf adjacency indices\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, adj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Gather adjacenct indices to root */ ierr = PetscSectionGetStorageSize(rootSectionAdj, &adjSize);CHKERRQ(ierr); ierr = PetscMalloc1(adjSize, &rootAdj);CHKERRQ(ierr); for (r = 0; r < adjSize; ++r) rootAdj[r] = -1; if (doComm) { ierr = PetscSFGatherBegin(sfAdj, MPIU_INT, adj, rootAdj);CHKERRQ(ierr); ierr = PetscSFGatherEnd(sfAdj, MPIU_INT, adj, rootAdj);CHKERRQ(ierr); } ierr = PetscSFDestroy(&sfAdj);CHKERRQ(ierr); ierr = PetscFree(adj);CHKERRQ(ierr); /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Root adjacency indices after gather\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Add in local adjacency indices for owned dofs on interface (roots) */ for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, adof, dof, off, d, q, anDof, anOff; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); if (!dof) continue; ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr); if (adof <= 0) continue; ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { PetscInt adof, aoff, i; ierr = PetscSectionGetDof(rootSectionAdj, d, &adof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(rootSectionAdj, d, &aoff);CHKERRQ(ierr); i = adof-1; for (q = 0; q < anDof; q++) { rootAdj[aoff+i] = anchorAdj[anOff+q]; --i; } for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, ngoff, nd; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr); for (nd = 0; nd < ndof-ncdof; ++nd) { rootAdj[aoff+i] = ngoff < 0 ? -(ngoff+1)+nd : ngoff+nd; --i; } } } } /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Root adjacency indices\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Compress indices */ ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt dof, cdof, off, d; PetscInt adof, aoff; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); if (!dof) continue; ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr); if (adof <= 0) continue; for (d = off; d < off+dof-cdof; ++d) { ierr = PetscSectionGetDof(rootSectionAdj, d, &adof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(rootSectionAdj, d, &aoff);CHKERRQ(ierr); ierr = PetscSortRemoveDupsInt(&adof, &rootAdj[aoff]);CHKERRQ(ierr); ierr = PetscSectionSetDof(rootSectionAdj, d, adof);CHKERRQ(ierr); } } /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots after compression:\n");CHKERRQ(ierr); ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Root adjacency indices after compression\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Build adjacency section: Maps global indices to sets of adjacent global indices */ ierr = PetscSectionGetOffsetRange(sectionGlobal, &globalOffStart, &globalOffEnd);CHKERRQ(ierr); ierr = PetscSectionCreate(comm, §ionAdj);CHKERRQ(ierr); ierr = PetscSectionSetChart(sectionAdj, globalOffStart, globalOffEnd);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, dof, cdof, off, goff, d, q, anDof; PetscBool found = PETSC_TRUE; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr); for (d = 0; d < dof-cdof; ++d) { PetscInt ldof, rdof; ierr = PetscSectionGetDof(leafSectionAdj, off+d, &ldof);CHKERRQ(ierr); ierr = PetscSectionGetDof(rootSectionAdj, off+d, &rdof);CHKERRQ(ierr); if (ldof > 0) { /* We do not own this point */ } else if (rdof > 0) { ierr = PetscSectionSetDof(sectionAdj, goff+d, rdof);CHKERRQ(ierr); } else { found = PETSC_FALSE; } } if (found) continue; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, noff; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, padj, &noff);CHKERRQ(ierr); for (d = goff; d < goff+dof-cdof; ++d) { ierr = PetscSectionAddDof(sectionAdj, d, ndof-ncdof);CHKERRQ(ierr); } } ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); if (anDof) { for (d = goff; d < goff+dof-cdof; ++d) { ierr = PetscSectionAddDof(sectionAdj, d, anDof);CHKERRQ(ierr); } } } ierr = PetscSectionSetUp(sectionAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjacency Section for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSectionView(sectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Get adjacent indices */ ierr = PetscSectionGetStorageSize(sectionAdj, &numCols);CHKERRQ(ierr); ierr = PetscMalloc1(numCols, &cols);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, dof, cdof, off, goff, d, q, anDof, anOff; PetscBool found = PETSC_TRUE; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr); for (d = 0; d < dof-cdof; ++d) { PetscInt ldof, rdof; ierr = PetscSectionGetDof(leafSectionAdj, off+d, &ldof);CHKERRQ(ierr); ierr = PetscSectionGetDof(rootSectionAdj, off+d, &rdof);CHKERRQ(ierr); if (ldof > 0) { /* We do not own this point */ } else if (rdof > 0) { PetscInt aoff, roff; ierr = PetscSectionGetOffset(sectionAdj, goff+d, &aoff);CHKERRQ(ierr); ierr = PetscSectionGetOffset(rootSectionAdj, off+d, &roff);CHKERRQ(ierr); ierr = PetscMemcpy(&cols[aoff], &rootAdj[roff], rdof * sizeof(PetscInt));CHKERRQ(ierr); } else { found = PETSC_FALSE; } } if (found) continue; ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr); for (d = goff; d < goff+dof-cdof; ++d) { PetscInt adof, aoff, i = 0; ierr = PetscSectionGetDof(sectionAdj, d, &adof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionAdj, d, &aoff);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, ngoff, nd; const PetscInt *ncind; /* Adjacent points may not be in the section chart */ if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintIndices(section, padj, &ncind);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr); for (nd = 0; nd < ndof-ncdof; ++nd, ++i) { cols[aoff+i] = ngoff < 0 ? -(ngoff+1)+nd : ngoff+nd; } } for (q = 0; q < anDof; q++, i++) { cols[aoff+i] = anchorAdj[anOff + q]; } if (i != adof) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of entries %D != %D for dof %D (point %D)", i, adof, d, p); } } ierr = PetscSectionDestroy(&anchorSectionAdj);CHKERRQ(ierr); ierr = PetscSectionDestroy(&leafSectionAdj);CHKERRQ(ierr); ierr = PetscSectionDestroy(&rootSectionAdj);CHKERRQ(ierr); ierr = PetscFree(anchorAdj);CHKERRQ(ierr); ierr = PetscFree(rootAdj);CHKERRQ(ierr); ierr = PetscFree(tmpAdj);CHKERRQ(ierr); /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Column indices\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, numCols, cols, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Create allocation vectors from adjacency graph */ ierr = MatGetLocalSize(A, &locRows, NULL);CHKERRQ(ierr); ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)A), &rLayout);CHKERRQ(ierr); ierr = PetscLayoutSetLocalSize(rLayout, locRows);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize(rLayout, 1);CHKERRQ(ierr); ierr = PetscLayoutSetUp(rLayout);CHKERRQ(ierr); ierr = PetscLayoutGetRange(rLayout, &rStart, &rEnd);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&rLayout);CHKERRQ(ierr); /* Only loop over blocks of rows */ if (rStart%bs || rEnd%bs) SETERRQ3(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid layout [%d, %d) for matrix, must be divisible by block size %d", rStart, rEnd, bs); for (r = rStart/bs; r < rEnd/bs; ++r) { const PetscInt row = r*bs; PetscInt numCols, cStart, c; ierr = PetscSectionGetDof(sectionAdj, row, &numCols);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionAdj, row, &cStart);CHKERRQ(ierr); for (c = cStart; c < cStart+numCols; ++c) { if ((cols[c] >= rStart*bs) && (cols[c] < rEnd*bs)) { ++dnz[r-rStart]; if (cols[c] >= row) ++dnzu[r-rStart]; } else { ++onz[r-rStart]; if (cols[c] >= row) ++onzu[r-rStart]; } } } if (bs > 1) { for (r = 0; r < locRows/bs; ++r) { dnz[r] /= bs; onz[r] /= bs; dnzu[r] /= bs; onzu[r] /= bs; } } /* Set matrix pattern */ ierr = MatXAIJSetPreallocation(A, bs, dnz, onz, dnzu, onzu);CHKERRQ(ierr); ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); /* Check for symmetric storage */ ierr = MatGetType(A, &mtype);CHKERRQ(ierr); ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr); ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr); ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr); if (isSymBlock || isSymSeqBlock || isSymMPIBlock) {ierr = MatSetOption(A, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);} /* Fill matrix with zeros */ if (fillMatrix) { PetscScalar *values; PetscInt maxRowLen = 0; for (r = rStart; r < rEnd; ++r) { PetscInt len; ierr = PetscSectionGetDof(sectionAdj, r, &len);CHKERRQ(ierr); maxRowLen = PetscMax(maxRowLen, len); } ierr = PetscCalloc1(maxRowLen, &values);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { PetscInt numCols, cStart; ierr = PetscSectionGetDof(sectionAdj, r, &numCols);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionAdj, r, &cStart);CHKERRQ(ierr); ierr = MatSetValues(A, 1, &r, numCols, &cols[cStart], values, INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(values);CHKERRQ(ierr); ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } /* restore original useAnchors */ ierr = DMPlexSetAdjacencyUseAnchors(dm,useAnchors);CHKERRQ(ierr); ierr = PetscSectionDestroy(§ionAdj);CHKERRQ(ierr); ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C DMPlexDistribute - Distributes the mesh and any associated sections. Not Collective Input Parameter: + dm - The original DMPlex object . partitioner - The partitioning package, or NULL for the default - overlap - The overlap of partitions, 0 is the default Output Parameter: + sf - The PetscSF used for point distribution - parallelMesh - The distributed DMPlex object, or NULL Note: If the mesh was not distributed, the return value is NULL. The user can control the definition of adjacency for the mesh using DMPlexGetAdjacencyUseCone() and DMPlexSetAdjacencyUseClosure(). They should choose the combination appropriate for the function representation on the mesh. Level: intermediate .keywords: mesh, elements .seealso: DMPlexCreate(), DMPlexDistributeByFace(), DMPlexSetAdjacencyUseCone(), DMPlexSetAdjacencyUseClosure() @*/ PetscErrorCode DMPlexDistribute(DM dm, const char partitioner[], PetscInt overlap, PetscSF *sf, DM *dmParallel) { DM_Plex *mesh = (DM_Plex*) dm->data, *pmesh; MPI_Comm comm; const PetscInt height = 0; PetscInt dim, numRemoteRanks; IS origCellPart, origPart, cellPart, part; PetscSection origCellPartSection, origPartSection, cellPartSection, partSection; PetscSFNode *remoteRanks; PetscSF partSF, pointSF, coneSF; ISLocalToGlobalMapping renumbering; PetscSection originalConeSection, newConeSection; PetscInt *remoteOffsets; PetscInt *cones, *newCones, newConesSize; PetscBool flg; PetscMPIInt rank, numProcs, p; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(dm, DM_CLASSID, 1); if (sf) PetscValidPointer(sf,4); PetscValidPointer(dmParallel,5); ierr = PetscLogEventBegin(DMPLEX_Distribute,dm,0,0,0);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &numProcs);CHKERRQ(ierr); *dmParallel = NULL; if (numProcs == 1) PetscFunctionReturn(0); ierr = DMPlexGetDimension(dm, &dim);CHKERRQ(ierr); /* Create cell partition - We need to rewrite to use IS, use the MatPartition stuff */ ierr = PetscLogEventBegin(DMPLEX_Partition,dm,0,0,0);CHKERRQ(ierr); if (overlap > 1) SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Overlap > 1 not yet implemented"); ierr = DMPlexCreatePartition(dm, partitioner, height, overlap > 0 ? PETSC_TRUE : PETSC_FALSE, &cellPartSection, &cellPart, &origCellPartSection, &origCellPart);CHKERRQ(ierr); /* Create SF assuming a serial partition for all processes: Could check for IS length here */ if (!rank) numRemoteRanks = numProcs; else numRemoteRanks = 0; ierr = PetscMalloc1(numRemoteRanks, &remoteRanks);CHKERRQ(ierr); for (p = 0; p < numRemoteRanks; ++p) { remoteRanks[p].rank = p; remoteRanks[p].index = 0; } ierr = PetscSFCreate(comm, &partSF);CHKERRQ(ierr); ierr = PetscSFSetGraph(partSF, 1, numRemoteRanks, NULL, PETSC_OWN_POINTER, remoteRanks, PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->prefix, "-partition_view", &flg);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(comm, "Cell Partition:\n");CHKERRQ(ierr); ierr = PetscSectionView(cellPartSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(cellPart, NULL);CHKERRQ(ierr); if (origCellPart) { ierr = PetscPrintf(comm, "Original Cell Partition:\n");CHKERRQ(ierr); ierr = PetscSectionView(origCellPartSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(origCellPart, NULL);CHKERRQ(ierr); } ierr = PetscSFView(partSF, NULL);CHKERRQ(ierr); } /* Close the partition over the mesh */ ierr = DMPlexCreatePartitionClosure(dm, cellPartSection, cellPart, &partSection, &part);CHKERRQ(ierr); ierr = ISDestroy(&cellPart);CHKERRQ(ierr); ierr = PetscSectionDestroy(&cellPartSection);CHKERRQ(ierr); /* Create new mesh */ ierr = DMPlexCreate(comm, dmParallel);CHKERRQ(ierr); ierr = DMPlexSetDimension(*dmParallel, dim);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *dmParallel, "Parallel Mesh");CHKERRQ(ierr); pmesh = (DM_Plex*) (*dmParallel)->data; /* Distribute sieve points and the global point numbering (replaces creating remote bases) */ ierr = PetscSFConvertPartition(partSF, partSection, part, &renumbering, &pointSF);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(comm, "Point Partition:\n");CHKERRQ(ierr); ierr = PetscSectionView(partSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(part, NULL);CHKERRQ(ierr); ierr = PetscSFView(pointSF, NULL);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Point Renumbering after partition:\n");CHKERRQ(ierr); ierr = ISLocalToGlobalMappingView(renumbering, NULL);CHKERRQ(ierr); } ierr = PetscLogEventEnd(DMPLEX_Partition,dm,0,0,0);CHKERRQ(ierr); ierr = PetscLogEventBegin(DMPLEX_DistributeCones,dm,0,0,0);CHKERRQ(ierr); /* Distribute cone section */ ierr = DMPlexGetConeSection(dm, &originalConeSection);CHKERRQ(ierr); ierr = DMPlexGetConeSection(*dmParallel, &newConeSection);CHKERRQ(ierr); ierr = PetscSFDistributeSection(pointSF, originalConeSection, &remoteOffsets, newConeSection);CHKERRQ(ierr); ierr = DMSetUp(*dmParallel);CHKERRQ(ierr); { PetscInt pStart, pEnd, p; ierr = PetscSectionGetChart(newConeSection, &pStart, &pEnd);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt coneSize; ierr = PetscSectionGetDof(newConeSection, p, &coneSize);CHKERRQ(ierr); pmesh->maxConeSize = PetscMax(pmesh->maxConeSize, coneSize); } } /* Communicate and renumber cones */ ierr = PetscSFCreateSectionSF(pointSF, originalConeSection, remoteOffsets, newConeSection, &coneSF);CHKERRQ(ierr); ierr = DMPlexGetCones(dm, &cones);CHKERRQ(ierr); ierr = DMPlexGetCones(*dmParallel, &newCones);CHKERRQ(ierr); ierr = PetscSFBcastBegin(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSFBcastEnd(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(newConeSection, &newConesSize);CHKERRQ(ierr); ierr = ISGlobalToLocalMappingApplyBlock(renumbering, IS_GTOLM_MASK, newConesSize, newCones, NULL, newCones);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->prefix, "-cones_view", &flg);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(comm, "Serial Cone Section:\n");CHKERRQ(ierr); ierr = PetscSectionView(originalConeSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Parallel Cone Section:\n");CHKERRQ(ierr); ierr = PetscSectionView(newConeSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscSFView(coneSF, NULL);CHKERRQ(ierr); } ierr = DMPlexGetConeOrientations(dm, &cones);CHKERRQ(ierr); ierr = DMPlexGetConeOrientations(*dmParallel, &newCones);CHKERRQ(ierr); ierr = PetscSFBcastBegin(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSFBcastEnd(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSFDestroy(&coneSF);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_DistributeCones,dm,0,0,0);CHKERRQ(ierr); /* Create supports and stratify sieve */ { PetscInt pStart, pEnd; ierr = PetscSectionGetChart(pmesh->coneSection, &pStart, &pEnd);CHKERRQ(ierr); ierr = PetscSectionSetChart(pmesh->supportSection, pStart, pEnd);CHKERRQ(ierr); } ierr = DMPlexSymmetrize(*dmParallel);CHKERRQ(ierr); ierr = DMPlexStratify(*dmParallel);CHKERRQ(ierr); /* Distribute Coordinates */ { PetscSection originalCoordSection, newCoordSection; Vec originalCoordinates, newCoordinates; PetscInt bs; const char *name; ierr = DMGetCoordinateSection(dm, &originalCoordSection);CHKERRQ(ierr); ierr = DMGetCoordinateSection(*dmParallel, &newCoordSection);CHKERRQ(ierr); ierr = DMGetCoordinatesLocal(dm, &originalCoordinates);CHKERRQ(ierr); ierr = VecCreate(comm, &newCoordinates);CHKERRQ(ierr); ierr = PetscObjectGetName((PetscObject) originalCoordinates, &name);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) newCoordinates, name);CHKERRQ(ierr); ierr = DMPlexDistributeField(dm, pointSF, originalCoordSection, originalCoordinates, newCoordSection, newCoordinates);CHKERRQ(ierr); ierr = DMSetCoordinatesLocal(*dmParallel, newCoordinates);CHKERRQ(ierr); ierr = VecGetBlockSize(originalCoordinates, &bs);CHKERRQ(ierr); ierr = VecSetBlockSize(newCoordinates, bs);CHKERRQ(ierr); ierr = VecDestroy(&newCoordinates);CHKERRQ(ierr); } /* Distribute labels */ ierr = PetscLogEventBegin(DMPLEX_DistributeLabels,dm,0,0,0);CHKERRQ(ierr); { DMLabel next = mesh->labels, newNext = pmesh->labels; PetscInt numLabels = 0, l; /* Bcast number of labels */ while (next) {++numLabels; next = next->next;} ierr = MPI_Bcast(&numLabels, 1, MPIU_INT, 0, comm);CHKERRQ(ierr); next = mesh->labels; for (l = 0; l < numLabels; ++l) { DMLabel labelNew; PetscBool isdepth; /* Skip "depth" because it is recreated */ if (!rank) {ierr = PetscStrcmp(next->name, "depth", &isdepth);CHKERRQ(ierr);} ierr = MPI_Bcast(&isdepth, 1, MPIU_BOOL, 0, comm);CHKERRQ(ierr); if (isdepth) {if (!rank) next = next->next; continue;} ierr = DMLabelDistribute(next, partSection, part, renumbering, &labelNew);CHKERRQ(ierr); /* Insert into list */ if (newNext) newNext->next = labelNew; else pmesh->labels = labelNew; newNext = labelNew; if (!rank) next = next->next; } } ierr = PetscLogEventEnd(DMPLEX_DistributeLabels,dm,0,0,0);CHKERRQ(ierr); /* Setup hybrid structure */ { const PetscInt *gpoints; PetscInt depth, n, d; for (d = 0; d <= dim; ++d) {pmesh->hybridPointMax[d] = mesh->hybridPointMax[d];} ierr = MPI_Bcast(pmesh->hybridPointMax, dim+1, MPIU_INT, 0, comm);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetSize(renumbering, &n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetIndices(renumbering, &gpoints);CHKERRQ(ierr); ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); for (d = 0; d <= dim; ++d) { PetscInt pmax = pmesh->hybridPointMax[d], newmax = 0, pEnd, stratum[2], p; if (pmax < 0) continue; ierr = DMPlexGetDepthStratum(dm, d > depth ? depth : d, &stratum[0], &stratum[1]);CHKERRQ(ierr); ierr = DMPlexGetDepthStratum(*dmParallel, d, NULL, &pEnd);CHKERRQ(ierr); ierr = MPI_Bcast(stratum, 2, MPIU_INT, 0, comm);CHKERRQ(ierr); for (p = 0; p < n; ++p) { const PetscInt point = gpoints[p]; if ((point >= stratum[0]) && (point < stratum[1]) && (point >= pmax)) ++newmax; } if (newmax > 0) pmesh->hybridPointMax[d] = pEnd - newmax; else pmesh->hybridPointMax[d] = -1; } ierr = ISLocalToGlobalMappingRestoreIndices(renumbering, &gpoints);CHKERRQ(ierr); } /* Cleanup Partition */ ierr = ISLocalToGlobalMappingDestroy(&renumbering);CHKERRQ(ierr); ierr = PetscSFDestroy(&partSF);CHKERRQ(ierr); ierr = PetscSectionDestroy(&partSection);CHKERRQ(ierr); ierr = ISDestroy(&part);CHKERRQ(ierr); /* Create point SF for parallel mesh */ ierr = PetscLogEventBegin(DMPLEX_DistributeSF,dm,0,0,0);CHKERRQ(ierr); { const PetscInt *leaves; PetscSFNode *remotePoints, *rowners, *lowners; PetscInt numRoots, numLeaves, numGhostPoints = 0, p, gp, *ghostPoints; PetscInt pStart, pEnd; ierr = DMPlexGetChart(*dmParallel, &pStart, &pEnd);CHKERRQ(ierr); ierr = PetscSFGetGraph(pointSF, &numRoots, &numLeaves, &leaves, NULL);CHKERRQ(ierr); ierr = PetscMalloc2(numRoots,&rowners,numLeaves,&lowners);CHKERRQ(ierr); for (p=0; p<numRoots; p++) { rowners[p].rank = -1; rowners[p].index = -1; } if (origCellPart) { /* Make sure points in the original partition are not assigned to other procs */ const PetscInt *origPoints; ierr = DMPlexCreatePartitionClosure(dm, origCellPartSection, origCellPart, &origPartSection, &origPart);CHKERRQ(ierr); ierr = ISGetIndices(origPart, &origPoints);CHKERRQ(ierr); for (p = 0; p < numProcs; ++p) { PetscInt dof, off, d; ierr = PetscSectionGetDof(origPartSection, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(origPartSection, p, &off);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { rowners[origPoints[d]].rank = p; } } ierr = ISRestoreIndices(origPart, &origPoints);CHKERRQ(ierr); ierr = ISDestroy(&origPart);CHKERRQ(ierr); ierr = PetscSectionDestroy(&origPartSection);CHKERRQ(ierr); } ierr = ISDestroy(&origCellPart);CHKERRQ(ierr); ierr = PetscSectionDestroy(&origCellPartSection);CHKERRQ(ierr); ierr = PetscSFBcastBegin(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); ierr = PetscSFBcastEnd(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); for (p = 0; p < numLeaves; ++p) { if (lowners[p].rank < 0 || lowners[p].rank == rank) { /* Either put in a bid or we know we own it */ lowners[p].rank = rank; lowners[p].index = leaves ? leaves[p] : p; } else if (lowners[p].rank >= 0) { /* Point already claimed so flag so that MAXLOC does not listen to us */ lowners[p].rank = -2; lowners[p].index = -2; } } for (p=0; p<numRoots; p++) { /* Root must not participate in the rediction, flag so that MAXLOC does not use */ rowners[p].rank = -3; rowners[p].index = -3; } ierr = PetscSFReduceBegin(pointSF, MPIU_2INT, lowners, rowners, MPI_MAXLOC);CHKERRQ(ierr); ierr = PetscSFReduceEnd(pointSF, MPIU_2INT, lowners, rowners, MPI_MAXLOC);CHKERRQ(ierr); ierr = PetscSFBcastBegin(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); ierr = PetscSFBcastEnd(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); for (p = 0; p < numLeaves; ++p) { if (lowners[p].rank < 0 || lowners[p].index < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cell partition corrupt: point not claimed"); if (lowners[p].rank != rank) ++numGhostPoints; } ierr = PetscMalloc1(numGhostPoints, &ghostPoints);CHKERRQ(ierr); ierr = PetscMalloc1(numGhostPoints, &remotePoints);CHKERRQ(ierr); for (p = 0, gp = 0; p < numLeaves; ++p) { if (lowners[p].rank != rank) { ghostPoints[gp] = leaves ? leaves[p] : p; remotePoints[gp].rank = lowners[p].rank; remotePoints[gp].index = lowners[p].index; ++gp; } } ierr = PetscFree2(rowners,lowners);CHKERRQ(ierr); ierr = PetscSFSetGraph((*dmParallel)->sf, pEnd - pStart, numGhostPoints, ghostPoints, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetFromOptions((*dmParallel)->sf);CHKERRQ(ierr); } pmesh->useCone = mesh->useCone; pmesh->useClosure = mesh->useClosure; ierr = PetscLogEventEnd(DMPLEX_DistributeSF,dm,0,0,0);CHKERRQ(ierr); /* Copy BC */ ierr = DMPlexCopyBoundary(dm, *dmParallel);CHKERRQ(ierr); /* Cleanup */ if (sf) {*sf = pointSF;} else {ierr = PetscSFDestroy(&pointSF);CHKERRQ(ierr);} ierr = DMSetFromOptions(*dmParallel);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_Distribute,dm,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode GreedyColoringLocalDistanceTwo_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors) { MC_Greedy *gr = (MC_Greedy *) mc->data; PetscInt i,j,k,l,s,e,n,nd,nd_global,n_global,idx,ncols,maxcolors,mcol,mcol_global,nd1cols,*mask,masksize,*d1cols,*bad,*badnext,nbad,badsize,ccol,no,cbad; Mat m = mc->mat, mt; Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data; Mat md=NULL,mo=NULL; const PetscInt *md_i,*mo_i,*md_j,*mo_j; const PetscInt *rmd_i,*rmo_i,*rmd_j,*rmo_j; PetscBool isMPIAIJ,isSEQAIJ; PetscInt pcol,*dcolors,*ocolors; ISColoringValue *badidx; const PetscInt *cidx; PetscReal *owts,*colorweights; PetscInt *oconf,*conf; PetscSF sf; PetscLayout layout; PetscErrorCode ierr; PetscFunctionBegin; ierr = MatGetSize(m,&n_global,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(m,&s,&e);CHKERRQ(ierr); n=e-s; nd_global = 0; /* get the matrix communication structures */ ierr = PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ); CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ); CHKERRQ(ierr); if (isMPIAIJ) { Mat_SeqAIJ *dseq; Mat_SeqAIJ *oseq; md=aij->A; dseq = (Mat_SeqAIJ*)md->data; mo=aij->B; oseq = (Mat_SeqAIJ*)mo->data; md_i = dseq->i; md_j = dseq->j; mo_i = oseq->i; mo_j = oseq->j; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = oseq->i; rmo_j = oseq->j; } else if (isSEQAIJ) { Mat_SeqAIJ *dseq; /* no off-processor nodes */ md=m; dseq = (Mat_SeqAIJ*)md->data; md_i = dseq->i; md_j = dseq->j; mo_i = NULL; mo_j = NULL; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring"); if (!gr->symmetric) { ierr = MatTranspose(m, MAT_INITIAL_MATRIX, &mt);CHKERRQ(ierr); if (isSEQAIJ) { Mat_SeqAIJ *dseq = (Mat_SeqAIJ*) mt->data; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject) mc), PETSC_ERR_SUP, "Nonsymmetric greedy coloring only works in serial"); } /* create the vectors and communication structures if necessary */ no=0; if (mo) { ierr = VecGetLocalSize(aij->lvec,&no);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);CHKERRQ(ierr); ierr = MatGetLayouts(m,&layout,NULL);CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);CHKERRQ(ierr); } ierr = MatColoringGetMaxColors(mc,&maxcolors);CHKERRQ(ierr); masksize=n; nbad=0; badsize=n; ierr = PetscMalloc1(masksize,&mask);CHKERRQ(ierr); ierr = PetscMalloc4(n,&d1cols,n,&dcolors,n,&conf,n,&bad);CHKERRQ(ierr); ierr = PetscMalloc2(badsize,&badidx,badsize,&badnext);CHKERRQ(ierr); for(i=0;i<masksize;i++) { mask[i]=-1; } for (i=0;i<n;i++) { dcolors[i]=maxcolors; bad[i]=-1; } for (i=0;i<badsize;i++) { badnext[i]=-1; } if (mo) { ierr = PetscMalloc3(no,&owts,no,&oconf,no,&ocolors);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); for (i=0;i<no;i++) { ocolors[i]=maxcolors; } } else { /* Appease overzealous -Wmaybe-initialized */ owts = NULL; oconf = NULL; ocolors = NULL; } mcol=0; while (nd_global < n_global) { nd=n; /* assign lowest possible color to each local vertex */ mcol_global=0; ierr = PetscLogEventBegin(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); for (i=0;i<n;i++) { idx=lperm[i]; if (dcolors[idx] == maxcolors) { /* entries in bad */ cbad=bad[idx]; while (cbad>=0) { ccol=badidx[cbad]; if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; cbad=badnext[cbad]; } /* diagonal distance-one rows */ nd1cols=0; ncols = rmd_i[idx+1]-rmd_i[idx]; cidx = &(rmd_j[rmd_i[idx]]); for (j=0;j<ncols;j++) { d1cols[nd1cols] = cidx[j]; nd1cols++; ccol=dcolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } /* off-diagonal distance-one rows */ if (mo) { ncols = rmo_i[idx+1]-rmo_i[idx]; cidx = &(rmo_j[rmo_i[idx]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* diagonal distance-two rows */ for (j=0;j<nd1cols;j++) { ncols = md_i[d1cols[j]+1]-md_i[d1cols[j]]; cidx = &(md_j[md_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=dcolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* off-diagonal distance-two rows */ if (mo) { for (j=0;j<nd1cols;j++) { ncols = mo_i[d1cols[j]+1]-mo_i[d1cols[j]]; cidx = &(mo_j[mo_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=ocolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } } /* assign this one the lowest color possible by seeing if there's a gap in the sequence of sorted neighbor colors */ for (j=0;j<masksize;j++) { if (mask[j]!=idx) { break; } } pcol=j; if (pcol>maxcolors) pcol=maxcolors; dcolors[idx]=pcol; if (pcol>mcol) mcol=pcol; } } ierr = PetscLogEventEnd(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); if (mo) { /* transfer neighbor colors */ ierr = PetscSFBcastBegin(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); /* find the maximum color assigned locally and allocate a mask */ ierr = MPIU_Allreduce(&mcol,&mcol_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); ierr = PetscMalloc1(mcol_global+1,&colorweights);CHKERRQ(ierr); /* check for conflicts */ for (i=0;i<n;i++) { conf[i]=PETSC_FALSE; } for (i=0;i<no;i++) { oconf[i]=PETSC_FALSE; } for (i=0;i<n;i++) { ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); if (ncols > 0) { /* fill in the mask */ for (j=0;j<mcol_global+1;j++) { colorweights[j]=0; } colorweights[dcolors[i]]=wts[i]; /* fill in the off-diagonal part of the mask */ for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < owts[cidx[j]]) { colorweights[ccol] = owts[cidx[j]]; } } } /* fill in the on-diagonal part of the mask */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < wts[cidx[j]]) { colorweights[ccol] = wts[cidx[j]]; } } } /* go back through and set up on and off-diagonal conflict vectors */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > wts[cidx[j]]) { conf[cidx[j]]=PETSC_TRUE; } } } ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > owts[cidx[j]]) { oconf[cidx[j]]=PETSC_TRUE; } } } } } nd_global=0; ierr = PetscFree(colorweights);CHKERRQ(ierr); ierr = PetscLogEventBegin(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); /* go through and unset local colors that have conflicts */ for (i=0;i<n;i++) { if (conf[i]>0) { /* push this color onto the bad stack */ badidx[nbad]=dcolors[i]; badnext[nbad]=bad[i]; bad[i]=nbad; nbad++; if (nbad>=badsize) { PetscInt *newbadnext; ISColoringValue *newbadidx; ierr = PetscMalloc2(badsize*2,&newbadidx,badsize*2,&newbadnext);CHKERRQ(ierr); for(k=0;k<2*badsize;k++) { newbadnext[k]=-1; } for(k=0;k<badsize;k++) { newbadidx[k]=badidx[k]; newbadnext[k]=badnext[k]; } ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); badidx=newbadidx; badnext=newbadnext; badsize*=2; } dcolors[i] = maxcolors; nd--; } } } ierr = MPIU_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); } if (mo) { ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree3(owts,oconf,ocolors);CHKERRQ(ierr); } for (i=0;i<n;i++) { colors[i]=dcolors[i]; } ierr = PetscFree(mask);CHKERRQ(ierr); ierr = PetscFree4(d1cols,dcolors,conf,bad);CHKERRQ(ierr); ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); if (!gr->symmetric) {ierr = MatDestroy(&mt);CHKERRQ(ierr);} PetscFunctionReturn(0); }
PetscErrorCode DMPatchSolve(DM dm) { MPI_Comm comm; MPI_Comm commz; DM dmc; PetscSF sfz, sfzr; Vec XC; MatStencil patchSize, commSize, gridRank, lower, upper; PetscInt M, N, P, i, j, k, l, m, n, p = 0; PetscMPIInt rank, size; PetscInt debug = 0; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = DMPatchGetCoarse(dm, &dmc);CHKERRQ(ierr); ierr = DMPatchGetPatchSize(dm, &patchSize);CHKERRQ(ierr); ierr = DMPatchGetCommSize(dm, &commSize);CHKERRQ(ierr); ierr = DMPatchGetCommSize(dm, &commSize);CHKERRQ(ierr); ierr = DMGetGlobalVector(dmc, &XC);CHKERRQ(ierr); ierr = DMDAGetInfo(dmc, 0, &M, &N, &P, &l, &m, &n, 0,0,0,0,0,0);CHKERRQ(ierr); M = PetscMax(M, 1); l = PetscMax(l, 1); N = PetscMax(N, 1); m = PetscMax(m, 1); P = PetscMax(P, 1); n = PetscMax(n, 1); gridRank.i = rank % l; gridRank.j = rank/l % m; gridRank.k = rank/(l*m) % n; if (commSize.i*commSize.j*commSize.k == size || commSize.i*commSize.j*commSize.k == 0) { commSize.i = l; commSize.j = m; commSize.k = n; commz = comm; } else if (commSize.i*commSize.j*commSize.k == 1) { commz = PETSC_COMM_SELF; } else { const PetscMPIInt newComm = ((gridRank.k/commSize.k)*(m/commSize.j) + gridRank.j/commSize.j)*(l/commSize.i) + (gridRank.i/commSize.i); const PetscMPIInt newRank = ((gridRank.k%commSize.k)*commSize.j + gridRank.j%commSize.j)*commSize.i + (gridRank.i%commSize.i); ierr = MPI_Comm_split(comm, newComm, newRank, &commz);CHKERRQ(ierr); if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "Rank %d color %d key %d commz %d\n", rank, newComm, newRank, *((PetscMPIInt*) &commz));CHKERRQ(ierr);} } /* Assumptions: - patchSize divides gridSize - commSize divides gridSize - commSize divides l,m,n Ignore multiple patches per rank for now Multiple ranks per patch: - l,m,n divides patchSize - commSize divides patchSize */ for (k = 0; k < P; k += PetscMax(patchSize.k, 1)) { for (j = 0; j < N; j += PetscMax(patchSize.j, 1)) { for (i = 0; i < M; i += PetscMax(patchSize.i, 1), ++p) { MPI_Comm commp = MPI_COMM_NULL; DM dmz = NULL; #if 0 DM dmf = NULL; Mat interpz = NULL; #endif Vec XZ = NULL; PetscScalar *xcarray = NULL; PetscScalar *xzarray = NULL; if ((gridRank.k/commSize.k == p/(l/commSize.i * m/commSize.j) % n/commSize.k) && (gridRank.j/commSize.j == p/(l/commSize.i) % m/commSize.j) && (gridRank.i/commSize.i == p % l/commSize.i)) { if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "Rank %d is accepting Patch %d\n", rank, p);CHKERRQ(ierr);} commp = commz; } /* Zoom to coarse patch */ lower.i = i; lower.j = j; lower.k = k; upper.i = i + patchSize.i; upper.j = j + patchSize.j; upper.k = k + patchSize.k; ierr = DMPatchZoom(dmc, XC, lower, upper, commp, &dmz, &sfz, &sfzr);CHKERRQ(ierr); lower.c = 0; /* initialize member, otherwise compiler issues warnings */ upper.c = 0; /* initialize member, otherwise compiler issues warnings */ /* Debug */ ierr = PetscPrintf(comm, "Patch %d: (%d, %d, %d)--(%d, %d, %d)\n", p, lower.i, lower.j, lower.k, upper.i, upper.j, upper.k);CHKERRQ(ierr); if (dmz) {ierr = DMView(dmz, PETSC_VIEWER_STDOUT_(commz));CHKERRQ(ierr);} ierr = PetscSFView(sfz, PETSC_VIEWER_STDOUT_(comm));CHKERRQ(ierr); ierr = PetscSFView(sfzr, PETSC_VIEWER_STDOUT_(comm));CHKERRQ(ierr); /* Scatter Xcoarse -> Xzoom */ if (dmz) {ierr = DMGetGlobalVector(dmz, &XZ);CHKERRQ(ierr);} if (XZ) {ierr = VecGetArray(XZ, &xzarray);CHKERRQ(ierr);} ierr = VecGetArray(XC, &xcarray);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sfz, MPIU_SCALAR, xcarray, xzarray);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sfz, MPIU_SCALAR, xcarray, xzarray);CHKERRQ(ierr); ierr = VecRestoreArray(XC, &xcarray);CHKERRQ(ierr); if (XZ) {ierr = VecRestoreArray(XZ, &xzarray);CHKERRQ(ierr);} #if 0 /* Interpolate Xzoom -> Xfine, note that this may be on subcomms */ ierr = DMRefine(dmz, MPI_COMM_NULL, &dmf);CHKERRQ(ierr); ierr = DMCreateInterpolation(dmz, dmf, &interpz, NULL);CHKERRQ(ierr); ierr = DMInterpolate(dmz, interpz, dmf);CHKERRQ(ierr); /* Smooth Xfine using two-step smoother, normal smoother plus Kaczmarz---moves back and forth from dmzoom to dmfine */ /* Compute residual Rfine */ /* Restrict Rfine to Rzoom_restricted */ #endif /* Scatter Rzoom_restricted -> Rcoarse_restricted */ if (XZ) {ierr = VecGetArray(XZ, &xzarray);CHKERRQ(ierr);} ierr = VecGetArray(XC, &xcarray);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sfzr, MPIU_SCALAR, xzarray, xcarray, MPIU_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sfzr, MPIU_SCALAR, xzarray, xcarray, MPIU_SUM);CHKERRQ(ierr); ierr = VecRestoreArray(XC, &xcarray);CHKERRQ(ierr); if (XZ) {ierr = VecRestoreArray(XZ, &xzarray);CHKERRQ(ierr);} if (dmz) {ierr = DMRestoreGlobalVector(dmz, &XZ);CHKERRQ(ierr);} /* Compute global residual Rcoarse */ /* TauCoarse = Rcoarse - Rcoarse_restricted */ ierr = PetscSFDestroy(&sfz);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfzr);CHKERRQ(ierr); ierr = DMDestroy(&dmz);CHKERRQ(ierr); } } } ierr = DMRestoreGlobalVector(dmc, &XC);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters Collective Input Argument: . sf - star forest that may contain roots with 0 or with more than 1 vertex Output Arguments: . multi - star forest with split roots, such that each root has degree exactly 1 Level: developer Notes: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming edge, it is a candidate for future optimization that might involve its removal. .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin() @*/ PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) { PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); PetscValidPointer(multi,2); if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); *multi = sf->multi; PetscFunctionReturn(0); } if (!sf->multi) { const PetscInt *indegree; PetscInt i,*inoffset,*outones,*outoffset,maxlocal; PetscSFNode *remote; ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); for (i=0,maxlocal=0; i<sf->nleaves; i++) maxlocal = PetscMax(maxlocal,(sf->mine ? sf->mine[i] : i)+1); ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); inoffset[0] = 0; for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; for (i=0; i<maxlocal; i++) outones[i] = 1; ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ #if 0 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ for (i=0; i<sf->nroots; i++) { if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); } #endif #endif ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); for (i=0; i<sf->nleaves; i++) { remote[i].rank = sf->remote[i].rank; remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; } ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); if (sf->rankorder) { /* Sort the ranks */ PetscMPIInt rank; PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; PetscSFNode *newremote; ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); for (i=0; i<maxlocal; i++) outranks[i] = rank; ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); /* Sort the incoming ranks at each vertex, build the inverse map */ for (i=0; i<sf->nroots; i++) { PetscInt j; for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; } ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); for (i=0; i<sf->nleaves; i++) { newremote[i].rank = sf->remote[i].rank; newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; } ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); } ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); } *multi = sf->multi; PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscSF sf,sfDup,sfInv,sfEmbed,sfA,sfB,sfBA; const PetscInt *degree; PetscErrorCode ierr; ierr = PetscInitialize(&argc,&argv,NULL,help);if (ierr) return ierr; ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFReset(sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFReset(sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = CheckGraphEmpty(sf);CHKERRQ(ierr); ierr = PetscSFReset(sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = CheckGraphEmpty(sf);CHKERRQ(ierr); ierr = PetscSFReset(sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test setup */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = CheckRanksNotSet(sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = CheckRanksNotSet(sf);CHKERRQ(ierr); ierr = PetscSFSetUp(sf);CHKERRQ(ierr); ierr = CheckRanksEmpty(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test setup then reset */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFSetUp(sf);CHKERRQ(ierr); ierr = PetscSFReset(sf);CHKERRQ(ierr); ierr = CheckRanksNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test view (no graph set, no type set) */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFView(sf,NULL);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test set graph then view (no type set) */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFView(sf,NULL);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test set type then view (no graph set) */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFView(sf,NULL);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test set type then graph then view */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFView(sf,NULL);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test set graph then type */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = CheckGraphEmpty(sf);CHKERRQ(ierr); ierr = PetscSFReset(sf);CHKERRQ(ierr); ierr = CheckGraphNotSet(sf);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test Bcast */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPI_INT,NULL,NULL);CHKERRQ(ierr); ierr = PetscSFBcastEnd (sf,MPI_INT,NULL,NULL);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test Reduce */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPI_INT,NULL,NULL,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceEnd (sf,MPI_INT,NULL,NULL,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPI_INT,NULL,NULL,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd (sf,MPI_INT,NULL,NULL,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test FetchAndOp */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFFetchAndOpBegin(sf,MPI_INT,NULL,NULL,NULL,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFFetchAndOpEnd (sf,MPI_INT,NULL,NULL,NULL,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test ComputeDegree */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_COPY_VALUES,NULL,PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test PetscSFDuplicate() */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_USE_POINTER,NULL,PETSC_USE_POINTER);CHKERRQ(ierr); ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_GRAPH,&sfDup);CHKERRQ(ierr); ierr = CheckGraphEmpty(sfDup);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfDup);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test PetscSFCreateInverseSF() */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_USE_POINTER,NULL,PETSC_USE_POINTER);CHKERRQ(ierr); ierr = PetscSFCreateInverseSF(sf,&sfInv);CHKERRQ(ierr); ierr = CheckGraphEmpty(sfInv);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfInv);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test PetscSFCreateEmbeddedSF() */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_USE_POINTER,NULL,PETSC_USE_POINTER);CHKERRQ(ierr); ierr = PetscSFCreateEmbeddedSF(sf,0,NULL,&sfEmbed);CHKERRQ(ierr); ierr = CheckGraphEmpty(sfEmbed);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfEmbed);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test PetscSFCreateEmbeddedLeafSF() */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,0,0,NULL,PETSC_USE_POINTER,NULL,PETSC_USE_POINTER);CHKERRQ(ierr); ierr = PetscSFCreateEmbeddedLeafSF(sf,0,NULL,&sfEmbed);CHKERRQ(ierr); ierr = CheckGraphEmpty(sfEmbed);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfEmbed);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* Test PetscSFCompose() */ ierr = PetscSFCreate(PETSC_COMM_WORLD,&sfA);CHKERRQ(ierr); ierr = PetscSFSetGraph(sfA,0,0,NULL,PETSC_USE_POINTER,NULL,PETSC_USE_POINTER);CHKERRQ(ierr); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sfB);CHKERRQ(ierr); ierr = PetscSFSetGraph(sfB,0,0,NULL,PETSC_USE_POINTER,NULL,PETSC_USE_POINTER);CHKERRQ(ierr); ierr = PetscSFCompose(sfA,sfB,&sfBA);CHKERRQ(ierr); ierr = CheckGraphEmpty(sfBA);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfBA);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfA);CHKERRQ(ierr); ierr = PetscSFDestroy(&sfB);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }