PetscErrorCode MatColoringGetDegrees(Mat G,PetscInt distance,PetscInt *degrees) { PetscInt j,i,s,e,n,ln,lm,degree,bidx,idx,dist; Mat lG,*lGs; IS ris; PetscErrorCode ierr; PetscInt *seen; const PetscInt *gidx; PetscInt *idxbuf; PetscInt *distbuf; PetscInt ncols; const PetscInt *cols; PetscBool isSEQAIJ; Mat_SeqAIJ *aij; PetscInt *Gi,*Gj; PetscFunctionBegin; ierr = MatGetOwnershipRange(G,&s,&e);CHKERRQ(ierr); n=e-s; ierr = ISCreateStride(PetscObjectComm((PetscObject)G),n,s,1,&ris);CHKERRQ(ierr); ierr = MatIncreaseOverlap(G,1,&ris,distance);CHKERRQ(ierr); ierr = ISSort(ris);CHKERRQ(ierr); ierr = MatGetSubMatrices(G,1,&ris,&ris,MAT_INITIAL_MATRIX,&lGs);CHKERRQ(ierr); lG = lGs[0]; ierr = PetscObjectTypeCompare((PetscObject)lG,MATSEQAIJ,&isSEQAIJ);CHKERRQ(ierr); if (!isSEQAIJ) { SETERRQ(PetscObjectComm((PetscObject)G),PETSC_ERR_ARG_WRONGSTATE,"MatColoringDegrees requires an MPI/SEQAIJ Matrix"); } ierr = MatGetSize(lG,&ln,&lm);CHKERRQ(ierr); aij = (Mat_SeqAIJ*)lG->data; Gi = aij->i; Gj = aij->j; ierr = PetscMalloc3(lm,&seen,lm,&idxbuf,lm,&distbuf);CHKERRQ(ierr); for (i=0;i<ln;i++) { seen[i]=-1; } ierr = ISGetIndices(ris,&gidx);CHKERRQ(ierr); for (i=0;i<ln;i++) { if (gidx[i] >= e || gidx[i] < s) continue; bidx=-1; ncols = Gi[i+1]-Gi[i]; cols = &(Gj[Gi[i]]); degree = 0; /* place the distance-one neighbors on the queue */ for (j=0;j<ncols;j++) { bidx++; seen[cols[j]] = i; distbuf[bidx] = 1; idxbuf[bidx] = cols[j]; } while (bidx >= 0) { /* pop */ idx = idxbuf[bidx]; dist = distbuf[bidx]; bidx--; degree++; if (dist < distance) { ncols = Gi[idx+1]-Gi[idx]; cols = &(Gj[Gi[idx]]); for (j=0;j<ncols;j++) { if (seen[cols[j]] != i) { bidx++; seen[cols[j]] = i; idxbuf[bidx] = cols[j]; distbuf[bidx] = dist+1; } } } } degrees[gidx[i]-s] = degree; } ierr = ISRestoreIndices(ris,&gidx);CHKERRQ(ierr); ierr = ISDestroy(&ris);CHKERRQ(ierr); ierr = PetscFree3(seen,idxbuf,distbuf);CHKERRQ(ierr); ierr = MatDestroyMatrices(1,&lGs);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat S, IS is_A_I, IS is_A_B, PetscInt ncc, IS is_cc[], PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) { Mat A_II,A_IB,A_BI,A_BB; ISLocalToGlobalMapping BtoNmap,ItoNmap; PetscBT touched; PetscInt i,n_I,n_B,n_local,*local_numbering; PetscBool is_sorted; PetscErrorCode ierr; PetscFunctionBegin; ierr = ISSorted(is_A_I,&is_sorted);CHKERRQ(ierr); if (!is_sorted) { SETERRQ(PetscObjectComm((PetscObject)is_A_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); } ierr = ISSorted(is_A_B,&is_sorted);CHKERRQ(ierr); if (!is_sorted) { SETERRQ(PetscObjectComm((PetscObject)is_A_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); } /* get sizes */ ierr = ISGetLocalSize(is_A_I,&n_I);CHKERRQ(ierr); ierr = ISGetLocalSize(is_A_B,&n_B);CHKERRQ(ierr); n_local = n_I+n_B; /* maps */ ierr = ISLocalToGlobalMappingCreateIS(is_A_B,&BtoNmap);CHKERRQ(ierr); if (nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* I problems have a different size of the original ones */ ierr = ISLocalToGlobalMappingCreateIS(is_A_I,&ItoNmap);CHKERRQ(ierr); /* allocate some auxiliary space */ ierr = PetscMalloc1(n_local,&local_numbering);CHKERRQ(ierr); ierr = PetscBTCreate(n_local,&touched);CHKERRQ(ierr); } else { ItoNmap = 0; local_numbering = 0; touched = 0; } /* get Schur complement matrices */ ierr = MatSchurComplementGetSubMatrices(S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); /* allocate space for schur complements */ ierr = PetscMalloc5(ncc,&sub_schurs->is_AEj_I,ncc,&sub_schurs->is_AEj_B,ncc,&sub_schurs->S_Ej,ncc,&sub_schurs->work1,ncc,&sub_schurs->work2);CHKERRQ(ierr); sub_schurs->n_subs = ncc; /* cycle on subsets and extract schur complements */ for (i=0;i<sub_schurs->n_subs;i++) { Mat AE_II,AE_IE,AE_EI,AE_EE; IS is_I,is_subset_B; /* get IS for subsets in B numbering */ ierr = ISDuplicate(is_cc[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); ierr = ISGlobalToLocalMappingApplyIS(BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B);CHKERRQ(ierr); /* BB block on subset */ ierr = MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);CHKERRQ(ierr); if (ItoNmap) { /* is ItoNmap has been computed, extracts only a part of I dofs */ const PetscInt* idx_B; PetscInt n_local_dofs,n_prev_added,j,layer,subset_size; /* all boundary dofs must be skipped when adding layers */ ierr = PetscBTMemzero(n_local,touched);CHKERRQ(ierr); ierr = ISGetIndices(is_A_B,&idx_B);CHKERRQ(ierr); for (j=0;j<n_B;j++) { ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); } ierr = ISRestoreIndices(is_A_B,&idx_B);CHKERRQ(ierr); /* add next layers of dofs */ ierr = ISGetLocalSize(is_cc[i],&subset_size);CHKERRQ(ierr); ierr = ISGetIndices(is_cc[i],&idx_B);CHKERRQ(ierr); ierr = PetscMemcpy(local_numbering,idx_B,subset_size*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(is_cc[i],&idx_B);CHKERRQ(ierr); n_local_dofs = subset_size; n_prev_added = subset_size; for (layer=0;layer<nlayers;layer++) { PetscInt n_added; if (n_local_dofs == n_I+subset_size) break; if (n_local_dofs > n_I+subset_size) { SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+subset_size); } ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); n_prev_added = n_added; n_local_dofs += n_added; if (!n_added) break; } /* IS for I dofs in original numbering and in I numbering */ ierr = ISCreateGeneral(PetscObjectComm((PetscObject)ItoNmap),n_local_dofs-subset_size,local_numbering+subset_size,PETSC_COPY_VALUES,&sub_schurs->is_AEj_I[i]);CHKERRQ(ierr); ierr = ISSort(sub_schurs->is_AEj_I[i]);CHKERRQ(ierr); ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_I[i],&is_I);CHKERRQ(ierr); /* II block */ ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); } else { /* in this case we can take references of already existing IS and matrices for I dofs */ /* IS for I dofs in original numbering */ ierr = PetscObjectReference((PetscObject)is_A_I);CHKERRQ(ierr); sub_schurs->is_AEj_I[i] = is_A_I; /* IS for I dofs in I numbering TODO: "first" argument of ISCreateStride is not general */ ierr = ISCreateStride(PetscObjectComm((PetscObject)is_A_I),n_I,0,1,&is_I);CHKERRQ(ierr); /* II block is the same */ ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); AE_II = A_II; } /* IE block */ ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);CHKERRQ(ierr); /* EI block */ ierr = MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);CHKERRQ(ierr); /* setup Schur complements on subset */ ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&sub_schurs->S_Ej[i]);CHKERRQ(ierr); ierr = MatGetVecs(sub_schurs->S_Ej[i],&sub_schurs->work1[i],&sub_schurs->work2[i]);CHKERRQ(ierr); if (AE_II == A_II) { /* we can reuse the same ksp */ KSP ksp; ierr = MatSchurComplementGetKSP(S,&ksp);CHKERRQ(ierr); ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); } else { /* build new ksp object which inherits ksp and pc types from the original one */ KSP origksp,schurksp; PC origpc,schurpc; KSPType ksp_type; PCType pc_type; PetscInt n_internal; ierr = MatSchurComplementGetKSP(S,&origksp);CHKERRQ(ierr); ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); if (n_internal) { /* UMFPACK gives error with 0 sized problems */ MatSolverPackage solver=NULL; ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); if (solver) { ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); } } ierr = KSPSetUp(schurksp);CHKERRQ(ierr); } /* free */ ierr = MatDestroy(&AE_II);CHKERRQ(ierr); ierr = MatDestroy(&AE_EE);CHKERRQ(ierr); ierr = MatDestroy(&AE_IE);CHKERRQ(ierr); ierr = MatDestroy(&AE_EI);CHKERRQ(ierr); ierr = ISDestroy(&is_I);CHKERRQ(ierr); ierr = ISDestroy(&is_subset_B);CHKERRQ(ierr); } /* free */ ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); ierr = PetscFree(local_numbering);CHKERRQ(ierr); ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatColoringCreateSmallestLastWeights(MatColoring mc,PetscReal *weights) { PetscInt *degrees,*degb,*llprev,*llnext; PetscInt j,i,s,e,n,nin,ln,lm,degree,maxdegree=0,bidx,idx,dist,distance=mc->dist; Mat lG,*lGs; IS ris; PetscErrorCode ierr; PetscInt *seen; const PetscInt *gidx; PetscInt *idxbuf; PetscInt *distbuf; PetscInt ncols,nxt,prv,cur; const PetscInt *cols; PetscBool isSEQAIJ; Mat_SeqAIJ *aij; PetscInt *Gi,*Gj,*rperm; Mat G = mc->mat; PetscReal *lweights,r; PetscRandom rand; PetscFunctionBegin; ierr = MatGetOwnershipRange(G,&s,&e);CHKERRQ(ierr); n=e-s; ierr = ISCreateStride(PetscObjectComm((PetscObject)G),n,s,1,&ris);CHKERRQ(ierr); ierr = MatIncreaseOverlap(G,1,&ris,distance+1);CHKERRQ(ierr); ierr = ISSort(ris);CHKERRQ(ierr); ierr = MatGetSubMatrices(G,1,&ris,&ris,MAT_INITIAL_MATRIX,&lGs);CHKERRQ(ierr); lG = lGs[0]; ierr = PetscObjectTypeCompare((PetscObject)lG,MATSEQAIJ,&isSEQAIJ);CHKERRQ(ierr); if (!isSEQAIJ) { SETERRQ(PetscObjectComm((PetscObject)G),PETSC_ERR_ARG_WRONGSTATE,"MatColoringDegrees requires an MPI/SEQAIJ Matrix"); } ierr = MatGetSize(lG,&ln,&lm);CHKERRQ(ierr); aij = (Mat_SeqAIJ*)lG->data; Gi = aij->i; Gj = aij->j; ierr = PetscMalloc3(lm,&seen,lm,&idxbuf,lm,&distbuf);CHKERRQ(ierr); ierr = PetscMalloc1(lm,°rees);CHKERRQ(ierr); ierr = PetscMalloc1(lm,&lweights);CHKERRQ(ierr); for (i=0;i<ln;i++) { seen[i]=-1; lweights[i] = 1.; } ierr = ISGetIndices(ris,&gidx);CHKERRQ(ierr); for (i=0;i<ln;i++) { bidx=-1; ncols = Gi[i+1]-Gi[i]; cols = &(Gj[Gi[i]]); degree = 0; /* place the distance-one neighbors on the queue */ for (j=0;j<ncols;j++) { bidx++; seen[cols[j]] = i; distbuf[bidx] = 1; idxbuf[bidx] = cols[j]; } while (bidx >= 0) { /* pop */ idx = idxbuf[bidx]; dist = distbuf[bidx]; bidx--; degree++; if (dist < distance) { ncols = Gi[idx+1]-Gi[idx]; cols = &(Gj[Gi[idx]]); for (j=0;j<ncols;j++) { if (seen[cols[j]] != i) { bidx++; seen[cols[j]] = i; idxbuf[bidx] = cols[j]; distbuf[bidx] = dist+1; } } } } degrees[i] = degree; if (degree > maxdegree) maxdegree = degree; } /* bucket by degree by some random permutation */ ierr = PetscRandomCreate(PetscObjectComm((PetscObject)mc),&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = PetscMalloc1(ln,&rperm);CHKERRQ(ierr); for (i=0;i<ln;i++) { ierr = PetscRandomGetValueReal(rand,&r);CHKERRQ(ierr); lweights[i] = r; rperm[i]=i; } ierr = PetscSortRealWithPermutation(lm,lweights,rperm);CHKERRQ(ierr); ierr = PetscMalloc1(maxdegree+1,°b);CHKERRQ(ierr); ierr = PetscMalloc2(ln,&llnext,ln,&llprev);CHKERRQ(ierr); for (i=0;i<maxdegree+1;i++) { degb[i] = -1; } for (i=0;i<ln;i++) { llnext[i] = -1; llprev[i] = -1; seen[i] = -1; } for (i=0;i<ln;i++) { idx = rperm[i]; llnext[idx] = degb[degrees[idx]]; if (degb[degrees[idx]] > 0) llprev[degb[degrees[idx]]] = idx; degb[degrees[idx]] = idx; } ierr = PetscFree(rperm);CHKERRQ(ierr); /* remove the lowest degree one */ i=0; nin=0; while (i != maxdegree+1) { for (i=1;i<maxdegree+1; i++) { if (degb[i] > 0) { cur = degb[i]; nin++; degrees[cur] = 0; degb[i] = llnext[cur]; bidx=-1; ncols = Gi[cur+1]-Gi[cur]; cols = &(Gj[Gi[cur]]); /* place the distance-one neighbors on the queue */ for (j=0;j<ncols;j++) { if (cols[j] != cur) { bidx++; seen[cols[j]] = i; distbuf[bidx] = 1; idxbuf[bidx] = cols[j]; } } while (bidx >= 0) { /* pop */ idx = idxbuf[bidx]; dist = distbuf[bidx]; bidx--; nxt=llnext[idx]; prv=llprev[idx]; if (degrees[idx] > 0) { /* change up the degree of the neighbors still in the graph */ if (lweights[idx] <= lweights[cur]) lweights[idx] = lweights[cur]+1; if (nxt > 0) { llprev[nxt] = prv; } if (prv > 0) { llnext[prv] = nxt; } else { degb[degrees[idx]] = nxt; } degrees[idx]--; llnext[idx] = degb[degrees[idx]]; llprev[idx] = -1; if (degb[degrees[idx]] >= 0) { llprev[degb[degrees[idx]]] = idx; } degb[degrees[idx]] = idx; if (dist < distance) { ncols = Gi[idx+1]-Gi[idx]; cols = &(Gj[Gi[idx]]); for (j=0;j<ncols;j++) { if (seen[cols[j]] != i) { bidx++; seen[cols[j]] = i; idxbuf[bidx] = cols[j]; distbuf[bidx] = dist+1; } } } } } break; } } } for (i=0;i<lm;i++) { if (gidx[i] >= s && gidx[i] < e) { weights[gidx[i]-s] = lweights[i]; } } ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = PetscFree(degb);CHKERRQ(ierr); ierr = PetscFree2(llnext,llprev);CHKERRQ(ierr); ierr = PetscFree(degrees);CHKERRQ(ierr); ierr = PetscFree(lweights);CHKERRQ(ierr); ierr = ISRestoreIndices(ris,&gidx);CHKERRQ(ierr); ierr = ISDestroy(&ris);CHKERRQ(ierr); ierr = PetscFree3(seen,idxbuf,distbuf);CHKERRQ(ierr); ierr = MatDestroyMatrices(1,&lGs);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; PetscBool useblockis; #if defined (PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; #if defined (PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid --; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j] ]) ec++; indices[aj[B->i[i] + j] ] = 1; } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); /* create two temporary Index sets for build scatter gather */ /* check for the special case where blocks are communicated for faster VecScatterXXX */ useblockis = PETSC_FALSE; if (mat->cmap->bs > 1) { PetscInt bs = mat->cmap->bs,ibs,ga; if (!(ec % bs)) { useblockis = PETSC_TRUE; for (i=0; i<ec/bs; i++) { if ((ga = garray[ibs = i*bs]) % bs) { useblockis = PETSC_FALSE; break; } for (j=1; j<bs; j++) { if (garray[ibs+j] != ga+j) { useblockis = PETSC_FALSE; break; } } if (!useblockis) break; } } } #if defined(PETSC_USE_DEBUG) i = (PetscInt)useblockis; ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm); CHKERRQ(ierr); if(j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); #endif if (useblockis) { PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; if(ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); ierr = PetscInfo(mat,"Using block index set to define scatter\n"); ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); } else { ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); } ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); aij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt n = 5,N,i; PetscMPIInt size,rank; PetscScalar value,zero = 0.0; Vec x,y; IS is1,is2; VecScatter ctx = 0; ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); /* create two vectors */ N = size*n; ierr = VecCreate(PETSC_COMM_WORLD,&y);CHKERRQ(ierr); ierr = VecSetSizes(y,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(y);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); /* create two index sets */ ierr = ISCreateStride(PETSC_COMM_WORLD,n,n*rank,1,&is1);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_WORLD,n,(n*(rank+1))%N,1,&is2);CHKERRQ(ierr); /* fill local part of parallel vector x */ value = (PetscScalar)(rank+1); for (i=n*rank; i<n*(rank+1); i++) { ierr = VecSetValues(x,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); ierr = VecSet(y,zero);CHKERRQ(ierr); ierr = VecScatterCreate(x,is1,y,is2,&ctx);CHKERRQ(ierr); for (i=0; i<100; i++) { PetscReal ynorm; PetscInt j; ierr = VecNormBegin(y,NORM_2,&ynorm);CHKERRQ(ierr); ierr = PetscCommSplitReductionBegin(((PetscObject)y)->comm);CHKERRQ(ierr); for (j=0; j<3; j++) { ierr = VecScatterBegin(ctx,x,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(ctx,x,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); } ierr = VecNormEnd(y,NORM_2,&ynorm);CHKERRQ(ierr); /* ierr = PetscPrintf(PETSC_COMM_WORLD,"ynorm = %8.2G\n",ynorm);CHKERRQ(ierr); */ } ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
PetscErrorCode PCBDDCNullSpaceAssembleCorrection(PC pc,IS local_dofs) { PC_BDDC *pcbddc = (PC_BDDC*)pc->data; PC_IS *pcis = (PC_IS*)pc->data; Mat_IS* matis = (Mat_IS*)pc->pmat->data; KSP *local_ksp; PC newpc; NullSpaceCorrection_ctx shell_ctx; Mat local_mat,local_pmat,small_mat,inv_small_mat; MatStructure local_mat_struct; Vec work1,work2; const Vec *nullvecs; VecScatter scatter_ctx; IS is_aux; MatFactorInfo matinfo; PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat; PetscScalar one = 1.0,zero = 0.0, m_one = -1.0; PetscInt basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R; PetscBool nnsp_has_cnst; PetscErrorCode ierr; PetscFunctionBegin; /* Infer the local solver */ ierr = ISGetSize(local_dofs,&basis_dofs);CHKERRQ(ierr); ierr = VecGetSize(pcis->vec1_D,&n_I);CHKERRQ(ierr); ierr = VecGetSize(pcbddc->vec1_R,&n_R);CHKERRQ(ierr); if (basis_dofs == n_I) { /* Dirichlet solver */ local_ksp = &pcbddc->ksp_D; } else if (basis_dofs == n_R) { /* Neumann solver */ local_ksp = &pcbddc->ksp_R; } else { SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",__FUNCT__,basis_dofs,n_I,n_R); } ierr = KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);CHKERRQ(ierr); /* Get null space vecs */ ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);CHKERRQ(ierr); basis_size = nnsp_size; if (nnsp_has_cnst) { basis_size++; } if (basis_dofs) { /* Create shell ctx */ ierr = PetscMalloc(sizeof(*shell_ctx),&shell_ctx);CHKERRQ(ierr); /* Create work vectors in shell context */ ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);CHKERRQ(ierr); ierr = VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);CHKERRQ(ierr); ierr = VecSetType(shell_ctx->work_small_1,VECSEQ);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);CHKERRQ(ierr); ierr = VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);CHKERRQ(ierr); ierr = VecSetType(shell_ctx->work_full_1,VECSEQ);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);CHKERRQ(ierr); /* Allocate workspace */ ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat );CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);CHKERRQ(ierr); ierr = MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); ierr = MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); /* Restrict local null space on selected dofs (Dirichlet or Neumann) and compute matrices N and K*N */ ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); ierr = VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);CHKERRQ(ierr); } for (k=0;k<nnsp_size;k++) { ierr = VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (basis_dofs) { ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = VecResetArray(work1);CHKERRQ(ierr); ierr = VecResetArray(work2);CHKERRQ(ierr); } } if (basis_dofs) { if (nnsp_has_cnst) { ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = VecSet(work1,one);CHKERRQ(ierr); ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = VecResetArray(work1);CHKERRQ(ierr); ierr = VecResetArray(work2);CHKERRQ(ierr); } ierr = VecDestroy(&work1);CHKERRQ(ierr); ierr = VecDestroy(&work2);CHKERRQ(ierr); ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); ierr = MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); ierr = MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); /* Assemble another Mat object in shell context */ ierr = MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);CHKERRQ(ierr); ierr = MatLUFactor(small_mat,is_aux,is_aux,&matinfo);CHKERRQ(ierr); ierr = ISDestroy(&is_aux);CHKERRQ(ierr); ierr = PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);CHKERRQ(ierr); for (k=0;k<basis_size;k++) { ierr = VecSet(shell_ctx->work_small_1,zero);CHKERRQ(ierr); ierr = VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(shell_ctx->work_small_1);CHKERRQ(ierr); ierr = VecAssemblyEnd(shell_ctx->work_small_1);CHKERRQ(ierr); ierr = MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);CHKERRQ(ierr); ierr = VecGetArrayRead(shell_ctx->work_small_2,(const PetscScalar**)&array);CHKERRQ(ierr); for (i=0;i<basis_size;i++) { array_mat[i*basis_size+k]=array[i]; } ierr = VecRestoreArrayRead(shell_ctx->work_small_2,(const PetscScalar**)&array);CHKERRQ(ierr); } ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);CHKERRQ(ierr); ierr = MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);CHKERRQ(ierr); ierr = PetscFree(array_mat);CHKERRQ(ierr); ierr = MatDestroy(&inv_small_mat);CHKERRQ(ierr); ierr = MatDestroy(&small_mat);CHKERRQ(ierr); ierr = MatScale(shell_ctx->Kbasis_mat,m_one);CHKERRQ(ierr); /* Rebuild local PC */ ierr = KSPGetPC(*local_ksp,&shell_ctx->local_pc);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)shell_ctx->local_pc);CHKERRQ(ierr); ierr = PCCreate(PETSC_COMM_SELF,&newpc);CHKERRQ(ierr); ierr = PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); ierr = PCShellSetContext(newpc,shell_ctx);CHKERRQ(ierr); ierr = PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);CHKERRQ(ierr); ierr = PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);CHKERRQ(ierr); ierr = PCSetUp(newpc);CHKERRQ(ierr); ierr = KSPSetPC(*local_ksp,newpc);CHKERRQ(ierr); ierr = PCDestroy(&newpc);CHKERRQ(ierr); ierr = KSPSetUp(*local_ksp);CHKERRQ(ierr); } /* test */ if (pcbddc->dbg_flag && basis_dofs) { KSP check_ksp; PC check_pc; Mat test_mat; Vec work3; PetscReal test_err,lambda_min,lambda_max; PetscBool setsym,issym=PETSC_FALSE; PetscInt tabs; ierr = PetscViewerASCIIGetTab(pcbddc->dbg_viewer,&tabs);CHKERRQ(ierr); ierr = KSPGetPC(*local_ksp,&check_pc);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work3);CHKERRQ(ierr); ierr = VecSetRandom(shell_ctx->work_small_1,NULL);CHKERRQ(ierr); ierr = MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);CHKERRQ(ierr); ierr = VecCopy(work1,work2);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work3);CHKERRQ(ierr); ierr = PCApply(check_pc,work3,work1);CHKERRQ(ierr); ierr = VecAXPY(work1,m_one,work2);CHKERRQ(ierr); ierr = VecNorm(work1,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank); ierr = PetscViewerASCIIUseTabs(pcbddc->dbg_viewer,PETSC_FALSE);CHKERRQ(ierr); if (basis_dofs == n_I) { ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Dirichlet "); } else { ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Neumann "); } ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"solver is :%1.14e\n",test_err); ierr = PetscViewerASCIISetTab(pcbddc->dbg_viewer,tabs);CHKERRQ(ierr); ierr = PetscViewerASCIIUseTabs(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); ierr = MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);CHKERRQ(ierr); ierr = MatShift(test_mat,one);CHKERRQ(ierr); ierr = MatNorm(test_mat,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = MatDestroy(&test_mat);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err); /* Create ksp object suitable for extreme eigenvalues' estimation */ ierr = KSPCreate(PETSC_COMM_SELF,&check_ksp);CHKERRQ(ierr); ierr = KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);CHKERRQ(ierr); ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); if (issym) { ierr = KSPSetType(check_ksp,KSPCG);CHKERRQ(ierr); } ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); ierr = VecSetRandom(work1,NULL);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = KSPSolve(check_ksp,work2,work2);CHKERRQ(ierr); ierr = VecAXPY(work2,m_one,work1);CHKERRQ(ierr); ierr = VecNorm(work2,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max); ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); ierr = VecDestroy(&work1);CHKERRQ(ierr); ierr = VecDestroy(&work2);CHKERRQ(ierr); ierr = VecDestroy(&work3);CHKERRQ(ierr); } /* all processes shoud call this, even the void ones */ if (pcbddc->dbg_flag) { ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat A,Atrans,sA,*submatA,*submatsA; PetscErrorCode ierr; PetscMPIInt size,rank; PetscInt bs=1,mbs=10,ov=1,i,j,k,*rows,*cols,nd=2,*idx,rstart,rend,sz,M,N,Mbs; PetscScalar *vals,rval,one=1.0; IS *is1,*is2; PetscRandom rand; PetscBool flg,TestOverlap,TestSubMat,TestAllcols; PetscLogStage stages[2]; PetscInt vid = -1; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_mbs",&mbs,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-ov",&ov,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-nd",&nd,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-view_id",&vid,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL, "-test_overlap", &TestOverlap);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL, "-test_submat", &TestSubMat);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL, "-test_allcols", &TestAllcols);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,mbs*bs,mbs*bs,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetType(A,MATBAIJ);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(A,bs,PETSC_DEFAULT,PETSC_NULL); ierr = MatMPIBAIJSetPreallocation(A,bs,PETSC_DEFAULT,PETSC_NULL,PETSC_DEFAULT,PETSC_NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); ierr = MatGetSize(A,&M,&N); Mbs = M/bs; ierr = PetscMalloc(bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); ierr = PetscMalloc(bs*sizeof(PetscInt),&cols);CHKERRQ(ierr); ierr = PetscMalloc(bs*bs*sizeof(PetscScalar),&vals);CHKERRQ(ierr); ierr = PetscMalloc(M*sizeof(PetscScalar),&idx);CHKERRQ(ierr); /* Now set blocks of values */ for (j=0; j<bs*bs; j++) vals[j] = 0.0; for (i=0; i<Mbs; i++){ cols[0] = i*bs; rows[0] = i*bs; for (j=1; j<bs; j++) { rows[j] = rows[j-1]+1; cols[j] = cols[j-1]+1; } ierr = MatSetValues(A,bs,rows,bs,cols,vals,ADD_VALUES);CHKERRQ(ierr); } /* second, add random blocks */ for (i=0; i<20*bs; i++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); cols[0] = bs*(PetscInt)(PetscRealPart(rval)*Mbs); ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); rows[0] = rstart + bs*(PetscInt)(PetscRealPart(rval)*mbs); for (j=1; j<bs; j++) { rows[j] = rows[j-1]+1; cols[j] = cols[j-1]+1; } for (j=0; j<bs*bs; j++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); vals[j] = rval; } ierr = MatSetValues(A,bs,rows,bs,cols,vals,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* make A a symmetric matrix: A <- A^T + A */ ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Atrans);CHKERRQ(ierr); ierr = MatAXPY(A,one,Atrans,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatDestroy(&Atrans);CHKERRQ(ierr); ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Atrans); ierr = MatEqual(A, Atrans, &flg); if (flg) { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } else { SETERRQ(PETSC_COMM_SELF,1,"A+A^T is non-symmetric"); } ierr = MatDestroy(&Atrans);CHKERRQ(ierr); /* create a SeqSBAIJ matrix sA (= A) */ ierr = MatConvert(A,MATSBAIJ,MAT_INITIAL_MATRIX,&sA);CHKERRQ(ierr); if (vid >= 0 && vid < size){ if (!rank) printf("A: \n"); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); if (!rank) printf("sA: \n"); ierr = MatView(sA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Test sA==A through MatMult() */ ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG ,"Error in MatConvert(): A != sA"); /* Test MatIncreaseOverlap() */ ierr = PetscMalloc(nd*sizeof(IS **),&is1);CHKERRQ(ierr); ierr = PetscMalloc(nd*sizeof(IS **),&is2);CHKERRQ(ierr); for (i=0; i<nd; i++) { if (!TestAllcols){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); sz = (PetscInt)((0.5+0.2*PetscRealPart(rval))*mbs); /* 0.5*mbs < sz < 0.7*mbs */ for (j=0; j<sz; j++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); idx[j*bs] = bs*(PetscInt)(PetscRealPart(rval)*Mbs); for (k=1; k<bs; k++) idx[j*bs+k] = idx[j*bs]+k; } ierr = ISCreateGeneral(PETSC_COMM_SELF,sz*bs,idx,PETSC_COPY_VALUES,is1+i);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,sz*bs,idx,PETSC_COPY_VALUES,is2+i);CHKERRQ(ierr); if (rank == vid){ ierr = PetscPrintf(PETSC_COMM_SELF," [%d] IS sz[%d]: %d\n",rank,i,sz);CHKERRQ(ierr); ierr = ISView(is2[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } } else { /* Test all rows and colums */ sz = M; ierr = ISCreateStride(PETSC_COMM_SELF,sz,0,1,is1+i);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,sz,0,1,is2+i);CHKERRQ(ierr); if (rank == vid){ PetscBool colflag; ierr = ISIdentity(is2[i],&colflag);CHKERRQ(ierr); printf("[%d] is2[%d], colflag %d\n",rank,i,colflag); ierr = ISView(is2[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } } } ierr = PetscLogStageRegister("MatOv_SBAIJ",&stages[0]); ierr = PetscLogStageRegister("MatOv_BAIJ",&stages[1]); /* Test MatIncreaseOverlap */ if (TestOverlap){ ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); ierr = MatIncreaseOverlap(sA,nd,is2,ov);CHKERRQ(ierr); ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr); ierr = MatIncreaseOverlap(A,nd,is1,ov);CHKERRQ(ierr); ierr = PetscLogStagePop();CHKERRQ(ierr); if (rank == vid){ printf("\n[%d] IS from BAIJ:\n",rank); ierr = ISView(is1[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); printf("\n[%d] IS from SBAIJ:\n",rank); ierr = ISView(is2[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } for (i=0; i<nd; ++i) { ierr = ISEqual(is1[i],is2[i],&flg);CHKERRQ(ierr); if (!flg ){ if (rank == 0){ ierr = ISSort(is1[i]);CHKERRQ(ierr); /* ISView(is1[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */ ierr = ISSort(is2[i]);CHKERRQ(ierr); /* ISView(is2[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */ } SETERRQ1(PETSC_COMM_SELF,1,"i=%D, is1 != is2",i); } } } /* Test MatGetSubmatrices */ if (TestSubMat){ for(i = 0; i < nd; ++i) { ierr = ISSort(is1[i]); CHKERRQ(ierr); ierr = ISSort(is2[i]); CHKERRQ(ierr); } ierr = MatGetSubMatrices(A,nd,is1,is1,MAT_INITIAL_MATRIX,&submatA);CHKERRQ(ierr); ierr = MatGetSubMatrices(sA,nd,is2,is2,MAT_INITIAL_MATRIX,&submatsA);CHKERRQ(ierr); ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A != sA"); /* Now test MatGetSubmatrices with MAT_REUSE_MATRIX option */ ierr = MatGetSubMatrices(A,nd,is1,is1,MAT_REUSE_MATRIX,&submatA);CHKERRQ(ierr); ierr = MatGetSubMatrices(sA,nd,is2,is2,MAT_REUSE_MATRIX,&submatsA);CHKERRQ(ierr); ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetSubmatrices(): A != sA"); for (i=0; i<nd; ++i) { ierr = MatDestroy(&submatA[i]);CHKERRQ(ierr); ierr = MatDestroy(&submatsA[i]);CHKERRQ(ierr); } ierr = PetscFree(submatA);CHKERRQ(ierr); ierr = PetscFree(submatsA);CHKERRQ(ierr); } /* Free allocated memory */ for (i=0; i<nd; ++i) { ierr = ISDestroy(&is1[i]);CHKERRQ(ierr); ierr = ISDestroy(&is2[i]);CHKERRQ(ierr); } ierr = PetscFree(is1);CHKERRQ(ierr); ierr = PetscFree(is2);CHKERRQ(ierr); ierr = PetscFree(idx);CHKERRQ(ierr); ierr = PetscFree(rows);CHKERRQ(ierr); ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscFree(vals);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&sA);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i,N=C->cmap->N, bs=C->rmap->bs,M=C->rmap->N,Mbs=M/bs,*nidx,isz,iov; IS *is_new,*is_row; Mat *submats; Mat_MPISBAIJ *c=(Mat_MPISBAIJ*)C->data; Mat_SeqSBAIJ *asub_i; PetscBT table; PetscInt *ai,brow,nz,nis,l,nmax,nstages_local,nstages,max_no,pos; const PetscInt *idx; PetscBool flg,*allcolumns,*allrows; PetscFunctionBegin; ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,is_max,is,is_new);CHKERRQ(ierr); if (ov < 0){ SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} /* ----- previous non-scalable implementation ----- */ flg=PETSC_FALSE; ierr = PetscOptionsHasName(PETSC_NULL, "-IncreaseOverlap_old", &flg);CHKERRQ(ierr); if (flg){ /* previous non-scalable implementation */ printf("use previous non-scalable implementation...\n"); for (i=0; i<ov; ++i) { ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); } } else { /* implementation using modified BAIJ routines */ ierr = PetscMalloc((Mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,&table);CHKERRQ(ierr); /* for column search */ ierr = PetscMalloc2(is_max+1,PetscBool,&allcolumns,is_max+1,PetscBool,&allrows);CHKERRQ(ierr); /* Create is_row */ ierr = PetscMalloc(is_max*sizeof(IS **),&is_row);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,Mbs,0,1,&is_row[0]);CHKERRQ(ierr); allrows[0] = PETSC_TRUE; for (i=1; i<is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ allrows[i] = PETSC_TRUE; } /* Allocate memory to hold all the submatrices - Modified from MatGetSubMatrices_MPIBAIJ() */ ierr = PetscMalloc((is_max+1)*sizeof(Mat),&submats);CHKERRQ(ierr); /* Check for special case: each processor gets entire matrix columns */ for (i=0; i<is_max; i++) { ierr = ISIdentity(is_new[i],&flg);CHKERRQ(ierr); ierr = ISGetLocalSize(is_new[i],&isz);CHKERRQ(ierr); if (flg && isz == Mbs){ allcolumns[i] = PETSC_TRUE; } else { allcolumns[i] = PETSC_FALSE; } } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = is_max/nmax + ((is_max % nmax)?1:0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);CHKERRQ(ierr); for (iov=0; iov<ov; ++iov) { /* 1) Get submats for column search */ for (i=0,pos=0; i<nstages; i++) { if (pos+nmax <= is_max) max_no = nmax; else if (pos == is_max) max_no = 0; else max_no = is_max-pos; c->ijonly = PETSC_TRUE; ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,is_row+pos,is_new+pos,MAT_INITIAL_MATRIX,allrows+pos,allcolumns+pos,submats+pos);CHKERRQ(ierr); pos += max_no; } /* 2) Row search */ ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); /* 3) Column search */ for (i=0; i<is_max; i++){ asub_i = (Mat_SeqSBAIJ*)submats[i]->data; ai=asub_i->i;; /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ ierr = PetscBTMemzero(Mbs,table);CHKERRQ(ierr); ierr = ISGetIndices(is_new[i],&idx);CHKERRQ(ierr); ierr = ISGetLocalSize(is_new[i],&nis);CHKERRQ(ierr); for (l=0; l<nis; l++) { ierr = PetscBTSet(table,idx[l]);CHKERRQ(ierr); nidx[l] = idx[l]; } isz = nis; /* add column entries to table */ for (brow=0; brow<Mbs; brow++){ nz = ai[brow+1] - ai[brow]; if (nz) { if (!PetscBTLookupSet(table,brow)) nidx[isz++] = brow; } } ierr = ISRestoreIndices(is_new[i],&idx);CHKERRQ(ierr); ierr = ISDestroy(&is_new[i]);CHKERRQ(ierr); /* create updated is_new */ ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,PETSC_COPY_VALUES,is_new+i);CHKERRQ(ierr); } /* Free tmp spaces */ for (i=0; i<is_max; i++){ ierr = MatDestroy(&submats[i]);CHKERRQ(ierr); } } ierr = PetscFree2(allcolumns,allrows);CHKERRQ(ierr); ierr = PetscBTDestroy(&table);CHKERRQ(ierr); ierr = PetscFree(submats);CHKERRQ(ierr); ierr = ISDestroy(&is_row[0]);CHKERRQ(ierr); ierr = PetscFree(is_row);CHKERRQ(ierr); ierr = PetscFree(nidx);CHKERRQ(ierr); } for (i=0; i<is_max; i++) {ierr = ISDestroy(&is[i]);CHKERRQ(ierr);} ierr = ISExpandIndicesGeneral(N,N,bs,is_max,is_new,is);CHKERRQ(ierr); for (i=0; i<is_max; i++) {ierr = ISDestroy(&is_new[i]);CHKERRQ(ierr);} ierr = PetscFree(is_new);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat A,F; PetscViewer fd; /* viewer */ char file[PETSC_MAX_PATH_LEN]; /* input file name */ PetscErrorCode ierr; PetscBool flg; Vec x,y,w; MatFactorInfo iluinfo; IS perm; PetscInt m; PetscReal norm; PetscInitialize(&argc,&args,(char *)0,help); /* Determine file from which we read the matrix */ ierr = PetscOptionsGetString(PETSC_NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,1,"Must indicate binary file with the -f option"); /* Open binary file. Note that we use FILE_MODE_READ to indicate reading from this file. */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); /* Load the matrix; then destroy the viewer. */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecLoad(x,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); ierr = VecDuplicate(x,&w);CHKERRQ(ierr); ierr = MatGetFactor(A,"petsc",MAT_FACTOR_ILU,&F);CHKERRQ(ierr); iluinfo.fill = 1.0; ierr = MatGetSize(A,&m,0);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_WORLD,m,0,1,&perm);CHKERRQ(ierr); ierr = MatLUFactorSymbolic(F,A,perm,perm,&iluinfo);CHKERRQ(ierr); ierr = MatLUFactorNumeric(F,A,&iluinfo);CHKERRQ(ierr); ierr = MatSolveTranspose(F,x,y);CHKERRQ(ierr); F->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; ierr = MatSolveTranspose(F,x,w);CHKERRQ(ierr); // VecView(w,0);VecView(y,0); ierr = VecAXPY(w,-1.0,y);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); if (norm) { ierr = PetscPrintf(PETSC_COMM_SELF,"Norm of difference is nonzero %g\n",norm);CHKERRQ(ierr); } ierr = ISDestroy(&perm);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&w);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode permutematrix(Mat Ain, Mat Bin, Mat *Aout, Mat *Bout, int **permIndices) { PetscErrorCode ierr; MatPartitioning part; IS isn, is, iscols; PetscInt *nlocal,localCols,m,n; PetscMPIInt size, rank; MPI_Comm comm; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Ain,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MatGetSize(Ain,&m,&n);CHKERRQ(ierr); ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr); ierr = MatPartitioningSetAdjacency(part,Ain);CHKERRQ(ierr); ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); /* get new processor owner number of each vertex */ ierr = MatPartitioningApply(part,&is);CHKERRQ(ierr); /* get new global number of each old global number */ ierr = ISPartitioningToNumbering(is,&isn);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(int),&nlocal);CHKERRQ(ierr); /* get number of new vertices for each processor */ ierr = ISPartitioningCount(is,size,nlocal);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); /* get old global number of each new global number */ ierr = ISInvertPermutation(isn,nlocal[rank],&is);CHKERRQ(ierr); ierr = ISDestroy(&isn);CHKERRQ(ierr); ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); ierr = ISSort(is);CHKERRQ(ierr); /* If matrix is square, the permutation is applied to rows and columns; otherwise it is only applied to rows. */ if (m == n) { iscols = is; localCols = nlocal[rank]; } else { PetscInt lowj, highj; ierr = MatGetOwnershipRangeColumn(Ain,&lowj,&highj);CHKERRQ(ierr); localCols = highj-lowj; ierr = ISCreateStride(comm,localCols, lowj, 1, &iscols);CHKERRQ(ierr); } /* copy permutation */ if (permIndices) { const PetscInt *indices; PetscInt i; *permIndices = malloc(sizeof(int)*(nlocal[rank]+localCols)); ierr = ISGetIndices(is, &indices);CHKERRQ(ierr); for (i=0; i<nlocal[rank]; i++) (*permIndices)[i] = indices[i]; ierr = ISRestoreIndices(is, &indices);CHKERRQ(ierr); ierr = ISGetIndices(iscols, &indices);CHKERRQ(ierr); for (i=0; i<localCols; i++) (*permIndices)[i+nlocal[rank]] = indices[i]; ierr = ISRestoreIndices(iscols, &indices);CHKERRQ(ierr); } ierr = PetscFree(nlocal);CHKERRQ(ierr); ierr = MatGetSubMatrix(Ain,is,iscols,MAT_INITIAL_MATRIX,Aout);CHKERRQ(ierr); if (Bin && Bout) { ierr = MatGetSubMatrix(Bin,is,iscols,MAT_INITIAL_MATRIX,Bout);CHKERRQ(ierr); } ierr = ISDestroy(&is);CHKERRQ(ierr); if (m != n) { ierr = ISDestroy(&iscols);CHKERRQ(ierr); } PetscFunctionReturn(0); }