PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; if (!aij->garray) { #if defined(PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif } else { garray = aij->garray; } if (!aij->lvec) { /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); } else { ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); } /* create two temporary Index sets for build scatter gather */ ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ if (aij->Mvctx_mpi1_flg) { ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); } else { ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); } aij->garray = garray; ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; PetscInt bs = mat->rmap->bs,*stmp; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt Nbs = baij->Nbs,*indices; #endif PetscFunctionBegin; #if defined(PETSC_USE_CTABLE) /* use a table - Mark Adams */ ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i]+j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i]+j] = lid; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in baij->B */ ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<Nbs; i++) { if (indices[i]) { garray[ec++] = i; } } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); /* create two temporary index sets for building scatter-gather */ ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); for (i=0; i<ec; i++) stmp[i] = i; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); baij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ ISCompressIndicesGeneral - convert the indices into block indices Input Parameters: + n - maximum possible length of the index set . nkeys - expected number of keys when PETSC_USE_CTABLE . bs - the size of block . imax - the number of index sets - is_in - the non-blocked array of index sets Output Parameter: . is_out - the blocked new index set Level: intermediate .seealso: ISExpandIndicesGeneral() @*/ PetscErrorCode ISCompressIndicesGeneral(PetscInt n,PetscInt nkeys,PetscInt bs,PetscInt imax,const IS is_in[],IS is_out[]) { PetscErrorCode ierr; PetscInt isz,len,i,j,ival,Nbs; const PetscInt *idx; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscInt tt, gid1, *nidx,Nkbs; PetscTablePosition tpos; #else PetscInt *nidx; PetscBT table; #endif PetscFunctionBegin; Nbs =n/bs; #if defined(PETSC_USE_CTABLE) Nkbs = nkeys/bs; ierr = PetscTableCreate(Nkbs,Nbs,&gid1_lid1);CHKERRQ(ierr); #else ierr = PetscMalloc1(Nbs,&nidx);CHKERRQ(ierr); ierr = PetscBTCreate(Nbs,&table);CHKERRQ(ierr); #endif for (i=0; i<imax; i++) { isz = 0; #if defined(PETSC_USE_CTABLE) ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); #else ierr = PetscBTMemzero(Nbs,table);CHKERRQ(ierr); #endif ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); ierr = ISGetLocalSize(is_in[i],&len);CHKERRQ(ierr); for (j=0; j<len; j++) { ival = idx[j]/bs; /* convert the indices into block indices */ #if defined(PETSC_USE_CTABLE) ierr = PetscTableFind(gid1_lid1,ival+1,&tt);CHKERRQ(ierr); if (!tt) { ierr = PetscTableAdd(gid1_lid1,ival+1,isz+1,INSERT_VALUES);CHKERRQ(ierr); isz++; } #else if (ival>Nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); if (!PetscBTLookupSet(table,ival)) nidx[isz++] = ival; #endif } ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); #if defined(PETSC_USE_CTABLE) ierr = PetscMalloc1(isz,&nidx);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); j = 0; while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr); if (tt-- > isz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than array-dim"); nidx[tt] = gid1 - 1; j++; } if (j != isz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"table error: jj != isz"); ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,PETSC_OWN_POINTER,(is_out+i));CHKERRQ(ierr); #else ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,PETSC_COPY_VALUES,(is_out+i));CHKERRQ(ierr); #endif } #if defined(PETSC_USE_CTABLE) ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else ierr = PetscBTDestroy(&table);CHKERRQ(ierr); ierr = PetscFree(nidx);CHKERRQ(ierr); #endif PetscFunctionReturn(0); }
/* DMDAGetFaceInterpolation - Gets the interpolation for a face based coarse space */ PetscErrorCode DMDAGetFaceInterpolation(DM da,PC_Exotic *exotic,Mat Aglobal,MatReuse reuse,Mat *P) { PetscErrorCode ierr; PetscInt dim,i,j,k,m,n,p,dof,Nint,Nface,Nwire,Nsurf,*Iint,*Isurf,cint = 0,csurf = 0,istart,jstart,kstart,*II,N,c = 0; PetscInt mwidth,nwidth,pwidth,cnt,mp,np,pp,Ntotal,gl[6],*globals,Ng,*IIint,*IIsurf,Nt; Mat Xint, Xsurf,Xint_tmp; IS isint,issurf,is,row,col; ISLocalToGlobalMapping ltg; MPI_Comm comm; Mat A,Aii,Ais,Asi,*Aholder,iAii; MatFactorInfo info; PetscScalar *xsurf,*xint; #if defined(PETSC_USE_DEBUG_foo) PetscScalar tmp; #endif PetscTable ht; PetscFunctionBegin; ierr = DMDAGetInfo(da,&dim,0,0,0,&mp,&np,&pp,&dof,0,0,0,0,0); CHKERRQ(ierr); if (dof != 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Only for single field problems"); if (dim != 3) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Only coded for 3d problems"); ierr = DMDAGetCorners(da,0,0,0,&m,&n,&p); CHKERRQ(ierr); ierr = DMDAGetGhostCorners(da,&istart,&jstart,&kstart,&mwidth,&nwidth,&pwidth); CHKERRQ(ierr); istart = istart ? -1 : 0; jstart = jstart ? -1 : 0; kstart = kstart ? -1 : 0; /* the columns of P are the interpolation of each coarse grid point (one for each vertex and edge) to all the local degrees of freedom (this includes the vertices, edges and faces). Xint are the subset of the interpolation into the interior Xface are the interpolation onto faces but not into the interior Xsurf are the interpolation onto the vertices and edges (the surfbasket) Xint Symbolically one could write P = (Xface) after interchanging the rows to match the natural ordering on the domain Xsurf */ N = (m - istart)*(n - jstart)*(p - kstart); Nint = (m-2-istart)*(n-2-jstart)*(p-2-kstart); Nface = 2*((m-2-istart)*(n-2-jstart) + (m-2-istart)*(p-2-kstart) + (n-2-jstart)*(p-2-kstart)); Nwire = 4*((m-2-istart) + (n-2-jstart) + (p-2-kstart)) + 8; Nsurf = Nface + Nwire; ierr = MatCreateSeqDense(MPI_COMM_SELF,Nint,6,NULL,&Xint); CHKERRQ(ierr); ierr = MatCreateSeqDense(MPI_COMM_SELF,Nsurf,6,NULL,&Xsurf); CHKERRQ(ierr); ierr = MatDenseGetArray(Xsurf,&xsurf); CHKERRQ(ierr); /* Require that all 12 edges and 6 faces have at least one grid point. Otherwise some of the columns of Xsurf will be all zero (thus making the coarse matrix singular). */ if (m-istart < 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of grid points per process in X direction must be at least 3"); if (n-jstart < 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of grid points per process in Y direction must be at least 3"); if (p-kstart < 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of grid points per process in Z direction must be at least 3"); cnt = 0; for (j=1; j<n-1-jstart; j++) { for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 0*Nsurf] = 1; } for (k=1; k<p-1-kstart; k++) { for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 1*Nsurf] = 1; for (j=1; j<n-1-jstart; j++) { xsurf[cnt++ + 2*Nsurf] = 1; /* these are the interior nodes */ xsurf[cnt++ + 3*Nsurf] = 1; } for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 4*Nsurf] = 1; } for (j=1; j<n-1-jstart; j++) { for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 5*Nsurf] = 1; } #if defined(PETSC_USE_DEBUG_foo) for (i=0; i<Nsurf; i++) { tmp = 0.0; for (j=0; j<6; j++) tmp += xsurf[i+j*Nsurf]; if (PetscAbsScalar(tmp-1.0) > 1.e-10) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong Xsurf interpolation at i %D value %g",i,(double)PetscAbsScalar(tmp)); } #endif ierr = MatDenseRestoreArray(Xsurf,&xsurf); CHKERRQ(ierr); /* ierr = MatView(Xsurf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);*/ /* I are the indices for all the needed vertices (in global numbering) Iint are the indices for the interior values, I surf for the surface values (This is just for the part of the global matrix obtained with MatGetSubMatrix(), it is NOT the local DMDA ordering.) IIint and IIsurf are the same as the Iint, Isurf except they are in the global numbering */ #define Endpoint(a,start,b) (a == 0 || a == (b-1-start)) ierr = PetscMalloc3(N,&II,Nint,&Iint,Nsurf,&Isurf); CHKERRQ(ierr); ierr = PetscMalloc2(Nint,&IIint,Nsurf,&IIsurf); CHKERRQ(ierr); for (k=0; k<p-kstart; k++) { for (j=0; j<n-jstart; j++) { for (i=0; i<m-istart; i++) { II[c++] = i + j*mwidth + k*mwidth*nwidth; if (!Endpoint(i,istart,m) && !Endpoint(j,jstart,n) && !Endpoint(k,kstart,p)) { IIint[cint] = i + j*mwidth + k*mwidth*nwidth; Iint[cint++] = i + j*(m-istart) + k*(m-istart)*(n-jstart); } else { IIsurf[csurf] = i + j*mwidth + k*mwidth*nwidth; Isurf[csurf++] = i + j*(m-istart) + k*(m-istart)*(n-jstart); } } } } if (c != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"c != N"); if (cint != Nint) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"cint != Nint"); if (csurf != Nsurf) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csurf != Nsurf"); ierr = DMGetLocalToGlobalMapping(da,<g); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(ltg,N,II,II); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(ltg,Nint,IIint,IIint); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(ltg,Nsurf,IIsurf,IIsurf); CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)da,&comm); CHKERRQ(ierr); ierr = ISCreateGeneral(comm,N,II,PETSC_COPY_VALUES,&is); CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,Nint,Iint,PETSC_COPY_VALUES,&isint); CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,Nsurf,Isurf,PETSC_COPY_VALUES,&issurf); CHKERRQ(ierr); ierr = PetscFree3(II,Iint,Isurf); CHKERRQ(ierr); ierr = ISSort(is); CHKERRQ(ierr); ierr = MatGetSubMatrices(Aglobal,1,&is,&is,MAT_INITIAL_MATRIX,&Aholder); CHKERRQ(ierr); A = *Aholder; ierr = PetscFree(Aholder); CHKERRQ(ierr); ierr = MatGetSubMatrix(A,isint,isint,MAT_INITIAL_MATRIX,&Aii); CHKERRQ(ierr); ierr = MatGetSubMatrix(A,isint,issurf,MAT_INITIAL_MATRIX,&Ais); CHKERRQ(ierr); ierr = MatGetSubMatrix(A,issurf,isint,MAT_INITIAL_MATRIX,&Asi); CHKERRQ(ierr); /* Solve for the interpolation onto the interior Xint */ ierr = MatMatMult(Ais,Xsurf,MAT_INITIAL_MATRIX,PETSC_DETERMINE,&Xint_tmp); CHKERRQ(ierr); ierr = MatScale(Xint_tmp,-1.0); CHKERRQ(ierr); if (exotic->directSolve) { ierr = MatGetFactor(Aii,MATSOLVERPETSC,MAT_FACTOR_LU,&iAii); CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info); CHKERRQ(ierr); ierr = MatGetOrdering(Aii,MATORDERINGND,&row,&col); CHKERRQ(ierr); ierr = MatLUFactorSymbolic(iAii,Aii,row,col,&info); CHKERRQ(ierr); ierr = ISDestroy(&row); CHKERRQ(ierr); ierr = ISDestroy(&col); CHKERRQ(ierr); ierr = MatLUFactorNumeric(iAii,Aii,&info); CHKERRQ(ierr); ierr = MatMatSolve(iAii,Xint_tmp,Xint); CHKERRQ(ierr); ierr = MatDestroy(&iAii); CHKERRQ(ierr); } else { Vec b,x; PetscScalar *xint_tmp; ierr = MatDenseGetArray(Xint,&xint); CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,Nint,0,&x); CHKERRQ(ierr); ierr = MatDenseGetArray(Xint_tmp,&xint_tmp); CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,Nint,0,&b); CHKERRQ(ierr); ierr = KSPSetOperators(exotic->ksp,Aii,Aii); CHKERRQ(ierr); for (i=0; i<6; i++) { ierr = VecPlaceArray(x,xint+i*Nint); CHKERRQ(ierr); ierr = VecPlaceArray(b,xint_tmp+i*Nint); CHKERRQ(ierr); ierr = KSPSolve(exotic->ksp,b,x); CHKERRQ(ierr); ierr = VecResetArray(x); CHKERRQ(ierr); ierr = VecResetArray(b); CHKERRQ(ierr); } ierr = MatDenseRestoreArray(Xint,&xint); CHKERRQ(ierr); ierr = MatDenseRestoreArray(Xint_tmp,&xint_tmp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&b); CHKERRQ(ierr); } ierr = MatDestroy(&Xint_tmp); CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG_foo) ierr = MatDenseGetArray(Xint,&xint); CHKERRQ(ierr); for (i=0; i<Nint; i++) { tmp = 0.0; for (j=0; j<6; j++) tmp += xint[i+j*Nint]; if (PetscAbsScalar(tmp-1.0) > 1.e-10) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong Xint interpolation at i %D value %g",i,(double)PetscAbsScalar(tmp)); } ierr = MatDenseRestoreArray(Xint,&xint); CHKERRQ(ierr); /* ierr =MatView(Xint,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ #endif /* total faces */ Ntotal = mp*np*(pp+1) + mp*pp*(np+1) + np*pp*(mp+1); /* For each vertex, edge, face on process (in the same orderings as used above) determine its local number including ghost points */ cnt = 0; { gl[cnt++] = mwidth+1; } { { gl[cnt++] = mwidth*nwidth+1; } { gl[cnt++] = mwidth*nwidth + mwidth; /* these are the interior nodes */ gl[cnt++] = mwidth*nwidth + mwidth+m-istart-1; } { gl[cnt++] = mwidth*nwidth+mwidth*(n-jstart-1)+1; } } { gl[cnt++] = mwidth*nwidth*(p-kstart-1) + mwidth+1; } /* PetscIntView(6,gl,PETSC_VIEWER_STDOUT_WORLD); */ /* convert that to global numbering and get them on all processes */ ierr = ISLocalToGlobalMappingApply(ltg,6,gl,gl); CHKERRQ(ierr); /* PetscIntView(6,gl,PETSC_VIEWER_STDOUT_WORLD); */ ierr = PetscMalloc1(6*mp*np*pp,&globals); CHKERRQ(ierr); ierr = MPI_Allgather(gl,6,MPIU_INT,globals,6,MPIU_INT,PetscObjectComm((PetscObject)da)); CHKERRQ(ierr); /* Number the coarse grid points from 0 to Ntotal */ ierr = MatGetSize(Aglobal,&Nt,NULL); CHKERRQ(ierr); ierr = PetscTableCreate(Ntotal/3,Nt+1,&ht); CHKERRQ(ierr); for (i=0; i<6*mp*np*pp; i++) { ierr = PetscTableAddCount(ht,globals[i]+1); CHKERRQ(ierr); } ierr = PetscTableGetCount(ht,&cnt); CHKERRQ(ierr); if (cnt != Ntotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hash table size %D not equal to total number coarse grid points %D",cnt,Ntotal); ierr = PetscFree(globals); CHKERRQ(ierr); for (i=0; i<6; i++) { ierr = PetscTableFind(ht,gl[i]+1,&gl[i]); CHKERRQ(ierr); gl[i]--; } ierr = PetscTableDestroy(&ht); CHKERRQ(ierr); /* PetscIntView(6,gl,PETSC_VIEWER_STDOUT_WORLD); */ /* construct global interpolation matrix */ ierr = MatGetLocalSize(Aglobal,&Ng,NULL); CHKERRQ(ierr); if (reuse == MAT_INITIAL_MATRIX) { ierr = MatCreateAIJ(PetscObjectComm((PetscObject)da),Ng,PETSC_DECIDE,PETSC_DECIDE,Ntotal,Nint+Nsurf,NULL,Nint,NULL,P); CHKERRQ(ierr); } else { ierr = MatZeroEntries(*P); CHKERRQ(ierr); } ierr = MatSetOption(*P,MAT_ROW_ORIENTED,PETSC_FALSE); CHKERRQ(ierr); ierr = MatDenseGetArray(Xint,&xint); CHKERRQ(ierr); ierr = MatSetValues(*P,Nint,IIint,6,gl,xint,INSERT_VALUES); CHKERRQ(ierr); ierr = MatDenseRestoreArray(Xint,&xint); CHKERRQ(ierr); ierr = MatDenseGetArray(Xsurf,&xsurf); CHKERRQ(ierr); ierr = MatSetValues(*P,Nsurf,IIsurf,6,gl,xsurf,INSERT_VALUES); CHKERRQ(ierr); ierr = MatDenseRestoreArray(Xsurf,&xsurf); CHKERRQ(ierr); ierr = MatAssemblyBegin(*P,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(*P,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = PetscFree2(IIint,IIsurf); CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG_foo) { Vec x,y; PetscScalar *yy; ierr = VecCreateMPI(PetscObjectComm((PetscObject)da),Ng,PETSC_DETERMINE,&y); CHKERRQ(ierr); ierr = VecCreateMPI(PetscObjectComm((PetscObject)da),PETSC_DETERMINE,Ntotal,&x); CHKERRQ(ierr); ierr = VecSet(x,1.0); CHKERRQ(ierr); ierr = MatMult(*P,x,y); CHKERRQ(ierr); ierr = VecGetArray(y,&yy); CHKERRQ(ierr); for (i=0; i<Ng; i++) { if (PetscAbsScalar(yy[i]-1.0) > 1.e-10) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong p interpolation at i %D value %g",i,(double)PetscAbsScalar(yy[i])); } ierr = VecRestoreArray(y,&yy); CHKERRQ(ierr); ierr = VecDestroy(x); CHKERRQ(ierr); ierr = VecDestroy(y); CHKERRQ(ierr); } #endif ierr = MatDestroy(&Aii); CHKERRQ(ierr); ierr = MatDestroy(&Ais); CHKERRQ(ierr); ierr = MatDestroy(&Asi); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = ISDestroy(&is); CHKERRQ(ierr); ierr = ISDestroy(&isint); CHKERRQ(ierr); ierr = ISDestroy(&issurf); CHKERRQ(ierr); ierr = MatDestroy(&Xint); CHKERRQ(ierr); ierr = MatDestroy(&Xsurf); CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFSetGraph - Set a parallel star forest Collective Input Arguments: + sf - star forest . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) . nleaves - number of leaf vertices on the current process, each of these references a root on any process . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage . localmode - copy mode for ilocal . iremote - remote locations of root vertices for each leaf on the current process - remotemode - copy mode for iremote Level: intermediate .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() @*/ PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) { PetscErrorCode ierr; PetscTable table; PetscTablePosition pos; PetscMPIInt size; PetscInt i,*rcount,*ranks; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); if (nleaves && ilocal) PetscValidIntPointer(ilocal,4); if (nleaves) PetscValidPointer(iremote,6); if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots); if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); ierr = PetscSFReset(sf);CHKERRQ(ierr); sf->nroots = nroots; sf->nleaves = nleaves; if (ilocal) { switch (localmode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); sf->mine = sf->mine_alloc; ierr = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr); sf->minleaf = PETSC_MAX_INT; sf->maxleaf = PETSC_MIN_INT; for (i=0; i<nleaves; i++) { sf->minleaf = PetscMin(sf->minleaf,ilocal[i]); sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]); } break; case PETSC_OWN_POINTER: sf->mine_alloc = (PetscInt*)ilocal; sf->mine = sf->mine_alloc; break; case PETSC_USE_POINTER: sf->mine = (PetscInt*)ilocal; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); } } if (!ilocal || nleaves > 0) { sf->minleaf = 0; sf->maxleaf = nleaves - 1; } switch (remotemode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); sf->remote = sf->remote_alloc; ierr = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr); break; case PETSC_OWN_POINTER: sf->remote_alloc = (PetscSFNode*)iremote; sf->remote = sf->remote_alloc; break; case PETSC_USE_POINTER: sf->remote = (PetscSFNode*)iremote; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); } ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); for (i=0; i<nleaves; i++) { /* Log 1-based rank */ ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); } ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr); ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); for (i=0; i<sf->nranks; i++) { ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); ranks[i]--; /* Convert back to 0-based */ } ierr = PetscTableDestroy(&table);CHKERRQ(ierr); ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr); sf->roffset[0] = 0; for (i=0; i<sf->nranks; i++) { ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); sf->roffset[i+1] = sf->roffset[i] + rcount[i]; rcount[i] = 0; } for (i=0; i<nleaves; i++) { PetscInt lo,hi,irank; /* Search for index of iremote[i].rank in sf->ranks */ lo = 0; hi = sf->nranks; while (hi - lo > 1) { PetscInt mid = lo + (hi - lo)/2; if (iremote[i].rank < sf->ranks[mid]) hi = mid; else lo = mid; } if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo; else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank); sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i; sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index; rcount[irank]++; } ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (nroots == PETSC_DETERMINE) { /* Jed, if you have a better way to do this, put it in */ PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0; /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */ ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr); ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank]; ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Could set nroots to this maximum */ for (i = 0; i < size; ++i) maxRoots += numRankRoots[i]; /* Gather all indices */ ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr); leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index; leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; rootOff[0] = 0; for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i]; ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Sort and reduce */ ierr = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr); ierr = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr); ierr = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr); sf->nroots = maxRoots; } #endif sf->graphset = PETSC_TRUE; ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; PetscBool useblockis; #if defined (PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; #if defined (PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid --; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j] ]) ec++; indices[aj[B->i[i] + j] ] = 1; } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); /* create two temporary Index sets for build scatter gather */ /* check for the special case where blocks are communicated for faster VecScatterXXX */ useblockis = PETSC_FALSE; if (mat->cmap->bs > 1) { PetscInt bs = mat->cmap->bs,ibs,ga; if (!(ec % bs)) { useblockis = PETSC_TRUE; for (i=0; i<ec/bs; i++) { if ((ga = garray[ibs = i*bs]) % bs) { useblockis = PETSC_FALSE; break; } for (j=1; j<bs; j++) { if (garray[ibs+j] != ga+j) { useblockis = PETSC_FALSE; break; } } if (!useblockis) break; } } } #if defined(PETSC_USE_DEBUG) i = (PetscInt)useblockis; ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm); CHKERRQ(ierr); if(j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); #endif if (useblockis) { PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; if(ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); ierr = PetscInfo(mat,"Using block index set to define scatter\n"); ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); } else { ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); } ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); aij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }