static PetscErrorCode KSPDestroy_PIPEFCG(KSP ksp) { PetscErrorCode ierr; PetscInt i; KSP_PIPEFCG *pipefcg; PetscFunctionBegin; pipefcg = (KSP_PIPEFCG*)ksp->data; /* Destroy "standard" work vecs */ VecDestroyVecs(ksp->nwork,&ksp->work); /* Destroy vectors of old directions and the arrays that manage pointers to them */ if(pipefcg->nvecs){ for(i=0;i<pipefcg->nchunks;++i){ ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pPvecs[i]);CHKERRQ(ierr); ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pSvecs[i]);CHKERRQ(ierr); ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pQvecs[i]);CHKERRQ(ierr); ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pZETAvecs[i]);CHKERRQ(ierr); } } ierr = PetscFree4(pipefcg->Pvecs,pipefcg->Svecs,pipefcg->pPvecs,pipefcg->pSvecs);CHKERRQ(ierr); ierr = PetscFree4(pipefcg->Qvecs,pipefcg->ZETAvecs,pipefcg->pQvecs,pipefcg->pZETAvecs);CHKERRQ(ierr); ierr = PetscFree4(pipefcg->Pold,pipefcg->Sold,pipefcg->Qold,pipefcg->ZETAold);CHKERRQ(ierr); ierr = PetscFree(pipefcg->chunksizes);CHKERRQ(ierr); ierr = PetscFree3(pipefcg->dots,pipefcg->etas,pipefcg->redux);CHKERRQ(ierr); ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals Not Collective Input Arguments: + degree - degree of reconstruction polynomial . nsource - number of source intervals . sourcex - sorted coordinates of source cell boundaries (length nsource+1) . ntarget - number of target intervals - targetx - sorted coordinates of target cell boundaries (length ntarget+1) Output Arguments: . R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s] Level: advanced .seealso: PetscDTLegendreEval() @*/ PetscErrorCode PetscDTReconstructPoly(PetscInt degree,PetscInt nsource,const PetscReal *sourcex,PetscInt ntarget,const PetscReal *targetx,PetscReal *R) { PetscErrorCode ierr; PetscInt i,j,k,*bdegrees,worksize; PetscReal xmin,xmax,center,hscale,*sourcey,*targety,*Bsource,*Bsinv,*Btarget; PetscScalar *tau,*work; PetscFunctionBegin; PetscValidRealPointer(sourcex,3); PetscValidRealPointer(targetx,5); PetscValidRealPointer(R,6); if (degree >= nsource) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Reconstruction degree %D must be less than number of source intervals %D",degree,nsource); #if defined(PETSC_USE_DEBUG) for (i=0; i<nsource; i++) { if (sourcex[i] >= sourcex[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Source interval %D has negative orientation (%G,%G)",i,sourcex[i],sourcex[i+1]); } for (i=0; i<ntarget; i++) { if (targetx[i] >= targetx[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Target interval %D has negative orientation (%G,%G)",i,targetx[i],targetx[i+1]); } #endif xmin = PetscMin(sourcex[0],targetx[0]); xmax = PetscMax(sourcex[nsource],targetx[ntarget]); center = (xmin + xmax)/2; hscale = (xmax - xmin)/2; worksize = nsource; ierr = PetscMalloc4(degree+1,PetscInt,&bdegrees,nsource+1,PetscReal,&sourcey,nsource*(degree+1),PetscReal,&Bsource,worksize,PetscScalar,&work);CHKERRQ(ierr); ierr = PetscMalloc4(nsource,PetscScalar,&tau,nsource*(degree+1),PetscReal,&Bsinv,ntarget+1,PetscReal,&targety,ntarget*(degree+1),PetscReal,&Btarget);CHKERRQ(ierr); for (i=0; i<=nsource; i++) sourcey[i] = (sourcex[i]-center)/hscale; for (i=0; i<=degree; i++) bdegrees[i] = i+1; ierr = PetscDTLegendreIntegrate(nsource,sourcey,degree+1,bdegrees,PETSC_TRUE,Bsource);CHKERRQ(ierr); ierr = PetscDTPseudoInverseQR(nsource,nsource,degree+1,Bsource,Bsinv,tau,nsource,work);CHKERRQ(ierr); for (i=0; i<=ntarget; i++) targety[i] = (targetx[i]-center)/hscale; ierr = PetscDTLegendreIntegrate(ntarget,targety,degree+1,bdegrees,PETSC_FALSE,Btarget);CHKERRQ(ierr); for (i=0; i<ntarget; i++) { PetscReal rowsum = 0; for (j=0; j<nsource; j++) { PetscReal sum = 0; for (k=0; k<degree+1; k++) { sum += Btarget[i*(degree+1)+k] * Bsinv[k*nsource+j]; } R[i*nsource+j] = sum; rowsum += sum; } for (j=0; j<nsource; j++) R[i*nsource+j] /= rowsum; /* normalize each row */ } ierr = PetscFree4(bdegrees,sourcey,Bsource,work);CHKERRQ(ierr); ierr = PetscFree4(tau,Bsinv,targety,Btarget);CHKERRQ(ierr); PetscFunctionReturn(0); }
EXTERN_C_BEGIN /* MatOrdering_ND - Find the nested dissection ordering of a given matrix. */ #undef __FUNCT__ #define __FUNCT__ "MatOrdering_ND" PetscErrorCode PETSCMAT_DLLEXPORT MatOrdering_ND(Mat mat,const MatOrderingType type,IS *row,IS *col) { PetscErrorCode ierr; PetscInt i, *mask,*xls,*ls,nrow,*ia,*ja,*perm; PetscTruth done; PetscFunctionBegin; ierr = MatGetRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name); ierr = PetscMalloc4(nrow,PetscInt,&mask,nrow,PetscInt,&perm,nrow+1,PetscInt,&xls,nrow,PetscInt,&ls);CHKERRQ(ierr); SPARSEPACKgennd(&nrow,ia,ja,mask,perm,xls,ls); ierr = MatRestoreRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); /* shift because Sparsepack indices start at one */ for (i=0; i<nrow; i++) perm[i]--; ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,row);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,col);CHKERRQ(ierr); ierr = PetscFree4(mask,perm,xls,ls);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPDestroy_FCG(KSP ksp) { PetscErrorCode ierr; PetscInt i; KSP_FCG *fcg = (KSP_FCG*)ksp->data; PetscFunctionBegin; /* Destroy "standard" work vecs */ VecDestroyVecs(ksp->nwork,&ksp->work); /* Destroy P and C vectors and the arrays that manage pointers to them */ if (fcg->nvecs){ for (i=0;i<fcg->nchunks;++i){ ierr = VecDestroyVecs(fcg->chunksizes[i],&fcg->pPvecs[i]);CHKERRQ(ierr); ierr = VecDestroyVecs(fcg->chunksizes[i],&fcg->pCvecs[i]);CHKERRQ(ierr); } } ierr = PetscFree5(fcg->Pvecs,fcg->Cvecs,fcg->pPvecs,fcg->pCvecs,fcg->chunksizes);CHKERRQ(ierr); /* free space used for singular value calculations */ if (ksp->calc_sings) { ierr = PetscFree4(fcg->e,fcg->d,fcg->ee,fcg->dd);CHKERRQ(ierr); } ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex Not Collective Input Arguments: + dim - The simplex dimension . order - The number of points in one dimension . a - left end of interval (often-1) - b - right end of interval (often +1) Output Argument: . q - A PetscQuadrature object Level: intermediate References: Karniadakis and Sherwin. FIAT .seealso: PetscDTGaussTensorQuadrature(), PetscDTGaussQuadrature() @*/ PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt order, PetscReal a, PetscReal b, PetscQuadrature *q) { PetscInt npoints = dim > 1 ? dim > 2 ? order*PetscSqr(order) : PetscSqr(order) : order; PetscReal *px, *wx, *py, *wy, *pz, *wz, *x, *w; PetscInt i, j, k; PetscErrorCode ierr; PetscFunctionBegin; if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now"); ierr = PetscMalloc1(npoints*dim, &x);CHKERRQ(ierr); ierr = PetscMalloc1(npoints, &w);CHKERRQ(ierr); switch (dim) { case 0: ierr = PetscFree(x);CHKERRQ(ierr); ierr = PetscFree(w);CHKERRQ(ierr); ierr = PetscMalloc1(1, &x);CHKERRQ(ierr); ierr = PetscMalloc1(1, &w);CHKERRQ(ierr); x[0] = 0.0; w[0] = 1.0; break; case 1: ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, x, w);CHKERRQ(ierr); break; case 2: ierr = PetscMalloc4(order,&px,order,&wx,order,&py,order,&wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr); for (i = 0; i < order; ++i) { for (j = 0; j < order; ++j) { ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*order+j)*2+0], &x[(i*order+j)*2+1]);CHKERRQ(ierr); w[i*order+j] = 0.5 * wx[i] * wy[j]; } } ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr); break; case 3: ierr = PetscMalloc6(order,&px,order,&wx,order,&py,order,&wy,order,&pz,order,&wz);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 2.0, 0.0, pz, wz);CHKERRQ(ierr); for (i = 0; i < order; ++i) { for (j = 0; j < order; ++j) { for (k = 0; k < order; ++k) { ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*order+j)*order+k)*3+0], &x[((i*order+j)*order+k)*3+1], &x[((i*order+j)*order+k)*3+2]);CHKERRQ(ierr); w[(i*order+j)*order+k] = 0.125 * wx[i] * wy[j] * wz[k]; } } } ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim); } ierr = PetscQuadratureCreate(PETSC_COMM_SELF, q);CHKERRQ(ierr); ierr = PetscQuadratureSetData(*q, dim, npoints, x, w);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode NEPSolve_Interpol(NEP nep) { PetscErrorCode ierr; NEP_INTERPOL *ctx = (NEP_INTERPOL*)nep->data; Mat *A; /*T=nep->function,Tp=nep->jacobian;*/ PetscScalar *x,*fx,t; PetscReal *cs,a,b,s; PetscInt i,j,k,deg=ctx->deg; PetscFunctionBegin; ierr = PetscMalloc4(deg+1,&A,(deg+1)*(deg+1),&cs,deg+1,&x,(deg+1)*nep->nt,&fx);CHKERRQ(ierr); ierr = RGIntervalGetEndpoints(nep->rg,&a,&b,NULL,NULL);CHKERRQ(ierr); ierr = ChebyshevNodes(deg,a,b,x,cs);CHKERRQ(ierr); for (j=0;j<nep->nt;j++) { for (i=0;i<=deg;i++) { ierr = FNEvaluateFunction(nep->f[j],x[i],&fx[i+j*(deg+1)]);CHKERRQ(ierr); } } /* Polynomial coefficients */ for (k=0;k<=deg;k++) { ierr = MatDuplicate(nep->A[0],MAT_COPY_VALUES,&A[k]);CHKERRQ(ierr); t = 0.0; for (i=0;i<deg+1;i++) t += fx[i]*cs[i*(deg+1)+k]; t *= 2.0/(deg+1); if (k==0) t /= 2.0; ierr = MatScale(A[k],t);CHKERRQ(ierr); for (j=1;j<nep->nt;j++) { t = 0.0; for (i=0;i<deg+1;i++) t += fx[i+j*(deg+1)]*cs[i*(deg+1)+k]; t *= 2.0/(deg+1); if (k==0) t /= 2.0; ierr = MatAXPY(A[k],t,nep->A[j],SUBSET_NONZERO_PATTERN);CHKERRQ(ierr); } } ierr = PEPSetOperators(ctx->pep,deg+1,A);CHKERRQ(ierr); for (k=0;k<=deg;k++) { ierr = MatDestroy(&A[k]);CHKERRQ(ierr); } ierr = PetscFree4(A,cs,x,fx);CHKERRQ(ierr); /* Solve polynomial eigenproblem */ ierr = PEPSolve(ctx->pep);CHKERRQ(ierr); ierr = PEPGetConverged(ctx->pep,&nep->nconv);CHKERRQ(ierr); ierr = PEPGetIterationNumber(ctx->pep,&nep->its);CHKERRQ(ierr); ierr = PEPGetConvergedReason(ctx->pep,(PEPConvergedReason*)&nep->reason);CHKERRQ(ierr); s = 2.0/(b-a); for (i=0;i<nep->nconv;i++) { ierr = PEPGetEigenpair(ctx->pep,i,&nep->eigr[i],&nep->eigi[i],NULL,NULL);CHKERRQ(ierr); nep->eigr[i] /= s; nep->eigr[i] += (a+b)/2.0; nep->eigi[i] /= s; } nep->state = NEP_STATE_EIGENVECTORS; PetscFunctionReturn(0); }
PetscErrorCode AODestroy_Mapping(AO ao) { AO_Mapping *aomap = (AO_Mapping*) ao->data; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscFree4(aomap->app,aomap->appPerm,aomap->petsc,aomap->petscPerm);CHKERRQ(ierr); ierr = PetscFree(aomap);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MPIAIJ_MPIDenseDestroy(void *ctx) { MPIAIJ_MPIDense *contents = (MPIAIJ_MPIDense*) ctx; PetscErrorCode ierr; PetscFunctionBegin; if (contents->workB) {ierr = MatDestroy(contents->workB);CHKERRQ(ierr);} ierr = PetscFree4(contents->rvalues,contents->svalues,contents->rwaits,contents->swaits);CHKERRQ(ierr); ierr = PetscFree(contents);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PetscFEGeomDestroy(PetscFEGeom **geom) { PetscErrorCode ierr; PetscFunctionBegin; if (!*geom) PetscFunctionReturn(0); ierr = PetscFree3((*geom)->v,(*geom)->J,(*geom)->detJ);CHKERRQ(ierr); ierr = PetscFree((*geom)->invJ);CHKERRQ(ierr); ierr = PetscFree4((*geom)->face,(*geom)->n,(*geom)->suppInvJ[0],(*geom)->suppInvJ[1]);CHKERRQ(ierr); ierr = PetscFree(*geom);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatDestroy_Essl(Mat A) { PetscErrorCode ierr; Mat_Essl *essl=(Mat_Essl*)A->data; PetscFunctionBegin; if (essl->CleanUpESSL) { ierr = PetscFree4(essl->a,essl->aux,essl->ia,essl->ja);CHKERRQ(ierr); } ierr = PetscFree(A->data);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex Not Collective Input Arguments: + dim - The simplex dimension . npoints - number of points . a - left end of interval (often-1) - b - right end of interval (often +1) Output Arguments: + points - quadrature points - weights - quadrature weights Level: intermediate References: Karniadakis and Sherwin. FIAT .seealso: PetscDTGaussQuadrature() @*/ PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt npoints, PetscReal a, PetscReal b, PetscReal *points[], PetscReal *weights[]) { PetscReal *px, *wx, *py, *wy, *pz, *wz, *x, *w; PetscInt i, j, k; PetscErrorCode ierr; PetscFunctionBegin; if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now"); switch (dim) { case 1: ierr = PetscMalloc(npoints * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, x, w);CHKERRQ(ierr); break; case 2: ierr = PetscMalloc(npoints*npoints*2 * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints*npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscMalloc4(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr); for (i = 0; i < npoints; ++i) { for (j = 0; j < npoints; ++j) { ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*npoints+j)*2+0], &x[(i*npoints+j)*2+1]);CHKERRQ(ierr); w[i*npoints+j] = 0.5 * wx[i] * wy[j]; } } ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr); break; case 3: ierr = PetscMalloc(npoints*npoints*3 * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints*npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscMalloc6(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy,npoints,PetscReal,&pz,npoints,PetscReal,&wz);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 2.0, 0.0, pz, wz);CHKERRQ(ierr); for (i = 0; i < npoints; ++i) { for (j = 0; j < npoints; ++j) { for (k = 0; k < npoints; ++k) { ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*npoints+j)*npoints+k)*3+0], &x[((i*npoints+j)*npoints+k)*3+1], &x[((i*npoints+j)*npoints+k)*3+2]);CHKERRQ(ierr); w[(i*npoints+j)*npoints+k] = 0.125 * wx[i] * wy[j] * wz[k]; } } } ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim); } if (points) *points = x; if (weights) *weights = w; PetscFunctionReturn(0); }
/*@C PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() except it takes TWO ilenths and output TWO olengths. Collective on MPI_Comm Input Parameters: + comm - Communicator . nsends - number of messages that are to be sent. . nrecvs - number of messages being received - ilengths1, ilengths2 - array of integers of length sizeof(comm) a non zero ilengths[i] represent a message to i of length ilengths[i] Output Parameters: + onodes - list of node-ids from which messages are expected - olengths1, olengths2 - corresponding message lengths Level: developer Concepts: mpi utility Notes: With this info, the correct MPI_Irecv() can be posted with the correct from-id, with a buffer with the right amount of memory required. The calling function deallocates the memory in onodes and olengths To determine nrecevs, one can use PetscGatherNumberOfMessages() .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages() @*/ PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) { PetscErrorCode ierr; PetscMPIInt size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL; MPI_Request *s_waits = NULL,*r_waits = NULL; MPI_Status *w_status = NULL; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr); s_waits = r_waits + nrecvs; /* Post the Irecv to get the message length-info */ ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr); ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr); for (i=0; i<nrecvs; i++) { buf_j = buf_r + (2*i); ierr = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr); } /* Post the Isends with the message length-info */ for (i=0,j=0; i<size; ++i) { if (ilengths1[i]) { buf_j = buf_s + (2*j); buf_j[0] = *(ilengths1+i); buf_j[1] = *(ilengths2+i); ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr); j++; } } if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends); /* Post waits on sends and receivs */ if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);} /* Pack up the received data */ ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr); for (i=0; i<nrecvs; ++i) { (*onodes)[i] = w_status[i].MPI_SOURCE; buf_j = buf_r + (2*i); (*olengths1)[i] = buf_j[0]; (*olengths2)[i] = buf_j[1]; } ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDrawViewPortsDestroy - frees a PetscDrawViewPorts object Collective on PetscDraw inside PetscDrawViewPorts Input Parameter: . ports - the PetscDrawViewPorts object Level: advanced .seealso: PetscDrawSplitViewPort(), PetscDrawSetViewPort(), PetscDrawViewPortsSet(), PetscDrawViewPortsCreate() @*/ PetscErrorCode PetscDrawViewPortsDestroy(PetscDrawViewPorts *ports) { PetscErrorCode ierr; PetscFunctionBegin; if (!ports) PetscFunctionReturn(0); PetscValidPointer(ports,1); /* reset Drawport of Window back to previous value */ ierr = PetscDrawSetViewPort(ports->draw,ports->port_xl,ports->port_yl,ports->port_xr,ports->port_yr);CHKERRQ(ierr); ierr = PetscDrawDestroy(&ports->draw);CHKERRQ(ierr); ierr = PetscFree4(ports->xl,ports->xr,ports->yl,ports->yr);CHKERRQ(ierr); ierr = PetscFree(ports);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPDestroy_CG(KSP ksp) { KSP_CG *cg = (KSP_CG*)ksp->data; PetscErrorCode ierr; PetscFunctionBegin; /* free space used for singular value calculations */ if (ksp->calc_sings) { ierr = PetscFree4(cg->e,cg->d,cg->ee,cg->dd);CHKERRQ(ierr); } ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr); ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPCGSetType_C",NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPCGUseSingleReduction_C",NULL);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) { PetscErrorCode ierr; PetscMatStashSpace space; PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; PetscScalar **valptr; PetscFunctionBegin; ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); for (space=stash->space_head,cnt=0; space; space=space->next) { for (i=0; i<space->local_used; i++) { row[cnt] = space->idx[i]; col[cnt] = space->idy[i]; valptr[cnt] = &space->val[i*bs2]; perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ cnt++; } } if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ for (rowstart=0,cnt=0,i=1; i<=n; i++) { if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ PetscInt colstart; ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ PetscInt j,l; MatStashBlock *block; ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); block->row = row[rowstart]; block->col = col[colstart]; ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ if (insertmode == ADD_VALUES) { for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; } else { ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); } } colstart = j; } rowstart = i; } } ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ KSPBCGSLSetXRes - Sets the parameter governing when exact residuals will be used instead of computed residuals. Logically Collective on KSP Input Parameters: + ksp - iterative context obtained from KSPCreate - delta - computed residuals are used alone when delta is not positive Options Database Keys: . -ksp_bcgsl_xres delta Level: intermediate .keywords: KSP, BiCGStab(L), set, exact residuals .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol() @*/ PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta) { KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data; PetscErrorCode ierr; PetscFunctionBegin; PetscValidLogicalCollectiveReal(ksp,delta,2); if (ksp->setupstage) { if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) { ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr); ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr); ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr); ksp->setupstage = KSP_SETUP_NEW; } } bcgsl->delta = delta; PetscFunctionReturn(0); }
/*@ NEPAllocateSolution - Allocate memory storage for common variables such as eigenvalues and eigenvectors. Collective on NEP Input Parameters: + nep - eigensolver context - extra - number of additional positions, used for methods that require a working basis slightly larger than ncv Developers Note: This is PETSC_EXTERN because it may be required by user plugin NEP implementations. Level: developer @*/ PetscErrorCode NEPAllocateSolution(NEP nep,PetscInt extra) { PetscErrorCode ierr; PetscInt oldsize,newc,requested; PetscLogDouble cnt; Mat T; Vec t; PetscFunctionBegin; requested = nep->ncv + extra; /* oldsize is zero if this is the first time setup is called */ ierr = BVGetSizes(nep->V,NULL,NULL,&oldsize);CHKERRQ(ierr); newc = PetscMax(0,requested-oldsize); /* allocate space for eigenvalues and friends */ if (requested != oldsize) { if (oldsize) { ierr = PetscFree4(nep->eigr,nep->eigi,nep->errest,nep->perm);CHKERRQ(ierr); } ierr = PetscMalloc4(requested,&nep->eigr,requested,&nep->eigi,requested,&nep->errest,requested,&nep->perm);CHKERRQ(ierr); cnt = newc*sizeof(PetscScalar) + newc*sizeof(PetscReal) + newc*sizeof(PetscInt); ierr = PetscLogObjectMemory((PetscObject)nep,cnt);CHKERRQ(ierr); } /* allocate V */ if (!nep->V) { ierr = NEPGetBV(nep,&nep->V);CHKERRQ(ierr); } if (!oldsize) { if (!((PetscObject)(nep->V))->type_name) { ierr = BVSetType(nep->V,BVSVEC);CHKERRQ(ierr); } if (nep->split) T = nep->A[0]; else { ierr = NEPGetFunction(nep,&T,NULL,NULL,NULL);CHKERRQ(ierr); } ierr = MatGetVecs(T,&t,NULL);CHKERRQ(ierr); ierr = BVSetSizesFromVec(nep->V,t,requested);CHKERRQ(ierr); ierr = VecDestroy(&t);CHKERRQ(ierr); } else { ierr = BVResize(nep->V,requested,PETSC_FALSE);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@C PetscSFReset - Reset a star forest so that different sizes or neighbors can be used Collective Input Arguments: . sf - star forest Level: advanced .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() @*/ PetscErrorCode PetscSFReset(PetscSF sf) { PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); sf->mine = NULL; ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); sf->remote = NULL; ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); ierr = PetscFree(sf->degree);CHKERRQ(ierr); if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); sf->graphset = PETSC_FALSE; if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} sf->setupcalled = PETSC_FALSE; PetscFunctionReturn(0); }
static PetscErrorCode SNESReset_QN(SNES snes) { PetscErrorCode ierr; SNES_QN *qn; PetscFunctionBegin; if (snes->data) { qn = (SNES_QN*)snes->data; if (qn->U) { ierr = VecDestroyVecs(qn->m, &qn->U);CHKERRQ(ierr); } if (qn->V) { ierr = VecDestroyVecs(qn->m, &qn->V);CHKERRQ(ierr); } if (qn->singlereduction) { ierr = PetscFree3(qn->dXdFmat, qn->dFtdX, qn->YtdX);CHKERRQ(ierr); } ierr = PetscFree4(qn->alpha,qn->beta,qn->dXtdF,qn->lambda);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@ KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L). Logically Collective on KSP Input Parameters: + ksp - iterative context obtained from KSPCreate - ell - number of search directions Options Database Keys: . -ksp_bcgsl_ell ell Level: intermediate Notes: For large ell it is common for the polynomial update problem to become singular (due to happy breakdown for smallish test problems, but also for larger problems). Consequently, by default, the system is solved by pseudoinverse, which allows the iteration to complete successfully. See KSPBCGSLSetUsePseudoinverse() to switch to a conventional solve. .keywords: KSP, BiCGStab(L), set, exact residuals, .seealso: KSPBCGSLSetUsePseudoinverse() @*/ PetscErrorCode KSPBCGSLSetEll(KSP ksp, PetscInt ell) { KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data; PetscErrorCode ierr; PetscFunctionBegin; if (ell < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive"); PetscValidLogicalCollectiveInt(ksp,ell,2); if (!ksp->setupstage) bcgsl->ell = ell; else if (bcgsl->ell != ell) { /* free the data structures, then create them again */ ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr); ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr); ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr); bcgsl->ell = ell; ksp->setupstage = KSP_SETUP_NEW; } PetscFunctionReturn(0); }
/*@ KSPBCGSLSetPol - Sets the type of polynomial part will be used in the BiCGSTab(L) solver. Logically Collective on KSP Input Parameters: + ksp - iterative context obtained from KSPCreate - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step. Options Database Keys: + -ksp_bcgsl_cxpoly - use enhanced polynomial . -ksp_bcgsl_mrpoly - use standard polynomial Level: intermediate .keywords: KSP, BiCGStab(L), set, polynomial .seealso: @() @*/ PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscBool uMROR) { KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data; PetscErrorCode ierr; PetscFunctionBegin; PetscValidLogicalCollectiveBool(ksp,uMROR,2); if (!ksp->setupstage) bcgsl->bConvex = uMROR; else if (bcgsl->bConvex != uMROR) { /* free the data structures, then create them again */ ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr); ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr); ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr); bcgsl->bConvex = uMROR; ksp->setupstage = KSP_SETUP_NEW; } PetscFunctionReturn(0); }
/* MatGetOrdering_ND - Find the nested dissection ordering of a given matrix. */ PETSC_INTERN PetscErrorCode MatGetOrdering_ND(Mat mat,MatOrderingType type,IS *row,IS *col) { PetscErrorCode ierr; PetscInt i, *mask,*xls,*ls,nrow,*perm; const PetscInt *ia,*ja; PetscBool done; PetscFunctionBegin; ierr = MatGetRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name); ierr = PetscMalloc4(nrow,&mask,nrow,&perm,nrow+1,&xls,nrow,&ls);CHKERRQ(ierr); SPARSEPACKgennd(&nrow,ia,ja,mask,perm,xls,ls); ierr = MatRestoreRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,NULL,&ia,&ja,&done);CHKERRQ(ierr); /* shift because Sparsepack indices start at one */ for (i=0; i<nrow; i++) perm[i]--; ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,row);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,col);CHKERRQ(ierr); ierr = PetscFree4(mask,perm,xls,ls);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ EPSReset - Resets the EPS context to the initial state and removes any allocated objects. Collective on EPS Input Parameter: . eps - eigensolver context obtained from EPSCreate() Level: advanced .seealso: EPSDestroy() @*/ PetscErrorCode EPSReset(EPS eps) { PetscErrorCode ierr; PetscInt ncols; PetscFunctionBegin; PetscValidHeaderSpecific(eps,EPS_CLASSID,1); if (eps->ops->reset) { ierr = (eps->ops->reset)(eps); CHKERRQ(ierr); } if (eps->st) { ierr = STReset(eps->st); CHKERRQ(ierr); } if (eps->ds) { ierr = DSReset(eps->ds); CHKERRQ(ierr); } ierr = VecDestroy(&eps->D); CHKERRQ(ierr); ierr = BVGetSizes(eps->V,NULL,NULL,&ncols); CHKERRQ(ierr); if (ncols) { ierr = PetscFree4(eps->eigr,eps->eigi,eps->errest,eps->perm); CHKERRQ(ierr); ierr = PetscFree2(eps->rr,eps->ri); CHKERRQ(ierr); } ierr = BVDestroy(&eps->V); CHKERRQ(ierr); ierr = VecDestroyVecs(eps->nwork,&eps->work); CHKERRQ(ierr); eps->nwork = 0; eps->state = EPS_STATE_INITIAL; PetscFunctionReturn(0); }
/* FormFunctionLocal - Form the local residual F from the local input X Input Parameters: + dm - The mesh . X - Local input vector - user - The user context Output Parameter: . F - Local output vector Note: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator, like a GPU, or vectorize on a multicore machine. .seealso: FormJacobianLocal() */ PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user) { const PetscInt debug = user->debug; const PetscInt dim = user->dim; PetscReal *coords, *v0, *J, *invJ, *detJ; PetscScalar *elemVec, *u; const PetscInt numCells = cEnd - cStart; PetscInt cellDof = 0; PetscInt maxQuad = 0; PetscInt jacSize = 1; PetscInt cStart, cEnd, c, field; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr); ierr = VecSet(F, 0.0);CHKERRQ(ierr); ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); for(field = 0; field < numFields; ++field) { PetscInt dof = 1; for(d = 0; d < dim; ++d) {dof *= user->q[field].numBasisFuncs*user->q[field].numComponents;} cellDof += dof; maxQuad = PetscMax(maxQuad, user->q[field].numQuadPoints); } for(d = 0; d < dim; ++d) {jacSize *= maxQuad;} ierr = PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,jacSize,PetscReal,&J);CHKERRQ(ierr); ierr = PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*jacSize,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);CHKERRQ(ierr); for(c = cStart; c < cEnd; ++c) { const PetscScalar *x; PetscInt i; ierr = DMDAComputeCellGeometry(dm, c, v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr); if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c); ierr = DMDAVecGetClosure(dm, PETSC_NULL, X, c, &x);CHKERRQ(ierr); for(i = 0; i < cellDof; ++i) { u[c*cellDof+i] = x[i]; } } for(field = 0; field < numFields; ++field) { const PetscInt numQuadPoints = user->q[field].numQuadPoints; const PetscInt numBasisFuncs = user->q[field].numBasisFuncs; void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field]; void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field]; /* Conforming batches */ PetscInt blockSize = numBasisFuncs*numQuadPoints; PetscInt numBlocks = 1; PetscInt batchSize = numBlocks * blockSize; PetscInt numBatches = user->numBatches; PetscInt numChunks = numCells / (numBatches*batchSize); ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr); /* Remainder */ PetscInt numRemainder = numCells % (numBatches * batchSize); PetscInt offset = numCells - numRemainder; ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset], user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr); } for(c = cStart; c < cEnd; ++c) { if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);} ierr = DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr); } ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr); ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr); if (user->showResidual) { PetscInt p; ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr); for(p = 0; p < user->numProcs; ++p) { if (p == user->rank) { Vec f; ierr = VecDuplicate(F, &f);CHKERRQ(ierr); ierr = VecCopy(F, f);CHKERRQ(ierr); ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr); ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecDestroy(&f);CHKERRQ(ierr); } ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } } ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode GreedyColoringLocalDistanceTwo_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors) { MC_Greedy *gr = (MC_Greedy *) mc->data; PetscInt i,j,k,l,s,e,n,nd,nd_global,n_global,idx,ncols,maxcolors,mcol,mcol_global,nd1cols,*mask,masksize,*d1cols,*bad,*badnext,nbad,badsize,ccol,no,cbad; Mat m = mc->mat, mt; Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data; Mat md=NULL,mo=NULL; const PetscInt *md_i,*mo_i,*md_j,*mo_j; const PetscInt *rmd_i,*rmo_i,*rmd_j,*rmo_j; PetscBool isMPIAIJ,isSEQAIJ; PetscInt pcol,*dcolors,*ocolors; ISColoringValue *badidx; const PetscInt *cidx; PetscReal *owts,*colorweights; PetscInt *oconf,*conf; PetscSF sf; PetscLayout layout; PetscErrorCode ierr; PetscFunctionBegin; ierr = MatGetSize(m,&n_global,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(m,&s,&e);CHKERRQ(ierr); n=e-s; nd_global = 0; /* get the matrix communication structures */ ierr = PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ); CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ); CHKERRQ(ierr); if (isMPIAIJ) { Mat_SeqAIJ *dseq; Mat_SeqAIJ *oseq; md=aij->A; dseq = (Mat_SeqAIJ*)md->data; mo=aij->B; oseq = (Mat_SeqAIJ*)mo->data; md_i = dseq->i; md_j = dseq->j; mo_i = oseq->i; mo_j = oseq->j; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = oseq->i; rmo_j = oseq->j; } else if (isSEQAIJ) { Mat_SeqAIJ *dseq; /* no off-processor nodes */ md=m; dseq = (Mat_SeqAIJ*)md->data; md_i = dseq->i; md_j = dseq->j; mo_i = NULL; mo_j = NULL; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring"); if (!gr->symmetric) { ierr = MatTranspose(m, MAT_INITIAL_MATRIX, &mt);CHKERRQ(ierr); if (isSEQAIJ) { Mat_SeqAIJ *dseq = (Mat_SeqAIJ*) mt->data; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject) mc), PETSC_ERR_SUP, "Nonsymmetric greedy coloring only works in serial"); } /* create the vectors and communication structures if necessary */ no=0; if (mo) { ierr = VecGetLocalSize(aij->lvec,&no);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);CHKERRQ(ierr); ierr = MatGetLayouts(m,&layout,NULL);CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);CHKERRQ(ierr); } ierr = MatColoringGetMaxColors(mc,&maxcolors);CHKERRQ(ierr); masksize=n; nbad=0; badsize=n; ierr = PetscMalloc1(masksize,&mask);CHKERRQ(ierr); ierr = PetscMalloc4(n,&d1cols,n,&dcolors,n,&conf,n,&bad);CHKERRQ(ierr); ierr = PetscMalloc2(badsize,&badidx,badsize,&badnext);CHKERRQ(ierr); for(i=0;i<masksize;i++) { mask[i]=-1; } for (i=0;i<n;i++) { dcolors[i]=maxcolors; bad[i]=-1; } for (i=0;i<badsize;i++) { badnext[i]=-1; } if (mo) { ierr = PetscMalloc3(no,&owts,no,&oconf,no,&ocolors);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); for (i=0;i<no;i++) { ocolors[i]=maxcolors; } } else { /* Appease overzealous -Wmaybe-initialized */ owts = NULL; oconf = NULL; ocolors = NULL; } mcol=0; while (nd_global < n_global) { nd=n; /* assign lowest possible color to each local vertex */ mcol_global=0; ierr = PetscLogEventBegin(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); for (i=0;i<n;i++) { idx=lperm[i]; if (dcolors[idx] == maxcolors) { /* entries in bad */ cbad=bad[idx]; while (cbad>=0) { ccol=badidx[cbad]; if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; cbad=badnext[cbad]; } /* diagonal distance-one rows */ nd1cols=0; ncols = rmd_i[idx+1]-rmd_i[idx]; cidx = &(rmd_j[rmd_i[idx]]); for (j=0;j<ncols;j++) { d1cols[nd1cols] = cidx[j]; nd1cols++; ccol=dcolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } /* off-diagonal distance-one rows */ if (mo) { ncols = rmo_i[idx+1]-rmo_i[idx]; cidx = &(rmo_j[rmo_i[idx]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* diagonal distance-two rows */ for (j=0;j<nd1cols;j++) { ncols = md_i[d1cols[j]+1]-md_i[d1cols[j]]; cidx = &(md_j[md_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=dcolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* off-diagonal distance-two rows */ if (mo) { for (j=0;j<nd1cols;j++) { ncols = mo_i[d1cols[j]+1]-mo_i[d1cols[j]]; cidx = &(mo_j[mo_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=ocolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } } /* assign this one the lowest color possible by seeing if there's a gap in the sequence of sorted neighbor colors */ for (j=0;j<masksize;j++) { if (mask[j]!=idx) { break; } } pcol=j; if (pcol>maxcolors) pcol=maxcolors; dcolors[idx]=pcol; if (pcol>mcol) mcol=pcol; } } ierr = PetscLogEventEnd(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); if (mo) { /* transfer neighbor colors */ ierr = PetscSFBcastBegin(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); /* find the maximum color assigned locally and allocate a mask */ ierr = MPIU_Allreduce(&mcol,&mcol_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); ierr = PetscMalloc1(mcol_global+1,&colorweights);CHKERRQ(ierr); /* check for conflicts */ for (i=0;i<n;i++) { conf[i]=PETSC_FALSE; } for (i=0;i<no;i++) { oconf[i]=PETSC_FALSE; } for (i=0;i<n;i++) { ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); if (ncols > 0) { /* fill in the mask */ for (j=0;j<mcol_global+1;j++) { colorweights[j]=0; } colorweights[dcolors[i]]=wts[i]; /* fill in the off-diagonal part of the mask */ for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < owts[cidx[j]]) { colorweights[ccol] = owts[cidx[j]]; } } } /* fill in the on-diagonal part of the mask */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < wts[cidx[j]]) { colorweights[ccol] = wts[cidx[j]]; } } } /* go back through and set up on and off-diagonal conflict vectors */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > wts[cidx[j]]) { conf[cidx[j]]=PETSC_TRUE; } } } ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > owts[cidx[j]]) { oconf[cidx[j]]=PETSC_TRUE; } } } } } nd_global=0; ierr = PetscFree(colorweights);CHKERRQ(ierr); ierr = PetscLogEventBegin(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); /* go through and unset local colors that have conflicts */ for (i=0;i<n;i++) { if (conf[i]>0) { /* push this color onto the bad stack */ badidx[nbad]=dcolors[i]; badnext[nbad]=bad[i]; bad[i]=nbad; nbad++; if (nbad>=badsize) { PetscInt *newbadnext; ISColoringValue *newbadidx; ierr = PetscMalloc2(badsize*2,&newbadidx,badsize*2,&newbadnext);CHKERRQ(ierr); for(k=0;k<2*badsize;k++) { newbadnext[k]=-1; } for(k=0;k<badsize;k++) { newbadidx[k]=badidx[k]; newbadnext[k]=badnext[k]; } ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); badidx=newbadidx; badnext=newbadnext; badsize*=2; } dcolors[i] = maxcolors; nd--; } } } ierr = MPIU_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); } if (mo) { ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree3(owts,oconf,ocolors);CHKERRQ(ierr); } for (i=0;i<n;i++) { colors[i]=dcolors[i]; } ierr = PetscFree(mask);CHKERRQ(ierr); ierr = PetscFree4(d1cols,dcolors,conf,bad);CHKERRQ(ierr); ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); if (!gr->symmetric) {ierr = MatDestroy(&mt);CHKERRQ(ierr);} PetscFunctionReturn(0); }
static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[]) { Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i, Mbs,i,j,k,*odata1,*odata2, proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,*iwork,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; comm = ((PetscObject)C)->comm; size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size);CHKERRQ(ierr); ierr = PetscMalloc2(len,PetscBT,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,char,&t_p);CHKERRQ(ierr); for (i=0; i<len; i++) { table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; } ierr = MPI_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); /* 1. Send this processor's is[] to other processors */ /*---------------------------------------------------*/ /* allocate spaces */ ierr = PetscMalloc(is_max*sizeof(PetscInt),&n);CHKERRQ(ierr); len = 0; for (i=0; i<is_max; i++) { ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); len += n[i]; } if (!len) { is_max = 0; } else { len += 1 + is_max; /* max length of data1 for one processor */ } ierr = PetscMalloc((size*len+1)*sizeof(PetscInt),&data1);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscInt*),&data1_start);CHKERRQ(ierr); for (i=0; i<size; i++) data1_start[i] = data1 + i*len; ierr = PetscMalloc4(size,PetscInt,&len_s,size,PetscInt,&btable,size,PetscInt,&iwork,size+1,PetscInt,&Bowners);CHKERRQ(ierr); /* gather c->garray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,&garray_local);CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr); ierr = ISDestroy(garray_local);CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); Bowners[0] = 0; for (i=0; i<size; i++) Bowners[i+1] += Bowners[i]; if (is_max){ /* hash table ctable which maps c->row to proc_id) */ ierr = PetscMalloc(Mbs*sizeof(PetscInt),&ctable);CHKERRQ(ierr); for (proc_id=0,j=0; proc_id<size; proc_id++) { for (; j<C->rmap->range[proc_id+1]/bs; j++) { ctable[j] = proc_id; } } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); for (i=0; i<size; i++){ table_i = table[i]; ierr = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr); for (j = Bowners[i]; j<Bowners[i+1]; j++){ /* go through B cols of proc[i]*/ ierr = PetscBTSet(table_i,idx_i[j]);CHKERRQ(ierr); } } ierr = ISRestoreIndices(garray_gl,&idx_i);CHKERRQ(ierr); } /* if (is_max) */ ierr = ISDestroy(garray_gl);CHKERRQ(ierr); /* evaluate communication - mesg to who, length, and buffer space */ for (i=0; i<size; i++) len_s[i] = 0; /* header of data1 */ for (proc_id=0; proc_id<size; proc_id++){ iwork[proc_id] = 0; *data1_start[proc_id] = is_max; data1_start[proc_id]++; for (j=0; j<is_max; j++) { if (proc_id == rank){ *data1_start[proc_id] = n[j]; } else { *data1_start[proc_id] = 0; } data1_start[proc_id]++; } } for (i=0; i<is_max; i++) { ierr = ISGetIndices(is[i],&idx_i);CHKERRQ(ierr); for (j=0; j<n[i]; j++){ idx = idx_i[j]; *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */ proc_end = ctable[idx]; for (proc_id=0; proc_id<=proc_end; proc_id++){ /* for others to process */ if (proc_id == rank ) continue; /* done before this loop */ if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) continue; /* no need for sending idx to [proc_id] */ *data1_start[proc_id] = idx; data1_start[proc_id]++; len_s[proc_id]++; } } /* update header data */ for (proc_id=0; proc_id<size; proc_id++){ if (proc_id== rank) continue; *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id]; iwork[proc_id] = len_s[proc_id] ; } ierr = ISRestoreIndices(is[i],&idx_i);CHKERRQ(ierr); } nrqs = 0; nrqr = 0; for (i=0; i<size; i++){ data1_start[i] = data1 + i*len; if (len_s[i]){ nrqs++; len_s[i] += 1 + is_max; /* add no. of header msg */ } } for (i=0; i<is_max; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } ierr = PetscFree(n);CHKERRQ(ierr); ierr = PetscFree(ctable);CHKERRQ(ierr); /* Determine the number of messages to expect, their lengths, from from-ids */ ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&nrqr);CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1);CHKERRQ(ierr); /* Now post the sends */ ierr = PetscMalloc2(size,MPI_Request,&s_waits1,size,MPI_Request,&s_waits2);CHKERRQ(ierr); k = 0; for (proc_id=0; proc_id<size; proc_id++){ /* send data1 to processor [proc_id] */ if (len_s[proc_id]){ ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr); k++; } } /* 2. Receive other's is[] and process. Then send back */ /*-----------------------------------------------------*/ len = 0; for (i=0; i<nrqr; i++){ if (len_r1[i] > len)len = len_r1[i]; } ierr = PetscFree(len_r1);CHKERRQ(ierr); ierr = PetscFree(id_r1);CHKERRQ(ierr); for (proc_id=0; proc_id<size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; ierr = PetscMalloc((len+1)*sizeof(PetscInt),&odata1);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscInt**),&odata2_ptr);CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,otable);CHKERRQ(ierr); len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */ len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ odata2_ptr[nodata2] = odata2; len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ k = 0; while (k < nrqr){ /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr); if (flag){ ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max){ /* allocate more space for odata2 */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i<odata2[0]; i++){ len += odata2[1 + i]; } /* Send messages back */ ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k);CHKERRQ(ierr); k++; odata2 += len; len_unused -= len; len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ } } ierr = PetscFree(odata1);CHKERRQ(ierr); ierr = PetscBTDestroy(otable);CHKERRQ(ierr); /* 3. Do local work on this processor's is[] */ /*-------------------------------------------*/ /* make sure there is enough unused space in odata2(=data) array */ len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */ if (len_unused < len_max){ /* allocate more space for odata2 */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } data = odata2; ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table);CHKERRQ(ierr); ierr = PetscFree(data1_start);CHKERRQ(ierr); /* 4. Receive work done on other processors, then merge */ /*------------------------------------------------------*/ /* get max number of messages that this processor expects to recv */ ierr = MPI_Allreduce(len_s,iwork,size,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); ierr = PetscMalloc((iwork[rank]+1)*sizeof(PetscInt),&data2);CHKERRQ(ierr); ierr = PetscFree4(len_s,btable,iwork,Bowners);CHKERRQ(ierr); k = 0; while (k < nrqs){ /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); if (flag){ ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); if (len > 1+is_max){ /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; i<is_max; i++){ table_i = table[i]; data_i = data + 1 + is_max + Mbs*i; isz = data[1+i]; for (j=0; j<data2[1+i]; j++){ col = data2_i[j]; if (!PetscBTLookupSet(table_i,col)) {data_i[isz++] = col;} } data[1+i] = isz; if (i < is_max - 1) data2_i += data2[1+i]; } } k++; } } ierr = PetscFree(data2);CHKERRQ(ierr); ierr = PetscFree2(table,t_p);CHKERRQ(ierr); /* phase 1 sends are complete */ ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} ierr = PetscFree(data1);CHKERRQ(ierr); /* phase 2 sends are complete */ if (nrqr){ierr = MPI_Waitall(nrqr,s_waits2,s_status);CHKERRQ(ierr);} ierr = PetscFree2(s_waits1,s_waits2);CHKERRQ(ierr); ierr = PetscFree(s_status);CHKERRQ(ierr); /* 5. Create new is[] */ /*--------------------*/ for (i=0; i<is_max; i++) { data_i = data + 1 + is_max + Mbs*i; ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,is+i);CHKERRQ(ierr); } for (k=0; k<=nodata2; k++){ ierr = PetscFree(odata2_ptr[k]);CHKERRQ(ierr); } ierr = PetscFree(odata2_ptr);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing in polynomial eigenproblems. */ PetscErrorCode PEPBuildDiagonalScaling(PEP pep) { PetscErrorCode ierr; PetscInt it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl; const PetscInt *cidx,*ridx; Mat M,*T,A; PetscMPIInt n; PetscBool cont=PETSC_TRUE,flg=PETSC_FALSE; PetscScalar *array,*Dr,*Dl,t; PetscReal l2,d,*rsum,*aux,*csum,w=1.0; MatStructure str; MatInfo info; PetscFunctionBegin; l2 = 2*PetscLogReal(2.0); nmat = pep->nmat; ierr = PetscMPIIntCast(pep->n,&n); ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr); ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr); for (k=0;k<nmat;k++) { ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr); } /* Form local auxiliar matrix M */ ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types"); ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr); if (cont) { ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr); flg = PETSC_TRUE; } else { ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr); } ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); for (k=1;k<nmat;k++) { if (flg) { ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); } else { if (str==SAME_NONZERO_PATTERN) { ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr); } } ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); w *= pep->slambda*pep->slambda*pep->sfactor; ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr); if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) { ierr = MatDestroy(&A);CHKERRQ(ierr); } } ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices"); ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr); ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr); ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr); ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr); ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr); ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) { /* Search non-zero columns outsize lst-lend */ if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j]; /* Local column sums */ aux[cidx[j]] += PetscAbsScalar(array[j]); } for (it=0;it<pep->sits && cont;it++) { emaxl = 0; eminl = 0; /* Column sum */ if (it>0) { /* it=0 has been already done*/ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]); ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); } ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr)); /* Update Dr */ for (j=lst;j<lend;j++) { d = PetscLogReal(csum[j])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dr[j-lst] *= d; aux[j] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } for (j=0;j<nc;j++) { d = PetscLogReal(csum[cols[j]])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); aux[cols[j]] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } /* Scale M */ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (j=0;j<nz;j++) { array[j] *= aux[cidx[j]]; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Row sum */ ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nr;i++) { for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]); /* Update Dl */ d = PetscLogReal(rsum[i])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dl[i] *= d; /* Scale M */ for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Compute global max and min */ ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl)); ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl)); if (emax<=emin+2) cont = PETSC_FALSE; } ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr); /* Free memory*/ ierr = MatDestroy(&M);CHKERRQ(ierr); ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr); ierr = PetscFree(T);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode EPSSolve_Lanczos(EPS eps) { EPS_LANCZOS *lanczos = (EPS_LANCZOS*)eps->data; PetscErrorCode ierr; PetscInt nconv,i,j,k,l,x,n,*perm,restart,ncv=eps->ncv,r,ld; Vec vi,vj,w; Mat U; PetscScalar *Y,*ritz,stmp; PetscReal *d,*e,*bnd,anorm,beta,norm,rtmp,resnorm; PetscBool breakdown; char *conv,ctmp; PetscFunctionBegin; ierr = DSGetLeadingDimension(eps->ds,&ld);CHKERRQ(ierr); ierr = PetscMalloc4(ncv,&ritz,ncv,&bnd,ncv,&perm,ncv,&conv);CHKERRQ(ierr); /* The first Lanczos vector is the normalized initial vector */ ierr = EPSGetStartVector(eps,0,NULL);CHKERRQ(ierr); anorm = -1.0; nconv = 0; /* Restart loop */ while (eps->reason == EPS_CONVERGED_ITERATING) { eps->its++; /* Compute an ncv-step Lanczos factorization */ n = PetscMin(nconv+eps->mpd,ncv); ierr = DSGetArrayReal(eps->ds,DS_MAT_T,&d);CHKERRQ(ierr); e = d + ld; ierr = EPSBasicLanczos(eps,d,e,nconv,&n,&breakdown,anorm);CHKERRQ(ierr); beta = e[n-1]; ierr = DSRestoreArrayReal(eps->ds,DS_MAT_T,&d);CHKERRQ(ierr); ierr = DSSetDimensions(eps->ds,n,0,nconv,0);CHKERRQ(ierr); ierr = DSSetState(eps->ds,DS_STATE_INTERMEDIATE);CHKERRQ(ierr); ierr = BVSetActiveColumns(eps->V,nconv,n);CHKERRQ(ierr); /* Solve projected problem */ ierr = DSSolve(eps->ds,ritz,NULL);CHKERRQ(ierr); ierr = DSSort(eps->ds,ritz,NULL,NULL,NULL,NULL);CHKERRQ(ierr); /* Estimate ||A|| */ for (i=nconv;i<n;i++) anorm = PetscMax(anorm,PetscAbsReal(PetscRealPart(ritz[i]))); /* Compute residual norm estimates as beta*abs(Y(m,:)) + eps*||A|| */ ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); for (i=nconv;i<n;i++) { resnorm = beta*PetscAbsScalar(Y[n-1+i*ld]) + PETSC_MACHINE_EPSILON*anorm; ierr = (*eps->converged)(eps,ritz[i],eps->eigi[i],resnorm,&bnd[i],eps->convergedctx);CHKERRQ(ierr); if (bnd[i]<eps->tol) conv[i] = 'C'; else conv[i] = 'N'; } ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); /* purge repeated ritz values */ if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL) { for (i=nconv+1;i<n;i++) { if (conv[i] == 'C' && PetscAbsScalar((ritz[i]-ritz[i-1])/ritz[i]) < eps->tol) conv[i] = 'R'; } } /* Compute restart vector */ if (breakdown) { ierr = PetscInfo2(eps,"Breakdown in Lanczos method (it=%D norm=%g)\n",eps->its,(double)beta);CHKERRQ(ierr); } else { restart = nconv; while (restart<n && conv[restart] != 'N') restart++; if (restart >= n) { breakdown = PETSC_TRUE; } else { for (i=restart+1;i<n;i++) { if (conv[i] == 'N') { ierr = SlepcSCCompare(eps->sc,ritz[restart],0.0,ritz[i],0.0,&r);CHKERRQ(ierr); if (r>0) restart = i; } } ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); ierr = BVMultColumn(eps->V,1.0,0.0,n,Y+restart*ld+nconv);CHKERRQ(ierr); ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); } } /* Count and put converged eigenvalues first */ for (i=nconv;i<n;i++) perm[i] = i; for (k=nconv;k<n;k++) { if (conv[perm[k]] != 'C') { j = k + 1; while (j<n && conv[perm[j]] != 'C') j++; if (j>=n) break; l = perm[k]; perm[k] = perm[j]; perm[j] = l; } } /* Sort eigenvectors according to permutation */ ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); for (i=nconv;i<k;i++) { x = perm[i]; if (x != i) { j = i + 1; while (perm[j] != i) j++; /* swap eigenvalues i and j */ stmp = ritz[x]; ritz[x] = ritz[i]; ritz[i] = stmp; rtmp = bnd[x]; bnd[x] = bnd[i]; bnd[i] = rtmp; ctmp = conv[x]; conv[x] = conv[i]; conv[i] = ctmp; perm[j] = x; perm[i] = i; /* swap eigenvectors i and j */ for (l=0;l<n;l++) { stmp = Y[l+x*ld]; Y[l+x*ld] = Y[l+i*ld]; Y[l+i*ld] = stmp; } } } ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); /* compute converged eigenvectors */ ierr = DSGetMat(eps->ds,DS_MAT_Q,&U);CHKERRQ(ierr); ierr = BVMultInPlace(eps->V,U,nconv,k);CHKERRQ(ierr); ierr = MatDestroy(&U);CHKERRQ(ierr); /* purge spurious ritz values */ if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL) { for (i=nconv;i<k;i++) { ierr = BVGetColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = VecNorm(vi,NORM_2,&norm);CHKERRQ(ierr); ierr = VecScale(vi,1.0/norm);CHKERRQ(ierr); w = eps->work[0]; ierr = STApply(eps->st,vi,w);CHKERRQ(ierr); ierr = VecAXPY(w,-ritz[i],vi);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); ierr = (*eps->converged)(eps,ritz[i],eps->eigi[i],norm,&bnd[i],eps->convergedctx);CHKERRQ(ierr); if (bnd[i]>=eps->tol) conv[i] = 'S'; } for (i=nconv;i<k;i++) { if (conv[i] != 'C') { j = i + 1; while (j<k && conv[j] != 'C') j++; if (j>=k) break; /* swap eigenvalues i and j */ stmp = ritz[j]; ritz[j] = ritz[i]; ritz[i] = stmp; rtmp = bnd[j]; bnd[j] = bnd[i]; bnd[i] = rtmp; ctmp = conv[j]; conv[j] = conv[i]; conv[i] = ctmp; /* swap eigenvectors i and j */ ierr = BVGetColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = BVGetColumn(eps->V,j,&vj);CHKERRQ(ierr); ierr = VecSwap(vi,vj);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j,&vj);CHKERRQ(ierr); } } k = i; } /* store ritz values and estimated errors */ for (i=nconv;i<n;i++) { eps->eigr[i] = ritz[i]; eps->errest[i] = bnd[i]; } ierr = EPSMonitor(eps,eps->its,nconv,eps->eigr,eps->eigi,eps->errest,n);CHKERRQ(ierr); nconv = k; if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS; if (nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL; if (eps->reason == EPS_CONVERGED_ITERATING) { /* copy restart vector */ ierr = BVCopyColumn(eps->V,n,nconv);CHKERRQ(ierr); if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL && !breakdown) { /* Reorthonormalize restart vector */ ierr = BVOrthogonalizeColumn(eps->V,nconv,NULL,&norm,&breakdown);CHKERRQ(ierr); ierr = BVScaleColumn(eps->V,nconv,1.0/norm);CHKERRQ(ierr); } if (breakdown) { /* Use random vector for restarting */ ierr = PetscInfo(eps,"Using random vector for restart\n");CHKERRQ(ierr); ierr = EPSGetStartVector(eps,nconv,&breakdown);CHKERRQ(ierr); } if (breakdown) { /* give up */ eps->reason = EPS_DIVERGED_BREAKDOWN; ierr = PetscInfo(eps,"Unable to generate more start vectors\n");CHKERRQ(ierr); } } } eps->nconv = nconv; ierr = PetscFree4(ritz,bnd,perm,conv);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char *args[]) { PFLOTRANMesh data; Mat Adj; /* The adjacency matrix of the mesh */ PetscInt bs = 3; PetscScalar values[9],*cc; PetscMPIInt size; PetscInt i; PetscErrorCode ierr; PetscViewer binaryviewer; Vec cellCenters; PetscViewer hdf5viewer; hid_t file_id, dataset_id, dataspace_id; herr_t status; PetscFunctionBegin; ierr = PetscInitialize(&argc, &args, (char *) 0, help); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size > 1) SETERRQ(PETSC_ERR_SUP,"This preprocessor runs only on one process"); /* Open Glenn's file */ ierr = PetscViewerCreate(PETSC_COMM_SELF, &hdf5viewer);CHKERRQ(ierr); ierr = PetscViewerSetType(hdf5viewer, PETSC_VIEWER_HDF5);CHKERRQ(ierr); ierr = PetscViewerFileSetMode(hdf5viewer, FILE_MODE_READ);CHKERRQ(ierr); ierr = PetscViewerFileSetName(hdf5viewer, "mesh.h5");CHKERRQ(ierr); ierr = PetscViewerHDF5GetFileId(hdf5viewer, &file_id);CHKERRQ(ierr); /* get number of cells and then number of edges */ dataset_id = H5Dopen(file_id, "/Cells/Natural IDs"); dataspace_id = H5Dget_space(dataset_id); status = H5Sget_simple_extent_dims(dataspace_id, &data.numCells, NULL);if (status < 0) SETERRQ(PETSC_ERR_LIB,"Bad dimension"); status = H5Sclose(dataspace_id);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Areas"); dataspace_id = H5Dget_space(dataset_id); status = H5Sget_simple_extent_dims(dataspace_id, &data.numFaces, NULL);if (status < 0) SETERRQ(PETSC_ERR_LIB,"Bad dimension"); status = H5Sclose(dataspace_id);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); ierr = PetscPrintf(PETSC_COMM_SELF, "Number of cells %D Number of faces %D \n",(PetscInt)data.numCells,(PetscInt)data.numFaces);CHKERRQ(ierr); /* read face data */ ierr = PetscMalloc5(data.numFaces,double,&data.faceAreas,data.numFaces,int,&data.downCells,data.numFaces,double,&data.downX,data.numFaces,double,&data.downY,data.numFaces,double,&data.downZ);CHKERRQ(ierr); dataset_id = H5Dopen(file_id, "/Connections/Areas"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.faceAreas);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Cell IDs"); status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downCells);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance X"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downX);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance Y"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downY);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance Z"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downZ);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); ierr = PetscMalloc4(data.numFaces,int,&data.upCells,data.numFaces,double,&data.upX,data.numFaces,double,&data.upY,data.numFaces,double,&data.upZ);CHKERRQ(ierr); dataset_id = H5Dopen(file_id, "/Connections/Upwind Cell IDs"); status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upCells);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance X"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upX);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance Y"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upY);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance Z"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upZ);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); // Put face data into matrix ierr = MatCreate(PETSC_COMM_WORLD, &Adj);CHKERRQ(ierr); ierr = MatSetSizes(Adj, data.numCells*bs, data.numCells*bs, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetFromOptions(Adj);CHKERRQ(ierr); ierr = MatSetType(Adj,MATSEQBAIJ);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(Adj, bs, 6,PETSC_NULL);CHKERRQ(ierr); //ierr = MatSetType(Adj,MATSEQAIJ);CHKERRQ(ierr); //ierr = MatSeqAIJSetPreallocation(Adj, 6,PETSC_NULL);CHKERRQ(ierr); for(i = 0; i < data.numFaces; ++i) { values[0] = data.faceAreas[i]; values[1] = data.downCells[i]; values[2] = data.downX[i]; values[3] = data.downY[i]; values[4] = data.downZ[i]; values[5] = data.upCells[i]; values[6] = data.upX[i]; values[7] = data.upY[i]; values[8] = data.upZ[i]; ierr = MatSetValuesBlocked(Adj, 1, &data.downCells[i], 1, &data.upCells[i], values, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValuesBlocked(Adj, 1, &data.upCells[i], 1, &data.downCells[i], values, INSERT_VALUES);CHKERRQ(ierr); //ierr = MatSetValues(Adj, 1, &data.downCells[i], 1, &data.upCells[i], values, INSERT_VALUES);CHKERRQ(ierr); //ierr = MatSetValues(Adj, 1, &data.upCells[i], 1, &data.downCells[i], values, INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(Adj, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Adj, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree5(data.faceAreas, data.downCells, data.downX, data.downY, data.downZ);CHKERRQ(ierr); ierr = PetscFree4(data.upCells, data.upX, data.upY, data.upZ);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"mesh.petsc", FILE_MODE_WRITE,&binaryviewer);CHKERRQ(ierr); ierr = MatView(Adj, binaryviewer);CHKERRQ(ierr); ierr = MatDestroy(Adj);CHKERRQ(ierr); /* read cell information */ ierr = PetscMalloc5(data.numCells,int,&data.cellIds,data.numCells,double,&data.cellVols,data.numCells,double,&data.cellX,data.numCells,double,&data.cellY,data.numCells,double,&data.cellZ);CHKERRQ(ierr); dataset_id = H5Dopen(file_id, "/Cells/Natural IDs"); status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellIds);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/Volumes"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellVols);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/X-Coordinates"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellX);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/Y-Coordinates"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellY);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/Z-Coordinates"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellZ);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); ierr = PetscViewerDestroy(hdf5viewer);CHKERRQ(ierr); /* put cell information into vectors */ ierr = VecCreateSeq(PETSC_COMM_SELF,3*data.numCells,&cellCenters);CHKERRQ(ierr); ierr = VecSetBlockSize(cellCenters,3);CHKERRQ(ierr); ierr = VecGetArray(cellCenters,&cc);CHKERRQ(ierr); for (i=0; i<data.numCells; i++) { cc[3*i] = data.cellX[i]; cc[3*i+1] = data.cellY[i]; cc[3*i+2] = data.cellZ[i]; } ierr = VecRestoreArray(cellCenters,&cc);CHKERRQ(ierr); ierr = VecView(cellCenters,binaryviewer);CHKERRQ(ierr); ierr = VecGetArray(cellCenters,&cc);CHKERRQ(ierr); for (i=0; i<data.numCells; i++) { cc[3*i] = data.cellIds[i]; cc[3*i+1] = data.cellVols[i]; cc[3*i+2] = 0.0; } ierr = VecRestoreArray(cellCenters,&cc);CHKERRQ(ierr); ierr = VecView(cellCenters,binaryviewer);CHKERRQ(ierr); ierr = PetscFree5(data.cellIds, data.cellVols, data.cellX, data.cellY, data.cellZ);CHKERRQ(ierr); ierr = VecDestroy(cellCenters); ierr = PetscViewerDestroy(binaryviewer);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; comm = ((PetscObject)C)->comm; tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i<ismax; i++) { ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; i<size; i++) { jmax = C->rmap->range[i+1]; for (; j<jmax; j++) { rtable[j] = i; } } /* evaluate communication - mesg to who,length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i<ismax; i++) { ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ jmax = nrow[i]; irow_i = irow[i]; for (j=0; j<jmax; j++) { row = irow_i[j]; proc = rtable[row]; w4[proc]++; } for (j=0; j<size; j++) { if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;} } } nrqs = 0; /* no of outgoing messages */ msz = 0; /* total mesg length (for all procs) */ w1[2*rank] = 0; /* no mesg sent to self */ w3[rank] = 0; for (i=0; i<size; i++) { if (w1[2*i]) { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */ } ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ for (i=0,j=0; i<size; i++) { if (w1[2*i]) { pa[j] = i; j++; } } /* Each message would have a header = 1 + 2*(no of IS) + data */ for (i=0; i<nrqs; i++) { j = pa[i]; w1[2*j] += w1[2*j+1] + 2* w3[j]; msz += w1[2*j]; } /* Do a global reduction to determine how many messages to expect*/ ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr); /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr); ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr); for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz; /* Post the receives */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr); } /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp,ict = 0; for (i=0; i<nrqs; i++) { j = pa[i]; iptr += ict; sbuf1[j] = iptr; ict = w1[2*j]; } } /* Form the outgoing messages */ /* Initialize the header space */ for (i=0; i<nrqs; i++) { j = pa[i]; sbuf1[j][0] = 0; ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); ptr[j] = sbuf1[j] + 2*w3[j] + 1; } /* Parse the isrow and copy data into outbuf */ for (i=0; i<ismax; i++) { ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { /* parse the indices of each IS */ row = irow_i[j]; proc = rtable[row]; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; j<size; j++) { /* Can Optimise this loop too */ if ((ctr_j = ctr[j])) { sbuf1_j = sbuf1[j]; k = ++sbuf1_j[0]; sbuf1_j[2*k] = ctr_j; sbuf1_j[2*k-1] = i; } } } /* Now post the sends */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { j = pa[i]; ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); } /* Post recieves to capture the row_data from other procs */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr); for (i=0; i<nrqs; i++) { j = pa[i]; count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N; ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr); ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); } /* Receive messages(row_nos) and then, pack and send off the rowvalues to the correct processors */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr); { PetscScalar *sbuf2_i,*v_start; PetscInt s_proc; for (i=0; i<nrqr; ++i) { ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); s_proc = r_status1[i].MPI_SOURCE; /* send processor */ rbuf1_i = rbuf1[idex]; /* Actual message from s_proc */ /* no of rows = end - start; since start is array idex[], 0idex, whel end is length of the buffer - which is 1idex */ start = 2*rbuf1_i[0] + 1; ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); /* allocate memory sufficinet to hold all the row values */ ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr); sbuf2_i = sbuf2[idex]; /* Now pack the data */ for (j=start; j<end; j++) { row = rbuf1_i[j] - rstart; v_start = a->v + row; for (k=0; k<N; k++) { sbuf2_i[0] = v_start[0]; sbuf2_i++; v_start += C->rmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; i<ismax; i++) { mat = (Mat_SeqDense *)(submats[i]->data); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; i<ismax; i++) { ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],PETSC_NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)submats[i]->data; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; j<m; j++) { row = irow_i[j] ; proc = rtable[row]; if (proc == rank) { row = row - rstart; mat_vi = mat_v + row; imat_vi = imat_v + j; for (k=0; k<ncol[i]; k++) { col = icol[i][k]; imat_vi[k*m] = mat_vi[col*C->rmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;} for (i=0; i<ismax; i++) { rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { rmap_i[irow_i[j]] = j; } } /* Now Receive the row_values and assemble the rest of the matrix */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); { PetscInt is_max,tmp1,col,*sbuf1_i,is_sz; PetscScalar *rbuf2_i,*imat_v,*imat_vi; for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */ ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr); /* Now dig out the corresponding sbuf1, which contains the IS data_structure */ sbuf1_i = sbuf1[pa[i]]; is_max = sbuf1_i[0]; ct1 = 2*is_max+1; rbuf2_i = rbuf2[i]; for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */ is_no = sbuf1_i[2*j-1]; is_sz = sbuf1_i[2*j]; mat = (Mat_SeqDense*)submats[is_no]->data; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k<is_sz; k++,rbuf2_i+=N) { /* For each row */ row = sbuf1_i[ct1]; ct1++; row = rmap_i[row]; imat_vi = imat_v + row; for (l=0; l<ncol[is_no]; l++) { /* For each col */ col = icol[is_no][l]; imat_vi[l*m] = rbuf2_i[col]; } } } } } /* End Send-Recv of row_values */ ierr = PetscFree(r_status2);CHKERRQ(ierr); ierr = PetscFree(r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} ierr = PetscFree(s_status2);CHKERRQ(ierr); ierr = PetscFree(s_waits2);CHKERRQ(ierr); /* Restore the indices */ for (i=0; i<ismax; i++) { ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); } /* Destroy allocated memory */ ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(rbuf2);CHKERRQ(ierr); ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(sbuf2);CHKERRQ(ierr); ierr = PetscFree(rmap[0]);CHKERRQ(ierr); ierr = PetscFree(rmap);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }