static PetscErrorCode KSPSetUp_PIPEFCG(KSP ksp) { PetscErrorCode ierr; KSP_PIPEFCG *pipefcg; const PetscInt nworkstd = 5; PetscFunctionBegin; pipefcg = (KSP_PIPEFCG*)ksp->data; /* Allocate "standard" work vectors (not including the basis and transformed basis vectors) */ ierr = KSPSetWorkVecs(ksp,nworkstd);CHKERRQ(ierr); /* Allocated space for pointers to additional work vectors note that mmax is the number of previous directions, so we add 1 for the current direction, and an extra 1 for the prealloc (which might be empty) */ ierr = PetscMalloc4(pipefcg->mmax+1,&(pipefcg->Pvecs),pipefcg->mmax+1,&(pipefcg->pPvecs),pipefcg->mmax+1,&(pipefcg->Svecs),pipefcg->mmax+1,&(pipefcg->pSvecs));CHKERRQ(ierr); ierr = PetscMalloc4(pipefcg->mmax+1,&(pipefcg->Qvecs),pipefcg->mmax+1,&(pipefcg->pQvecs),pipefcg->mmax+1,&(pipefcg->ZETAvecs),pipefcg->mmax+1,&(pipefcg->pZETAvecs));CHKERRQ(ierr); ierr = PetscMalloc4(pipefcg->mmax+1,&(pipefcg->Pold),pipefcg->mmax+1,&(pipefcg->Sold),pipefcg->mmax+1,&(pipefcg->Qold),pipefcg->mmax+1,&(pipefcg->ZETAold));CHKERRQ(ierr); ierr = PetscMalloc1(pipefcg->mmax+1,&(pipefcg->chunksizes));CHKERRQ(ierr); ierr = PetscMalloc3(pipefcg->mmax+2,&(pipefcg->dots),pipefcg->mmax+1,&(pipefcg->etas),pipefcg->mmax+2,&(pipefcg->redux));CHKERRQ(ierr); /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */ if(pipefcg->nprealloc > pipefcg->mmax+1){ ierr = PetscInfo2(NULL,"Requested nprealloc=%d is greater than m_max+1=%d. Resetting nprealloc = m_max+1.\n",pipefcg->nprealloc, pipefcg->mmax+1);CHKERRQ(ierr); } /* Preallocate additional work vectors */ ierr = KSPAllocateVectors_PIPEFCG(ksp,pipefcg->nprealloc,pipefcg->nprealloc);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ksp,(pipefcg->mmax+1)*4*sizeof(Vec*)+(pipefcg->mmax+1)*4*sizeof(Vec**)+(pipefcg->mmax+1)*4*sizeof(Vec*)+ (pipefcg->mmax+1)*sizeof(PetscInt)+(pipefcg->mmax+2)*sizeof(Vec*)+(pipefcg->mmax+2)*sizeof(PetscScalar)+(pipefcg->mmax+1)*sizeof(PetscReal));CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals Not Collective Input Arguments: + degree - degree of reconstruction polynomial . nsource - number of source intervals . sourcex - sorted coordinates of source cell boundaries (length nsource+1) . ntarget - number of target intervals - targetx - sorted coordinates of target cell boundaries (length ntarget+1) Output Arguments: . R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s] Level: advanced .seealso: PetscDTLegendreEval() @*/ PetscErrorCode PetscDTReconstructPoly(PetscInt degree,PetscInt nsource,const PetscReal *sourcex,PetscInt ntarget,const PetscReal *targetx,PetscReal *R) { PetscErrorCode ierr; PetscInt i,j,k,*bdegrees,worksize; PetscReal xmin,xmax,center,hscale,*sourcey,*targety,*Bsource,*Bsinv,*Btarget; PetscScalar *tau,*work; PetscFunctionBegin; PetscValidRealPointer(sourcex,3); PetscValidRealPointer(targetx,5); PetscValidRealPointer(R,6); if (degree >= nsource) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Reconstruction degree %D must be less than number of source intervals %D",degree,nsource); #if defined(PETSC_USE_DEBUG) for (i=0; i<nsource; i++) { if (sourcex[i] >= sourcex[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Source interval %D has negative orientation (%G,%G)",i,sourcex[i],sourcex[i+1]); } for (i=0; i<ntarget; i++) { if (targetx[i] >= targetx[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Target interval %D has negative orientation (%G,%G)",i,targetx[i],targetx[i+1]); } #endif xmin = PetscMin(sourcex[0],targetx[0]); xmax = PetscMax(sourcex[nsource],targetx[ntarget]); center = (xmin + xmax)/2; hscale = (xmax - xmin)/2; worksize = nsource; ierr = PetscMalloc4(degree+1,PetscInt,&bdegrees,nsource+1,PetscReal,&sourcey,nsource*(degree+1),PetscReal,&Bsource,worksize,PetscScalar,&work);CHKERRQ(ierr); ierr = PetscMalloc4(nsource,PetscScalar,&tau,nsource*(degree+1),PetscReal,&Bsinv,ntarget+1,PetscReal,&targety,ntarget*(degree+1),PetscReal,&Btarget);CHKERRQ(ierr); for (i=0; i<=nsource; i++) sourcey[i] = (sourcex[i]-center)/hscale; for (i=0; i<=degree; i++) bdegrees[i] = i+1; ierr = PetscDTLegendreIntegrate(nsource,sourcey,degree+1,bdegrees,PETSC_TRUE,Bsource);CHKERRQ(ierr); ierr = PetscDTPseudoInverseQR(nsource,nsource,degree+1,Bsource,Bsinv,tau,nsource,work);CHKERRQ(ierr); for (i=0; i<=ntarget; i++) targety[i] = (targetx[i]-center)/hscale; ierr = PetscDTLegendreIntegrate(ntarget,targety,degree+1,bdegrees,PETSC_FALSE,Btarget);CHKERRQ(ierr); for (i=0; i<ntarget; i++) { PetscReal rowsum = 0; for (j=0; j<nsource; j++) { PetscReal sum = 0; for (k=0; k<degree+1; k++) { sum += Btarget[i*(degree+1)+k] * Bsinv[k*nsource+j]; } R[i*nsource+j] = sum; rowsum += sum; } for (j=0; j<nsource; j++) R[i*nsource+j] /= rowsum; /* normalize each row */ } ierr = PetscFree4(bdegrees,sourcey,Bsource,work);CHKERRQ(ierr); ierr = PetscFree4(tau,Bsinv,targety,Btarget);CHKERRQ(ierr); PetscFunctionReturn(0); }
EXTERN_C_BEGIN /* MatOrdering_ND - Find the nested dissection ordering of a given matrix. */ #undef __FUNCT__ #define __FUNCT__ "MatOrdering_ND" PetscErrorCode PETSCMAT_DLLEXPORT MatOrdering_ND(Mat mat,const MatOrderingType type,IS *row,IS *col) { PetscErrorCode ierr; PetscInt i, *mask,*xls,*ls,nrow,*ia,*ja,*perm; PetscTruth done; PetscFunctionBegin; ierr = MatGetRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name); ierr = PetscMalloc4(nrow,PetscInt,&mask,nrow,PetscInt,&perm,nrow+1,PetscInt,&xls,nrow,PetscInt,&ls);CHKERRQ(ierr); SPARSEPACKgennd(&nrow,ia,ja,mask,perm,xls,ls); ierr = MatRestoreRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); /* shift because Sparsepack indices start at one */ for (i=0; i<nrow; i++) perm[i]--; ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,row);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,col);CHKERRQ(ierr); ierr = PetscFree4(mask,perm,xls,ls);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatLUFactorSymbolic_Essl(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info) { Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_Essl *essl; PetscReal f = 1.0; PetscFunctionBegin; essl = (Mat_Essl*)(B->spptr); /* allocate the work arrays required by ESSL */ f = info->fill; ierr = PetscBLASIntCast(a->nz,&essl->nz);CHKERRQ(ierr); ierr = PetscBLASIntCast((PetscInt)(a->nz*f),&essl->lna);CHKERRQ(ierr); ierr = PetscBLASIntCast(100 + 10*A->rmap->n,&essl->naux);CHKERRQ(ierr); /* since malloc is slow on IBM we try a single malloc */ ierr = PetscMalloc4(essl->lna,&essl->a,essl->naux,&essl->aux,essl->lna,&essl->ia,essl->lna,&essl->ja);CHKERRQ(ierr); essl->CleanUpESSL = PETSC_TRUE; ierr = PetscLogObjectMemory((PetscObject)B,essl->lna*(2*sizeof(int)+sizeof(PetscScalar)) + essl->naux*sizeof(PetscScalar));CHKERRQ(ierr); B->ops->lufactornumeric = MatLUFactorNumeric_Essl; PetscFunctionReturn(0); }
PetscErrorCode KSPSetUp_CG(KSP ksp) { KSP_CG *cgP = (KSP_CG*)ksp->data; PetscErrorCode ierr; PetscInt maxit = ksp->max_it,nwork = 3; PetscFunctionBegin; /* This implementation of CG only handles left preconditioning so generate an error otherwise. */ if (ksp->pc_side == PC_RIGHT) { SETERRQ(PETSC_ERR_SUP,"No right preconditioning for KSPCG"); } else if (ksp->pc_side == PC_SYMMETRIC) { SETERRQ(PETSC_ERR_SUP,"No symmetric preconditioning for KSPCG"); } /* get work vectors needed by CG */ if (cgP->singlereduction) nwork += 2; ierr = KSPDefaultGetWork(ksp,nwork);CHKERRQ(ierr); /* If user requested computations of eigenvalues then allocate work work space needed */ if (ksp->calc_sings) { /* get space to store tridiagonal matrix for Lanczos */ ierr = PetscMalloc4(maxit+1,PetscScalar,&cgP->e,maxit+1,PetscScalar,&cgP->d,maxit+1,PetscReal,&cgP->ee,maxit+1,PetscReal,&cgP->dd);CHKERRQ(ierr); ierr = PetscLogObjectMemory(ksp,2*(maxit+1)*(sizeof(PetscScalar)+sizeof(PetscReal)));CHKERRQ(ierr); ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_CG; ksp->ops->computeeigenvalues = KSPComputeEigenvalues_CG; } PetscFunctionReturn(0); }
PetscErrorCode KSPSetUp_FCG(KSP ksp) { PetscErrorCode ierr; KSP_FCG *fcg = (KSP_FCG*)ksp->data; PetscInt maxit = ksp->max_it; const PetscInt nworkstd = 2; PetscFunctionBegin; /* Allocate "standard" work vectors (not including the basis and transformed basis vectors) */ ierr = KSPSetWorkVecs(ksp,nworkstd);CHKERRQ(ierr); /* Allocated space for pointers to additional work vectors note that mmax is the number of previous directions, so we add 1 for the current direction, and an extra 1 for the prealloc (which might be empty) */ ierr = PetscMalloc5(fcg->mmax+1,&fcg->Pvecs,fcg->mmax+1,&fcg->Cvecs,fcg->mmax+1,&fcg->pPvecs,fcg->mmax+1,&fcg->pCvecs,fcg->mmax+2,&fcg->chunksizes);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ksp,2*(fcg->mmax+1)*sizeof(Vec*) + 2*(fcg->mmax + 1)*sizeof(Vec**) + (fcg->mmax + 2)*sizeof(PetscInt));CHKERRQ(ierr); /* Preallocate additional work vectors */ ierr = KSPAllocateVectors_FCG(ksp,fcg->nprealloc,fcg->nprealloc);CHKERRQ(ierr); /* If user requested computations of eigenvalues then allocate work work space needed */ if (ksp->calc_sings) { /* get space to store tridiagonal matrix for Lanczos */ ierr = PetscMalloc4(maxit,&fcg->e,maxit,&fcg->d,maxit,&fcg->ee,maxit,&fcg->dd);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ksp,2*(maxit+1)*(sizeof(PetscScalar)+sizeof(PetscReal)));CHKERRQ(ierr); ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_CG; ksp->ops->computeeigenvalues = KSPComputeEigenvalues_CG; } PetscFunctionReturn(0); }
PetscErrorCode KSPSetUp_CG(KSP ksp) { KSP_CG *cgP = (KSP_CG*)ksp->data; PetscErrorCode ierr; PetscInt maxit = ksp->max_it,nwork = 3; PetscFunctionBegin; /* get work vectors needed by CG */ if (cgP->singlereduction) nwork += 2; ierr = KSPSetWorkVecs(ksp,nwork);CHKERRQ(ierr); /* If user requested computations of eigenvalues then allocate work work space needed */ if (ksp->calc_sings) { /* get space to store tridiagonal matrix for Lanczos */ ierr = PetscMalloc4(maxit+1,&cgP->e,maxit+1,&cgP->d,maxit+1,&cgP->ee,maxit+1,&cgP->dd);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ksp,2*(maxit+1)*(sizeof(PetscScalar)+sizeof(PetscReal)));CHKERRQ(ierr); ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_CG; ksp->ops->computeeigenvalues = KSPComputeEigenvalues_CG; } PetscFunctionReturn(0); }
/*@C PetscDrawViewPortsCreate - Splits a window into smaller view ports. Each processor shares all the viewports. Collective on PetscDraw Input Parameters: + draw - the drawing context - nports - the number of ports Output Parameter: . ports - a PetscDrawViewPorts context (C structure) Options Database: . -draw_ports - display multiple fields in the same window with PetscDrawPorts instead of in seperate windows Level: advanced Concepts: drawing^in subset of window .seealso: PetscDrawSplitViewPort(), PetscDrawSetViewPort(), PetscDrawViewPortsSet(), PetscDrawViewPortsDestroy() @*/ PetscErrorCode PetscDrawViewPortsCreate(PetscDraw draw,PetscInt nports,PetscDrawViewPorts **newports) { PetscDrawViewPorts *ports; PetscInt i,n; PetscBool isnull; PetscMPIInt rank; PetscReal *xl,*xr,*yl,*yr,h; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(draw,PETSC_DRAW_CLASSID,1); if (nports < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Number of divisions must be positive: %d", nports); PetscValidPointer(newports,3); ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) {*newports = NULL; PetscFunctionReturn(0);} ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)draw),&rank);CHKERRQ(ierr); ierr = PetscNew(&ports);CHKERRQ(ierr); *newports = ports; ports->draw = draw; ports->nports = nports; ierr = PetscObjectReference((PetscObject)draw);CHKERRQ(ierr); /* save previous drawport of window */ ierr = PetscDrawGetViewPort(draw,&ports->port_xl,&ports->port_yl,&ports->port_xr,&ports->port_yr);CHKERRQ(ierr); n = (PetscInt)(.1 + PetscSqrtReal((PetscReal)nports)); while (n*n < nports) n++; h = 1.0/n; ierr = PetscMalloc4(n*n,&xl,n*n,&xr,n*n,&yl,n*n,&yr);CHKERRQ(ierr); ports->xl = xl; ports->xr = xr; ports->yl = yl; ports->yr = yr; ierr = PetscDrawSetCoordinates(draw,0.0,0.0,1.0,1.0);CHKERRQ(ierr); ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); for (i=0; i<n*n; i++) { xl[i] = (i % n)*h; xr[i] = xl[i] + h; yl[i] = (i / n)*h; yr[i] = yl[i] + h; if (!rank) { ierr = PetscDrawLine(draw,xl[i],yl[i],xl[i],yr[i],PETSC_DRAW_BLACK);CHKERRQ(ierr); ierr = PetscDrawLine(draw,xl[i],yr[i],xr[i],yr[i],PETSC_DRAW_BLACK);CHKERRQ(ierr); ierr = PetscDrawLine(draw,xr[i],yr[i],xr[i],yl[i],PETSC_DRAW_BLACK);CHKERRQ(ierr); ierr = PetscDrawLine(draw,xr[i],yl[i],xl[i],yl[i],PETSC_DRAW_BLACK);CHKERRQ(ierr); } xl[i] += .05*h; xr[i] -= .05*h; yl[i] += .05*h; yr[i] -= .05*h; } ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); ierr = PetscDrawFlush(draw);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode NEPSolve_Interpol(NEP nep) { PetscErrorCode ierr; NEP_INTERPOL *ctx = (NEP_INTERPOL*)nep->data; Mat *A; /*T=nep->function,Tp=nep->jacobian;*/ PetscScalar *x,*fx,t; PetscReal *cs,a,b,s; PetscInt i,j,k,deg=ctx->deg; PetscFunctionBegin; ierr = PetscMalloc4(deg+1,&A,(deg+1)*(deg+1),&cs,deg+1,&x,(deg+1)*nep->nt,&fx);CHKERRQ(ierr); ierr = RGIntervalGetEndpoints(nep->rg,&a,&b,NULL,NULL);CHKERRQ(ierr); ierr = ChebyshevNodes(deg,a,b,x,cs);CHKERRQ(ierr); for (j=0;j<nep->nt;j++) { for (i=0;i<=deg;i++) { ierr = FNEvaluateFunction(nep->f[j],x[i],&fx[i+j*(deg+1)]);CHKERRQ(ierr); } } /* Polynomial coefficients */ for (k=0;k<=deg;k++) { ierr = MatDuplicate(nep->A[0],MAT_COPY_VALUES,&A[k]);CHKERRQ(ierr); t = 0.0; for (i=0;i<deg+1;i++) t += fx[i]*cs[i*(deg+1)+k]; t *= 2.0/(deg+1); if (k==0) t /= 2.0; ierr = MatScale(A[k],t);CHKERRQ(ierr); for (j=1;j<nep->nt;j++) { t = 0.0; for (i=0;i<deg+1;i++) t += fx[i+j*(deg+1)]*cs[i*(deg+1)+k]; t *= 2.0/(deg+1); if (k==0) t /= 2.0; ierr = MatAXPY(A[k],t,nep->A[j],SUBSET_NONZERO_PATTERN);CHKERRQ(ierr); } } ierr = PEPSetOperators(ctx->pep,deg+1,A);CHKERRQ(ierr); for (k=0;k<=deg;k++) { ierr = MatDestroy(&A[k]);CHKERRQ(ierr); } ierr = PetscFree4(A,cs,x,fx);CHKERRQ(ierr); /* Solve polynomial eigenproblem */ ierr = PEPSolve(ctx->pep);CHKERRQ(ierr); ierr = PEPGetConverged(ctx->pep,&nep->nconv);CHKERRQ(ierr); ierr = PEPGetIterationNumber(ctx->pep,&nep->its);CHKERRQ(ierr); ierr = PEPGetConvergedReason(ctx->pep,(PEPConvergedReason*)&nep->reason);CHKERRQ(ierr); s = 2.0/(b-a); for (i=0;i<nep->nconv;i++) { ierr = PEPGetEigenpair(ctx->pep,i,&nep->eigr[i],&nep->eigi[i],NULL,NULL);CHKERRQ(ierr); nep->eigr[i] /= s; nep->eigr[i] += (a+b)/2.0; nep->eigi[i] /= s; } nep->state = NEP_STATE_EIGENVECTORS; PetscFunctionReturn(0); }
/*@C PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex Not Collective Input Arguments: + dim - The simplex dimension . order - The number of points in one dimension . a - left end of interval (often-1) - b - right end of interval (often +1) Output Argument: . q - A PetscQuadrature object Level: intermediate References: Karniadakis and Sherwin. FIAT .seealso: PetscDTGaussTensorQuadrature(), PetscDTGaussQuadrature() @*/ PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt order, PetscReal a, PetscReal b, PetscQuadrature *q) { PetscInt npoints = dim > 1 ? dim > 2 ? order*PetscSqr(order) : PetscSqr(order) : order; PetscReal *px, *wx, *py, *wy, *pz, *wz, *x, *w; PetscInt i, j, k; PetscErrorCode ierr; PetscFunctionBegin; if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now"); ierr = PetscMalloc1(npoints*dim, &x);CHKERRQ(ierr); ierr = PetscMalloc1(npoints, &w);CHKERRQ(ierr); switch (dim) { case 0: ierr = PetscFree(x);CHKERRQ(ierr); ierr = PetscFree(w);CHKERRQ(ierr); ierr = PetscMalloc1(1, &x);CHKERRQ(ierr); ierr = PetscMalloc1(1, &w);CHKERRQ(ierr); x[0] = 0.0; w[0] = 1.0; break; case 1: ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, x, w);CHKERRQ(ierr); break; case 2: ierr = PetscMalloc4(order,&px,order,&wx,order,&py,order,&wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr); for (i = 0; i < order; ++i) { for (j = 0; j < order; ++j) { ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*order+j)*2+0], &x[(i*order+j)*2+1]);CHKERRQ(ierr); w[i*order+j] = 0.5 * wx[i] * wy[j]; } } ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr); break; case 3: ierr = PetscMalloc6(order,&px,order,&wx,order,&py,order,&wy,order,&pz,order,&wz);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 2.0, 0.0, pz, wz);CHKERRQ(ierr); for (i = 0; i < order; ++i) { for (j = 0; j < order; ++j) { for (k = 0; k < order; ++k) { ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*order+j)*order+k)*3+0], &x[((i*order+j)*order+k)*3+1], &x[((i*order+j)*order+k)*3+2]);CHKERRQ(ierr); w[(i*order+j)*order+k] = 0.125 * wx[i] * wy[j] * wz[k]; } } } ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim); } ierr = PetscQuadratureCreate(PETSC_COMM_SELF, q);CHKERRQ(ierr); ierr = PetscQuadratureSetData(*q, dim, npoints, x, w);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPSetUp_AGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt hes; PetscInt nloc; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscInt neig = agmres->neig; PetscInt max_k = agmres->max_k; PetscInt N = MAXKSPSIZE; PetscInt lwork = PetscMax(8 * N + 16, 4 * neig * (N - neig)); PetscFunctionBegin; if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"no symmetric preconditioning for KSPAGMRES"); max_k = agmres->max_k; N = MAXKSPSIZE; /* Preallocate space during the call to KSPSetup_GMRES for the Krylov basis */ agmres->q_preallocate = PETSC_TRUE; /* No allocation on the fly */ /* Preallocate space to compute later the eigenvalues in GMRES */ ksp->calc_sings = PETSC_TRUE; agmres->max_k = N; /* Set the augmented size to be allocated in KSPSetup_GMRES */ ierr = KSPSetUp_DGMRES(ksp);CHKERRQ(ierr); agmres->max_k = max_k; hes = (N + 1) * (N + 1); /* Data for the Newton basis GMRES */ ierr = PetscMalloc4(max_k,PetscScalar,&agmres->Rshift,max_k,PetscScalar,&agmres->Ishift,hes,PetscScalar,&agmres->Rloc,((N+1)*4),PetscScalar,&agmres->wbufptr);CHKERRQ(ierr); ierr = PetscMalloc7((N+1),PetscScalar,&agmres->Scale,(N+1),PetscScalar,&agmres->sgn,(N+1),PetscScalar,&agmres->tloc,(N+1),PetscScalar,&agmres->temp,(N+1),PetscScalar,&agmres->tau,lwork,PetscScalar,&agmres->work,(N+1),PetscScalar,&agmres->nrs);CHKERRQ(ierr); ierr = PetscMemzero(agmres->Rshift, max_k*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->Ishift, max_k*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->Scale, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->Rloc, (N+1)*(N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->sgn, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->tloc, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->temp, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->wbufptr, (N+1)*4*sizeof(PetscScalar));CHKERRQ(ierr); /* Allocate space for the vectors in the orthogonalized basis*/ ierr = VecGetLocalSize(agmres->vecs[0], &nloc);CHKERRQ(ierr); ierr = PetscMalloc(nloc*(N+1)*sizeof(PetscScalar), &agmres->Qloc);CHKERRQ(ierr); /* Init the ring of processors for the roddec orthogonalization */ ierr = KSPAGMRESRoddecInitNeighboor(ksp);CHKERRQ(ierr); if (agmres->neig < 1) PetscFunctionReturn(0); /* Allocate space for the deflation */ ierr = PetscMalloc(N*sizeof(PetscScalar), &agmres->select);CHKERRQ(ierr); ierr = VecDuplicateVecs(VEC_V(0), N, &agmres->TmpU);CHKERRQ(ierr); ierr = PetscMalloc2(N*N, PetscScalar, &agmres->MatEigL, N*N, PetscScalar, &agmres->MatEigR);CHKERRQ(ierr); /* ierr = PetscMalloc6(N*N, PetscScalar, &agmres->Q, N*N, PetscScalar, &agmres->Z, N, PetscScalar, &agmres->wr, N, PetscScalar, &agmres->wi, N, PetscScalar, &agmres->beta, N, PetscScalar, &agmres->modul);CHKERRQ(ierr); */ ierr = PetscMalloc3(N*N, PetscScalar, &agmres->Q, N*N, PetscScalar, &agmres->Z, N, PetscScalar, &agmres->beta);CHKERRQ(ierr); ierr = PetscMalloc2((N+1),PetscInt,&agmres->perm,(2*neig*N),PetscInt,&agmres->iwork);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex Not Collective Input Arguments: + dim - The simplex dimension . npoints - number of points . a - left end of interval (often-1) - b - right end of interval (often +1) Output Arguments: + points - quadrature points - weights - quadrature weights Level: intermediate References: Karniadakis and Sherwin. FIAT .seealso: PetscDTGaussQuadrature() @*/ PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt npoints, PetscReal a, PetscReal b, PetscReal *points[], PetscReal *weights[]) { PetscReal *px, *wx, *py, *wy, *pz, *wz, *x, *w; PetscInt i, j, k; PetscErrorCode ierr; PetscFunctionBegin; if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now"); switch (dim) { case 1: ierr = PetscMalloc(npoints * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, x, w);CHKERRQ(ierr); break; case 2: ierr = PetscMalloc(npoints*npoints*2 * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints*npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscMalloc4(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr); for (i = 0; i < npoints; ++i) { for (j = 0; j < npoints; ++j) { ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*npoints+j)*2+0], &x[(i*npoints+j)*2+1]);CHKERRQ(ierr); w[i*npoints+j] = 0.5 * wx[i] * wy[j]; } } ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr); break; case 3: ierr = PetscMalloc(npoints*npoints*3 * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints*npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscMalloc6(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy,npoints,PetscReal,&pz,npoints,PetscReal,&wz);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 2.0, 0.0, pz, wz);CHKERRQ(ierr); for (i = 0; i < npoints; ++i) { for (j = 0; j < npoints; ++j) { for (k = 0; k < npoints; ++k) { ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*npoints+j)*npoints+k)*3+0], &x[((i*npoints+j)*npoints+k)*3+1], &x[((i*npoints+j)*npoints+k)*3+2]);CHKERRQ(ierr); w[(i*npoints+j)*npoints+k] = 0.125 * wx[i] * wy[j] * wz[k]; } } } ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim); } if (points) *points = x; if (weights) *weights = w; PetscFunctionReturn(0); }
/*@C PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() except it takes TWO ilenths and output TWO olengths. Collective on MPI_Comm Input Parameters: + comm - Communicator . nsends - number of messages that are to be sent. . nrecvs - number of messages being received - ilengths1, ilengths2 - array of integers of length sizeof(comm) a non zero ilengths[i] represent a message to i of length ilengths[i] Output Parameters: + onodes - list of node-ids from which messages are expected - olengths1, olengths2 - corresponding message lengths Level: developer Concepts: mpi utility Notes: With this info, the correct MPI_Irecv() can be posted with the correct from-id, with a buffer with the right amount of memory required. The calling function deallocates the memory in onodes and olengths To determine nrecevs, one can use PetscGatherNumberOfMessages() .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages() @*/ PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) { PetscErrorCode ierr; PetscMPIInt size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL; MPI_Request *s_waits = NULL,*r_waits = NULL; MPI_Status *w_status = NULL; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr); s_waits = r_waits + nrecvs; /* Post the Irecv to get the message length-info */ ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr); ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr); for (i=0; i<nrecvs; i++) { buf_j = buf_r + (2*i); ierr = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr); } /* Post the Isends with the message length-info */ for (i=0,j=0; i<size; ++i) { if (ilengths1[i]) { buf_j = buf_s + (2*j); buf_j[0] = *(ilengths1+i); buf_j[1] = *(ilengths2+i); ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr); j++; } } if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends); /* Post waits on sends and receivs */ if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);} /* Pack up the received data */ ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr); for (i=0; i<nrecvs; ++i) { (*onodes)[i] = w_status[i].MPI_SOURCE; buf_j = buf_r + (2*i); (*olengths1)[i] = buf_j[0]; (*olengths2)[i] = buf_j[1]; } ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) { PetscErrorCode ierr; PetscMatStashSpace space; PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; PetscScalar **valptr; PetscFunctionBegin; ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); for (space=stash->space_head,cnt=0; space; space=space->next) { for (i=0; i<space->local_used; i++) { row[cnt] = space->idx[i]; col[cnt] = space->idy[i]; valptr[cnt] = &space->val[i*bs2]; perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ cnt++; } } if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ for (rowstart=0,cnt=0,i=1; i<=n; i++) { if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ PetscInt colstart; ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ PetscInt j,l; MatStashBlock *block; ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); block->row = row[rowstart]; block->col = col[colstart]; ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ if (insertmode == ADD_VALUES) { for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; } else { ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); } } colstart = j; } rowstart = i; } } ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode SNESSetUp_QN(SNES snes) { SNES_QN *qn = (SNES_QN*)snes->data; PetscErrorCode ierr; DM dm; PetscFunctionBegin; if (!snes->vec_sol) { ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr); ierr = DMCreateGlobalVector(dm,&snes->vec_sol);CHKERRQ(ierr); } ierr = VecDuplicateVecs(snes->vec_sol, qn->m, &qn->U);CHKERRQ(ierr); if (qn->type != SNES_QN_BROYDEN) ierr = VecDuplicateVecs(snes->vec_sol, qn->m, &qn->V);CHKERRQ(ierr); ierr = PetscMalloc4(qn->m,&qn->alpha,qn->m,&qn->beta,qn->m,&qn->dXtdF,qn->m,&qn->lambda);CHKERRQ(ierr); if (qn->singlereduction) { ierr = PetscMalloc3(qn->m*qn->m,&qn->dXdFmat,qn->m,&qn->dFtdX,qn->m,&qn->YtdX);CHKERRQ(ierr); } ierr = SNESSetWorkVecs(snes,4);CHKERRQ(ierr); /* set method defaults */ if (qn->scale_type == SNES_QN_SCALE_DEFAULT) { if (qn->type == SNES_QN_BADBROYDEN) { qn->scale_type = SNES_QN_SCALE_NONE; } else { qn->scale_type = SNES_QN_SCALE_SHANNO; } } if (qn->restart_type == SNES_QN_RESTART_DEFAULT) { if (qn->type == SNES_QN_LBFGS) { qn->restart_type = SNES_QN_RESTART_POWELL; } else { qn->restart_type = SNES_QN_RESTART_PERIODIC; } } if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = SNESSetUpMatrices(snes);CHKERRQ(ierr); } if (snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_DEFAULT) {snes->functype = SNES_FUNCTION_UNPRECONDITIONED;} PetscFunctionReturn(0); }
/*@ NEPAllocateSolution - Allocate memory storage for common variables such as eigenvalues and eigenvectors. Collective on NEP Input Parameters: + nep - eigensolver context - extra - number of additional positions, used for methods that require a working basis slightly larger than ncv Developers Note: This is PETSC_EXTERN because it may be required by user plugin NEP implementations. Level: developer @*/ PetscErrorCode NEPAllocateSolution(NEP nep,PetscInt extra) { PetscErrorCode ierr; PetscInt oldsize,newc,requested; PetscLogDouble cnt; Mat T; Vec t; PetscFunctionBegin; requested = nep->ncv + extra; /* oldsize is zero if this is the first time setup is called */ ierr = BVGetSizes(nep->V,NULL,NULL,&oldsize);CHKERRQ(ierr); newc = PetscMax(0,requested-oldsize); /* allocate space for eigenvalues and friends */ if (requested != oldsize) { if (oldsize) { ierr = PetscFree4(nep->eigr,nep->eigi,nep->errest,nep->perm);CHKERRQ(ierr); } ierr = PetscMalloc4(requested,&nep->eigr,requested,&nep->eigi,requested,&nep->errest,requested,&nep->perm);CHKERRQ(ierr); cnt = newc*sizeof(PetscScalar) + newc*sizeof(PetscReal) + newc*sizeof(PetscInt); ierr = PetscLogObjectMemory((PetscObject)nep,cnt);CHKERRQ(ierr); } /* allocate V */ if (!nep->V) { ierr = NEPGetBV(nep,&nep->V);CHKERRQ(ierr); } if (!oldsize) { if (!((PetscObject)(nep->V))->type_name) { ierr = BVSetType(nep->V,BVSVEC);CHKERRQ(ierr); } if (nep->split) T = nep->A[0]; else { ierr = NEPGetFunction(nep,&T,NULL,NULL,NULL);CHKERRQ(ierr); } ierr = MatGetVecs(T,&t,NULL);CHKERRQ(ierr); ierr = BVSetSizesFromVec(nep->V,t,requested);CHKERRQ(ierr); ierr = VecDestroy(&t);CHKERRQ(ierr); } else { ierr = BVResize(nep->V,requested,PETSC_FALSE);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode KSPSetUp_CGNE(KSP ksp) { KSP_CG *cgP = (KSP_CG*)ksp->data; PetscErrorCode ierr; PetscInt maxit = ksp->max_it; PetscFunctionBegin; /* get work vectors needed by CGNE */ ierr = KSPSetWorkVecs(ksp,4);CHKERRQ(ierr); /* If user requested computations of eigenvalues then allocate work work space needed */ if (ksp->calc_sings) { /* get space to store tridiagonal matrix for Lanczos */ ierr = PetscMalloc4(maxit+1,&cgP->e,maxit+1,&cgP->d,maxit+1,&cgP->ee,maxit+1,&cgP->dd);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ksp,2*(maxit+1)*(sizeof(PetscScalar)+sizeof(PetscReal)));CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* MatGetOrdering_ND - Find the nested dissection ordering of a given matrix. */ PETSC_INTERN PetscErrorCode MatGetOrdering_ND(Mat mat,MatOrderingType type,IS *row,IS *col) { PetscErrorCode ierr; PetscInt i, *mask,*xls,*ls,nrow,*perm; const PetscInt *ia,*ja; PetscBool done; PetscFunctionBegin; ierr = MatGetRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name); ierr = PetscMalloc4(nrow,&mask,nrow,&perm,nrow+1,&xls,nrow,&ls);CHKERRQ(ierr); SPARSEPACKgennd(&nrow,ia,ja,mask,perm,xls,ls); ierr = MatRestoreRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,NULL,&ia,&ja,&done);CHKERRQ(ierr); /* shift because Sparsepack indices start at one */ for (i=0; i<nrow; i++) perm[i]--; ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,row);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,col);CHKERRQ(ierr); ierr = PetscFree4(mask,perm,xls,ls);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat_MPIAIJ *aij = (Mat_MPIAIJ*) A->data; PetscInt nz = aij->B->cmap->n; PetscContainer cont; MPIAIJ_MPIDense *contents; VecScatter ctx = aij->Mvctx; VecScatter_MPI_General *from = (VecScatter_MPI_General*) ctx->fromdata; VecScatter_MPI_General *to = ( VecScatter_MPI_General*) ctx->todata; PetscInt m=A->rmap->n,n=B->cmap->n; PetscFunctionBegin; ierr = MatCreate(((PetscObject)B)->comm,C);CHKERRQ(ierr); ierr = MatSetSizes(*C,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); ierr = MatSetType(*C,MATMPIDENSE);CHKERRQ(ierr); ierr = MatAssemblyBegin(*C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(*C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscContainerCreate(((PetscObject)A)->comm,&cont);CHKERRQ(ierr); ierr = PetscNew(MPIAIJ_MPIDense,&contents);CHKERRQ(ierr); ierr = PetscContainerSetPointer(cont,contents);CHKERRQ(ierr); ierr = PetscContainerSetUserDestroy(cont,MPIAIJ_MPIDenseDestroy);CHKERRQ(ierr); /* Create work matrix used to store off processor rows of B needed for local product */ ierr = MatCreateSeqDense(PETSC_COMM_SELF,nz,B->cmap->N,PETSC_NULL,&contents->workB);CHKERRQ(ierr); /* Create work arrays needed */ ierr = PetscMalloc4(B->cmap->N*from->starts[from->n],PetscScalar,&contents->rvalues, B->cmap->N*to->starts[to->n],PetscScalar,&contents->svalues, from->n,MPI_Request,&contents->rwaits, to->n,MPI_Request,&contents->swaits);CHKERRQ(ierr); ierr = PetscObjectCompose((PetscObject)(*C),"workB",(PetscObject)cont);CHKERRQ(ierr); ierr = PetscContainerDestroy(cont);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[]) { Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i, Mbs,i,j,k,*odata1,*odata2, proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,*iwork,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; comm = ((PetscObject)C)->comm; size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size);CHKERRQ(ierr); ierr = PetscMalloc2(len,PetscBT,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,char,&t_p);CHKERRQ(ierr); for (i=0; i<len; i++) { table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; } ierr = MPI_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); /* 1. Send this processor's is[] to other processors */ /*---------------------------------------------------*/ /* allocate spaces */ ierr = PetscMalloc(is_max*sizeof(PetscInt),&n);CHKERRQ(ierr); len = 0; for (i=0; i<is_max; i++) { ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); len += n[i]; } if (!len) { is_max = 0; } else { len += 1 + is_max; /* max length of data1 for one processor */ } ierr = PetscMalloc((size*len+1)*sizeof(PetscInt),&data1);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscInt*),&data1_start);CHKERRQ(ierr); for (i=0; i<size; i++) data1_start[i] = data1 + i*len; ierr = PetscMalloc4(size,PetscInt,&len_s,size,PetscInt,&btable,size,PetscInt,&iwork,size+1,PetscInt,&Bowners);CHKERRQ(ierr); /* gather c->garray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,&garray_local);CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr); ierr = ISDestroy(garray_local);CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); Bowners[0] = 0; for (i=0; i<size; i++) Bowners[i+1] += Bowners[i]; if (is_max){ /* hash table ctable which maps c->row to proc_id) */ ierr = PetscMalloc(Mbs*sizeof(PetscInt),&ctable);CHKERRQ(ierr); for (proc_id=0,j=0; proc_id<size; proc_id++) { for (; j<C->rmap->range[proc_id+1]/bs; j++) { ctable[j] = proc_id; } } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); for (i=0; i<size; i++){ table_i = table[i]; ierr = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr); for (j = Bowners[i]; j<Bowners[i+1]; j++){ /* go through B cols of proc[i]*/ ierr = PetscBTSet(table_i,idx_i[j]);CHKERRQ(ierr); } } ierr = ISRestoreIndices(garray_gl,&idx_i);CHKERRQ(ierr); } /* if (is_max) */ ierr = ISDestroy(garray_gl);CHKERRQ(ierr); /* evaluate communication - mesg to who, length, and buffer space */ for (i=0; i<size; i++) len_s[i] = 0; /* header of data1 */ for (proc_id=0; proc_id<size; proc_id++){ iwork[proc_id] = 0; *data1_start[proc_id] = is_max; data1_start[proc_id]++; for (j=0; j<is_max; j++) { if (proc_id == rank){ *data1_start[proc_id] = n[j]; } else { *data1_start[proc_id] = 0; } data1_start[proc_id]++; } } for (i=0; i<is_max; i++) { ierr = ISGetIndices(is[i],&idx_i);CHKERRQ(ierr); for (j=0; j<n[i]; j++){ idx = idx_i[j]; *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */ proc_end = ctable[idx]; for (proc_id=0; proc_id<=proc_end; proc_id++){ /* for others to process */ if (proc_id == rank ) continue; /* done before this loop */ if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) continue; /* no need for sending idx to [proc_id] */ *data1_start[proc_id] = idx; data1_start[proc_id]++; len_s[proc_id]++; } } /* update header data */ for (proc_id=0; proc_id<size; proc_id++){ if (proc_id== rank) continue; *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id]; iwork[proc_id] = len_s[proc_id] ; } ierr = ISRestoreIndices(is[i],&idx_i);CHKERRQ(ierr); } nrqs = 0; nrqr = 0; for (i=0; i<size; i++){ data1_start[i] = data1 + i*len; if (len_s[i]){ nrqs++; len_s[i] += 1 + is_max; /* add no. of header msg */ } } for (i=0; i<is_max; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } ierr = PetscFree(n);CHKERRQ(ierr); ierr = PetscFree(ctable);CHKERRQ(ierr); /* Determine the number of messages to expect, their lengths, from from-ids */ ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&nrqr);CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1);CHKERRQ(ierr); /* Now post the sends */ ierr = PetscMalloc2(size,MPI_Request,&s_waits1,size,MPI_Request,&s_waits2);CHKERRQ(ierr); k = 0; for (proc_id=0; proc_id<size; proc_id++){ /* send data1 to processor [proc_id] */ if (len_s[proc_id]){ ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr); k++; } } /* 2. Receive other's is[] and process. Then send back */ /*-----------------------------------------------------*/ len = 0; for (i=0; i<nrqr; i++){ if (len_r1[i] > len)len = len_r1[i]; } ierr = PetscFree(len_r1);CHKERRQ(ierr); ierr = PetscFree(id_r1);CHKERRQ(ierr); for (proc_id=0; proc_id<size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; ierr = PetscMalloc((len+1)*sizeof(PetscInt),&odata1);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscInt**),&odata2_ptr);CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,otable);CHKERRQ(ierr); len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */ len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ odata2_ptr[nodata2] = odata2; len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ k = 0; while (k < nrqr){ /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr); if (flag){ ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max){ /* allocate more space for odata2 */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i<odata2[0]; i++){ len += odata2[1 + i]; } /* Send messages back */ ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k);CHKERRQ(ierr); k++; odata2 += len; len_unused -= len; len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ } } ierr = PetscFree(odata1);CHKERRQ(ierr); ierr = PetscBTDestroy(otable);CHKERRQ(ierr); /* 3. Do local work on this processor's is[] */ /*-------------------------------------------*/ /* make sure there is enough unused space in odata2(=data) array */ len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */ if (len_unused < len_max){ /* allocate more space for odata2 */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } data = odata2; ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table);CHKERRQ(ierr); ierr = PetscFree(data1_start);CHKERRQ(ierr); /* 4. Receive work done on other processors, then merge */ /*------------------------------------------------------*/ /* get max number of messages that this processor expects to recv */ ierr = MPI_Allreduce(len_s,iwork,size,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); ierr = PetscMalloc((iwork[rank]+1)*sizeof(PetscInt),&data2);CHKERRQ(ierr); ierr = PetscFree4(len_s,btable,iwork,Bowners);CHKERRQ(ierr); k = 0; while (k < nrqs){ /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); if (flag){ ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); if (len > 1+is_max){ /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; i<is_max; i++){ table_i = table[i]; data_i = data + 1 + is_max + Mbs*i; isz = data[1+i]; for (j=0; j<data2[1+i]; j++){ col = data2_i[j]; if (!PetscBTLookupSet(table_i,col)) {data_i[isz++] = col;} } data[1+i] = isz; if (i < is_max - 1) data2_i += data2[1+i]; } } k++; } } ierr = PetscFree(data2);CHKERRQ(ierr); ierr = PetscFree2(table,t_p);CHKERRQ(ierr); /* phase 1 sends are complete */ ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} ierr = PetscFree(data1);CHKERRQ(ierr); /* phase 2 sends are complete */ if (nrqr){ierr = MPI_Waitall(nrqr,s_waits2,s_status);CHKERRQ(ierr);} ierr = PetscFree2(s_waits1,s_waits2);CHKERRQ(ierr); ierr = PetscFree(s_status);CHKERRQ(ierr); /* 5. Create new is[] */ /*--------------------*/ for (i=0; i<is_max; i++) { data_i = data + 1 + is_max + Mbs*i; ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,is+i);CHKERRQ(ierr); } for (k=0; k<=nodata2; k++){ ierr = PetscFree(odata2_ptr[k]);CHKERRQ(ierr); } ierr = PetscFree(odata2_ptr);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDrawViewPortsCreateRect - Splits a window into smaller view ports. Each processor shares all the viewports. The number of views in the x- and y-directions is specified. Collective on PetscDraw Input Parameters: + draw - the drawing context . nx - the number of x divisions - ny - the number of y divisions Output Parameter: . ports - a PetscDrawViewPorts context (C structure) Level: advanced Concepts: drawing^in subset of window .seealso: PetscDrawSplitViewPort(), PetscDrawSetViewPort(), PetscDrawViewPortsSet(), PetscDrawViewPortsDestroy() @*/ PetscErrorCode PetscDrawViewPortsCreateRect(PetscDraw draw,PetscInt nx,PetscInt ny,PetscDrawViewPorts **newports) { PetscDrawViewPorts *ports; PetscReal *xl,*xr,*yl,*yr,hx,hy; PetscInt i,j,k,n; PetscBool isnull; PetscMPIInt rank; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(draw,PETSC_DRAW_CLASSID,1); if ((nx < 1) || (ny < 1)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Number of divisions must be positive: %d x %d", nx, ny); PetscValidPointer(newports,3); ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) {*newports = NULL; PetscFunctionReturn(0);} ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)draw),&rank);CHKERRQ(ierr); n = nx*ny; hx = 1.0/nx; hy = 1.0/ny; ierr = PetscNew(&ports);CHKERRQ(ierr); *newports = ports; ports->draw = draw; ports->nports = n; ierr = PetscObjectReference((PetscObject) draw);CHKERRQ(ierr); /* save previous drawport of window */ ierr = PetscDrawGetViewPort(draw,&ports->port_xl,&ports->port_yl,&ports->port_xr,&ports->port_yr);CHKERRQ(ierr); ierr = PetscMalloc4(n,&xl,n,&xr,n,&yl,n,&yr);CHKERRQ(ierr); ports->xr = xr; ports->xl = xl; ports->yl = yl; ports->yr = yr; ierr = PetscDrawSetCoordinates(draw,0.0,0.0,1.0,1.0);CHKERRQ(ierr); ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { k = j*nx+i; xl[k] = i*hx; xr[k] = xl[k] + hx; yl[k] = j*hy; yr[k] = yl[k] + hy; if (!rank) { ierr = PetscDrawLine(draw,xl[k],yl[k],xl[k],yr[k],PETSC_DRAW_BLACK);CHKERRQ(ierr); ierr = PetscDrawLine(draw,xl[k],yr[k],xr[k],yr[k],PETSC_DRAW_BLACK);CHKERRQ(ierr); ierr = PetscDrawLine(draw,xr[k],yr[k],xr[k],yl[k],PETSC_DRAW_BLACK);CHKERRQ(ierr); ierr = PetscDrawLine(draw,xr[k],yl[k],xl[k],yl[k],PETSC_DRAW_BLACK);CHKERRQ(ierr); } xl[k] += .05*hx; xr[k] -= .05*hx; yl[k] += .05*hy; yr[k] -= .05*hy; } } ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); ierr = PetscDrawFlush(draw);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormFunctionLocal - Form the local residual F from the local input X Input Parameters: + dm - The mesh . X - Local input vector - user - The user context Output Parameter: . F - Local output vector Note: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator, like a GPU, or vectorize on a multicore machine. .seealso: FormJacobianLocal() */ PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user) { const PetscInt debug = user->debug; const PetscInt dim = user->dim; PetscReal *coords, *v0, *J, *invJ, *detJ; PetscScalar *elemVec, *u; const PetscInt numCells = cEnd - cStart; PetscInt cellDof = 0; PetscInt maxQuad = 0; PetscInt jacSize = 1; PetscInt cStart, cEnd, c, field; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr); ierr = VecSet(F, 0.0);CHKERRQ(ierr); ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); for(field = 0; field < numFields; ++field) { PetscInt dof = 1; for(d = 0; d < dim; ++d) {dof *= user->q[field].numBasisFuncs*user->q[field].numComponents;} cellDof += dof; maxQuad = PetscMax(maxQuad, user->q[field].numQuadPoints); } for(d = 0; d < dim; ++d) {jacSize *= maxQuad;} ierr = PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,jacSize,PetscReal,&J);CHKERRQ(ierr); ierr = PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*jacSize,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);CHKERRQ(ierr); for(c = cStart; c < cEnd; ++c) { const PetscScalar *x; PetscInt i; ierr = DMDAComputeCellGeometry(dm, c, v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr); if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c); ierr = DMDAVecGetClosure(dm, PETSC_NULL, X, c, &x);CHKERRQ(ierr); for(i = 0; i < cellDof; ++i) { u[c*cellDof+i] = x[i]; } } for(field = 0; field < numFields; ++field) { const PetscInt numQuadPoints = user->q[field].numQuadPoints; const PetscInt numBasisFuncs = user->q[field].numBasisFuncs; void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field]; void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field]; /* Conforming batches */ PetscInt blockSize = numBasisFuncs*numQuadPoints; PetscInt numBlocks = 1; PetscInt batchSize = numBlocks * blockSize; PetscInt numBatches = user->numBatches; PetscInt numChunks = numCells / (numBatches*batchSize); ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr); /* Remainder */ PetscInt numRemainder = numCells % (numBatches * batchSize); PetscInt offset = numCells - numRemainder; ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset], user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr); } for(c = cStart; c < cEnd; ++c) { if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);} ierr = DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr); } ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr); ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr); if (user->showResidual) { PetscInt p; ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr); for(p = 0; p < user->numProcs; ++p) { if (p == user->rank) { Vec f; ierr = VecDuplicate(F, &f);CHKERRQ(ierr); ierr = VecCopy(F, f);CHKERRQ(ierr); ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr); ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecDestroy(&f);CHKERRQ(ierr); } ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } } ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char **args) { Mat A, L; AppCtx ctx; PetscViewer viewer; PetscErrorCode ierr; ierr = PetscInitialize(&argc, &args, (char *) 0, help);CHKERRQ(ierr); ierr = ProcessOptions(&ctx);CHKERRQ(ierr); /* Load matrix */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, ctx.matFilename, FILE_MODE_READ, &viewer);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); ierr = MatLoad(A, viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); /* Make graph Laplacian from matrix */ ierr = MatLaplacian(A, 1.0e-12, &L);CHKERRQ(ierr); /* Check Laplacian */ PetscReal norm; Vec x, y; ierr = MatGetVecs(L, &x, NULL);CHKERRQ(ierr); ierr = VecDuplicate(x, &y);CHKERRQ(ierr); ierr = VecSet(x, 1.0);CHKERRQ(ierr); ierr = MatMult(L, x, y);CHKERRQ(ierr); ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr); if (norm > 1.0e-10) SETERRQ(PetscObjectComm((PetscObject) y), PETSC_ERR_PLIB, "Invalid graph Laplacian"); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); /* Compute Fiedler vector, and perhaps more vectors */ Mat LD; PetscScalar *a, *realpart, *imagpart, *eigvec, *work, sdummy; PetscBLASInt bn, bN, lwork, lierr, idummy; PetscInt n, i; ierr = MatConvert(L, MATDENSE, MAT_INITIAL_MATRIX, &LD);CHKERRQ(ierr); ierr = MatGetLocalSize(LD, &n, NULL);CHKERRQ(ierr); ierr = MatDenseGetArray(LD, &a);CHKERRQ(ierr); ierr = PetscBLASIntCast(n, &bn);CHKERRQ(ierr); ierr = PetscBLASIntCast(n, &bN);CHKERRQ(ierr); ierr = PetscBLASIntCast(5*n,&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(1,&idummy);CHKERRQ(ierr); ierr = PetscMalloc4(n,PetscScalar,&realpart,n,PetscScalar,&imagpart,n*n,PetscScalar,&eigvec,lwork,PetscScalar,&work);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgeev", LAPACKgeev_("N","V",&bn,a,&bN,realpart,imagpart,&sdummy,&idummy,eigvec,&bN,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int) lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); PetscReal *r, *c; PetscInt *perm; ierr = PetscMalloc3(n,PetscInt,&perm,n,PetscReal,&r,n,PetscReal,&c);CHKERRQ(ierr); for (i = 0; i < n; ++i) perm[i] = i; ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i = 0; i < n; ++i) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } for (i = 0; i < n; ++i) { realpart[i] = r[i]; imagpart[i] = c[i]; } /* Output spectrum */ if (ctx.showSpectrum) { ierr = PetscPrintf(PETSC_COMM_SELF, "Spectrum\n");CHKERRQ(ierr); for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: Real %g Imag %g\n", i, realpart[i], imagpart[i]);CHKERRQ(ierr);} } /* Check lowest eigenvalue and eigenvector */ PetscInt evInd = perm[0]; if ((realpart[0] > 1.0e-12) || (imagpart[0] > 1.0e-12)) SETERRQ(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have lowest eigenvalue 0"); for (i = 0; i < n; ++i) { if (fabs(eigvec[evInd*n+i] - eigvec[evInd*n+0]) > 1.0e-10) SETERRQ3(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have constant lowest eigenvector ev_%d %g != ev_0 %g", i, eigvec[evInd*n+i], eigvec[evInd*n+0]); } /* Output Fiedler vector */ evInd = perm[1]; if (ctx.showFiedler) { ierr = PetscPrintf(PETSC_COMM_SELF, "Fiedler vector, Re{ev} %g\n", realpart[1]);CHKERRQ(ierr); for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: %g\n", i, eigvec[evInd*n+i]);CHKERRQ(ierr);} } /* Construct Fiedler partition */ IS fIS, fIS2; PetscInt *fperm, *fperm2, pos, neg, posSize = 0; ierr = PetscMalloc(n * sizeof(PetscInt), &fperm);CHKERRQ(ierr); for (i = 0; i < n; ++i) { if (eigvec[evInd*n+i] > 0.0) ++posSize; } ierr = PetscMalloc(n * sizeof(PetscInt), &fperm2);CHKERRQ(ierr); for (i = 0; i < n; ++i) fperm[i] = i; ierr = PetscSortRealWithPermutation(n, &eigvec[evInd*n], fperm);CHKERRQ(ierr); for (i = 0; i < n; ++i) fperm2[n-1-i] = fperm[i]; for (i = 0, pos = 0, neg = posSize; i < n; ++i) { if (eigvec[evInd*n+i] > 0.0) fperm[pos++] = i; else fperm[neg++] = i; } ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm, PETSC_OWN_POINTER, &fIS);CHKERRQ(ierr); ierr = ISSetPermutation(fIS);CHKERRQ(ierr); ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm2, PETSC_OWN_POINTER, &fIS2);CHKERRQ(ierr); ierr = ISSetPermutation(fIS2);CHKERRQ(ierr); ierr = PetscFree3(perm,r,c);CHKERRQ(ierr); ierr = PetscFree4(realpart,imagpart,eigvec,work);CHKERRQ(ierr); ierr = MatDenseRestoreArray(LD, &a);CHKERRQ(ierr); ierr = MatDestroy(&LD);CHKERRQ(ierr); ierr = MatDestroy(&L);CHKERRQ(ierr); /* Permute matrix */ Mat AR, AR2; ierr = MatPermute(A, fIS, fIS, &AR);CHKERRQ(ierr); ierr = MatView(A, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(AR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&fIS);CHKERRQ(ierr); ierr = MatPermute(A, fIS2, fIS2, &AR2);CHKERRQ(ierr); ierr = MatView(AR2, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&fIS2);CHKERRQ(ierr); ierr = MatDestroy(&AR);CHKERRQ(ierr); AR = AR2; /* Extract blocks and reorder */ Mat AP, AN, APR, ANR; IS ispos, isneg, rpermpos, cpermpos, rpermneg, cpermneg; PetscInt bw, bwr; ierr = ISCreateStride(PETSC_COMM_SELF, posSize, 0, 1, &ispos);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF, n - posSize, posSize, 1, &isneg);CHKERRQ(ierr); ierr = MatGetSubMatrix(AR, ispos, ispos, MAT_INITIAL_MATRIX, &AP);CHKERRQ(ierr); ierr = MatGetSubMatrix(AR, isneg, isneg, MAT_INITIAL_MATRIX, &AN);CHKERRQ(ierr); ierr = ISDestroy(&ispos);CHKERRQ(ierr); ierr = ISDestroy(&isneg);CHKERRQ(ierr); ierr = MatGetOrdering(AP, ctx.matOrdtype, &rpermpos, &cpermpos);CHKERRQ(ierr); ierr = MatGetOrdering(AN, ctx.matOrdtype, &rpermneg, &cpermneg);CHKERRQ(ierr); ierr = MatPermute(AP, rpermpos, cpermpos, &APR);CHKERRQ(ierr); ierr = MatComputeBandwidth(AP, 0.0, &bw);CHKERRQ(ierr); ierr = MatComputeBandwidth(APR, 0.0, &bwr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced positive bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr); ierr = MatPermute(AN, rpermneg, cpermneg, &ANR);CHKERRQ(ierr); ierr = MatComputeBandwidth(AN, 0.0, &bw);CHKERRQ(ierr); ierr = MatComputeBandwidth(ANR, 0.0, &bwr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced negative bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr); ierr = MatView(AP, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(APR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(AN, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(ANR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* Reorder original matrix */ Mat ARR; IS rperm, cperm; PetscInt *idx; const PetscInt *cidx; ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr); ierr = ISGetIndices(rpermpos, &cidx);CHKERRQ(ierr); for (i = 0; i < posSize; ++i) idx[i] = cidx[i]; ierr = ISRestoreIndices(rpermpos, &cidx);CHKERRQ(ierr); ierr = ISGetIndices(rpermneg, &cidx);CHKERRQ(ierr); for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize; ierr = ISRestoreIndices(rpermneg, &cidx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &rperm);CHKERRQ(ierr); ierr = ISSetPermutation(rperm);CHKERRQ(ierr); ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr); ierr = ISGetIndices(cpermpos, &cidx);CHKERRQ(ierr); for (i = 0; i < posSize; ++i) idx[i] = cidx[i]; ierr = ISRestoreIndices(cpermpos, &cidx);CHKERRQ(ierr); ierr = ISGetIndices(cpermneg, &cidx);CHKERRQ(ierr); for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize; ierr = ISRestoreIndices(cpermneg, &cidx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &cperm);CHKERRQ(ierr); ierr = ISSetPermutation(cperm);CHKERRQ(ierr); ierr = MatPermute(AR, rperm, cperm, &ARR);CHKERRQ(ierr); ierr = MatView(ARR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&rperm);CHKERRQ(ierr); ierr = ISDestroy(&cperm);CHKERRQ(ierr); ierr = ISDestroy(&rpermpos);CHKERRQ(ierr); ierr = ISDestroy(&cpermpos);CHKERRQ(ierr); ierr = ISDestroy(&rpermneg);CHKERRQ(ierr); ierr = ISDestroy(&cpermneg);CHKERRQ(ierr); ierr = MatDestroy(&AP);CHKERRQ(ierr); ierr = MatDestroy(&AN);CHKERRQ(ierr); ierr = MatDestroy(&APR);CHKERRQ(ierr); ierr = MatDestroy(&ANR);CHKERRQ(ierr); /* Compare bands */ Mat B, BR; ierr = MatCreateSubMatrixBanded(A, 50, 0.95, &B);CHKERRQ(ierr); ierr = MatCreateSubMatrixBanded(ARR, 50, 0.95, &BR);CHKERRQ(ierr); ierr = MatView(B, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(BR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&BR);CHKERRQ(ierr); /* Cleanup */ ierr = MatDestroy(&ARR);CHKERRQ(ierr); ierr = MatDestroy(&AR);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
PetscErrorCode KSPComputeShifts_DGMRES(KSP ksp) { PetscErrorCode ierr; KSP_AGMRES *agmres = (KSP_AGMRES*)(ksp->data); PetscInt max_k = agmres->max_k; /* size of the (non augmented) Krylov subspace */ PetscInt Neig = 0; PetscInt max_it = ksp->max_it; /* Perform one cycle of dgmres to find the eigenvalues and compute the first approximations of the eigenvectors */ PetscFunctionBegin; ierr = PetscLogEventBegin(KSP_AGMRESComputeShifts, ksp, 0,0,0);CHKERRQ(ierr); /* Send the size of the augmented basis to DGMRES */ ksp->max_it = max_k; /* set this to have DGMRES performing only one cycle */ ksp->ops->buildsolution = KSPBuildSolution_DGMRES; ierr = KSPSolve_DGMRES(ksp); ksp->guess_zero = PETSC_FALSE; if (ksp->reason == KSP_CONVERGED_RTOL) { ierr = PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); } else ksp->reason = KSP_CONVERGED_ITERATING; if ((agmres->r == 0) && (agmres->neig > 0)) { /* Compute the eigenvalues for the shifts and the eigenvectors (to augment the Newton basis) */ agmres->HasSchur = PETSC_FALSE; ierr = KSPDGMRESComputeDeflationData_DGMRES (ksp, &Neig);CHKERRQ (ierr); Neig = max_k; } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */ ierr = KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig);CHKERRQ(ierr); } /* It may happen that the Ritz values from one cycle of GMRES are not accurate enough to provide a good stability. In this case, another cycle of GMRES is performed. The two sets of values thus generated are sorted and the most accurate are kept as shifts */ PetscBool flg; ierr = PetscOptionsHasName(NULL, "-ksp_agmres_ImproveShifts", &flg);CHKERRQ(ierr); if (!flg) { ierr = KSPAGMRESLejaOrdering(agmres->wr, agmres->wi, agmres->Rshift, agmres->Ishift, max_k);CHKERRQ(ierr); } else { /* Perform another cycle of DGMRES to find another set of eigenvalues */ PetscInt i; PetscScalar *wr, *wi,*Rshift, *Ishift; ierr = PetscMalloc4(2*max_k, &wr, 2*max_k, &wi, 2*max_k, &Rshift, 2*max_k, &Ishift);CHKERRQ(ierr); for (i = 0; i < max_k; i++) { wr[i] = agmres->wr[i]; wi[i] = agmres->wi[i]; } ierr = KSPSolve_DGMRES(ksp); ksp->guess_zero = PETSC_FALSE; if (ksp->reason == KSP_CONVERGED_RTOL) PetscFunctionReturn(0); else ksp->reason = KSP_CONVERGED_ITERATING; if (agmres->neig > 0) { /* Compute the eigenvalues for the shifts) and the eigenvectors (to augment the Newton basis */ agmres->HasSchur = PETSC_FALSE; ierr = KSPDGMRESComputeDeflationData_DGMRES(ksp, &Neig);CHKERRQ(ierr); Neig = max_k; } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */ ierr = KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig);CHKERRQ(ierr); } for (i = 0; i < max_k; i++) { wr[max_k+i] = agmres->wr[i]; wi[max_k+i] = agmres->wi[i]; } ierr = KSPAGMRESLejaOrdering(wr, wi, Rshift, Ishift, 2*max_k);CHKERRQ(ierr); for (i = 0; i< max_k; i++) { agmres->Rshift[i] = Rshift[i]; agmres->Ishift[i] = Ishift[i]; } ierr = PetscFree(Rshift);CHKERRQ(ierr); ierr = PetscFree(wr);CHKERRQ(ierr); ierr = PetscFree(Ishift);CHKERRQ(ierr); ierr = PetscFree(wi);CHKERRQ(ierr); } agmres->HasShifts = PETSC_TRUE; ksp->max_it = max_it; ierr = PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C AOCreateMapping - Creates a basic application mapping using two integer arrays. Input Parameters: + comm - MPI communicator that is to share AO . napp - size of integer arrays . myapp - integer array that defines an ordering - mypetsc - integer array that defines another ordering (may be NULL to indicate the identity ordering) Output Parameter: . aoout - the new application mapping Options Database Key: . -ao_view : call AOView() at the conclusion of AOCreateMapping() Level: beginner Notes: the arrays myapp and mypetsc need NOT contain the all the integers 0 to napp-1, that is there CAN be "holes" in the indices. Use AOCreateBasic() or AOCreateBasicIS() if they do not have holes for better performance. .keywords: AO, create .seealso: AOCreateBasic(), AOCreateBasic(), AOCreateMappingIS(), AODestroy() @*/ PetscErrorCode AOCreateMapping(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) { AO ao; AO_Mapping *aomap; PetscInt *allpetsc, *allapp; PetscInt *petscPerm, *appPerm; PetscInt *petsc; PetscMPIInt size, rank,*lens, *disp,nnapp; PetscInt N, start; PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; PetscValidPointer(aoout,5); *aoout = 0; ierr = AOInitializePackage();CHKERRQ(ierr); ierr = PetscHeaderCreate(ao, AO_CLASSID, "AO", "Application Ordering", "AO", comm, AODestroy, AOView);CHKERRQ(ierr); ierr = PetscNewLog(ao,&aomap);CHKERRQ(ierr); ierr = PetscMemcpy(ao->ops, &AOps, sizeof(AOps));CHKERRQ(ierr); ao->data = (void*) aomap; /* transmit all lengths to all processors */ ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc2(size, &lens,size,&disp);CHKERRQ(ierr); nnapp = napp; ierr = MPI_Allgather(&nnapp, 1, MPI_INT, lens, 1, MPI_INT, comm);CHKERRQ(ierr); N = 0; for (i = 0; i < size; i++) { disp[i] = N; N += lens[i]; } aomap->N = N; ao->N = N; ao->n = N; /* If mypetsc is 0 then use "natural" numbering */ if (!mypetsc) { start = disp[rank]; ierr = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr); for (i = 0; i < napp; i++) petsc[i] = start + i; } else { petsc = (PetscInt*)mypetsc; } /* get all indices on all processors */ ierr = PetscMalloc4(N, &allapp,N,&appPerm,N,&allpetsc,N,&petscPerm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)myapp, napp, MPIU_INT, allapp, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)petsc, napp, MPIU_INT, allpetsc, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = PetscFree2(lens,disp);CHKERRQ(ierr); /* generate a list of application and PETSc node numbers */ ierr = PetscMalloc4(N, &aomap->app,N,&aomap->appPerm,N,&aomap->petsc,N,&aomap->petscPerm);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao, 4*N * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < N; i++) { appPerm[i] = i; petscPerm[i] = i; } ierr = PetscSortIntWithPermutation(N, allpetsc, petscPerm);CHKERRQ(ierr); ierr = PetscSortIntWithPermutation(N, allapp, appPerm);CHKERRQ(ierr); /* Form sorted arrays of indices */ for (i = 0; i < N; i++) { aomap->app[i] = allapp[appPerm[i]]; aomap->petsc[i] = allpetsc[petscPerm[i]]; } /* Invert petscPerm[] into aomap->petscPerm[] */ for (i = 0; i < N; i++) aomap->petscPerm[petscPerm[i]] = i; /* Form map between aomap->app[] and aomap->petsc[] */ for (i = 0; i < N; i++) aomap->appPerm[i] = aomap->petscPerm[appPerm[i]]; /* Invert appPerm[] into allapp[] */ for (i = 0; i < N; i++) allapp[appPerm[i]] = i; /* Form map between aomap->petsc[] and aomap->app[] */ for (i = 0; i < N; i++) aomap->petscPerm[i] = allapp[petscPerm[i]]; #if defined(PETSC_USE_DEBUG) /* Check that the permutations are complementary */ for (i = 0; i < N; i++) { if (i != aomap->appPerm[aomap->petscPerm[i]]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Invalid ordering"); } #endif /* Cleanup */ if (!mypetsc) { ierr = PetscFree(petsc);CHKERRQ(ierr); } ierr = PetscFree4(allapp,appPerm,allpetsc,petscPerm);CHKERRQ(ierr); ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr); *aoout = ao; PetscFunctionReturn(0); }
PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ PetscErrorCode ierr; PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; MatScalar *ca; PetscFunctionBegin; /* some error checking which could be moved into interface layer */ if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); /* Set up timers */ ierr = PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); /* Create ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscMalloc4(an,PetscInt,&padenserow,an,PetscInt,&pasparserow,pm,PetscInt,&denserow,pm,PetscInt,&sparserow);CHKERRQ(ierr); ierr = PetscMemzero(padenserow,an*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(pasparserow,an*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(denserow,pm*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(sparserow,pm*sizeof(PetscInt));CHKERRQ(ierr); /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ /* This should be reasonable if sparsity of PAPt is similar to that of A. */ ierr = PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);CHKERRQ(ierr); current_space = free_space; /* Determine fill for each row of C: */ for (i=0;i<pm;i++) { pnzi = pi[i+1] - pi[i]; panzi = 0; /* Get symbolic sparse row of PA: */ for (j=0;j<pnzi;j++) { arow = *pj++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0;k<anzj;k++) { if (!padenserow[ajj[k]]) { padenserow[ajj[k]] = -1; pasparserow[panzi++] = ajj[k]; } } } /* Using symbolic row of PA, determine symbolic row of C: */ paj = pasparserow; cnzi = 0; for (j=0;j<panzi;j++) { ptrow = *paj++; ptnzj = pti[ptrow+1] - pti[ptrow]; ptjj = ptj + pti[ptrow]; for (k=0;k<ptnzj;k++) { if (!denserow[ptjj[k]]) { denserow[ptjj[k]] = -1; sparserow[cnzi++] = ptjj[k]; } } } /* sort sparse representation */ ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); } /* Copy data into free space, and zero out dense row */ ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0;j<panzi;j++) { padenserow[pasparserow[j]] = 0; } for (j=0;j<cnzi;j++) { denserow[sparserow[j]] = 0; } ci[i+1] = ci[i] + cnzi; } /* column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree4(padenserow,pasparserow,denserow,sparserow);CHKERRQ(ierr); /* Allocate space for ca */ ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); (*C)->rmap->bs = P->cmap->bs; (*C)->cmap->bs = P->cmap->bs; /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ *)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode GreedyColoringLocalDistanceTwo_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors) { MC_Greedy *gr = (MC_Greedy *) mc->data; PetscInt i,j,k,l,s,e,n,nd,nd_global,n_global,idx,ncols,maxcolors,mcol,mcol_global,nd1cols,*mask,masksize,*d1cols,*bad,*badnext,nbad,badsize,ccol,no,cbad; Mat m = mc->mat, mt; Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data; Mat md=NULL,mo=NULL; const PetscInt *md_i,*mo_i,*md_j,*mo_j; const PetscInt *rmd_i,*rmo_i,*rmd_j,*rmo_j; PetscBool isMPIAIJ,isSEQAIJ; PetscInt pcol,*dcolors,*ocolors; ISColoringValue *badidx; const PetscInt *cidx; PetscReal *owts,*colorweights; PetscInt *oconf,*conf; PetscSF sf; PetscLayout layout; PetscErrorCode ierr; PetscFunctionBegin; ierr = MatGetSize(m,&n_global,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(m,&s,&e);CHKERRQ(ierr); n=e-s; nd_global = 0; /* get the matrix communication structures */ ierr = PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ); CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ); CHKERRQ(ierr); if (isMPIAIJ) { Mat_SeqAIJ *dseq; Mat_SeqAIJ *oseq; md=aij->A; dseq = (Mat_SeqAIJ*)md->data; mo=aij->B; oseq = (Mat_SeqAIJ*)mo->data; md_i = dseq->i; md_j = dseq->j; mo_i = oseq->i; mo_j = oseq->j; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = oseq->i; rmo_j = oseq->j; } else if (isSEQAIJ) { Mat_SeqAIJ *dseq; /* no off-processor nodes */ md=m; dseq = (Mat_SeqAIJ*)md->data; md_i = dseq->i; md_j = dseq->j; mo_i = NULL; mo_j = NULL; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring"); if (!gr->symmetric) { ierr = MatTranspose(m, MAT_INITIAL_MATRIX, &mt);CHKERRQ(ierr); if (isSEQAIJ) { Mat_SeqAIJ *dseq = (Mat_SeqAIJ*) mt->data; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject) mc), PETSC_ERR_SUP, "Nonsymmetric greedy coloring only works in serial"); } /* create the vectors and communication structures if necessary */ no=0; if (mo) { ierr = VecGetLocalSize(aij->lvec,&no);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);CHKERRQ(ierr); ierr = MatGetLayouts(m,&layout,NULL);CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);CHKERRQ(ierr); } ierr = MatColoringGetMaxColors(mc,&maxcolors);CHKERRQ(ierr); masksize=n; nbad=0; badsize=n; ierr = PetscMalloc1(masksize,&mask);CHKERRQ(ierr); ierr = PetscMalloc4(n,&d1cols,n,&dcolors,n,&conf,n,&bad);CHKERRQ(ierr); ierr = PetscMalloc2(badsize,&badidx,badsize,&badnext);CHKERRQ(ierr); for(i=0;i<masksize;i++) { mask[i]=-1; } for (i=0;i<n;i++) { dcolors[i]=maxcolors; bad[i]=-1; } for (i=0;i<badsize;i++) { badnext[i]=-1; } if (mo) { ierr = PetscMalloc3(no,&owts,no,&oconf,no,&ocolors);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); for (i=0;i<no;i++) { ocolors[i]=maxcolors; } } else { /* Appease overzealous -Wmaybe-initialized */ owts = NULL; oconf = NULL; ocolors = NULL; } mcol=0; while (nd_global < n_global) { nd=n; /* assign lowest possible color to each local vertex */ mcol_global=0; ierr = PetscLogEventBegin(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); for (i=0;i<n;i++) { idx=lperm[i]; if (dcolors[idx] == maxcolors) { /* entries in bad */ cbad=bad[idx]; while (cbad>=0) { ccol=badidx[cbad]; if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; cbad=badnext[cbad]; } /* diagonal distance-one rows */ nd1cols=0; ncols = rmd_i[idx+1]-rmd_i[idx]; cidx = &(rmd_j[rmd_i[idx]]); for (j=0;j<ncols;j++) { d1cols[nd1cols] = cidx[j]; nd1cols++; ccol=dcolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } /* off-diagonal distance-one rows */ if (mo) { ncols = rmo_i[idx+1]-rmo_i[idx]; cidx = &(rmo_j[rmo_i[idx]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* diagonal distance-two rows */ for (j=0;j<nd1cols;j++) { ncols = md_i[d1cols[j]+1]-md_i[d1cols[j]]; cidx = &(md_j[md_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=dcolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* off-diagonal distance-two rows */ if (mo) { for (j=0;j<nd1cols;j++) { ncols = mo_i[d1cols[j]+1]-mo_i[d1cols[j]]; cidx = &(mo_j[mo_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=ocolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } } /* assign this one the lowest color possible by seeing if there's a gap in the sequence of sorted neighbor colors */ for (j=0;j<masksize;j++) { if (mask[j]!=idx) { break; } } pcol=j; if (pcol>maxcolors) pcol=maxcolors; dcolors[idx]=pcol; if (pcol>mcol) mcol=pcol; } } ierr = PetscLogEventEnd(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); if (mo) { /* transfer neighbor colors */ ierr = PetscSFBcastBegin(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); /* find the maximum color assigned locally and allocate a mask */ ierr = MPIU_Allreduce(&mcol,&mcol_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); ierr = PetscMalloc1(mcol_global+1,&colorweights);CHKERRQ(ierr); /* check for conflicts */ for (i=0;i<n;i++) { conf[i]=PETSC_FALSE; } for (i=0;i<no;i++) { oconf[i]=PETSC_FALSE; } for (i=0;i<n;i++) { ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); if (ncols > 0) { /* fill in the mask */ for (j=0;j<mcol_global+1;j++) { colorweights[j]=0; } colorweights[dcolors[i]]=wts[i]; /* fill in the off-diagonal part of the mask */ for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < owts[cidx[j]]) { colorweights[ccol] = owts[cidx[j]]; } } } /* fill in the on-diagonal part of the mask */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < wts[cidx[j]]) { colorweights[ccol] = wts[cidx[j]]; } } } /* go back through and set up on and off-diagonal conflict vectors */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > wts[cidx[j]]) { conf[cidx[j]]=PETSC_TRUE; } } } ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > owts[cidx[j]]) { oconf[cidx[j]]=PETSC_TRUE; } } } } } nd_global=0; ierr = PetscFree(colorweights);CHKERRQ(ierr); ierr = PetscLogEventBegin(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); /* go through and unset local colors that have conflicts */ for (i=0;i<n;i++) { if (conf[i]>0) { /* push this color onto the bad stack */ badidx[nbad]=dcolors[i]; badnext[nbad]=bad[i]; bad[i]=nbad; nbad++; if (nbad>=badsize) { PetscInt *newbadnext; ISColoringValue *newbadidx; ierr = PetscMalloc2(badsize*2,&newbadidx,badsize*2,&newbadnext);CHKERRQ(ierr); for(k=0;k<2*badsize;k++) { newbadnext[k]=-1; } for(k=0;k<badsize;k++) { newbadidx[k]=badidx[k]; newbadnext[k]=badnext[k]; } ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); badidx=newbadidx; badnext=newbadnext; badsize*=2; } dcolors[i] = maxcolors; nd--; } } } ierr = MPIU_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); } if (mo) { ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree3(owts,oconf,ocolors);CHKERRQ(ierr); } for (i=0;i<n;i++) { colors[i]=dcolors[i]; } ierr = PetscFree(mask);CHKERRQ(ierr); ierr = PetscFree4(d1cols,dcolors,conf,bad);CHKERRQ(ierr); ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); if (!gr->symmetric) {ierr = MatDestroy(&mt);CHKERRQ(ierr);} PetscFunctionReturn(0); }
/* PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing in polynomial eigenproblems. */ PetscErrorCode PEPBuildDiagonalScaling(PEP pep) { PetscErrorCode ierr; PetscInt it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl; const PetscInt *cidx,*ridx; Mat M,*T,A; PetscMPIInt n; PetscBool cont=PETSC_TRUE,flg=PETSC_FALSE; PetscScalar *array,*Dr,*Dl,t; PetscReal l2,d,*rsum,*aux,*csum,w=1.0; MatStructure str; MatInfo info; PetscFunctionBegin; l2 = 2*PetscLogReal(2.0); nmat = pep->nmat; ierr = PetscMPIIntCast(pep->n,&n); ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr); ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr); for (k=0;k<nmat;k++) { ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr); } /* Form local auxiliar matrix M */ ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types"); ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr); if (cont) { ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr); flg = PETSC_TRUE; } else { ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr); } ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); for (k=1;k<nmat;k++) { if (flg) { ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); } else { if (str==SAME_NONZERO_PATTERN) { ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr); } } ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); w *= pep->slambda*pep->slambda*pep->sfactor; ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr); if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) { ierr = MatDestroy(&A);CHKERRQ(ierr); } } ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices"); ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr); ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr); ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr); ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr); ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr); ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) { /* Search non-zero columns outsize lst-lend */ if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j]; /* Local column sums */ aux[cidx[j]] += PetscAbsScalar(array[j]); } for (it=0;it<pep->sits && cont;it++) { emaxl = 0; eminl = 0; /* Column sum */ if (it>0) { /* it=0 has been already done*/ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]); ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); } ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr)); /* Update Dr */ for (j=lst;j<lend;j++) { d = PetscLogReal(csum[j])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dr[j-lst] *= d; aux[j] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } for (j=0;j<nc;j++) { d = PetscLogReal(csum[cols[j]])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); aux[cols[j]] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } /* Scale M */ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (j=0;j<nz;j++) { array[j] *= aux[cidx[j]]; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Row sum */ ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nr;i++) { for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]); /* Update Dl */ d = PetscLogReal(rsum[i])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dl[i] *= d; /* Scale M */ for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Compute global max and min */ ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl)); ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl)); if (emax<=emin+2) cont = PETSC_FALSE; } ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr); /* Free memory*/ ierr = MatDestroy(&M);CHKERRQ(ierr); ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr); ierr = PetscFree(T);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode EPSSolve_Lanczos(EPS eps) { EPS_LANCZOS *lanczos = (EPS_LANCZOS*)eps->data; PetscErrorCode ierr; PetscInt nconv,i,j,k,l,x,n,*perm,restart,ncv=eps->ncv,r,ld; Vec vi,vj,w; Mat U; PetscScalar *Y,*ritz,stmp; PetscReal *d,*e,*bnd,anorm,beta,norm,rtmp,resnorm; PetscBool breakdown; char *conv,ctmp; PetscFunctionBegin; ierr = DSGetLeadingDimension(eps->ds,&ld);CHKERRQ(ierr); ierr = PetscMalloc4(ncv,&ritz,ncv,&bnd,ncv,&perm,ncv,&conv);CHKERRQ(ierr); /* The first Lanczos vector is the normalized initial vector */ ierr = EPSGetStartVector(eps,0,NULL);CHKERRQ(ierr); anorm = -1.0; nconv = 0; /* Restart loop */ while (eps->reason == EPS_CONVERGED_ITERATING) { eps->its++; /* Compute an ncv-step Lanczos factorization */ n = PetscMin(nconv+eps->mpd,ncv); ierr = DSGetArrayReal(eps->ds,DS_MAT_T,&d);CHKERRQ(ierr); e = d + ld; ierr = EPSBasicLanczos(eps,d,e,nconv,&n,&breakdown,anorm);CHKERRQ(ierr); beta = e[n-1]; ierr = DSRestoreArrayReal(eps->ds,DS_MAT_T,&d);CHKERRQ(ierr); ierr = DSSetDimensions(eps->ds,n,0,nconv,0);CHKERRQ(ierr); ierr = DSSetState(eps->ds,DS_STATE_INTERMEDIATE);CHKERRQ(ierr); ierr = BVSetActiveColumns(eps->V,nconv,n);CHKERRQ(ierr); /* Solve projected problem */ ierr = DSSolve(eps->ds,ritz,NULL);CHKERRQ(ierr); ierr = DSSort(eps->ds,ritz,NULL,NULL,NULL,NULL);CHKERRQ(ierr); /* Estimate ||A|| */ for (i=nconv;i<n;i++) anorm = PetscMax(anorm,PetscAbsReal(PetscRealPart(ritz[i]))); /* Compute residual norm estimates as beta*abs(Y(m,:)) + eps*||A|| */ ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); for (i=nconv;i<n;i++) { resnorm = beta*PetscAbsScalar(Y[n-1+i*ld]) + PETSC_MACHINE_EPSILON*anorm; ierr = (*eps->converged)(eps,ritz[i],eps->eigi[i],resnorm,&bnd[i],eps->convergedctx);CHKERRQ(ierr); if (bnd[i]<eps->tol) conv[i] = 'C'; else conv[i] = 'N'; } ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); /* purge repeated ritz values */ if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL) { for (i=nconv+1;i<n;i++) { if (conv[i] == 'C' && PetscAbsScalar((ritz[i]-ritz[i-1])/ritz[i]) < eps->tol) conv[i] = 'R'; } } /* Compute restart vector */ if (breakdown) { ierr = PetscInfo2(eps,"Breakdown in Lanczos method (it=%D norm=%g)\n",eps->its,(double)beta);CHKERRQ(ierr); } else { restart = nconv; while (restart<n && conv[restart] != 'N') restart++; if (restart >= n) { breakdown = PETSC_TRUE; } else { for (i=restart+1;i<n;i++) { if (conv[i] == 'N') { ierr = SlepcSCCompare(eps->sc,ritz[restart],0.0,ritz[i],0.0,&r);CHKERRQ(ierr); if (r>0) restart = i; } } ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); ierr = BVMultColumn(eps->V,1.0,0.0,n,Y+restart*ld+nconv);CHKERRQ(ierr); ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); } } /* Count and put converged eigenvalues first */ for (i=nconv;i<n;i++) perm[i] = i; for (k=nconv;k<n;k++) { if (conv[perm[k]] != 'C') { j = k + 1; while (j<n && conv[perm[j]] != 'C') j++; if (j>=n) break; l = perm[k]; perm[k] = perm[j]; perm[j] = l; } } /* Sort eigenvectors according to permutation */ ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); for (i=nconv;i<k;i++) { x = perm[i]; if (x != i) { j = i + 1; while (perm[j] != i) j++; /* swap eigenvalues i and j */ stmp = ritz[x]; ritz[x] = ritz[i]; ritz[i] = stmp; rtmp = bnd[x]; bnd[x] = bnd[i]; bnd[i] = rtmp; ctmp = conv[x]; conv[x] = conv[i]; conv[i] = ctmp; perm[j] = x; perm[i] = i; /* swap eigenvectors i and j */ for (l=0;l<n;l++) { stmp = Y[l+x*ld]; Y[l+x*ld] = Y[l+i*ld]; Y[l+i*ld] = stmp; } } } ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr); /* compute converged eigenvectors */ ierr = DSGetMat(eps->ds,DS_MAT_Q,&U);CHKERRQ(ierr); ierr = BVMultInPlace(eps->V,U,nconv,k);CHKERRQ(ierr); ierr = MatDestroy(&U);CHKERRQ(ierr); /* purge spurious ritz values */ if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL) { for (i=nconv;i<k;i++) { ierr = BVGetColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = VecNorm(vi,NORM_2,&norm);CHKERRQ(ierr); ierr = VecScale(vi,1.0/norm);CHKERRQ(ierr); w = eps->work[0]; ierr = STApply(eps->st,vi,w);CHKERRQ(ierr); ierr = VecAXPY(w,-ritz[i],vi);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); ierr = (*eps->converged)(eps,ritz[i],eps->eigi[i],norm,&bnd[i],eps->convergedctx);CHKERRQ(ierr); if (bnd[i]>=eps->tol) conv[i] = 'S'; } for (i=nconv;i<k;i++) { if (conv[i] != 'C') { j = i + 1; while (j<k && conv[j] != 'C') j++; if (j>=k) break; /* swap eigenvalues i and j */ stmp = ritz[j]; ritz[j] = ritz[i]; ritz[i] = stmp; rtmp = bnd[j]; bnd[j] = bnd[i]; bnd[i] = rtmp; ctmp = conv[j]; conv[j] = conv[i]; conv[i] = ctmp; /* swap eigenvectors i and j */ ierr = BVGetColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = BVGetColumn(eps->V,j,&vj);CHKERRQ(ierr); ierr = VecSwap(vi,vj);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,i,&vi);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j,&vj);CHKERRQ(ierr); } } k = i; } /* store ritz values and estimated errors */ for (i=nconv;i<n;i++) { eps->eigr[i] = ritz[i]; eps->errest[i] = bnd[i]; } ierr = EPSMonitor(eps,eps->its,nconv,eps->eigr,eps->eigi,eps->errest,n);CHKERRQ(ierr); nconv = k; if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS; if (nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL; if (eps->reason == EPS_CONVERGED_ITERATING) { /* copy restart vector */ ierr = BVCopyColumn(eps->V,n,nconv);CHKERRQ(ierr); if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL && !breakdown) { /* Reorthonormalize restart vector */ ierr = BVOrthogonalizeColumn(eps->V,nconv,NULL,&norm,&breakdown);CHKERRQ(ierr); ierr = BVScaleColumn(eps->V,nconv,1.0/norm);CHKERRQ(ierr); } if (breakdown) { /* Use random vector for restarting */ ierr = PetscInfo(eps,"Using random vector for restart\n");CHKERRQ(ierr); ierr = EPSGetStartVector(eps,nconv,&breakdown);CHKERRQ(ierr); } if (breakdown) { /* give up */ eps->reason = EPS_DIVERGED_BREAKDOWN; ierr = PetscInfo(eps,"Unable to generate more start vectors\n");CHKERRQ(ierr); } } } eps->nconv = nconv; ierr = PetscFree4(ritz,bnd,perm,conv);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFSetGraph - Set a parallel star forest Collective Input Arguments: + sf - star forest . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) . nleaves - number of leaf vertices on the current process, each of these references a root on any process . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage . localmode - copy mode for ilocal . iremote - remote locations of root vertices for each leaf on the current process - remotemode - copy mode for iremote Level: intermediate .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() @*/ PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) { PetscErrorCode ierr; PetscTable table; PetscTablePosition pos; PetscMPIInt size; PetscInt i,*rcount,*ranks; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); if (nleaves && ilocal) PetscValidIntPointer(ilocal,4); if (nleaves) PetscValidPointer(iremote,6); if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots); if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); ierr = PetscSFReset(sf);CHKERRQ(ierr); sf->nroots = nroots; sf->nleaves = nleaves; if (ilocal) { switch (localmode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); sf->mine = sf->mine_alloc; ierr = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr); sf->minleaf = PETSC_MAX_INT; sf->maxleaf = PETSC_MIN_INT; for (i=0; i<nleaves; i++) { sf->minleaf = PetscMin(sf->minleaf,ilocal[i]); sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]); } break; case PETSC_OWN_POINTER: sf->mine_alloc = (PetscInt*)ilocal; sf->mine = sf->mine_alloc; break; case PETSC_USE_POINTER: sf->mine = (PetscInt*)ilocal; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); } } if (!ilocal || nleaves > 0) { sf->minleaf = 0; sf->maxleaf = nleaves - 1; } switch (remotemode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); sf->remote = sf->remote_alloc; ierr = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr); break; case PETSC_OWN_POINTER: sf->remote_alloc = (PetscSFNode*)iremote; sf->remote = sf->remote_alloc; break; case PETSC_USE_POINTER: sf->remote = (PetscSFNode*)iremote; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); } ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); for (i=0; i<nleaves; i++) { /* Log 1-based rank */ ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); } ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr); ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); for (i=0; i<sf->nranks; i++) { ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); ranks[i]--; /* Convert back to 0-based */ } ierr = PetscTableDestroy(&table);CHKERRQ(ierr); ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr); sf->roffset[0] = 0; for (i=0; i<sf->nranks; i++) { ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); sf->roffset[i+1] = sf->roffset[i] + rcount[i]; rcount[i] = 0; } for (i=0; i<nleaves; i++) { PetscInt lo,hi,irank; /* Search for index of iremote[i].rank in sf->ranks */ lo = 0; hi = sf->nranks; while (hi - lo > 1) { PetscInt mid = lo + (hi - lo)/2; if (iremote[i].rank < sf->ranks[mid]) hi = mid; else lo = mid; } if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo; else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank); sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i; sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index; rcount[irank]++; } ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (nroots == PETSC_DETERMINE) { /* Jed, if you have a better way to do this, put it in */ PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0; /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */ ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr); ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank]; ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Could set nroots to this maximum */ for (i = 0; i < size; ++i) maxRoots += numRankRoots[i]; /* Gather all indices */ ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr); leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index; leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; rootOff[0] = 0; for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i]; ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Sort and reduce */ ierr = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr); ierr = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr); ierr = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr); sf->nroots = maxRoots; } #endif sf->graphset = PETSC_TRUE; ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }