PetscErrorCode SNESMonitorJacUpdateSpectrum(SNES snes,PetscInt it,PetscReal fnorm,void *ctx) { #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #elif defined(PETSC_HAVE_ESSL) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"GEEV - No support for ESSL Lapack Routines"); #else Vec X; Mat J,dJ,dJdense; PetscErrorCode ierr; PetscErrorCode (*func)(SNES,Vec,Mat*,Mat*,MatStructure*,void*); PetscInt n,i; PetscBLASInt nb,lwork; PetscReal *eigr,*eigi; MatStructure flg = DIFFERENT_NONZERO_PATTERN; PetscScalar *work; PetscScalar *a; PetscFunctionBegin; if (it == 0) PetscFunctionReturn(0); /* create the difference between the current update and the current jacobian */ ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr); ierr = SNESGetJacobian(snes,&J,NULL,&func,NULL);CHKERRQ(ierr); ierr = MatDuplicate(J,MAT_COPY_VALUES,&dJ);CHKERRQ(ierr); ierr = SNESComputeJacobian(snes,X,&dJ,&dJ,&flg);CHKERRQ(ierr); ierr = MatAXPY(dJ,-1.0,J,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* compute the spectrum directly */ ierr = MatConvert(dJ,MATSEQDENSE,MAT_INITIAL_MATRIX,&dJdense);CHKERRQ(ierr); ierr = MatGetSize(dJ,&n,NULL);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); lwork = 3*nb; ierr = PetscMalloc(n*sizeof(PetscReal),&eigr);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscReal),&eigi);CHKERRQ(ierr); ierr = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); ierr = MatDenseGetArray(dJdense,&a);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) { PetscBLASInt lierr; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgeev",LAPACKgeev_("N","N",&nb,a,&nb,eigr,eigi,NULL,&nb,NULL,&nb,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"geev() error %d",lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex"); #endif PetscPrintf(PetscObjectComm((PetscObject)snes),"Eigenvalues of J_%d - J_%d:\n",it,it-1);CHKERRQ(ierr); for (i=0;i<n;i++) { PetscPrintf(PetscObjectComm((PetscObject)snes),"%5d: %20.5g + %20.5gi\n",i,eigr[i],eigi[i]);CHKERRQ(ierr); } ierr = MatDenseRestoreArray(dJdense,&a);CHKERRQ(ierr); ierr = MatDestroy(&dJ);CHKERRQ(ierr); ierr = MatDestroy(&dJdense);CHKERRQ(ierr); ierr = PetscFree(eigr);CHKERRQ(ierr); ierr = PetscFree(eigi);CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); PetscFunctionReturn(0); #endif }
/*@ PetscDTGaussQuadrature - create Gauss quadrature Not Collective Input Arguments: + npoints - number of points . a - left end of interval (often-1) - b - right end of interval (often +1) Output Arguments: + x - quadrature points - w - quadrature weights Level: intermediate References: Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 221--230, 1969. .seealso: PetscDTLegendreEval() @*/ PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints,PetscReal a,PetscReal b,PetscReal *x,PetscReal *w) { PetscErrorCode ierr; PetscInt i; PetscReal *work; PetscScalar *Z; PetscBLASInt N,LDZ,info; PetscFunctionBegin; /* Set up the Golub-Welsch system */ for (i=0; i<npoints; i++) { x[i] = 0; /* diagonal is 0 */ if (i) w[i-1] = 0.5 / PetscSqrtReal(1 - 1./PetscSqr(2*i)); } ierr = PetscRealView(npoints-1,w,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = PetscMalloc2(npoints*npoints,PetscScalar,&Z,PetscMax(1,2*npoints-2),PetscReal,&work);CHKERRQ(ierr); ierr = PetscBLASIntCast(npoints,&N);CHKERRQ(ierr); LDZ = N; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKsteqr",LAPACKsteqr_("I",&N,x,w,Z,&LDZ,work,&info)); ierr = PetscFPTrapPop();CHKERRQ(ierr); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"xSTEQR error"); for (i=0; i<(npoints+1)/2; i++) { PetscReal y = 0.5 * (-x[i] + x[npoints-i-1]); /* enforces symmetry */ x[i] = (a+b)/2 - y*(b-a)/2; x[npoints-i-1] = (a+b)/2 + y*(b-a)/2; w[i] = w[npoints-1-i] = (b-a)*PetscSqr(0.5*PetscAbsScalar(Z[i*npoints] + Z[(npoints-i-1)*npoints])); } ierr = PetscFree2(Z,work);CHKERRQ(ierr); PetscFunctionReturn(0); }
/**************************************xyt.c***********************************/ static PetscErrorCode do_xyt_solve(xyt_ADT xyt_handle, PetscScalar *uc) { PetscInt off, len, *iptr; PetscInt level =xyt_handle->level; PetscInt n =xyt_handle->info->n; PetscInt m =xyt_handle->info->m; PetscInt *stages =xyt_handle->info->stages; PetscInt *xcol_indices=xyt_handle->info->xcol_indices; PetscInt *ycol_indices=xyt_handle->info->ycol_indices; PetscScalar *x_ptr, *y_ptr, *uu_ptr; PetscScalar *solve_uu=xyt_handle->info->solve_uu; PetscScalar *solve_w =xyt_handle->info->solve_w; PetscScalar *x =xyt_handle->info->x; PetscScalar *y =xyt_handle->info->y; PetscBLASInt i1 = 1,dlen; PetscErrorCode ierr; PetscFunctionBegin; uu_ptr=solve_uu; PCTFS_rvec_zero(uu_ptr,m); /* x = X.Y^T.b */ /* uu = Y^T.b */ for (y_ptr=y,iptr=ycol_indices; *iptr!=-1; y_ptr+=len) { off =*iptr++; len =*iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCall("BLASdot",*uu_ptr++ = BLASdot_(&dlen,uc+off,&i1,y_ptr,&i1)); } /* comunication of beta */ uu_ptr=solve_uu; if (level) PCTFS_ssgl_radd(uu_ptr, solve_w, level, stages); PCTFS_rvec_zero(uc,n); /* x = X.uu */ for (x_ptr=x,iptr=xcol_indices; *iptr!=-1; x_ptr+=len) { off =*iptr++; len =*iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,uu_ptr++,x_ptr,&i1,uc+off,&i1)); } PetscFunctionReturn(0); }
PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->spptr; PetscInt M = A->rmap->N,N=A->cmap->N; PetscFunctionBegin; /* Initialize the SuperLU process grid. */ PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); /* Initialize ScalePermstruct and LUstruct. */ PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(M, N, &lu->LUstruct)); F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; F->ops->solve = MatSolve_SuperLU_DIST; F->ops->matsolve = MatMatSolve_SuperLU_DIST; lu->CleanUpSuperLU_Dist = PETSC_TRUE; PetscFunctionReturn(0); }
PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) { Mat_MPIDense *A = (Mat_MPIDense*)inA->data; Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; PetscScalar oalpha = alpha; PetscErrorCode ierr; PetscBLASInt one = 1,nz; PetscFunctionBegin; ierr = PetscBLASIntCast(inA->rmap->n*inA->cmap->N,&nz);CHKERRQ(ierr); PetscStackCall("BLASscal",BLASscal_(&nz,&oalpha,a->v,&one)); ierr = PetscLogFlops(nz);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) { PetscErrorCode ierr; Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscBool flg; PetscFunctionBegin; if (lu && lu->CleanUpSuperLU_Dist) { /* Deallocate SuperLU_DIST storage */ if (lu->MatInputMode == GLOBAL) { PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup)); } else { PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); if (lu->options.SolveInitialized) { #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); #else PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); #endif } } PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); /* Release the SuperLU_DIST process grid. */ PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); } ierr = PetscFree(A->spptr);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);CHKERRQ(ierr); if (flg) { ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); } else { ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA) { SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; PetscInt i,j; Vec *Fdot = ngmres->Fdot; Vec *Xdot = ngmres->Xdot; PetscScalar *beta = ngmres->beta; PetscScalar *xi = ngmres->xi; PetscScalar alph_total = 0.; PetscErrorCode ierr; PetscReal nu; Vec Y = snes->work[2]; PetscBool changed_y,changed_w; PetscFunctionBegin; nu = fMnorm*fMnorm; /* construct the right hand side and xi factors */ ierr = VecMDot(FM,l,Fdot,xi);CHKERRQ(ierr); for (i = 0; i < l; i++) beta[i] = nu - xi[i]; /* construct h */ for (j = 0; j < l; j++) { for (i = 0; i < l; i++) { H(i,j) = Q(i,j)-xi[i]-xi[j]+nu; } } if (l == 1) { /* simply set alpha[0] = beta[0] / H[0, 0] */ if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0); else beta[0] = 0.; } else { #if defined(PETSC_MISSING_LAPACK_GELSS) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine."); #else ierr = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr); ierr = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr); ngmres->info = 0; ngmres->rcond = -1.; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info)); #else PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info)); #endif ierr = PetscFPTrapPop();CHKERRQ(ierr); if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS"); if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge"); #endif } for (i=0; i<l; i++) { if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output"); } alph_total = 0.; for (i = 0; i < l; i++) alph_total += beta[i]; ierr = VecCopy(XM,XA);CHKERRQ(ierr); ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr); /* check the validity of the step */ ierr = VecCopy(XA,Y);CHKERRQ(ierr); ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr); ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr); if (!ngmres->approxfunc) {ierr = SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr);} else { ierr = VecCopy(FM,FA);CHKERRQ(ierr); ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* approximately solve the overdetermined system: 2*F(x_i)\cdot F(\x_j)\alpha_i = 0 \alpha_i = 1 Which minimizes the L2 norm of the linearization of: ||F(\sum_i \alpha_i*x_i)||^2 With the constraint that \sum_i\alpha_i = 1 Where x_i is the solution from the ith subsolver. */ static PetscErrorCode SNESCompositeApply_AdditiveOptimal(SNES snes,Vec X,Vec B,Vec F,PetscReal *fnorm) { PetscErrorCode ierr; SNES_Composite *jac = (SNES_Composite*)snes->data; SNES_CompositeLink next = jac->head; Vec *Xes = jac->Xes,*Fes = jac->Fes; PetscInt i,j; PetscScalar tot,total,ftf; PetscReal min_fnorm; PetscInt min_i; SNESConvergedReason reason; PetscFunctionBegin; if (!next) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE,"No composite SNESes supplied via SNESCompositeAddSNES() or -snes_composite_sneses"); if (snes->normschedule == SNES_NORM_ALWAYS) { next = jac->head; ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr); while (next->next) { next = next->next; ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr); } } next = jac->head; i = 0; ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr); ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr); ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { jac->innerFailures++; if (jac->innerFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } while (next->next) { i++; next = next->next; ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr); ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr); ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { jac->innerFailures++; if (jac->innerFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } } /* all the solutions are collected; combine optimally */ for (i=0;i<jac->n;i++) { for (j=0;j<i+1;j++) { ierr = VecDotBegin(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr); } ierr = VecDotBegin(Fes[i],F,&jac->g[i]);CHKERRQ(ierr); } for (i=0;i<jac->n;i++) { for (j=0;j<i+1;j++) { ierr = VecDotEnd(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr); if (i == j) jac->fnorms[i] = PetscSqrtReal(PetscRealPart(jac->h[i + j*jac->n])); } ierr = VecDotEnd(Fes[i],F,&jac->g[i]);CHKERRQ(ierr); } ftf = (*fnorm)*(*fnorm); for (i=0; i<jac->n; i++) { for (j=i+1;j<jac->n;j++) { jac->h[i + j*jac->n] = jac->h[j + i*jac->n]; } } for (i=0; i<jac->n; i++) { for (j=0; j<jac->n; j++) { jac->h[i + j*jac->n] = jac->h[i + j*jac->n] - jac->g[j] - jac->g[i] + ftf; } jac->beta[i] = ftf - jac->g[i]; } #if defined(PETSC_MISSING_LAPACK_GELSS) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"SNESCOMPOSITE with ADDITIVEOPTIMAL requires the LAPACK GELSS routine."); #else jac->info = 0; jac->rcond = -1.; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,jac->rwork,&jac->info)); #else PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,&jac->info)); #endif ierr = PetscFPTrapPop();CHKERRQ(ierr); if (jac->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS"); if (jac->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge"); #endif tot = 0.; total = 0.; for (i=0; i<jac->n; i++) { if (snes->errorifnotconverged && PetscIsInfOrNanScalar(jac->beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output"); ierr = PetscInfo2(snes,"%D: %g\n",i,(double)PetscRealPart(jac->beta[i]));CHKERRQ(ierr); tot += jac->beta[i]; total += PetscAbsScalar(jac->beta[i]); } ierr = VecScale(X,(1. - tot));CHKERRQ(ierr); ierr = VecMAXPY(X,jac->n,jac->beta,Xes);CHKERRQ(ierr); ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); if (snes->xl && snes->xu) { ierr = SNESVIComputeInactiveSetFnorm(snes, F, X, fnorm);CHKERRQ(ierr); } else { ierr = VecNorm(F, NORM_2, fnorm);CHKERRQ(ierr); } /* take the minimum-normed candidate if it beats the combination by a factor of rtol or the combination has stagnated */ min_fnorm = jac->fnorms[0]; min_i = 0; for (i=0; i<jac->n; i++) { if (jac->fnorms[i] < min_fnorm) { min_fnorm = jac->fnorms[i]; min_i = i; } } /* stagnation or divergence restart to the solution of the solver that failed the least */ if (PetscRealPart(total) < jac->stol || min_fnorm*jac->rtol < *fnorm) { ierr = VecCopy(jac->Xes[min_i],X);CHKERRQ(ierr); ierr = VecCopy(jac->Fes[min_i],F);CHKERRQ(ierr); *fnorm = min_fnorm; } PetscFunctionReturn(0); }
/**************************************xyt.c***********************************/ static PetscInt xyt_generate(xyt_ADT xyt_handle) { PetscInt i,j,k,idx; PetscInt dim, col; PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w; PetscInt *segs; PetscInt op[] = {GL_ADD,0}; PetscInt off, len; PetscScalar *x_ptr, *y_ptr; PetscInt *iptr, flag; PetscInt start =0, end, work; PetscInt op2[] = {GL_MIN,0}; PCTFS_gs_ADT PCTFS_gs_handle; PetscInt *nsep, *lnsep, *fo; PetscInt a_n =xyt_handle->mvi->n; PetscInt a_m =xyt_handle->mvi->m; PetscInt *a_local2global=xyt_handle->mvi->local2global; PetscInt level; PetscInt n, m; PetscInt *xcol_sz, *xcol_indices, *stages; PetscScalar **xcol_vals, *x; PetscInt *ycol_sz, *ycol_indices; PetscScalar **ycol_vals, *y; PetscInt n_global; PetscInt xt_nnz =0, xt_max_nnz=0; PetscInt yt_nnz =0, yt_max_nnz=0; PetscInt xt_zero_nnz =0; PetscInt xt_zero_nnz_0=0; PetscInt yt_zero_nnz =0; PetscInt yt_zero_nnz_0=0; PetscBLASInt i1 = 1,dlen; PetscScalar dm1 = -1.0; PetscErrorCode ierr; n =xyt_handle->mvi->n; nsep =xyt_handle->info->nsep; lnsep =xyt_handle->info->lnsep; fo =xyt_handle->info->fo; end =lnsep[0]; level =xyt_handle->level; PCTFS_gs_handle=xyt_handle->mvi->PCTFS_gs_handle; /* is there a null space? */ /* LATER add in ability to detect null space by checking alpha */ for (i=0, j=0; i<=level; i++) j+=nsep[i]; m = j-xyt_handle->ns; if (m!=j) { ierr = PetscPrintf(PETSC_COMM_WORLD,"xyt_generate() :: null space exists %D %D %D\n",m,j,xyt_handle->ns);CHKERRQ(ierr); } ierr = PetscInfo2(0,"xyt_generate() :: X(%D,%D)\n",n,m);CHKERRQ(ierr); /* get and initialize storage for x local */ /* note that x local is nxm and stored by columns */ xcol_sz = (PetscInt*) malloc(m*sizeof(PetscInt)); xcol_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt)); xcol_vals = (PetscScalar**) malloc(m*sizeof(PetscScalar*)); for (i=j=0; i<m; i++, j+=2) { xcol_indices[j]=xcol_indices[j+1]=xcol_sz[i]=-1; xcol_vals[i] = NULL; } xcol_indices[j]=-1; /* get and initialize storage for y local */ /* note that y local is nxm and stored by columns */ ycol_sz = (PetscInt*) malloc(m*sizeof(PetscInt)); ycol_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt)); ycol_vals = (PetscScalar**) malloc(m*sizeof(PetscScalar*)); for (i=j=0; i<m; i++, j+=2) { ycol_indices[j]=ycol_indices[j+1]=ycol_sz[i]=-1; ycol_vals[i] = NULL; } ycol_indices[j]=-1; /* size of separators for each sub-hc working from bottom of tree to top */ /* this looks like nsep[]=segments */ stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt)); segs = (PetscInt*) malloc((level+1)*sizeof(PetscInt)); PCTFS_ivec_zero(stages,level+1); PCTFS_ivec_copy(segs,nsep,level+1); for (i=0; i<level; i++) segs[i+1] += segs[i]; stages[0] = segs[0]; /* temporary vectors */ u = (PetscScalar*) malloc(n*sizeof(PetscScalar)); z = (PetscScalar*) malloc(n*sizeof(PetscScalar)); v = (PetscScalar*) malloc(a_m*sizeof(PetscScalar)); uu = (PetscScalar*) malloc(m*sizeof(PetscScalar)); w = (PetscScalar*) malloc(m*sizeof(PetscScalar)); /* extra nnz due to replication of vertices across separators */ for (i=1, j=0; i<=level; i++) j+=nsep[i]; /* storage for sparse x values */ n_global = xyt_handle->info->n_global; xt_max_nnz = yt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes; x = (PetscScalar*) malloc(xt_max_nnz*sizeof(PetscScalar)); y = (PetscScalar*) malloc(yt_max_nnz*sizeof(PetscScalar)); /* LATER - can embed next sep to fire in gs */ /* time to make the donuts - generate X factor */ for (dim=i=j=0; i<m; i++) { /* time to move to the next level? */ while (i==segs[dim]) { if (dim==level) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim about to exceed level\n"); stages[dim++]=i; end +=lnsep[dim]; } stages[dim]=i; /* which column are we firing? */ /* i.e. set v_l */ /* use new seps and do global min across hc to determine which one to fire */ (start<end) ? (col=fo[start]) : (col=INT_MAX); PCTFS_giop_hc(&col,&work,1,op2,dim); /* shouldn't need this */ if (col==INT_MAX) { ierr = PetscInfo(0,"hey ... col==INT_MAX??\n");CHKERRQ(ierr); continue; } /* do I own it? I should */ PCTFS_rvec_zero(v,a_m); if (col==fo[start]) { start++; idx=PCTFS_ivec_linear_search(col, a_local2global, a_n); if (idx!=-1) { v[idx] = 1.0; j++; } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!\n"); } else { idx=PCTFS_ivec_linear_search(col, a_local2global, a_m); if (idx!=-1) v[idx] = 1.0; } /* perform u = A.v_l */ PCTFS_rvec_zero(u,n); do_matvec(xyt_handle->mvi,v,u); /* uu = X^T.u_l (local portion) */ /* technically only need to zero out first i entries */ /* later turn this into an XYT_solve call ? */ PCTFS_rvec_zero(uu,m); y_ptr=y; iptr = ycol_indices; for (k=0; k<i; k++) { off = *iptr++; len = *iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCall("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,y_ptr,&i1)); y_ptr+=len; } /* uu = X^T.u_l (comm portion) */ PCTFS_ssgl_radd (uu, w, dim, stages); /* z = X.uu */ PCTFS_rvec_zero(z,n); x_ptr=x; iptr = xcol_indices; for (k=0; k<i; k++) { off = *iptr++; len = *iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1)); x_ptr+=len; } /* compute v_l = v_l - z */ PCTFS_rvec_zero(v+a_n,a_m-a_n); ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr); PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1)); /* compute u_l = A.v_l */ if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim); PCTFS_rvec_zero(u,n); do_matvec(xyt_handle->mvi,v,u); /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - local portion */ ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr); PetscStackCall("BLASdot",alpha = BLASdot_(&dlen,u,&i1,u,&i1)); /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - comm portion */ PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim); alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha); /* check for small alpha */ /* LATER use this to detect and determine null space */ if (fabs(alpha)<1.0e-14) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"bad alpha! %g\n",alpha); /* compute v_l = v_l/sqrt(alpha) */ PCTFS_rvec_scale(v,1.0/alpha,n); PCTFS_rvec_scale(u,1.0/alpha,n); /* add newly generated column, v_l, to X */ flag = 1; off =len=0; for (k=0; k<n; k++) { if (v[k]!=0.0) { len=k; if (flag) {off=k; flag=0;} } } len -= (off-1); if (len>0) { if ((xt_nnz+len)>xt_max_nnz) { ierr = PetscInfo(0,"increasing space for X by 2x!\n");CHKERRQ(ierr); xt_max_nnz *= 2; x_ptr = (PetscScalar*) malloc(xt_max_nnz*sizeof(PetscScalar)); PCTFS_rvec_copy(x_ptr,x,xt_nnz); free(x); x = x_ptr; x_ptr+=xt_nnz; } xt_nnz += len; PCTFS_rvec_copy(x_ptr,v+off,len); /* keep track of number of zeros */ if (dim) { for (k=0; k<len; k++) { if (x_ptr[k]==0.0) xt_zero_nnz++; } } else { for (k=0; k<len; k++) { if (x_ptr[k]==0.0) xt_zero_nnz_0++; } } xcol_indices[2*i] = off; xcol_sz[i] = xcol_indices[2*i+1] = len; xcol_vals[i] = x_ptr; } else { xcol_indices[2*i] = 0; xcol_sz[i] = xcol_indices[2*i+1] = 0; xcol_vals[i] = x_ptr; } /* add newly generated column, u_l, to Y */ flag = 1; off =len=0; for (k=0; k<n; k++) { if (u[k]!=0.0) { len=k; if (flag) { off=k; flag=0; } } } len -= (off-1); if (len>0) { if ((yt_nnz+len)>yt_max_nnz) { ierr = PetscInfo(0,"increasing space for Y by 2x!\n");CHKERRQ(ierr); yt_max_nnz *= 2; y_ptr = (PetscScalar*) malloc(yt_max_nnz*sizeof(PetscScalar)); PCTFS_rvec_copy(y_ptr,y,yt_nnz); free(y); y = y_ptr; y_ptr+=yt_nnz; } yt_nnz += len; PCTFS_rvec_copy(y_ptr,u+off,len); /* keep track of number of zeros */ if (dim) { for (k=0; k<len; k++) { if (y_ptr[k]==0.0) yt_zero_nnz++; } } else { for (k=0; k<len; k++) { if (y_ptr[k]==0.0) yt_zero_nnz_0++; } } ycol_indices[2*i] = off; ycol_sz[i] = ycol_indices[2*i+1] = len; ycol_vals[i] = y_ptr; } else { ycol_indices[2*i] = 0; ycol_sz[i] = ycol_indices[2*i+1] = 0; ycol_vals[i] = y_ptr; } } /* close off stages for execution phase */ while (dim!=level) { stages[dim++]=i; ierr = PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);CHKERRQ(ierr); } stages[dim]=i; xyt_handle->info->n =xyt_handle->mvi->n; xyt_handle->info->m =m; xyt_handle->info->nnz =xt_nnz + yt_nnz; xyt_handle->info->max_nnz =xt_max_nnz + yt_max_nnz; xyt_handle->info->msg_buf_sz =stages[level]-stages[0]; xyt_handle->info->solve_uu = (PetscScalar*) malloc(m*sizeof(PetscScalar)); xyt_handle->info->solve_w = (PetscScalar*) malloc(m*sizeof(PetscScalar)); xyt_handle->info->x =x; xyt_handle->info->xcol_vals =xcol_vals; xyt_handle->info->xcol_sz =xcol_sz; xyt_handle->info->xcol_indices=xcol_indices; xyt_handle->info->stages =stages; xyt_handle->info->y =y; xyt_handle->info->ycol_vals =ycol_vals; xyt_handle->info->ycol_sz =ycol_sz; xyt_handle->info->ycol_indices=ycol_indices; free(segs); free(u); free(v); free(uu); free(z); free(w); return(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
static PetscErrorCode PCSetUp_SVD(PC pc) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates."); #else PC_SVD *jac = (PC_SVD*)pc->data; PetscErrorCode ierr; PetscScalar *a,*u,*v,*d,*work; PetscBLASInt nb,lwork; PetscInt i,n; PetscMPIInt size; PetscFunctionBegin; ierr = MatDestroy(&jac->A);CHKERRQ(ierr); ierr = MPI_Comm_size(((PetscObject)pc->pmat)->comm,&size);CHKERRQ(ierr); if (size > 1) { Mat redmat; PetscInt M; ierr = MatGetSize(pc->pmat,&M,NULL);CHKERRQ(ierr); ierr = MatGetRedundantMatrix(pc->pmat,size,PETSC_COMM_SELF,M,MAT_INITIAL_MATRIX,&redmat);CHKERRQ(ierr); ierr = MatConvert(redmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr); ierr = MatDestroy(&redmat);CHKERRQ(ierr); } else { ierr = MatConvert(pc->pmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr); } if (!jac->diag) { /* assume square matrices */ ierr = MatGetVecs(jac->A,&jac->diag,&jac->work);CHKERRQ(ierr); } if (!jac->U) { ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->U);CHKERRQ(ierr); ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->Vt);CHKERRQ(ierr); } ierr = MatGetSize(pc->pmat,&n,NULL);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); lwork = 5*nb; ierr = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->A,&a);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->U,&u);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->Vt,&v);CHKERRQ(ierr); ierr = VecGetArray(jac->diag,&d);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) { PetscBLASInt lierr; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&nb,&nb,a,&nb,d,u,&nb,v,&nb,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"gesv() error %d",lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex"); #endif ierr = MatDenseRestoreArray(jac->A,&a);CHKERRQ(ierr); ierr = MatDenseRestoreArray(jac->U,&u);CHKERRQ(ierr); ierr = MatDenseRestoreArray(jac->Vt,&v);CHKERRQ(ierr); for (i=n-1; i>=0; i--) if (PetscRealPart(d[i]) > jac->zerosing) break; jac->nzero = n-1-i; if (jac->monitor) { ierr = PetscViewerASCIIAddTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: condition number %14.12e, %D of %D singular values are (nearly) zero\n",(double)PetscRealPart(d[0]/d[n-1]),jac->nzero,n);CHKERRQ(ierr); if (n >= 10) { /* print 5 smallest and 5 largest */ ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: smallest singular values: %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[n-1]),(double)PetscRealPart(d[n-2]),(double)PetscRealPart(d[n-3]),(double)PetscRealPart(d[n-4]),(double)PetscRealPart(d[n-5]));CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: largest singular values : %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[4]),(double)PetscRealPart(d[3]),(double)PetscRealPart(d[2]),(double)PetscRealPart(d[1]),(double)PetscRealPart(d[0]));CHKERRQ(ierr); } else { /* print all singular values */ char buf[256],*p; size_t left = sizeof(buf),used; PetscInt thisline; for (p=buf,i=n-1,thisline=1; i>=0; i--,thisline++) { ierr = PetscSNPrintfCount(p,left," %14.12e",&used,(double)PetscRealPart(d[i]));CHKERRQ(ierr); left -= used; p += used; if (thisline > 4 || i==0) { ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: singular values:%s\n",buf);CHKERRQ(ierr); p = buf; thisline = 0; } } } ierr = PetscViewerASCIISubtractTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr); } ierr = PetscInfo2(pc,"Largest and smallest singular values %14.12e %14.12e\n",(double)PetscRealPart(d[0]),(double)PetscRealPart(d[n-1]));CHKERRQ(ierr); for (i=0; i<n-jac->nzero; i++) d[i] = 1.0/d[i]; for (; i<n; i++) d[i] = 0.0; if (jac->essrank > 0) for (i=0; i<n-jac->nzero-jac->essrank; i++) d[i] = 0.0; /* Skip all but essrank eigenvalues */ ierr = PetscInfo1(pc,"Number of zero or nearly singular values %D\n",jac->nzero);CHKERRQ(ierr); ierr = VecRestoreArray(jac->diag,&d);CHKERRQ(ierr); #if defined(foo) { PetscViewer viewer; ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"joe",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); ierr = MatView(jac->A,viewer);CHKERRQ(ierr); ierr = MatView(jac->U,viewer);CHKERRQ(ierr); ierr = MatView(jac->Vt,viewer);CHKERRQ(ierr); ierr = VecView(jac->diag,viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); PetscFunctionReturn(0); #endif }
PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) { Mat *tseq,A_seq = NULL; Mat_SeqAIJ *aa,*bb; Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; PetscErrorCode ierr; PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ PetscMPIInt size; SuperLUStat_t stat; double *berr=0; IS isrow; Mat F_diag=NULL; #if defined(PETSC_USE_COMPLEX) doublecomplex *av, *bv; #else double *av, *bv; #endif PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); if (lu->MatInputMode == GLOBAL) { /* global mat input */ if (size > 1) { /* convert mpi A to seq mat A */ ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); ierr = ISDestroy(&isrow);CHKERRQ(ierr); A_seq = *tseq; ierr = PetscFree(tseq);CHKERRQ(ierr); aa = (Mat_SeqAIJ*)A_seq->data; } else { PetscBool flg; ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); if (flg) { Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data; A = At->A; } aa = (Mat_SeqAIJ*)A->data; } /* Convert Petsc NR matrix to SuperLU_DIST NC. Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup)); if (lu->FactPattern == SamePattern_SameRowPerm) { lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { /* lu->FactPattern == SamePattern */ PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); lu->options.Fact = SamePattern; } } #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zCompRow_to_CompCol_dist",zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,(int_t*)aa->j,(int_t*)aa->i,&lu->val,&lu->col, &lu->row)); #else PetscStackCall("SuperLU_DIST:dCompRow_to_CompCol_dist",dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,(int_t*)aa->j,(int_t*)aa->i,&lu->val, &lu->col, &lu->row)); #endif /* Create compressed column matrix A_sup. */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zCreate_CompCol_Matrix_dist",zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE)); #else PetscStackCall("SuperLU_DIST:dCreate_CompCol_Matrix_dist",dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE)); #endif } else { /* distributed mat input */ Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; aa=(Mat_SeqAIJ*)(mat->A)->data; bb=(Mat_SeqAIJ*)(mat->B)->data; ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; #if defined(PETSC_USE_COMPLEX) av=(doublecomplex*)aa->a; bv=(doublecomplex*)bb->a; #else av=aa->a; bv=bb->a; #endif rstart = A->rmap->rstart; nz = aa->nz + bb->nz; garray = mat->garray; if (lu->options.Fact == DOFACT) { /* first numeric factorization */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); #else PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); #endif } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */ if (lu->FactPattern == SamePattern_SameRowPerm) { lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); /* Deallocate storage associated with the L and U matrices. */ lu->options.Fact = SamePattern; } } nz = 0; for (i=0; i<m; i++) { lu->row[i] = nz; countA = ai[i+1] - ai[i]; countB = bi[i+1] - bi[i]; ajj = aj + ai[i]; /* ptr to the beginning of this row */ bjj = bj + bi[i]; /* B part, smaller col index */ colA_start = rstart + ajj[0]; /* the smallest global col index of A */ jB = 0; for (j=0; j<countB; j++) { jcol = garray[bjj[j]]; if (jcol > colA_start) { jB = j; break; } lu->col[nz] = jcol; lu->val[nz++] = *bv++; if (j==countB-1) jB = countB; } /* A part */ for (j=0; j<countA; j++) { lu->col[nz] = rstart + ajj[j]; lu->val[nz++] = *av++; } /* B part, larger col index */ for (j=jB; j<countB; j++) { lu->col[nz] = garray[bjj[j]]; lu->val[nz++] = *bv++; } } lu->row[m] = nz; #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); #else PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); #endif } /* Factor the matrix. */ PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { /* global mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo)); #else PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo)); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); #else PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); #endif } if (lu->MatInputMode == GLOBAL && size > 1) { ierr = MatDestroy(&A_seq);CHKERRQ(ierr); } if (lu->options.PrintStat) { PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); if (size > 1) { F_diag = ((Mat_MPIAIJ*)(F)->data)->A; F_diag->assembled = PETSC_TRUE; } (F)->assembled = PETSC_TRUE; (F)->preallocated = PETSC_TRUE; lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ PetscFunctionReturn(0); }
PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt M=A->rmap->N,m=A->rmap->n,nrhs; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ PetscBool flg; PetscFunctionBegin; if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size); /* size==1 or distributed mat input */ if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, thus destroy it and create a new SOLVEstruct. Otherwise it may result in memory corruption or incorrect solution See src/mat/examples/tests/ex125.c */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); #else PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); #endif lu->options.SolveInitialized = NO; } ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); if (lu->MatInputMode == GLOBAL) { /* size == 1 */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,&lu->grid, &lu->LUstruct, berr, &stat, &info)); #else PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs, &lu->grid, &lu->LUstruct, berr, &stat, &info)); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #else PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); lu->matsolve_iscalled = PETSC_FALSE; lu->matmatsolve_iscalled = PETSC_TRUE; PetscFunctionReturn(0); }
PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; PetscInt nrhs=1; Vec x_seq; IS iden; VecScatter scat; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ static PetscBool cite = PETSC_FALSE; PetscFunctionBegin; ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); ierr = ISDestroy(&iden);CHKERRQ(ierr); ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); } else { /* size==1 || distributed mat input */ if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { /* see comments in MatMatSolve() */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); #else PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); #endif lu->options.SolveInitialized = NO; } ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); } if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info)); #else PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info)); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #else PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); if (size > 1 && lu->MatInputMode == GLOBAL) { /* convert seq x to mpi x */ ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterDestroy(&scat);CHKERRQ(ierr); ierr = VecDestroy(&x_seq);CHKERRQ(ierr); } else { ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); lu->matsolve_iscalled = PETSC_TRUE; lu->matmatsolve_iscalled = PETSC_FALSE; } PetscFunctionReturn(0); }
int main(int argc, char **args) { Mat A, L; AppCtx ctx; PetscViewer viewer; PetscErrorCode ierr; ierr = PetscInitialize(&argc, &args, (char *) 0, help);CHKERRQ(ierr); ierr = ProcessOptions(&ctx);CHKERRQ(ierr); /* Load matrix */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, ctx.matFilename, FILE_MODE_READ, &viewer);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); ierr = MatLoad(A, viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); /* Make graph Laplacian from matrix */ ierr = MatLaplacian(A, 1.0e-12, &L);CHKERRQ(ierr); /* Check Laplacian */ PetscReal norm; Vec x, y; ierr = MatGetVecs(L, &x, NULL);CHKERRQ(ierr); ierr = VecDuplicate(x, &y);CHKERRQ(ierr); ierr = VecSet(x, 1.0);CHKERRQ(ierr); ierr = MatMult(L, x, y);CHKERRQ(ierr); ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr); if (norm > 1.0e-10) SETERRQ(PetscObjectComm((PetscObject) y), PETSC_ERR_PLIB, "Invalid graph Laplacian"); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); /* Compute Fiedler vector, and perhaps more vectors */ Mat LD; PetscScalar *a, *realpart, *imagpart, *eigvec, *work, sdummy; PetscBLASInt bn, bN, lwork, lierr, idummy; PetscInt n, i; ierr = MatConvert(L, MATDENSE, MAT_INITIAL_MATRIX, &LD);CHKERRQ(ierr); ierr = MatGetLocalSize(LD, &n, NULL);CHKERRQ(ierr); ierr = MatDenseGetArray(LD, &a);CHKERRQ(ierr); ierr = PetscBLASIntCast(n, &bn);CHKERRQ(ierr); ierr = PetscBLASIntCast(n, &bN);CHKERRQ(ierr); ierr = PetscBLASIntCast(5*n,&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(1,&idummy);CHKERRQ(ierr); ierr = PetscMalloc4(n,PetscScalar,&realpart,n,PetscScalar,&imagpart,n*n,PetscScalar,&eigvec,lwork,PetscScalar,&work);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgeev", LAPACKgeev_("N","V",&bn,a,&bN,realpart,imagpart,&sdummy,&idummy,eigvec,&bN,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int) lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); PetscReal *r, *c; PetscInt *perm; ierr = PetscMalloc3(n,PetscInt,&perm,n,PetscReal,&r,n,PetscReal,&c);CHKERRQ(ierr); for (i = 0; i < n; ++i) perm[i] = i; ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i = 0; i < n; ++i) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } for (i = 0; i < n; ++i) { realpart[i] = r[i]; imagpart[i] = c[i]; } /* Output spectrum */ if (ctx.showSpectrum) { ierr = PetscPrintf(PETSC_COMM_SELF, "Spectrum\n");CHKERRQ(ierr); for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: Real %g Imag %g\n", i, realpart[i], imagpart[i]);CHKERRQ(ierr);} } /* Check lowest eigenvalue and eigenvector */ PetscInt evInd = perm[0]; if ((realpart[0] > 1.0e-12) || (imagpart[0] > 1.0e-12)) SETERRQ(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have lowest eigenvalue 0"); for (i = 0; i < n; ++i) { if (fabs(eigvec[evInd*n+i] - eigvec[evInd*n+0]) > 1.0e-10) SETERRQ3(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have constant lowest eigenvector ev_%d %g != ev_0 %g", i, eigvec[evInd*n+i], eigvec[evInd*n+0]); } /* Output Fiedler vector */ evInd = perm[1]; if (ctx.showFiedler) { ierr = PetscPrintf(PETSC_COMM_SELF, "Fiedler vector, Re{ev} %g\n", realpart[1]);CHKERRQ(ierr); for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: %g\n", i, eigvec[evInd*n+i]);CHKERRQ(ierr);} } /* Construct Fiedler partition */ IS fIS, fIS2; PetscInt *fperm, *fperm2, pos, neg, posSize = 0; ierr = PetscMalloc(n * sizeof(PetscInt), &fperm);CHKERRQ(ierr); for (i = 0; i < n; ++i) { if (eigvec[evInd*n+i] > 0.0) ++posSize; } ierr = PetscMalloc(n * sizeof(PetscInt), &fperm2);CHKERRQ(ierr); for (i = 0; i < n; ++i) fperm[i] = i; ierr = PetscSortRealWithPermutation(n, &eigvec[evInd*n], fperm);CHKERRQ(ierr); for (i = 0; i < n; ++i) fperm2[n-1-i] = fperm[i]; for (i = 0, pos = 0, neg = posSize; i < n; ++i) { if (eigvec[evInd*n+i] > 0.0) fperm[pos++] = i; else fperm[neg++] = i; } ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm, PETSC_OWN_POINTER, &fIS);CHKERRQ(ierr); ierr = ISSetPermutation(fIS);CHKERRQ(ierr); ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm2, PETSC_OWN_POINTER, &fIS2);CHKERRQ(ierr); ierr = ISSetPermutation(fIS2);CHKERRQ(ierr); ierr = PetscFree3(perm,r,c);CHKERRQ(ierr); ierr = PetscFree4(realpart,imagpart,eigvec,work);CHKERRQ(ierr); ierr = MatDenseRestoreArray(LD, &a);CHKERRQ(ierr); ierr = MatDestroy(&LD);CHKERRQ(ierr); ierr = MatDestroy(&L);CHKERRQ(ierr); /* Permute matrix */ Mat AR, AR2; ierr = MatPermute(A, fIS, fIS, &AR);CHKERRQ(ierr); ierr = MatView(A, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(AR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&fIS);CHKERRQ(ierr); ierr = MatPermute(A, fIS2, fIS2, &AR2);CHKERRQ(ierr); ierr = MatView(AR2, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&fIS2);CHKERRQ(ierr); ierr = MatDestroy(&AR);CHKERRQ(ierr); AR = AR2; /* Extract blocks and reorder */ Mat AP, AN, APR, ANR; IS ispos, isneg, rpermpos, cpermpos, rpermneg, cpermneg; PetscInt bw, bwr; ierr = ISCreateStride(PETSC_COMM_SELF, posSize, 0, 1, &ispos);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF, n - posSize, posSize, 1, &isneg);CHKERRQ(ierr); ierr = MatGetSubMatrix(AR, ispos, ispos, MAT_INITIAL_MATRIX, &AP);CHKERRQ(ierr); ierr = MatGetSubMatrix(AR, isneg, isneg, MAT_INITIAL_MATRIX, &AN);CHKERRQ(ierr); ierr = ISDestroy(&ispos);CHKERRQ(ierr); ierr = ISDestroy(&isneg);CHKERRQ(ierr); ierr = MatGetOrdering(AP, ctx.matOrdtype, &rpermpos, &cpermpos);CHKERRQ(ierr); ierr = MatGetOrdering(AN, ctx.matOrdtype, &rpermneg, &cpermneg);CHKERRQ(ierr); ierr = MatPermute(AP, rpermpos, cpermpos, &APR);CHKERRQ(ierr); ierr = MatComputeBandwidth(AP, 0.0, &bw);CHKERRQ(ierr); ierr = MatComputeBandwidth(APR, 0.0, &bwr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced positive bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr); ierr = MatPermute(AN, rpermneg, cpermneg, &ANR);CHKERRQ(ierr); ierr = MatComputeBandwidth(AN, 0.0, &bw);CHKERRQ(ierr); ierr = MatComputeBandwidth(ANR, 0.0, &bwr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced negative bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr); ierr = MatView(AP, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(APR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(AN, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(ANR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* Reorder original matrix */ Mat ARR; IS rperm, cperm; PetscInt *idx; const PetscInt *cidx; ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr); ierr = ISGetIndices(rpermpos, &cidx);CHKERRQ(ierr); for (i = 0; i < posSize; ++i) idx[i] = cidx[i]; ierr = ISRestoreIndices(rpermpos, &cidx);CHKERRQ(ierr); ierr = ISGetIndices(rpermneg, &cidx);CHKERRQ(ierr); for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize; ierr = ISRestoreIndices(rpermneg, &cidx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &rperm);CHKERRQ(ierr); ierr = ISSetPermutation(rperm);CHKERRQ(ierr); ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr); ierr = ISGetIndices(cpermpos, &cidx);CHKERRQ(ierr); for (i = 0; i < posSize; ++i) idx[i] = cidx[i]; ierr = ISRestoreIndices(cpermpos, &cidx);CHKERRQ(ierr); ierr = ISGetIndices(cpermneg, &cidx);CHKERRQ(ierr); for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize; ierr = ISRestoreIndices(cpermneg, &cidx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &cperm);CHKERRQ(ierr); ierr = ISSetPermutation(cperm);CHKERRQ(ierr); ierr = MatPermute(AR, rperm, cperm, &ARR);CHKERRQ(ierr); ierr = MatView(ARR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&rperm);CHKERRQ(ierr); ierr = ISDestroy(&cperm);CHKERRQ(ierr); ierr = ISDestroy(&rpermpos);CHKERRQ(ierr); ierr = ISDestroy(&cpermpos);CHKERRQ(ierr); ierr = ISDestroy(&rpermneg);CHKERRQ(ierr); ierr = ISDestroy(&cpermneg);CHKERRQ(ierr); ierr = MatDestroy(&AP);CHKERRQ(ierr); ierr = MatDestroy(&AN);CHKERRQ(ierr); ierr = MatDestroy(&APR);CHKERRQ(ierr); ierr = MatDestroy(&ANR);CHKERRQ(ierr); /* Compare bands */ Mat B, BR; ierr = MatCreateSubMatrixBanded(A, 50, 0.95, &B);CHKERRQ(ierr); ierr = MatCreateSubMatrixBanded(ARR, 50, 0.95, &BR);CHKERRQ(ierr); ierr = MatView(B, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(BR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&BR);CHKERRQ(ierr); /* Cleanup */ ierr = MatDestroy(&ARR);CHKERRQ(ierr); ierr = MatDestroy(&AR);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }