int SparseGp_logLikeGrad (SparseGp *gp, HyperParam hp, int lengthInd, double *logLikeGrad) { PetscErrorCode ierr; (void) ierr; /* compute t' inv(K) (dKdt inv(K) t) */ /* 1. solve inv(K) t */ PetscInt N = gp->trainLabels->size; Vec invKt; ierr = petsc_util_createVec (&invKt, gp->nlocal, N); SparseGp_solve (gp, gp->_trainLabels, &invKt); /* 2. multiply dKdt by invKt */ Vec dKdtInvKt; ierr = petsc_util_createVec (&dKdtInvKt, gp->nlocal, N); SparseGp_KGradient (gp, hp, lengthInd); MatMult (gp->_KGradient, invKt, dKdtInvKt); /* 3. solve invK (vector from step 2.) */ Vec invKDkdtInvKt; ierr = petsc_util_createVec (&invKDkdtInvKt, gp->nlocal, N); SparseGp_solve (gp, dKdtInvKt, &invKDkdtInvKt); /* 4. compute inner product */ double dotProd; VecDot (gp->_trainLabels, invKDkdtInvKt, &dotProd); /* compute trace (invK dKdt) */ /* 1. for each column in dKdt, solve */ double myTrace = 0; Vec col, solution; petsc_util_createVec (&col, gp->nlocal, N); petsc_util_createVec (&solution, gp->nlocal, N); for (int i=0; i<N; i++) { /* IOU_ROOT_PRINT ("%d --- %d\n", i, N); */ int row = i; ierr = MatGetColumnVector (gp->_KGradient, col, row); /* IOU_ROOT_PRINT ("solving...\n"); */ SparseGp_solve (gp, col, &solution); /* IOU_ROOT_PRINT ("solved\n"); */ if (row < gp->rend && row >= gp->rstart) { double ii; VecGetValues (solution, 1, &row, &ii); myTrace += ii; } } /* gather all trace terms */ int numProcs; MPI_Comm_size (PETSC_COMM_WORLD, &numProcs); double diag[numProcs]; int ret = MPI_Allgather (&myTrace, 1, MPI_DOUBLE, diag, 1, MPI_DOUBLE, PETSC_COMM_WORLD); (void) ret; double trace = 0; for (int i=0; i<numProcs; i++) trace += diag[i]; /* IOU_ROOT_PRINT ("Cleaning up...\n"); */ /* clean up */ VecDestroy (&invKt); VecDestroy (&dKdtInvKt); VecDestroy (&invKDkdtInvKt); VecDestroy (&col); VecDestroy (&solution); /* IOU_ROOT_PRINT ("Done cleaning up...\n"); */ *logLikeGrad = 0.5*dotProd + 0.5*trace; return EXIT_SUCCESS; }

static PetscErrorCode TaoSolve_BQPIP(Tao tao) { TAO_BQPIP *qp = (TAO_BQPIP*)tao->data; PetscErrorCode ierr; PetscInt iter=0,its; PetscReal d1,d2,ksptol,sigma; PetscReal sigmamu; PetscReal dstep,pstep,step=0; PetscReal gap[4]; TaoConvergedReason reason; PetscFunctionBegin; qp->dobj = 0.0; qp->pobj = 1.0; qp->gap = 10.0; qp->rgap = 1.0; qp->mu = 1.0; qp->sigma = 1.0; qp->dinfeas = 1.0; qp->psteplength = 0.0; qp->dsteplength = 0.0; /* Tighten infinite bounds, things break when we don't do this -- see test_bqpip.c */ ierr = VecSet(qp->XU,1.0e20);CHKERRQ(ierr); ierr = VecSet(qp->XL,-1.0e20);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->XL,qp->XL,tao->XL);CHKERRQ(ierr); ierr = VecPointwiseMin(qp->XU,qp->XU,tao->XU);CHKERRQ(ierr); ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&qp->c,qp->C0);CHKERRQ(ierr); ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); ierr = MatMult(tao->hessian, tao->solution, qp->Work);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->Work, &d1);CHKERRQ(ierr); ierr = VecAXPY(qp->C0, -1.0, qp->Work);CHKERRQ(ierr); ierr = VecDot(qp->C0, tao->solution, &d2);CHKERRQ(ierr); qp->c -= (d1/2.0+d2); ierr = MatGetDiagonal(tao->hessian, qp->HDiag);CHKERRQ(ierr); ierr = QPIPSetInitialPoint(qp,tao);CHKERRQ(ierr); ierr = QPIPComputeResidual(qp,tao);CHKERRQ(ierr); /* Enter main loop */ while (1){ /* Check Stopping Condition */ ierr = TaoMonitor(tao,iter++,qp->pobj,PetscSqrtScalar(qp->gap + qp->dinfeas), qp->pinfeas, step, &reason);CHKERRQ(ierr); if (reason != TAO_CONTINUE_ITERATING) break; /* Dual Infeasibility Direction should already be in the right hand side from computing the residuals */ ierr = QPIPComputeNormFromCentralPath(qp,&d1);CHKERRQ(ierr); if (iter > 0 && (qp->rnorm>5*qp->mu || d1*d1>qp->m*qp->mu*qp->mu) ) { sigma=1.0;sigmamu=qp->mu; sigma=0.0;sigmamu=0; } else { sigma=0.0;sigmamu=0; } ierr = VecSet(qp->DZ, sigmamu);CHKERRQ(ierr); ierr = VecSet(qp->DS, sigmamu);CHKERRQ(ierr); if (sigmamu !=0){ ierr = VecPointwiseDivide(qp->DZ, qp->DZ, qp->G);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->DS, qp->DS, qp->T);CHKERRQ(ierr); ierr = VecCopy(qp->DZ,qp->RHS2);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->DS);CHKERRQ(ierr); } else { ierr = VecZeroEntries(qp->RHS2);CHKERRQ(ierr); } /* Compute the Primal Infeasiblitiy RHS and the Diagonal Matrix to be added to H and store in Work */ ierr = VecPointwiseDivide(qp->DiagAxpy, qp->Z, qp->G);CHKERRQ(ierr); ierr = VecPointwiseMult(qp->GZwork, qp->DiagAxpy, qp->R3);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS, -1.0, qp->GZwork);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->TSwork, qp->S, qp->T);CHKERRQ(ierr); ierr = VecAXPY(qp->DiagAxpy, 1.0, qp->TSwork);CHKERRQ(ierr); ierr = VecPointwiseMult(qp->TSwork, qp->TSwork, qp->R5);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS, -1.0, qp->TSwork);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->RHS);CHKERRQ(ierr); /* Determine the solving tolerance */ ksptol = qp->mu/10.0; ksptol = PetscMin(ksptol,0.001); ierr = MatDiagonalSet(tao->hessian, qp->DiagAxpy, ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = KSPSetOperators(tao->ksp, tao->hessian, tao->hessian_pre);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, qp->RHS, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = VecScale(qp->DiagAxpy, -1.0);CHKERRQ(ierr); ierr = MatDiagonalSet(tao->hessian, qp->DiagAxpy, ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = VecScale(qp->DiagAxpy, -1.0);CHKERRQ(ierr); ierr = QPComputeStepDirection(qp,tao);CHKERRQ(ierr); ierr = QPStepLength(qp); CHKERRQ(ierr); /* Calculate New Residual R1 in Work vector */ ierr = MatMult(tao->hessian, tao->stepdirection, qp->RHS2);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->DS);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, -1.0, qp->DZ);CHKERRQ(ierr); ierr = VecAYPX(qp->RHS2, qp->dsteplength, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(qp->RHS2, NORM_2, &qp->dinfeas);CHKERRQ(ierr); ierr = VecDot(qp->DZ, qp->DG, gap);CHKERRQ(ierr); ierr = VecDot(qp->DS, qp->DT, gap+1);CHKERRQ(ierr); qp->rnorm=(qp->dinfeas+qp->psteplength*qp->pinfeas)/(qp->m+qp->n); pstep = qp->psteplength; dstep = qp->dsteplength; step = PetscMin(qp->psteplength,qp->dsteplength); sigmamu= ( pstep*pstep*(gap[0]+gap[1]) + (1 - pstep + pstep*sigma)*qp->gap )/qp->m; if (qp->predcorr && step < 0.9){ if (sigmamu < qp->mu){ sigmamu=sigmamu/qp->mu; sigmamu=sigmamu*sigmamu*sigmamu; } else {sigmamu = 1.0;} sigmamu = sigmamu*qp->mu; /* Compute Corrector Step */ ierr = VecPointwiseMult(qp->DZ, qp->DG, qp->DZ);CHKERRQ(ierr); ierr = VecScale(qp->DZ, -1.0);CHKERRQ(ierr); ierr = VecShift(qp->DZ, sigmamu);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->DZ, qp->DZ, qp->G);CHKERRQ(ierr); ierr = VecPointwiseMult(qp->DS, qp->DS, qp->DT);CHKERRQ(ierr); ierr = VecScale(qp->DS, -1.0);CHKERRQ(ierr); ierr = VecShift(qp->DS, sigmamu);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->DS, qp->DS, qp->T);CHKERRQ(ierr); ierr = VecCopy(qp->DZ, qp->RHS2);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, -1.0, qp->DS);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->RHS);CHKERRQ(ierr); /* Approximately solve the linear system */ ierr = MatDiagonalSet(tao->hessian, qp->DiagAxpy, ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, qp->RHS2, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = MatDiagonalSet(tao->hessian, qp->HDiag, INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = QPComputeStepDirection(qp,tao);CHKERRQ(ierr); ierr = QPStepLength(qp);CHKERRQ(ierr); } /* End Corrector step */ /* Take the step */ pstep = qp->psteplength; dstep = qp->dsteplength; ierr = VecAXPY(qp->Z, dstep, qp->DZ);CHKERRQ(ierr); ierr = VecAXPY(qp->S, dstep, qp->DS);CHKERRQ(ierr); ierr = VecAXPY(tao->solution, dstep, tao->stepdirection);CHKERRQ(ierr); ierr = VecAXPY(qp->G, dstep, qp->DG);CHKERRQ(ierr); ierr = VecAXPY(qp->T, dstep, qp->DT);CHKERRQ(ierr); /* Compute Residuals */ ierr = QPIPComputeResidual(qp,tao);CHKERRQ(ierr); /* Evaluate quadratic function */ ierr = MatMult(tao->hessian, tao->solution, qp->Work);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->Work, &d1);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->C0, &d2);CHKERRQ(ierr); ierr = VecDot(qp->G, qp->Z, gap);CHKERRQ(ierr); ierr = VecDot(qp->T, qp->S, gap+1);CHKERRQ(ierr); qp->pobj=d1/2.0 + d2+qp->c; /* Compute the duality gap */ qp->gap = (gap[0]+gap[1]); qp->dobj = qp->pobj - qp->gap; if (qp->m>0) qp->mu=qp->gap/(qp->m); qp->rgap=qp->gap/( PetscAbsReal(qp->dobj) + PetscAbsReal(qp->pobj) + 1.0 ); } /* END MAIN LOOP */ PetscFunctionReturn(0); }

int main(int argc,char **args) { Vec x,y,b,s1,s2; Mat A; /* linear system matrix */ Mat sA; /* symmetric part of the matrices */ PetscInt n,mbs=16,bs=1,nz=3,prob=2,i,j,col[3],row,Ii,J,n1; const PetscInt *ip_ptr; PetscScalar neg_one = -1.0,value[3],alpha=0.1; PetscMPIInt size; PetscErrorCode ierr; IS ip, isrow, iscol; PetscRandom rdm; PetscBool reorder=PETSC_FALSE; MatInfo minfo1,minfo2; PetscReal norm1,norm2,tol=1.e-10; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,1,"This is a uniprocessor example only!"); ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-mbs",&mbs,NULL);CHKERRQ(ierr); n = mbs*bs; ierr=MatCreateSeqBAIJ(PETSC_COMM_WORLD,bs,n,n,nz,NULL, &A);CHKERRQ(ierr); ierr=MatCreateSeqSBAIJ(PETSC_COMM_WORLD,bs,n,n,nz,NULL, &sA);CHKERRQ(ierr); /* Test MatGetOwnershipRange() */ ierr = MatGetOwnershipRange(A,&Ii,&J);CHKERRQ(ierr); ierr = MatGetOwnershipRange(sA,&i,&j);CHKERRQ(ierr); if (i-Ii || j-J) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetOwnershipRange() in MatSBAIJ format\n");CHKERRQ(ierr); } /* Assemble matrix */ if (bs == 1) { ierr = PetscOptionsGetInt(NULL,NULL,"-test_problem",&prob,NULL);CHKERRQ(ierr); if (prob == 1) { /* tridiagonal matrix */ value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<n-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = n - 1; col[0]=0; col[1] = n - 2; col[2] = n - 1; value[0]= 0.1; value[1]=-1; value[2]=2; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0; col[0] = 0; col[1] = 1; col[2]=n-1; value[0] = 2.0; value[1] = -1.0; value[2]=0.1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } else if (prob ==2) { /* matrix for the five point stencil */ n1 = (PetscInt) (PetscSqrtReal((PetscReal)n) + 0.001); if (n1*n1 - n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"sqrt(n) must be a positive interger!"); for (i=0; i<n1; i++) { for (j=0; j<n1; j++) { Ii = j + n1*i; if (i>0) { J = Ii - n1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } if (i<n1-1) { J = Ii + n1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } if (j>0) { J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } if (j<n1-1) { J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } /* ierr = MatSetValues(A,1,&I,1,&I,&four,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&I,1,&I,&four,INSERT_VALUES);CHKERRQ(ierr); */ } } } } else { /* bs > 1 */ #if defined(DIAGB) for (block=0; block<n/bs; block++) { /* diagonal blocks */ value[0] = -1.0; value[1] = 4.0; value[2] = -1.0; for (i=1+block*bs; i<bs-1+block*bs; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = bs - 1+block*bs; col[0] = bs - 2+block*bs; col[1] = bs - 1+block*bs; value[0]=-1.0; value[1]=4.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0+block*bs; col[0] = 0+block*bs; col[1] = 1+block*bs; value[0]=4.0; value[1] = -1.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } #endif /* off-diagonal blocks */ value[0]=-1.0; for (i=0; i<(n/bs-1)*bs; i++) { col[0]=i+bs; ierr = MatSetValues(A,1,&i,1,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,1,col,value,INSERT_VALUES);CHKERRQ(ierr); col[0]=i; row=i+bs; ierr = MatSetValues(A,1,&row,1,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&row,1,col,value,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* PetscPrintf(PETSC_COMM_SELF,"\n The Matrix: \n"); MatView(A, VIEWER_DRAW_WORLD); MatView(A, VIEWER_STDOUT_WORLD); */ ierr = MatAssemblyBegin(sA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(sA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* PetscPrintf(PETSC_COMM_SELF,"\n Symmetric Part of Matrix: \n"); MatView(sA, VIEWER_DRAW_WORLD); MatView(sA, VIEWER_STDOUT_WORLD); */ /* Test MatNorm() */ ierr = MatNorm(A,NORM_FROBENIUS,&norm1);CHKERRQ(ierr); ierr = MatNorm(sA,NORM_FROBENIUS,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm(), fnorm1-fnorm2=%16.14e\n",norm1);CHKERRQ(ierr); } ierr = MatNorm(A,NORM_INFINITY,&norm1);CHKERRQ(ierr); ierr = MatNorm(sA,NORM_INFINITY,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm(), inf_norm1-inf_norm2=%16.14e\n",norm1);CHKERRQ(ierr); } /* Test MatGetInfo(), MatGetSize(), MatGetBlockSize() */ ierr = MatGetInfo(A,MAT_LOCAL,&minfo1);CHKERRQ(ierr); ierr = MatGetInfo(sA,MAT_LOCAL,&minfo2);CHKERRQ(ierr); /* printf("matrix nonzeros (BAIJ format) = %d, allocated nonzeros= %d\n", (int)minfo1.nz_used,(int)minfo1.nz_allocated); printf("matrix nonzeros(SBAIJ format) = %d, allocated nonzeros= %d\n", (int)minfo2.nz_used,(int)minfo2.nz_allocated); */ i = (int) (minfo1.nz_used - minfo2.nz_used); j = (int) (minfo1.nz_allocated - minfo2.nz_allocated); if (i<0 || j<0) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetInfo()\n");CHKERRQ(ierr); } ierr = MatGetSize(A,&Ii,&J);CHKERRQ(ierr); ierr = MatGetSize(sA,&i,&j);CHKERRQ(ierr); if (i-Ii || j-J) { PetscPrintf(PETSC_COMM_SELF,"Error: MatGetSize()\n");CHKERRQ(ierr); } ierr = MatGetBlockSize(A, &Ii);CHKERRQ(ierr); ierr = MatGetBlockSize(sA, &i);CHKERRQ(ierr); if (i-Ii) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetBlockSize()\n");CHKERRQ(ierr); } /* Test MatDiagonalScale(), MatGetDiagonal(), MatScale() */ ierr = PetscRandomCreate(PETSC_COMM_SELF,&rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&s1);CHKERRQ(ierr); ierr = VecDuplicate(x,&s2);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); ierr = MatDiagonalScale(A,x,x);CHKERRQ(ierr); ierr = MatDiagonalScale(sA,x,x);CHKERRQ(ierr); ierr = MatGetDiagonal(A,s1);CHKERRQ(ierr); ierr = MatGetDiagonal(sA,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&norm1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatGetDiagonal() \n");CHKERRQ(ierr); } ierr = MatScale(A,alpha);CHKERRQ(ierr); ierr = MatScale(sA,alpha);CHKERRQ(ierr); /* Test MatMult(), MatMultAdd() */ for (i=0; i<40; i++) { ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); ierr = MatMult(A,x,s1);CHKERRQ(ierr); ierr = MatMult(sA,x,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&norm1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatMult(), MatDiagonalScale() or MatScale()\n");CHKERRQ(ierr); } } for (i=0; i<40; i++) { ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); ierr = VecSetRandom(y,rdm);CHKERRQ(ierr); ierr = MatMultAdd(A,x,y,s1);CHKERRQ(ierr); ierr = MatMultAdd(sA,x,y,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&norm1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatMultAdd(), MatDiagonalScale() or MatScale() \n");CHKERRQ(ierr); } } /* Test MatReordering() */ ierr = MatGetOrdering(A,MATORDERINGNATURAL,&isrow,&iscol);CHKERRQ(ierr); ip = isrow; if (reorder) { IS nip; PetscInt *nip_ptr; ierr = PetscMalloc1(mbs,&nip_ptr);CHKERRQ(ierr); ierr = ISGetIndices(ip,&ip_ptr);CHKERRQ(ierr); ierr = PetscMemcpy(nip_ptr,ip_ptr,mbs*sizeof(PetscInt));CHKERRQ(ierr); i = nip_ptr[1]; nip_ptr[1] = nip_ptr[mbs-2]; nip_ptr[mbs-2] = i; i = nip_ptr[0]; nip_ptr[0] = nip_ptr[mbs-1]; nip_ptr[mbs-1] = i; ierr = ISRestoreIndices(ip,&ip_ptr);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,mbs,nip_ptr,PETSC_COPY_VALUES,&nip);CHKERRQ(ierr); ierr = PetscFree(nip_ptr);CHKERRQ(ierr); ierr = MatReorderingSeqSBAIJ(sA, ip);CHKERRQ(ierr); ierr = ISDestroy(&nip);CHKERRQ(ierr); /* ierr = ISView(ip, VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = MatView(sA,VIEWER_DRAW_SELF);CHKERRQ(ierr); */ } ierr = ISDestroy(&iscol);CHKERRQ(ierr); /* ierr = ISDestroy(&isrow);CHKERRQ(ierr);*/ ierr = ISDestroy(&isrow);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&sA);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&s1);CHKERRQ(ierr); ierr = VecDestroy(&s2);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

int main(int argc,char **argv) { PetscErrorCode ierr; KSP ksp; PC pc; Vec x,b; DM da; Mat A,Atrans; PetscInt dof=1,M=-8; PetscBool flg,trans=PETSC_FALSE; PetscInitialize(&argc,&argv,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-dof",&dof,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-M",&M,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-trans",&trans,PETSC_NULL);CHKERRQ(ierr); ierr = DMDACreate(PETSC_COMM_WORLD,&da);CHKERRQ(ierr); ierr = DMDASetDim(da,3);CHKERRQ(ierr); ierr = DMDASetBoundaryType(da,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE);CHKERRQ(ierr); ierr = DMDASetStencilType(da,DMDA_STENCIL_STAR);CHKERRQ(ierr); ierr = DMDASetSizes(da,M,M,M);CHKERRQ(ierr); ierr = DMDASetNumProcs(da,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = DMDASetDof(da,dof);CHKERRQ(ierr); ierr = DMDASetStencilWidth(da,1);CHKERRQ(ierr); ierr = DMDASetOwnershipRanges(da,PETSC_NULL,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = DMSetFromOptions(da);CHKERRQ(ierr); ierr = DMSetUp(da);CHKERRQ(ierr); ierr = DMCreateGlobalVector(da,&x);CHKERRQ(ierr); ierr = DMCreateGlobalVector(da,&b);CHKERRQ(ierr); ierr = ComputeRHS(da,b);CHKERRQ(ierr); ierr = DMCreateMatrix(da,MATBAIJ,&A);CHKERRQ(ierr); ierr = ComputeMatrix(da,A);CHKERRQ(ierr); /* A is non-symmetric. Make A = 0.5*(A + Atrans) symmetric for testing icc and cholesky */ ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&Atrans);CHKERRQ(ierr); ierr = MatAXPY(A,1.0,Atrans,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatScale(A,0.5);CHKERRQ(ierr); ierr = MatDestroy(&Atrans);CHKERRQ(ierr); /* Test sbaij matrix */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL, "-test_sbaij1", &flg,PETSC_NULL);CHKERRQ(ierr); if (flg){ Mat sA; PetscBool issymm; ierr = MatIsTranspose(A,A,0.0,&issymm);CHKERRQ(ierr); if (issymm) { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } else { printf("Warning: A is non-symmetric\n"); } ierr = MatConvert(A,MATSBAIJ,MAT_INITIAL_MATRIX,&sA);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); A = sA; } ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetDM(pc,(DM)da);CHKERRQ(ierr); if (trans) { ierr = KSPSolveTranspose(ksp,b,x);CHKERRQ(ierr); } else { ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); } /* check final residual */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL, "-check_final_residual", &flg,PETSC_NULL);CHKERRQ(ierr); if (flg){ Vec b1; PetscReal norm; ierr = KSPGetSolution(ksp,&x);CHKERRQ(ierr); ierr = VecDuplicate(b,&b1);CHKERRQ(ierr); ierr = MatMult(A,x,b1);CHKERRQ(ierr); ierr = VecAXPY(b1,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(b1,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Final residual %g\n",norm);CHKERRQ(ierr); ierr = VecDestroy(&b1);CHKERRQ(ierr); } ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscInt main(PetscInt argc,char **args) { typedef enum {RANDOM, CONSTANT, TANH, NUM_FUNCS} FuncType; const char *funcNames[NUM_FUNCS] = {"random", "constant", "tanh"}; Mat A, AA; PetscMPIInt size; PetscInt N,i, stencil=1,dof=3; PetscInt dim[3] = {10,10,10}, ndim = 3; Vec coords,x,y,z,xx, yy, zz; Vec xxsplit[DOF], yysplit[DOF], zzsplit[DOF]; PetscReal h[3]; PetscScalar s; PetscRandom rdm; PetscReal norm, enorm; PetscInt func; FuncType function = TANH; DM da, da1, coordsda; PetscBool view_x = PETSC_FALSE, view_y = PETSC_FALSE, view_z = PETSC_FALSE; PetscErrorCode ierr; ierr = PetscInitialize(&argc,&args,(char *)0,help);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This is a uniprocessor example only!"); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, PETSC_NULL, "USFFT Options", "ex27");CHKERRQ(ierr); ierr = PetscOptionsEList("-function", "Function type", "ex27", funcNames, NUM_FUNCS, funcNames[function], &func, PETSC_NULL);CHKERRQ(ierr); function = (FuncType) func; ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-view_x",&view_x,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-view_y",&view_y,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-view_z",&view_z,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetIntArray(PETSC_NULL,"-dim",dim,&ndim,PETSC_NULL);CHKERRQ(ierr); // DMDA with the correct fiber dimension ierr = DMDACreate3d(PETSC_COMM_SELF,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR, dim[0], dim[1], dim[2], PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, dof, stencil, PETSC_NULL, PETSC_NULL, PETSC_NULL, &da); CHKERRQ(ierr); // DMDA with fiber dimension 1 for split fields ierr = DMDACreate3d(PETSC_COMM_SELF,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR, dim[0], dim[1], dim[2], PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, stencil, PETSC_NULL, PETSC_NULL, PETSC_NULL, &da1); CHKERRQ(ierr); // Coordinates ierr = DMDAGetCoordinateDA(da, &coordsda); ierr = DMGetGlobalVector(coordsda, &coords);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) coords, "Grid coordinates");CHKERRQ(ierr); for(i = 0, N = 1; i < 3; i++) { h[i] = 1.0/dim[i]; PetscScalar *a; ierr = VecGetArray(coords, &a);CHKERRQ(ierr); PetscInt j,k,n = 0; for(i = 0; i < 3; ++i) { for(j = 0; j < dim[i]; ++j){ for(k = 0; k < 3; ++k) { a[n] = j*h[i]; // coordinate along the j-th point in the i-th dimension ++n; } } } ierr = VecRestoreArray(coords, &a);CHKERRQ(ierr); } ierr = DMDASetCoordinates(da, coords);CHKERRQ(ierr); ierr = VecDestroy(&coords);CHKERRQ(ierr); // Work vectors ierr = DMGetGlobalVector(da, &x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &xx);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) xx, "Real space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "USFFT frequency space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &yy);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) yy, "FFTW frequency space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "USFFT reconstructed vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &zz);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) zz, "FFTW reconstructed vector");CHKERRQ(ierr); // Split vectors for FFTW for(int ii = 0; ii < 3; ++ii) { ierr = DMGetGlobalVector(da1, &xxsplit[ii]);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) xxsplit[ii], "Real space split vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da1, &yysplit[ii]);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) yysplit[ii], "FFTW frequency space split vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da1, &zzsplit[ii]);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) zzsplit[ii], "FFTW reconstructed split vector");CHKERRQ(ierr); } ierr = PetscPrintf(PETSC_COMM_SELF, "%3-D: USFFT on vector of ");CHKERRQ(ierr); for(i = 0, N = 1; i < 3; i++) { ierr = PetscPrintf(PETSC_COMM_SELF, "dim[%d] = %d ",i,dim[i]);CHKERRQ(ierr); N *= dim[i]; } ierr = PetscPrintf(PETSC_COMM_SELF, "; total size %d \n",N);CHKERRQ(ierr); if (function == RANDOM) { ierr = PetscRandomCreate(PETSC_COMM_SELF, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); } else if (function == CONSTANT) { ierr = VecSet(x, 1.0);CHKERRQ(ierr); } else if (function == TANH) { PetscScalar *a; ierr = VecGetArray(x, &a);CHKERRQ(ierr); PetscInt j,k = 0; for(i = 0; i < 3; ++i) { for(j = 0; j < dim[i]; ++j) { a[k] = tanh((j - dim[i]/2.0)*(10.0/dim[i])); ++k; } } ierr = VecRestoreArray(x, &a);CHKERRQ(ierr); } if(view_x) { ierr = VecView(x, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } ierr = VecCopy(x,xx);CHKERRQ(ierr); // Split xx ierr = VecStrideGatherAll(xx,xxsplit, INSERT_VALUES);CHKERRQ(ierr); //YES! 'Gather' means 'split' (or maybe 'scatter'?)! ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|x|_2 = %g\n",norm);CHKERRQ(ierr); /* create USFFT object */ ierr = MatCreateSeqUSFFT(da,da,&A);CHKERRQ(ierr); /* create FFTW object */ ierr = MatCreateSeqFFTW(PETSC_COMM_SELF,3,dim,&AA);CHKERRQ(ierr); /* apply USFFT and FFTW FORWARD "preemptively", so the fftw_plans can be reused on different vectors */ ierr = MatMult(A,x,z);CHKERRQ(ierr); for(int ii = 0; ii < 3; ++ii) { ierr = MatMult(AA,xxsplit[ii],zzsplit[ii]);CHKERRQ(ierr); } // Now apply USFFT and FFTW forward several (3) times for (i=0; i<3; ++i){ ierr = MatMult(A,x,y);CHKERRQ(ierr); for(int ii = 0; ii < 3; ++ii) { ierr = MatMult(AA,xxsplit[ii],yysplit[ii]);CHKERRQ(ierr); } ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); for(int ii = 0; ii < 3; ++ii) { ierr = MatMult(AA,yysplit[ii],zzsplit[ii]);CHKERRQ(ierr); } } // Unsplit yy ierr = VecStrideScatterAll(yysplit, yy, INSERT_VALUES);CHKERRQ(ierr); //YES! 'Scatter' means 'collect' (or maybe 'gather'?)! // Unsplit zz ierr = VecStrideScatterAll(zzsplit, zz, INSERT_VALUES);CHKERRQ(ierr); //YES! 'Scatter' means 'collect' (or maybe 'gather'?)! if(view_y) { ierr = PetscPrintf(PETSC_COMM_WORLD, "y = \n");CHKERRQ(ierr); ierr = VecView(y, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "yy = \n");CHKERRQ(ierr); ierr = VecView(yy, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } if(view_z) { ierr = PetscPrintf(PETSC_COMM_WORLD, "z = \n");CHKERRQ(ierr); ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "zz = \n");CHKERRQ(ierr); ierr = VecView(zz, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* compare x and z. USFFT computes an unnormalized DFT, thus z = N*x */ s = 1.0/(PetscReal)N; ierr = VecScale(z,s);CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,z);CHKERRQ(ierr); ierr = VecNorm(x,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|x-z| = %g\n",enorm);CHKERRQ(ierr); /* compare xx and zz. FFTW computes an unnormalized DFT, thus zz = N*x */ s = 1.0/(PetscReal)N; ierr = VecScale(zz,s);CHKERRQ(ierr); ierr = VecAXPY(xx,-1.0,zz);CHKERRQ(ierr); ierr = VecNorm(xx,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|xx-zz| = %g\n",enorm);CHKERRQ(ierr); /* compare y and yy: USFFT and FFTW results*/ ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); ierr = VecAXPY(y,-1.0,yy);CHKERRQ(ierr); ierr = VecNorm(y,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|y|_2 = %g\n",norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|y-yy| = %g\n",enorm);CHKERRQ(ierr); /* compare z and zz: USFFT and FFTW results*/ ierr = VecNorm(z,NORM_2,&norm);CHKERRQ(ierr); ierr = VecAXPY(z,-1.0,zz);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|z|_2 = %g\n",norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|z-zz| = %g\n",enorm);CHKERRQ(ierr); /* free spaces */ ierr = DMRestoreGlobalVector(da,&x);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&xx);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&y);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&yy);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&z);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&zz);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

/*@ MatNullSpaceTest - Tests if the claimed null space is really a null space of a matrix Collective on MatNullSpace Input Parameters: + sp - the null space context - mat - the matrix Output Parameters: . isNull - PETSC_TRUE if the nullspace is valid for this matrix Level: advanced .keywords: PC, null space, remove .seealso: MatNullSpaceCreate(), MatNullSpaceDestroy(), MatNullSpaceSetFunction() @*/ PetscErrorCode MatNullSpaceTest(MatNullSpace sp,Mat mat,PetscBool *isNull) { PetscScalar sum; PetscReal nrm,tol = 10. * PETSC_SQRT_MACHINE_EPSILON; PetscInt j,n,N; PetscErrorCode ierr; Vec l,r; PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,consistent = PETSC_TRUE; PetscViewer viewer; PetscFunctionBegin; PetscValidHeaderSpecific(sp,MAT_NULLSPACE_CLASSID,1); PetscValidHeaderSpecific(mat,MAT_CLASSID,2); n = sp->n; ierr = PetscOptionsGetBool(((PetscObject)sp)->options,NULL,"-mat_null_space_test_view",&flg1,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject)sp)->options,NULL,"-mat_null_space_test_view_draw",&flg2,NULL);CHKERRQ(ierr); if (n) { ierr = VecDuplicate(sp->vecs[0],&l);CHKERRQ(ierr); } else { ierr = MatCreateVecs(mat,&l,NULL);CHKERRQ(ierr); } ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sp),&viewer);CHKERRQ(ierr); if (sp->has_cnst) { ierr = VecDuplicate(l,&r);CHKERRQ(ierr); ierr = VecGetSize(l,&N);CHKERRQ(ierr); sum = 1.0/N; ierr = VecSet(l,sum);CHKERRQ(ierr); ierr = MatMult(mat,l,r);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&nrm);CHKERRQ(ierr); if (nrm >= tol) consistent = PETSC_FALSE; if (flg1) { if (consistent) { ierr = PetscPrintf(PetscObjectComm((PetscObject)sp),"Constants are likely null vector");CHKERRQ(ierr); } else { ierr = PetscPrintf(PetscObjectComm((PetscObject)sp),"Constants are unlikely null vector ");CHKERRQ(ierr); } ierr = PetscPrintf(PetscObjectComm((PetscObject)sp),"|| A * 1/N || = %g\n",(double)nrm);CHKERRQ(ierr); } if (!consistent && flg1) {ierr = VecView(r,viewer);CHKERRQ(ierr);} if (!consistent && flg2) {ierr = VecView(r,viewer);CHKERRQ(ierr);} ierr = VecDestroy(&r);CHKERRQ(ierr); } for (j=0; j<n; j++) { ierr = (*mat->ops->mult)(mat,sp->vecs[j],l);CHKERRQ(ierr); ierr = VecNorm(l,NORM_2,&nrm);CHKERRQ(ierr); if (nrm >= tol) consistent = PETSC_FALSE; if (flg1) { if (consistent) { ierr = PetscPrintf(PetscObjectComm((PetscObject)sp),"Null vector %D is likely null vector",j);CHKERRQ(ierr); } else { ierr = PetscPrintf(PetscObjectComm((PetscObject)sp),"Null vector %D unlikely null vector ",j);CHKERRQ(ierr); consistent = PETSC_FALSE; } ierr = PetscPrintf(PetscObjectComm((PetscObject)sp),"|| A * v[%D] || = %g\n",j,(double)nrm);CHKERRQ(ierr); } if (!consistent && flg1) {ierr = VecView(l,viewer);CHKERRQ(ierr);} if (!consistent && flg2) {ierr = VecView(l,viewer);CHKERRQ(ierr);} } if (sp->remove) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot test a null space provided as a function with MatNullSpaceSetFunction()"); ierr = VecDestroy(&l);CHKERRQ(ierr); if (isNull) *isNull = consistent; PetscFunctionReturn(0); }

void bsscr_summary(KSP_BSSCR * bsscrp_self, KSP ksp_S, KSP ksp_inner, Mat K,Mat K2,Mat D,Mat G,Mat C,Vec u,Vec p,Vec f,Vec h,Vec t, double penaltyNumber,PetscTruth KisJustK,double mgSetupTime,double scrSolveTime,double a11SingleSolveTime){ PetscTruth flg, found; PetscInt uSize, pSize, lmax, lmin, iterations; PetscReal rNorm, fNorm, uNorm, uNormInf, pNorm, pNormInf, p_sum, min, max; Vec q, qq, t2, t3; double solutionAnalysisTime; PetscPrintf( PETSC_COMM_WORLD, "\n\nSCR Solver Summary:\n\n"); if(bsscrp_self->mg) PetscPrintf( PETSC_COMM_WORLD, " Multigrid setup: = %.4g secs \n", mgSetupTime); KSPGetIterationNumber( ksp_S, &iterations); bsscrp_self->solver->stats.pressure_its = iterations; PetscPrintf( PETSC_COMM_WORLD, " Pressure Solve: = %.4g secs / %d its\n", scrSolveTime, iterations); KSPGetIterationNumber( ksp_inner, &iterations); bsscrp_self->solver->stats.velocity_backsolve_its = iterations; PetscPrintf( PETSC_COMM_WORLD, " Final V Solve: = %.4g secs / %d its\n\n", a11SingleSolveTime, iterations); /***************************************************************************************************************/ flg = PETSC_FALSE; /* Off by default */ PetscOptionsGetTruth( PETSC_NULL, "-scr_ksp_solution_summary", &flg, &found ); if(flg) { PetscScalar KuNorm; solutionAnalysisTime = MPI_Wtime(); VecGetSize( u, &uSize ); VecGetSize( p, &pSize ); VecDuplicate( u, &t2 ); VecDuplicate( u, &t3 ); MatMult( K, u, t3); VecNorm( t3, NORM_2, &KuNorm ); double angle, kdot; if(penaltyNumber > 1e-10){/* should change this to ifK2built maybe */ MatMult( K2, u, t2); VecNorm( t2, NORM_2, &rNorm ); VecDot(t2,t3,&kdot); angle = (kdot/(rNorm*KuNorm)); PetscPrintf( PETSC_COMM_WORLD, " <K u, K2 u>/(|K u| |K2 u|) = %.6e\n", angle); } VecNorm( t, NORM_2, &rNorm ); /* t = f- G p should be the formal residual vector, calculated on line 267 in auglag-driver-DGTGD.c */ VecDot(t3,t,&kdot); angle = (kdot/(rNorm*KuNorm)); PetscPrintf( PETSC_COMM_WORLD, " <K u, (f-G p)>/(|K u| |f- G p|) = %.6e\n\n", angle); MatMult( K, u, t2); VecNorm(t2, NORM_2, &KuNorm); VecAYPX( t2, -1.0, t ); /* t2 <- -t2 + t : t = f- G p should be the formal residual vector, calculated on line 267 in auglag-driver-DGTGD.c*/ VecNorm( t2, NORM_2, &rNorm ); VecNorm( f, NORM_2, &fNorm ); if(KisJustK){ PetscPrintf( PETSC_COMM_WORLD,"Velocity back-solve with original K matrix\n"); PetscPrintf( PETSC_COMM_WORLD,"Solved K u = G p -f\n"); PetscPrintf( PETSC_COMM_WORLD,"Residual with original K matrix\n"); PetscPrintf( PETSC_COMM_WORLD, " |f - K u - G p| = %.12e\n", rNorm); PetscPrintf( PETSC_COMM_WORLD, " |f - K u - G p|/|f| = %.12e\n", rNorm/fNorm); if(penaltyNumber > 1e-10){/* means the restore_K flag was used */ //if(K2 && f2){ MatAXPY(K,penaltyNumber,K2,DIFFERENT_NONZERO_PATTERN);/* Computes K = penaltyNumber*K2 + K */ //VecAXPY(f,penaltyNumber,f2); /* f = penaltyNumber*f2 + f */ KisJustK=PETSC_FALSE; MatMult( K, u, t2); MatMult( G, p, t); VecAYPX( t, -1.0, f ); /* t <- -t + f */ VecAYPX( t2, -1.0, t ); /* t2 <- -t2 + t */ VecNorm( t2, NORM_2, &rNorm ); PetscPrintf( PETSC_COMM_WORLD,"Residual with K+K2 matrix and f rhs vector\n"); PetscPrintf( PETSC_COMM_WORLD, " |(f) - (K + K2) u - G p| = %.12e\n", rNorm); //} } } else{ PetscPrintf( PETSC_COMM_WORLD,"Velocity back-solve with K+K2 matrix\n"); PetscPrintf( PETSC_COMM_WORLD,"Solved (K + K2) u = G p - (f)\n"); PetscPrintf( PETSC_COMM_WORLD,"Residual with K+K2 matrix and f rhs vector\n"); PetscPrintf( PETSC_COMM_WORLD, " |(f) - (K + K2) u - G p| = %.12e\n", rNorm); PetscReal KK2Norm,KK2Normf; MatNorm(K,NORM_1,&KK2Norm); MatNorm(K,NORM_FROBENIUS,&KK2Normf); penaltyNumber = -penaltyNumber; MatAXPY(K,penaltyNumber,K2,DIFFERENT_NONZERO_PATTERN);/* Computes K = penaltyNumber*K2 + K */ //VecAXPY(f,penaltyNumber,f2); /* f = penaltyNumber*f2 + f */ KisJustK=PETSC_FALSE; MatMult( K, u, t2); /* t2 = K*u */ MatMult( G, p, t); /* t = G*p */ VecAYPX( t, -1.0, f ); /* t <- f - t ; t = f - G*p */ VecAYPX( t2, -1.0, t ); /* t2 <- t - t2; t2 = f - G*p - K*u */ VecNorm( t2, NORM_2, &rNorm ); PetscPrintf( PETSC_COMM_WORLD,"Residual with original K matrix\n"); PetscPrintf( PETSC_COMM_WORLD, " |f - K u - G p| = %.12e\n", rNorm); PetscPrintf( PETSC_COMM_WORLD, " |f - K u - G p|/|f| = %.12e\n", rNorm/fNorm); PetscReal KNorm, K2Norm; MatNorm(K,NORM_1,&KNorm); MatNorm(K2,NORM_1,&K2Norm); PetscPrintf( PETSC_COMM_WORLD,"K and K2 norm_1 %.12e %.12e ratio %.12e\n",KNorm,K2Norm,K2Norm/KNorm); MatNorm(K,NORM_INFINITY,&KNorm); MatNorm(K2,NORM_INFINITY,&K2Norm); PetscPrintf( PETSC_COMM_WORLD,"K and K2 norm_inf %.12e %.12e ratio %.12e\n",KNorm,K2Norm,K2Norm/KNorm); MatNorm(K,NORM_FROBENIUS,&KNorm); MatNorm(K2,NORM_FROBENIUS,&K2Norm); PetscPrintf( PETSC_COMM_WORLD,"K and K2 norm_frob %.12e %.12e ratio %.12e\n",KNorm,K2Norm,K2Norm/KNorm); PetscPrintf( PETSC_COMM_WORLD,"K+r*K2 norm_1 %.12e\n",KK2Norm); PetscPrintf( PETSC_COMM_WORLD,"K+r*K2 norm_frob %.12e\n",KK2Normf); penaltyNumber = -penaltyNumber; MatAXPY(K,penaltyNumber,K2,DIFFERENT_NONZERO_PATTERN);/* Computes K = penaltyNumber*K2 + K */ } PetscPrintf( PETSC_COMM_WORLD,"\n"); PetscPrintf( PETSC_COMM_WORLD, " |K u| = %.12e\n", KuNorm); if(penaltyNumber > 1e-10){ MatMult( K2, u, t2); VecNorm( t2, NORM_2, &rNorm ); PetscPrintf( PETSC_COMM_WORLD, " |K2 u| = %.12e\n", rNorm); PetscPrintf( PETSC_COMM_WORLD,"\n"); } VecDuplicate( p, &q ); MatMult( D, u, q ); /* q = G'*u = D*u */ VecNorm( u, NORM_2, &uNorm ); VecNorm( q, NORM_2, &rNorm ); PetscPrintf( PETSC_COMM_WORLD, " |G^T u|_2 = %.6e\n", rNorm ); PetscPrintf( PETSC_COMM_WORLD, " |G^T u|_2/|u|_2 = %.6e\n", sqrt( (double) uSize / (double) pSize ) * rNorm / uNorm); VecDuplicate( p, &qq ); MatMultTranspose( G, u, qq ); VecNorm( qq, NORM_2, &rNorm ); PetscPrintf( PETSC_COMM_WORLD, " |G^T u|/|u| = %.8e\n", rNorm/uNorm ); /* to compare directly with Uzawa */ VecNorm( q, NORM_INFINITY, &rNorm ); PetscPrintf( PETSC_COMM_WORLD, " |G^T u|_infty/|u|_2 = %.6e\n", sqrt( (double) uSize ) * rNorm / uNorm); /* create G'*u+C*p-h to check on this constraint */ /* already have q = D*u */ VecZeroEntries(qq); if(C){ MatMult( C, p, qq ); } VecAYPX( q, 1.0, qq ); /* q = q+qq; G'*u + C*p*/ VecAXPY( q, -1.0, h ); /* q = q-h; G'*u + C*p - h */ VecNorm( q, NORM_2, &rNorm ); PetscPrintf( PETSC_COMM_WORLD, " |G^T u + C p - h| = %.8e :constraint\n", rNorm ); VecNorm( u, NORM_INFINITY, &uNormInf ); VecNorm( u, NORM_2, &uNorm ); VecGetSize( u, &uSize ); VecNorm( p, NORM_INFINITY, &pNormInf ); VecNorm( p, NORM_2, &pNorm ); PetscPrintf( PETSC_COMM_WORLD, " |u|_{\\infty} = %.6e , u_rms = %.6e\n", uNormInf, uNorm / sqrt( (double) uSize ) ); PetscPrintf( PETSC_COMM_WORLD, " |p|_{\\infty} = %.6e , p_rms = %.6e\n", pNormInf, pNorm / sqrt( (double) pSize ) ); VecMax( u, &lmax, &max ); VecMin( u, &lmin, &min ); PetscPrintf( PETSC_COMM_WORLD, " min/max(u) = %.6e [%d] / %.6e [%d]\n",min,lmin,max,lmax); bsscrp_self->solver->stats.vmin = min; bsscrp_self->solver->stats.vmax = max; VecMax( p, &lmax, &max ); VecMin( p, &lmin, &min ); PetscPrintf( PETSC_COMM_WORLD, " min/max(p) = %.6e [%d] / %.6e [%d]\n",min,lmin,max,lmax); bsscrp_self->solver->stats.pmin = min; bsscrp_self->solver->stats.pmax = max; VecSum( p, &p_sum ); PetscPrintf( PETSC_COMM_WORLD, " \\sum_i p_i = %.6e \n", p_sum ); bsscrp_self->solver->stats.p_sum=p_sum; solutionAnalysisTime = MPI_Wtime() - solutionAnalysisTime; PetscPrintf( PETSC_COMM_WORLD, "\n Time for this analysis = %.4g secs\n\n",solutionAnalysisTime); Stg_VecDestroy(&t2 ); Stg_VecDestroy(&t3 ); Stg_VecDestroy(&q ); Stg_VecDestroy(&qq ); } }

int main(int argc,char **args) { PetscMPIInt size; PetscErrorCode ierr; Vec x,y,b,s1,s2; Mat A; /* linear system matrix */ Mat sA,sB,sFactor; /* symmetric matrices */ PetscInt n,mbs=16,bs=1,nz=3,prob=1,i,j,k1,k2,col[3],lf,block, row,Ii,J,n1,inc; PetscReal norm1,norm2,rnorm,tol=PETSC_SMALL; PetscScalar neg_one = -1.0,four=4.0,value[3]; IS perm, iscol; PetscRandom rdm; PetscBool doIcc=PETSC_TRUE,equal; MatInfo minfo1,minfo2; MatFactorInfo factinfo; MatType type; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-mbs",&mbs,NULL);CHKERRQ(ierr); n = mbs*bs; ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(A,bs,nz,NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_SELF,&sA);CHKERRQ(ierr); ierr = MatSetSizes(sA,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(sA,MATSEQSBAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(sA);CHKERRQ(ierr); ierr = MatGetType(sA,&type);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)sA,MATSEQSBAIJ,&doIcc);CHKERRQ(ierr); ierr = MatSeqSBAIJSetPreallocation(sA,bs,nz,NULL);CHKERRQ(ierr); ierr = MatSetOption(sA,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); /* Test MatGetOwnershipRange() */ ierr = MatGetOwnershipRange(A,&Ii,&J);CHKERRQ(ierr); ierr = MatGetOwnershipRange(sA,&i,&j);CHKERRQ(ierr); if (i-Ii || j-J) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetOwnershipRange() in MatSBAIJ format\n");CHKERRQ(ierr); } /* Assemble matrix */ if (bs == 1) { ierr = PetscOptionsGetInt(NULL,NULL,"-test_problem",&prob,NULL);CHKERRQ(ierr); if (prob == 1) { /* tridiagonal matrix */ value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<n-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = n - 1; col[0]=0; col[1] = n - 2; col[2] = n - 1; value[0]= 0.1; value[1]=-1; value[2]=2; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0; col[0] = n-1; col[1] = 1; col[2] = 0; value[0] = 0.1; value[1] = -1.0; value[2] = 2; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } else if (prob ==2) { /* matrix for the five point stencil */ n1 = (PetscInt) (PetscSqrtReal((PetscReal)n) + 0.001); if (n1*n1 - n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"sqrt(n) must be a positive interger!"); for (i=0; i<n1; i++) { for (j=0; j<n1; j++) { Ii = j + n1*i; if (i>0) { J = Ii - n1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } if (i<n1-1) { J = Ii + n1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } if (j>0) { J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } if (j<n1-1) { J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatSetValues(A,1,&Ii,1,&Ii,&four,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&Ii,1,&Ii,&four,INSERT_VALUES);CHKERRQ(ierr); } } } } else { /* bs > 1 */ for (block=0; block<n/bs; block++) { /* diagonal blocks */ value[0] = -1.0; value[1] = 4.0; value[2] = -1.0; for (i=1+block*bs; i<bs-1+block*bs; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = bs - 1+block*bs; col[0] = bs - 2+block*bs; col[1] = bs - 1+block*bs; value[0]=-1.0; value[1]=4.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0+block*bs; col[0] = 0+block*bs; col[1] = 1+block*bs; value[0]=4.0; value[1] = -1.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } /* off-diagonal blocks */ value[0]=-1.0; for (i=0; i<(n/bs-1)*bs; i++) { col[0]=i+bs; ierr = MatSetValues(A,1,&i,1,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&i,1,col,value,INSERT_VALUES);CHKERRQ(ierr); col[0]=i; row=i+bs; ierr = MatSetValues(A,1,&row,1,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(sA,1,&row,1,col,value,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(sA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(sA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Test MatGetInfo() of A and sA */ ierr = MatGetInfo(A,MAT_LOCAL,&minfo1);CHKERRQ(ierr); ierr = MatGetInfo(sA,MAT_LOCAL,&minfo2);CHKERRQ(ierr); /* printf("A matrix nonzeros (BAIJ format) = %d, allocated nonzeros= %d\n", (int)minfo1.nz_used,(int)minfo1.nz_allocated); printf("sA matrix nonzeros(SBAIJ format) = %d, allocated nonzeros= %d\n", (int)minfo2.nz_used,(int)minfo2.nz_allocated); */ i = (int) (minfo1.nz_used - minfo2.nz_used); j = (int) (minfo1.nz_allocated - minfo2.nz_allocated); k1 = (int) (minfo1.nz_allocated - minfo1.nz_used); k2 = (int) (minfo2.nz_allocated - minfo2.nz_used); if (i < 0 || j < 0 || k1 < 0 || k2 < 0) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error (compare A and sA): MatGetInfo()\n");CHKERRQ(ierr); } /* Test MatDuplicate() */ ierr = MatNorm(A,NORM_FROBENIUS,&norm1);CHKERRQ(ierr); ierr = MatDuplicate(sA,MAT_COPY_VALUES,&sB);CHKERRQ(ierr); ierr = MatEqual(sA,sB,&equal);CHKERRQ(ierr); if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatDuplicate()"); /* Test MatNorm() */ ierr = MatNorm(A,NORM_FROBENIUS,&norm1);CHKERRQ(ierr); ierr = MatNorm(sB,NORM_FROBENIUS,&norm2);CHKERRQ(ierr); rnorm = PetscAbsReal(norm1-norm2)/norm2; if (rnorm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_FROBENIUS, NormA=%16.14e NormsB=%16.14e\n",norm1,norm2);CHKERRQ(ierr); } ierr = MatNorm(A,NORM_INFINITY,&norm1);CHKERRQ(ierr); ierr = MatNorm(sB,NORM_INFINITY,&norm2);CHKERRQ(ierr); rnorm = PetscAbsReal(norm1-norm2)/norm2; if (rnorm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_INFINITY(), NormA=%16.14e NormsB=%16.14e\n",norm1,norm2);CHKERRQ(ierr); } ierr = MatNorm(A,NORM_1,&norm1);CHKERRQ(ierr); ierr = MatNorm(sB,NORM_1,&norm2);CHKERRQ(ierr); rnorm = PetscAbsReal(norm1-norm2)/norm2; if (rnorm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_INFINITY(), NormA=%16.14e NormsB=%16.14e\n",norm1,norm2);CHKERRQ(ierr); } /* Test MatGetInfo(), MatGetSize(), MatGetBlockSize() */ ierr = MatGetInfo(A,MAT_LOCAL,&minfo1);CHKERRQ(ierr); ierr = MatGetInfo(sB,MAT_LOCAL,&minfo2);CHKERRQ(ierr); /* printf("matrix nonzeros (BAIJ format) = %d, allocated nonzeros= %d\n", (int)minfo1.nz_used,(int)minfo1.nz_allocated); printf("matrix nonzeros(SBAIJ format) = %d, allocated nonzeros= %d\n", (int)minfo2.nz_used,(int)minfo2.nz_allocated); */ i = (int) (minfo1.nz_used - minfo2.nz_used); j = (int) (minfo1.nz_allocated - minfo2.nz_allocated); k1 = (int) (minfo1.nz_allocated - minfo1.nz_used); k2 = (int) (minfo2.nz_allocated - minfo2.nz_used); if (i < 0 || j < 0 || k1 < 0 || k2 < 0) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error(compare A and sB): MatGetInfo()\n");CHKERRQ(ierr); } ierr = MatGetSize(A,&Ii,&J);CHKERRQ(ierr); ierr = MatGetSize(sB,&i,&j);CHKERRQ(ierr); if (i-Ii || j-J) { PetscPrintf(PETSC_COMM_SELF,"Error: MatGetSize()\n");CHKERRQ(ierr); } ierr = MatGetBlockSize(A, &Ii);CHKERRQ(ierr); ierr = MatGetBlockSize(sB, &i);CHKERRQ(ierr); if (i-Ii) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetBlockSize()\n");CHKERRQ(ierr); } ierr = PetscRandomCreate(PETSC_COMM_SELF,&rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&s1);CHKERRQ(ierr); ierr = VecDuplicate(x,&s2);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); /* Test MatDiagonalScale(), MatGetDiagonal(), MatScale() */ #if !defined(PETSC_USE_COMPLEX) /* Scaling matrix with complex numbers results non-spd matrix, causing crash of MatForwardSolve() and MatBackwardSolve() */ ierr = MatDiagonalScale(A,x,x);CHKERRQ(ierr); ierr = MatDiagonalScale(sB,x,x);CHKERRQ(ierr); ierr = MatMultEqual(A,sB,10,&equal);CHKERRQ(ierr); if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatDiagonalScale"); ierr = MatGetDiagonal(A,s1);CHKERRQ(ierr); ierr = MatGetDiagonal(sB,s2);CHKERRQ(ierr); ierr = VecAXPY(s2,neg_one,s1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm1);CHKERRQ(ierr); if (norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatGetDiagonal(), ||s1-s2||=%g\n",(double)norm1);CHKERRQ(ierr); } { PetscScalar alpha=0.1; ierr = MatScale(A,alpha);CHKERRQ(ierr); ierr = MatScale(sB,alpha);CHKERRQ(ierr); } #endif /* Test MatGetRowMaxAbs() */ ierr = MatGetRowMaxAbs(A,s1,NULL);CHKERRQ(ierr); ierr = MatGetRowMaxAbs(sB,s2,NULL);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&norm1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatGetRowMaxAbs() \n");CHKERRQ(ierr); } /* Test MatMult() */ for (i=0; i<40; i++) { ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); ierr = MatMult(A,x,s1);CHKERRQ(ierr); ierr = MatMult(sB,x,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&norm1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatMult(), norm1-norm2: %g\n",(double)norm1);CHKERRQ(ierr); } } /* MatMultAdd() */ for (i=0; i<40; i++) { ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); ierr = VecSetRandom(y,rdm);CHKERRQ(ierr); ierr = MatMultAdd(A,x,y,s1);CHKERRQ(ierr); ierr = MatMultAdd(sB,x,y,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&norm1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&norm2);CHKERRQ(ierr); norm1 -= norm2; if (norm1<-tol || norm1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatMultAdd(), norm1-norm2: %g\n",(double)norm1);CHKERRQ(ierr); } } /* Test MatCholeskyFactor(), MatICCFactor() with natural ordering */ ierr = MatGetOrdering(A,MATORDERINGNATURAL,&perm,&iscol);CHKERRQ(ierr); ierr = ISDestroy(&iscol);CHKERRQ(ierr); norm1 = tol; inc = bs; /* initialize factinfo */ ierr = PetscMemzero(&factinfo,sizeof(MatFactorInfo));CHKERRQ(ierr); for (lf=-1; lf<10; lf += inc) { if (lf==-1) { /* Cholesky factor of sB (duplicate sA) */ factinfo.fill = 5.0; ierr = MatGetFactor(sB,MATSOLVERPETSC,MAT_FACTOR_CHOLESKY,&sFactor);CHKERRQ(ierr); ierr = MatCholeskyFactorSymbolic(sFactor,sB,perm,&factinfo);CHKERRQ(ierr); } else if (!doIcc) break; else { /* incomplete Cholesky factor */ factinfo.fill = 5.0; factinfo.levels = lf; ierr = MatGetFactor(sB,MATSOLVERPETSC,MAT_FACTOR_ICC,&sFactor);CHKERRQ(ierr); ierr = MatICCFactorSymbolic(sFactor,sB,perm,&factinfo);CHKERRQ(ierr); } ierr = MatCholeskyFactorNumeric(sFactor,sB,&factinfo);CHKERRQ(ierr); /* MatView(sFactor, PETSC_VIEWER_DRAW_WORLD); */ /* test MatGetDiagonal on numeric factor */ /* if (lf == -1) { ierr = MatGetDiagonal(sFactor,s1);CHKERRQ(ierr); printf(" in ex74.c, diag: \n"); ierr = VecView(s1,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } */ ierr = MatMult(sB,x,b);CHKERRQ(ierr); /* test MatForwardSolve() and MatBackwardSolve() */ if (lf == -1) { ierr = MatForwardSolve(sFactor,b,s1);CHKERRQ(ierr); ierr = MatBackwardSolve(sFactor,s1,s2);CHKERRQ(ierr); ierr = VecAXPY(s2,neg_one,x);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_2,&norm2);CHKERRQ(ierr); if (10*norm1 < norm2) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatForwardSolve and BackwardSolve: Norm of error=%g, bs=%D\n",(double)norm2,bs);CHKERRQ(ierr); } } /* test MatSolve() */ ierr = MatSolve(sFactor,b,y);CHKERRQ(ierr); ierr = MatDestroy(&sFactor);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(y,neg_one,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm2);CHKERRQ(ierr); if (10*norm1 < norm2 && lf-inc != -1) { ierr = PetscPrintf(PETSC_COMM_SELF,"lf=%D, %D, Norm of error=%g, %g\n",lf-inc,lf,(double)norm1,(double)norm2);CHKERRQ(ierr); } norm1 = norm2; if (norm2 < tol && lf != -1) break; } ierr = ISDestroy(&perm);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&sB);CHKERRQ(ierr); ierr = MatDestroy(&sA);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&s1);CHKERRQ(ierr); ierr = VecDestroy(&s2);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

int main(int argc,char **args) { Mat mat; /* matrix */ Vec b,ustar,u; /* vectors (RHS, exact solution, approx solution) */ PC pc; /* PC context */ KSP ksp; /* KSP context */ PetscErrorCode ierr; PetscInt n = 10,i,its,col[3]; PetscScalar value[3]; PCType pcname; KSPType kspname; PetscReal norm,tol=1.e-14; PetscInitialize(&argc,&args,(char*)0,help); /* Create and initialize vectors */ ierr = VecCreateSeq(PETSC_COMM_SELF,n,&b); CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n,&ustar); CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n,&u); CHKERRQ(ierr); ierr = VecSet(ustar,1.0); CHKERRQ(ierr); ierr = VecSet(u,0.0); CHKERRQ(ierr); /* Create and assemble matrix */ ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,n,n,3,NULL,&mat); CHKERRQ(ierr); value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<n-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(mat,1,&i,3,col,value,INSERT_VALUES); CHKERRQ(ierr); } i = n - 1; col[0] = n - 2; col[1] = n - 1; ierr = MatSetValues(mat,1,&i,2,col,value,INSERT_VALUES); CHKERRQ(ierr); i = 0; col[0] = 0; col[1] = 1; value[0] = 2.0; value[1] = -1.0; ierr = MatSetValues(mat,1,&i,2,col,value,INSERT_VALUES); CHKERRQ(ierr); ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); /* Compute right-hand-side vector */ ierr = MatMult(mat,ustar,b); CHKERRQ(ierr); /* Create PC context and set up data structures */ ierr = PCCreate(PETSC_COMM_WORLD,&pc); CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE); CHKERRQ(ierr); ierr = PCSetFromOptions(pc); CHKERRQ(ierr); ierr = PCSetOperators(pc,mat,mat); CHKERRQ(ierr); ierr = PCSetUp(pc); CHKERRQ(ierr); /* Create KSP context and set up data structures */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp); CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPRICHARDSON); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); ierr = PCSetOperators(pc,mat,mat); CHKERRQ(ierr); ierr = KSPSetPC(ksp,pc); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); /* Solve the problem */ ierr = KSPGetType(ksp,&kspname); CHKERRQ(ierr); ierr = PCGetType(pc,&pcname); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Running %s with %s preconditioning\n",kspname,pcname); CHKERRQ(ierr); ierr = KSPSolve(ksp,b,u); CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,ustar); CHKERRQ(ierr); ierr = VecNorm(u,NORM_2,&norm); ierr = KSPGetIterationNumber(ksp,&its); CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"2 norm of error %g Number of iterations %D\n",(double)norm,its); CHKERRQ(ierr); } /* Free data structures */ ierr = KSPDestroy(&ksp); CHKERRQ(ierr); ierr = VecDestroy(&u); CHKERRQ(ierr); ierr = VecDestroy(&ustar); CHKERRQ(ierr); ierr = VecDestroy(&b); CHKERRQ(ierr); ierr = MatDestroy(&mat); CHKERRQ(ierr); ierr = PCDestroy(&pc); CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

int main(int argc,char **args) { Vec x,b; /* approx solution, RHS */ Mat A; /* linear system matrix */ KSP ksp; /* linear solver context */ PetscInt Ii,Istart,Iend,m = 11; PetscErrorCode ierr; PetscScalar v; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); /* Create parallel diagonal matrix */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,m);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(A,1,NULL,1,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = (PetscReal)Ii+1; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } /* Make A sigular */ Ii = m - 1; /* last diagonal entry */ v = 0.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* A is symmetric. Set symmetric flag to enable KSP_type = minres */ ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&b);CHKERRQ(ierr); ierr = VecSetSizes(b,PETSC_DECIDE,m);CHKERRQ(ierr); ierr = VecSetFromOptions(b);CHKERRQ(ierr); ierr = VecDuplicate(b,&x);CHKERRQ(ierr); ierr = VecSet(x,1.0);CHKERRQ(ierr); ierr = MatMult(A,x,b);CHKERRQ(ierr); ierr = VecSet(x,0.0);CHKERRQ(ierr); /* Create linear solver context */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Check solution and clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Free work space. */ ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

int main(int argc,char **args) { Mat A,RHS,C,F,X,S; Vec u,x,b; Vec xschur,bschur,uschur; IS is_schur; PetscErrorCode ierr; PetscMPIInt size; PetscInt isolver=0,size_schur,m,n,nfact,nsolve,nrhs; PetscReal norm,tol=PETSC_SQRT_MACHINE_EPSILON; PetscRandom rand; PetscBool data_provided,herm,symm,use_lu; PetscReal sratio = 5.1/12.; PetscViewer fd; /* viewer */ char solver[256]; char file[PETSC_MAX_PATH_LEN]; /* input file name */ PetscInitialize(&argc,&args,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size > 1) SETERRQ(PETSC_COMM_WORLD,1,"This is a uniprocessor test"); /* Determine which type of solver we want to test for */ herm = PETSC_FALSE; symm = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-symmetric_solve",&symm,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-hermitian_solve",&herm,NULL);CHKERRQ(ierr); if (herm) symm = PETSC_TRUE; /* Determine file from which we read the matrix A */ ierr = PetscOptionsGetString(NULL,NULL,"-f",file,PETSC_MAX_PATH_LEN,&data_provided);CHKERRQ(ierr); if (!data_provided) { /* get matrices from PETSc distribution */ sprintf(file,PETSC_DIR); ierr = PetscStrcat(file,"/share/petsc/datafiles/matrices/");CHKERRQ(ierr); if (symm) { #if defined (PETSC_USE_COMPLEX) ierr = PetscStrcat(file,"hpd-complex-");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"spd-real-");CHKERRQ(ierr); #endif } else { #if defined (PETSC_USE_COMPLEX) ierr = PetscStrcat(file,"nh-complex-");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"ns-real-");CHKERRQ(ierr); #endif } #if defined(PETSC_USE_64BIT_INDICES) ierr = PetscStrcat(file,"int64-");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"int32-");CHKERRQ(ierr); #endif #if defined (PETSC_USE_REAL_SINGLE) ierr = PetscStrcat(file,"float32");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"float64");CHKERRQ(ierr); #endif } /* Load matrix A */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n); /* Create dense matrix C and X; C holds true solution with identical colums */ nrhs = 2; ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);CHKERRQ(ierr); ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatSetRandom(C,rand);CHKERRQ(ierr); ierr = MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);CHKERRQ(ierr); /* Create vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ ierr = PetscOptionsGetInt(NULL,NULL,"-solver",&isolver,NULL);CHKERRQ(ierr); switch (isolver) { #if defined(PETSC_HAVE_MUMPS) case 0: ierr = PetscStrcpy(solver,MATSOLVERMUMPS);CHKERRQ(ierr); break; #endif #if defined(PETSC_HAVE_MKL_PARDISO) case 1: ierr = PetscStrcpy(solver,MATSOLVERMKL_PARDISO);CHKERRQ(ierr); break; #endif default: ierr = PetscStrcpy(solver,MATSOLVERPETSC);CHKERRQ(ierr); break; } #if defined (PETSC_USE_COMPLEX) if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */ PetscScalar im = PetscSqrtScalar((PetscScalar)-1.); PetscScalar val = -1.0; val = val + im; ierr = MatSetValue(A,1,0,val,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } #endif ierr = PetscOptionsGetReal(NULL,NULL,"-schur_ratio",&sratio,NULL);CHKERRQ(ierr); if (sratio < 0. || sratio > 1.) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %f", sratio); } size_schur = (PetscInt)(sratio*m); ierr = PetscPrintf(PETSC_COMM_SELF,"Solving with %s: nrhs %d, sym %d, herm %d, size schur %d, size mat %d\n",solver,nrhs,symm,herm,size_schur,m);CHKERRQ(ierr); /* Test LU/Cholesky Factorization */ use_lu = PETSC_FALSE; if (!symm) use_lu = PETSC_TRUE; #if defined (PETSC_USE_COMPLEX) if (isolver == 1) use_lu = PETSC_TRUE; #endif if (herm && !use_lu) { /* test also conversion routines inside the solver packages */ ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatConvert(A,MATSEQSBAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); } if (use_lu) { ierr = MatGetFactor(A,solver,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } else { if (herm) { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } else { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_SPD,PETSC_FALSE);CHKERRQ(ierr); } ierr = MatGetFactor(A,solver,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); } ierr = ISCreateStride(PETSC_COMM_SELF,size_schur,m-size_schur,1,&is_schur);CHKERRQ(ierr); ierr = MatFactorSetSchurIS(F,is_schur);CHKERRQ(ierr); ierr = ISDestroy(&is_schur);CHKERRQ(ierr); if (use_lu) { ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr); } else { ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr); } for (nfact = 0; nfact < 3; nfact++) { Mat AD; if (!nfact) { ierr = VecSetRandom(x,rand);CHKERRQ(ierr); if (symm && herm) { ierr = VecAbs(x);CHKERRQ(ierr); } ierr = MatDiagonalSet(A,x,ADD_VALUES);CHKERRQ(ierr); } if (use_lu) { ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr); } else { ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr); } ierr = MatFactorCreateSchurComplement(F,&S);CHKERRQ(ierr); ierr = MatCreateVecs(S,&xschur,&bschur);CHKERRQ(ierr); ierr = VecDuplicate(xschur,&uschur);CHKERRQ(ierr); if (nfact == 1) { ierr = MatFactorInvertSchurComplement(F);CHKERRQ(ierr); } for (nsolve = 0; nsolve < 2; nsolve++) { ierr = VecSetRandom(x,rand);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); if (nsolve) { ierr = MatMult(A,x,b);CHKERRQ(ierr); ierr = MatSolve(F,b,x);CHKERRQ(ierr); } else { ierr = MatMultTranspose(A,x,b);CHKERRQ(ierr); ierr = MatSolveTranspose(F,b,x);CHKERRQ(ierr); } /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { PetscReal resi; if (nsolve) { ierr = MatMult(A,x,u);CHKERRQ(ierr); /* u = A*x */ } else { ierr = MatMultTranspose(A,x,u);CHKERRQ(ierr); /* u = A*x */ } ierr = VecAXPY(u,-1.0,b);CHKERRQ(ierr); /* u <- (-1.0)b + u */ ierr = VecNorm(u,NORM_2,&resi);CHKERRQ(ierr); if (nsolve) { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatSolve error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatSolveTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } } ierr = VecSetRandom(xschur,rand);CHKERRQ(ierr); ierr = VecCopy(xschur,uschur);CHKERRQ(ierr); if (nsolve) { ierr = MatMult(S,xschur,bschur);CHKERRQ(ierr); ierr = MatFactorSolveSchurComplement(F,bschur,xschur);CHKERRQ(ierr); } else { ierr = MatMultTranspose(S,xschur,bschur);CHKERRQ(ierr); ierr = MatFactorSolveSchurComplementTranspose(F,bschur,xschur);CHKERRQ(ierr); } /* Check the error */ ierr = VecAXPY(uschur,-1.0,xschur);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(uschur,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { PetscReal resi; if (nsolve) { ierr = MatMult(S,xschur,uschur);CHKERRQ(ierr); /* u = A*x */ } else { ierr = MatMultTranspose(S,xschur,uschur);CHKERRQ(ierr); /* u = A*x */ } ierr = VecAXPY(uschur,-1.0,bschur);CHKERRQ(ierr); /* u <- (-1.0)b + u */ ierr = VecNorm(uschur,NORM_2,&resi);CHKERRQ(ierr); if (nsolve) { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatFactorSolveSchurComplement error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } } } ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&AD); if (!nfact) { ierr = MatMatMult(AD,C,MAT_INITIAL_MATRIX,2.0,&RHS);CHKERRQ(ierr); } else { ierr = MatMatMult(AD,C,MAT_REUSE_MATRIX,2.0,&RHS);CHKERRQ(ierr); } ierr = MatDestroy(&AD);CHKERRQ(ierr); for (nsolve = 0; nsolve < 2; nsolve++) { ierr = MatMatSolve(F,RHS,X);CHKERRQ(ierr); /* Check the error */ ierr = MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatNorm(X,NORM_FROBENIUS,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);CHKERRQ(ierr); } } ierr = MatDestroy(&S);CHKERRQ(ierr); ierr = VecDestroy(&xschur);CHKERRQ(ierr); ierr = VecDestroy(&bschur);CHKERRQ(ierr); ierr = VecDestroy(&uschur);CHKERRQ(ierr); } /* Free data structures */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = MatDestroy(&X);CHKERRQ(ierr); ierr = MatDestroy(&RHS);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscErrorCode ResidualFunction(SNES snes,Vec X, Vec F, Userctx *user) { PetscErrorCode ierr; Vec Xgen,Xnet,Fgen,Fnet; PetscScalar *xgen,*xnet,*fgen,*fnet; PetscInt i,idx=0; PetscScalar Vr,Vi,Vm,Vm2; PetscScalar Eqp,Edp,delta,w; /* Generator variables */ PetscScalar Efd,RF,VR; /* Exciter variables */ PetscScalar Id,Iq; /* Generator dq axis currents */ PetscScalar Vd,Vq,SE; PetscScalar IGr,IGi,IDr,IDi; PetscScalar Zdq_inv[4],det; PetscScalar PD,QD,Vm0,*v0; PetscInt k; PetscFunctionBegin; ierr = VecZeroEntries(F);CHKERRQ(ierr); ierr = DMCompositeGetLocalVectors(user->dmpgrid,&Xgen,&Xnet);CHKERRQ(ierr); ierr = DMCompositeGetLocalVectors(user->dmpgrid,&Fgen,&Fnet);CHKERRQ(ierr); ierr = DMCompositeScatter(user->dmpgrid,X,Xgen,Xnet);CHKERRQ(ierr); ierr = DMCompositeScatter(user->dmpgrid,F,Fgen,Fnet);CHKERRQ(ierr); /* Network current balance residual IG + Y*V + IL = 0. Only YV is added here. The generator current injection, IG, and load current injection, ID are added later */ /* Note that the values in Ybus are stored assuming the imaginary current balance equation is ordered first followed by real current balance equation for each bus. Thus imaginary current contribution goes in location 2*i, and real current contribution in 2*i+1 */ ierr = MatMult(user->Ybus,Xnet,Fnet);CHKERRQ(ierr); ierr = VecGetArray(Xgen,&xgen);CHKERRQ(ierr); ierr = VecGetArray(Xnet,&xnet);CHKERRQ(ierr); ierr = VecGetArray(Fgen,&fgen);CHKERRQ(ierr); ierr = VecGetArray(Fnet,&fnet);CHKERRQ(ierr); /* Generator subsystem */ for (i=0; i < ngen; i++) { Eqp = xgen[idx]; Edp = xgen[idx+1]; delta = xgen[idx+2]; w = xgen[idx+3]; Id = xgen[idx+4]; Iq = xgen[idx+5]; Efd = xgen[idx+6]; RF = xgen[idx+7]; VR = xgen[idx+8]; /* Generator differential equations */ fgen[idx] = (Eqp + (Xd[i] - Xdp[i])*Id - Efd)/Td0p[i]; fgen[idx+1] = (Edp - (Xq[i] - Xqp[i])*Iq)/Tq0p[i]; fgen[idx+2] = -w + w_s; fgen[idx+3] = (-TM[i] + Edp*Id + Eqp*Iq + (Xqp[i] - Xdp[i])*Id*Iq + D[i]*(w - w_s))/M[i]; Vr = xnet[2*gbus[i]]; /* Real part of generator terminal voltage */ Vi = xnet[2*gbus[i]+1]; /* Imaginary part of the generator terminal voltage */ ierr = ri2dq(Vr,Vi,delta,&Vd,&Vq);CHKERRQ(ierr); /* Algebraic equations for stator currents */ det = Rs[i]*Rs[i] + Xdp[i]*Xqp[i]; Zdq_inv[0] = Rs[i]/det; Zdq_inv[1] = Xqp[i]/det; Zdq_inv[2] = -Xdp[i]/det; Zdq_inv[3] = Rs[i]/det; fgen[idx+4] = Zdq_inv[0]*(-Edp + Vd) + Zdq_inv[1]*(-Eqp + Vq) + Id; fgen[idx+5] = Zdq_inv[2]*(-Edp + Vd) + Zdq_inv[3]*(-Eqp + Vq) + Iq; /* Add generator current injection to network */ ierr = dq2ri(Id,Iq,delta,&IGr,&IGi);CHKERRQ(ierr); fnet[2*gbus[i]] -= IGi; fnet[2*gbus[i]+1] -= IGr; Vm = PetscSqrtScalar(Vd*Vd + Vq*Vq); SE = k1[i]*PetscExpScalar(k2[i]*Efd); /* Exciter differential equations */ fgen[idx+6] = (KE[i]*Efd + SE - VR)/TE[i]; fgen[idx+7] = (RF - KF[i]*Efd/TF[i])/TF[i]; fgen[idx+8] = (VR - KA[i]*RF + KA[i]*KF[i]*Efd/TF[i] - KA[i]*(Vref[i] - Vm))/TA[i]; idx = idx + 9; } ierr = VecGetArray(user->V0,&v0);CHKERRQ(ierr); for (i=0; i < nload; i++) { Vr = xnet[2*lbus[i]]; /* Real part of load bus voltage */ Vi = xnet[2*lbus[i]+1]; /* Imaginary part of the load bus voltage */ Vm = PetscSqrtScalar(Vr*Vr + Vi*Vi); Vm2 = Vm*Vm; Vm0 = PetscSqrtScalar(v0[2*lbus[i]]*v0[2*lbus[i]] + v0[2*lbus[i]+1]*v0[2*lbus[i]+1]); PD = QD = 0.0; for (k=0; k < ld_nsegsp[i]; k++) PD += ld_alphap[k]*PD0[i]*PetscPowScalar((Vm/Vm0),ld_betap[k]); for (k=0; k < ld_nsegsq[i]; k++) QD += ld_alphaq[k]*QD0[i]*PetscPowScalar((Vm/Vm0),ld_betaq[k]); /* Load currents */ IDr = (PD*Vr + QD*Vi)/Vm2; IDi = (-QD*Vr + PD*Vi)/Vm2; fnet[2*lbus[i]] += IDi; fnet[2*lbus[i]+1] += IDr; } ierr = VecRestoreArray(user->V0,&v0);CHKERRQ(ierr); ierr = VecRestoreArray(Xgen,&xgen);CHKERRQ(ierr); ierr = VecRestoreArray(Xnet,&xnet);CHKERRQ(ierr); ierr = VecRestoreArray(Fgen,&fgen);CHKERRQ(ierr); ierr = VecRestoreArray(Fnet,&fnet);CHKERRQ(ierr); ierr = DMCompositeGather(user->dmpgrid,F,INSERT_VALUES,Fgen,Fnet);CHKERRQ(ierr); ierr = DMCompositeRestoreLocalVectors(user->dmpgrid,&Xgen,&Xnet);CHKERRQ(ierr); ierr = DMCompositeRestoreLocalVectors(user->dmpgrid,&Fgen,&Fnet);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode CkEigenSolutions(PetscInt cklvl,Mat A,PetscInt il,PetscInt iu,PetscScalar *eval,Vec *evec,PetscReal *tols) { PetscInt ierr,i,j,nev; Vec vt1,vt2; /* tmp vectors */ PetscReal norm,norm_max; PetscScalar dot,tmp; PetscReal dot_max; PetscFunctionBegin; nev = iu - il; if (nev <= 0) PetscFunctionReturn(0); ierr = VecDuplicate(evec[0],&vt1);CHKERRQ(ierr); ierr = VecDuplicate(evec[0],&vt2);CHKERRQ(ierr); switch (cklvl) { case 2: dot_max = 0.0; for (i = il; i<iu; i++) { ierr = VecCopy(evec[i], vt1);CHKERRQ(ierr); for (j=il; j<iu; j++) { ierr = VecDot(evec[j],vt1,&dot);CHKERRQ(ierr); if (j == i) { dot = PetscAbsScalar(dot - 1.0); } else { dot = PetscAbsScalar(dot); } if (PetscAbsScalar(dot) > dot_max) dot_max = PetscAbsScalar(dot); #if defined(DEBUG_CkEigenSolutions) if (dot > tols[1]) { ierr = VecNorm(evec[i],NORM_INFINITY,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"|delta(%d,%d)|: %g, norm: %d\n",i,j,(double)dot,(double)norm);CHKERRQ(ierr); } #endif } } ierr = PetscPrintf(PETSC_COMM_SELF," max|(x_j^T*x_i) - delta_ji|: %g\n",(double)dot_max);CHKERRQ(ierr); case 1: norm_max = 0.0; for (i = il; i< iu; i++) { ierr = MatMult(A, evec[i], vt1);CHKERRQ(ierr); ierr = VecCopy(evec[i], vt2);CHKERRQ(ierr); tmp = -eval[i]; ierr = VecAXPY(vt1,tmp,vt2);CHKERRQ(ierr); ierr = VecNorm(vt1, NORM_INFINITY, &norm);CHKERRQ(ierr); norm = PetscAbsReal(norm); if (norm > norm_max) norm_max = norm; #if defined(DEBUG_CkEigenSolutions) if (norm > tols[0]) { ierr = PetscPrintf(PETSC_COMM_SELF," residual violation: %d, resi: %g\n",i, norm);CHKERRQ(ierr); } #endif } ierr = PetscPrintf(PETSC_COMM_SELF," max_resi: %g\n", (double)norm_max);CHKERRQ(ierr); break; default: ierr = PetscPrintf(PETSC_COMM_SELF,"Error: cklvl=%d is not supported \n",cklvl);CHKERRQ(ierr); } ierr = VecDestroy(&vt2);CHKERRQ(ierr); ierr = VecDestroy(&vt1);CHKERRQ(ierr); PetscFunctionReturn(0); }

int main(int argc,char **argv) { DM da; /* distributed array */ Vec x,b,u; /* approx solution, RHS, exact solution */ Mat A; /* linear system matrix */ KSP ksp; /* linear solver context */ PetscRandom rctx; /* random number generator context */ PetscReal norm; /* norm of solution error */ PetscInt i,j,its; PetscErrorCode ierr; PetscBool flg = PETSC_FALSE; PetscLogStage stage; DMDALocalInfo info; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; /* Create distributed array to handle parallel distribution. The problem size will default to 8 by 7, but this can be changed using -da_grid_x M -da_grid_y N */ ierr = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,-8,-7,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&da);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the matrix and right-hand-side vector that define the linear system, Ax = b. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create parallel matrix preallocated according to the DMDA, format AIJ by default. To use symmetric storage, run with -dm_mat_type sbaij -mat_ignore_lower_triangular */ ierr = DMCreateMatrix(da,&A);CHKERRQ(ierr); /* Set matrix elements for the 2-D, five-point stencil in parallel. - Each processor needs to insert only elements that it owns locally (but any non-local elements will be sent to the appropriate processor during matrix assembly). - Rows and columns are specified by the stencil - Entries are normalized for a domain [0,1]x[0,1] */ ierr = PetscLogStageRegister("Assembly", &stage);CHKERRQ(ierr); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr); for (j=info.ys; j<info.ys+info.ym; j++) { for (i=info.xs; i<info.xs+info.xm; i++) { PetscReal hx = 1./info.mx,hy = 1./info.my; MatStencil row = {0},col[5] = {{0}}; PetscScalar v[5]; PetscInt ncols = 0; row.j = j; row.i = i; col[ncols].j = j; col[ncols].i = i; v[ncols++] = 2*(hx/hy + hy/hx); /* boundaries */ if (i>0) {col[ncols].j = j; col[ncols].i = i-1; v[ncols++] = -hy/hx;} if (i<info.mx-1) {col[ncols].j = j; col[ncols].i = i+1; v[ncols++] = -hy/hx;} if (j>0) {col[ncols].j = j-1; col[ncols].i = i; v[ncols++] = -hx/hy;} if (j<info.my-1) {col[ncols].j = j+1; col[ncols].i = i; v[ncols++] = -hx/hy;} ierr = MatSetValuesStencil(A,1,&row,ncols,col,v,INSERT_VALUES);CHKERRQ(ierr); } } /* Assemble matrix, using the 2-step process: MatAssemblyBegin(), MatAssemblyEnd() Computations can be done while messages are in transition by placing code between these two statements. */ ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscLogStagePop();CHKERRQ(ierr); /* Create parallel vectors compatible with the DMDA. */ ierr = DMCreateGlobalVector(da,&u);CHKERRQ(ierr); ierr = VecDuplicate(u,&b);CHKERRQ(ierr); ierr = VecDuplicate(u,&x);CHKERRQ(ierr); /* Set exact solution; then compute right-hand-side vector. By default we use an exact solution of a vector with all elements of 1.0; Alternatively, using the runtime option -random_sol forms a solution vector with random components. */ ierr = PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);CHKERRQ(ierr); if (flg) { ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); ierr = VecSetRandom(u,rctx);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr); } else { ierr = VecSet(u,1.);CHKERRQ(ierr); } ierr = MatMult(A,u,b);CHKERRQ(ierr); /* View the exact solution vector if desired */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);CHKERRQ(ierr); if (flg) {ierr = VecView(u,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the linear solver and set various options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create linear solver context */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); /* Set operators. Here the matrix that defines the linear system also serves as the preconditioning matrix. */ ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr); /* Set runtime options, e.g., -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol> These options will override those specified above as long as KSPSetFromOptions() is called _after_ any other customization routines. */ ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve the linear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Check solution and clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Check the error */ ierr = VecAXPY(x,-1.,u);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); /* Print convergence information. PetscPrintf() produces a single print statement from all processes that share a communicator. An alternative is PetscFPrintf(), which prints to a file. */ ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr); /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); /* Always call PetscFinalize() before exiting a program. This routine - finalizes the PETSc libraries as well as MPI - provides summary and diagnostic information if certain runtime options are chosen (e.g., -log_summary). */ ierr = PetscFinalize(); return ierr; }

PetscErrorCode KSPFGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_FGMRES *fgmres = (KSP_FGMRES *)(ksp->data); PetscReal res_norm; PetscReal hapbnd,tt; PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ PetscErrorCode ierr; PetscInt loc_it; /* local count of # of dir. in Krylov space */ PetscInt max_k = fgmres->max_k; /* max # of directions Krylov space */ Mat Amat,Pmat; MatStructure pflag; PetscFunctionBegin; /* Number of pseudo iterations since last restart is the number of prestart directions */ loc_it = 0; /* note: (fgmres->it) is always set one less than (loc_it) It is used in KSPBUILDSolution_FGMRES, where it is passed to KSPFGMRESBuildSoln. Note that when KSPFGMRESBuildSoln is called from this function, (loc_it -1) is passed, so the two are equivalent */ fgmres->it = (loc_it - 1); /* initial residual is in VEC_VV(0) - compute its norm*/ ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); /* first entry in right-hand-side of hessenberg system is just the initial residual norm */ *RS(0) = res_norm; ksp->rnorm = res_norm; KSPLogResidualHistory(ksp,res_norm); /* check for the convergence - maybe the current guess is good enough */ ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { if (itcount) *itcount = 0; PetscFunctionReturn(0); } /* scale VEC_VV (the initial residual) */ ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); /* MAIN ITERATION LOOP BEGINNING*/ /* keep iterating until we have converged OR generated the max number of directions OR reached the max number of iterations for the method */ while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { if (loc_it) KSPLogResidualHistory(ksp,res_norm); fgmres->it = (loc_it - 1); ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); /* see if more space is needed for work vectors */ if (fgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { ierr = KSPFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); /* (loc_it+1) is passed in as number of the first vector that should be allocated */ } /* CHANGE THE PRECONDITIONER? */ /* ModifyPC is the callback function that can be used to change the PC or its attributes before its applied */ (*fgmres->modifypc)(ksp,ksp->its,loc_it,res_norm,fgmres->modifyctx); /* apply PRECONDITIONER to direction vector and store with preconditioned vectors in prevec */ ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); /* Multiply preconditioned vector by operator - put in VEC_VV(loc_it+1) */ ierr = MatMult(Amat,PREVEC(loc_it),VEC_VV(1+loc_it));CHKERRQ(ierr); /* update hessenberg matrix and do Gram-Schmidt - new direction is in VEC_VV(1+loc_it)*/ ierr = (*fgmres->orthog)(ksp,loc_it);CHKERRQ(ierr); /* new entry in hessenburg is the 2-norm of our new direction */ ierr = VecNorm(VEC_VV(loc_it+1),NORM_2,&tt);CHKERRQ(ierr); *HH(loc_it+1,loc_it) = tt; *HES(loc_it+1,loc_it) = tt; /* Happy Breakdown Check */ hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); /* RS(loc_it) contains the res_norm from the last iteration */ hapbnd = PetscMin(fgmres->haptol,hapbnd); if (tt > hapbnd) { /* scale new direction by its norm */ ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); } else { /* This happens when the solution is exactly reached. */ /* So there is no new direction... */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ hapend = PETSC_TRUE; } /* note that for FGMRES we could get HES(loc_it+1, loc_it) = 0 and the current solution would not be exact if HES was singular. Note that HH non-singular implies that HES is no singular, and HES is guaranteed to be nonsingular when PREVECS are linearly independent and A is nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity of HES). So we should really add a check to verify that HES is nonsingular.*/ /* Now apply rotations to new col of hessenberg (and right side of system), calculate new rotation, and get new residual norm at the same time*/ ierr = KSPFGMRESUpdateHessenberg(ksp,loc_it,hapend,&res_norm);CHKERRQ(ierr); if (ksp->reason) break; loc_it++; fgmres->it = (loc_it-1); /* Add this here in case it has converged */ ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = res_norm; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (!ksp->reason) { SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"You reached the happy break down,but convergence was not indicated."); } break; } } /* END OF ITERATION LOOP */ KSPLogResidualHistory(ksp,res_norm); /* Monitor if we know that we will not return for a restart */ if (ksp->reason || ksp->its >= ksp->max_it) { ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); } if (itcount) *itcount = loc_it; /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ /* Note: must pass in (loc_it-1) for iteration count so that KSPFGMRESBuildSoln properly navigates */ ierr = KSPFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode PCBDDCNullSpaceAssembleCorrection(PC pc,IS local_dofs) { PC_BDDC *pcbddc = (PC_BDDC*)pc->data; PC_IS *pcis = (PC_IS*)pc->data; Mat_IS* matis = (Mat_IS*)pc->pmat->data; KSP *local_ksp; PC newpc; NullSpaceCorrection_ctx shell_ctx; Mat local_mat,local_pmat,small_mat,inv_small_mat; MatStructure local_mat_struct; Vec work1,work2; const Vec *nullvecs; VecScatter scatter_ctx; IS is_aux; MatFactorInfo matinfo; PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat; PetscScalar one = 1.0,zero = 0.0, m_one = -1.0; PetscInt basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R; PetscBool nnsp_has_cnst; PetscErrorCode ierr; PetscFunctionBegin; /* Infer the local solver */ ierr = ISGetSize(local_dofs,&basis_dofs);CHKERRQ(ierr); ierr = VecGetSize(pcis->vec1_D,&n_I);CHKERRQ(ierr); ierr = VecGetSize(pcbddc->vec1_R,&n_R);CHKERRQ(ierr); if (basis_dofs == n_I) { /* Dirichlet solver */ local_ksp = &pcbddc->ksp_D; } else if (basis_dofs == n_R) { /* Neumann solver */ local_ksp = &pcbddc->ksp_R; } else { SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",__FUNCT__,basis_dofs,n_I,n_R); } ierr = KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);CHKERRQ(ierr); /* Get null space vecs */ ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);CHKERRQ(ierr); basis_size = nnsp_size; if (nnsp_has_cnst) { basis_size++; } /* Create shell ctx */ ierr = PetscMalloc(sizeof(*shell_ctx),&shell_ctx);CHKERRQ(ierr); /* Create work vectors in shell context */ ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);CHKERRQ(ierr); ierr = VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);CHKERRQ(ierr); ierr = VecSetType(shell_ctx->work_small_1,VECSEQ);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);CHKERRQ(ierr); ierr = VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);CHKERRQ(ierr); ierr = VecSetType(shell_ctx->work_full_1,VECSEQ);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);CHKERRQ(ierr); /* Allocate workspace */ ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat );CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);CHKERRQ(ierr); ierr = MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); ierr = MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); /* Restrict local null space on selected dofs (Dirichlet or Neumann) and compute matrices N and K*N */ ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); ierr = VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);CHKERRQ(ierr); for (k=0;k<nnsp_size;k++) { ierr = VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = VecResetArray(work1);CHKERRQ(ierr); ierr = VecResetArray(work2);CHKERRQ(ierr); } if (nnsp_has_cnst) { ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = VecSet(work1,one);CHKERRQ(ierr); ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = VecResetArray(work1);CHKERRQ(ierr); ierr = VecResetArray(work2);CHKERRQ(ierr); } ierr = VecDestroy(&work1);CHKERRQ(ierr); ierr = VecDestroy(&work2);CHKERRQ(ierr); ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); ierr = MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); ierr = MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); /* Assemble another Mat object in shell context */ ierr = MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);CHKERRQ(ierr); ierr = MatLUFactor(small_mat,is_aux,is_aux,&matinfo);CHKERRQ(ierr); ierr = ISDestroy(&is_aux);CHKERRQ(ierr); ierr = PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);CHKERRQ(ierr); for (k=0;k<basis_size;k++) { ierr = VecSet(shell_ctx->work_small_1,zero);CHKERRQ(ierr); ierr = VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(shell_ctx->work_small_1);CHKERRQ(ierr); ierr = VecAssemblyEnd(shell_ctx->work_small_1);CHKERRQ(ierr); ierr = MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);CHKERRQ(ierr); ierr = VecGetArrayRead(shell_ctx->work_small_2,(const PetscScalar**)&array);CHKERRQ(ierr); for (i=0;i<basis_size;i++) { array_mat[i*basis_size+k]=array[i]; } ierr = VecRestoreArrayRead(shell_ctx->work_small_2,(const PetscScalar**)&array);CHKERRQ(ierr); } ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);CHKERRQ(ierr); ierr = MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);CHKERRQ(ierr); ierr = PetscFree(array_mat);CHKERRQ(ierr); ierr = MatDestroy(&inv_small_mat);CHKERRQ(ierr); ierr = MatDestroy(&small_mat);CHKERRQ(ierr); ierr = MatScale(shell_ctx->Kbasis_mat,m_one);CHKERRQ(ierr); /* Rebuild local PC */ ierr = KSPGetPC(*local_ksp,&shell_ctx->local_pc);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)shell_ctx->local_pc);CHKERRQ(ierr); ierr = PCCreate(PETSC_COMM_SELF,&newpc);CHKERRQ(ierr); ierr = PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); ierr = PCShellSetContext(newpc,shell_ctx);CHKERRQ(ierr); ierr = PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);CHKERRQ(ierr); ierr = PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);CHKERRQ(ierr); ierr = PCSetUp(newpc);CHKERRQ(ierr); ierr = KSPSetPC(*local_ksp,newpc);CHKERRQ(ierr); ierr = PCDestroy(&newpc);CHKERRQ(ierr); ierr = KSPSetUp(*local_ksp);CHKERRQ(ierr); /* test */ /* TODO: this cause a deadlock when doing multilevel */ #if 0 if (pcbddc->dbg_flag) { KSP check_ksp; PC check_pc; Mat test_mat; Vec work3; PetscViewer viewer=pcbddc->dbg_viewer; PetscReal test_err,lambda_min,lambda_max; PetscBool setsym,issym=PETSC_FALSE; ierr = KSPGetPC(*local_ksp,&check_pc);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work3);CHKERRQ(ierr); ierr = VecSetRandom(shell_ctx->work_small_1,NULL);CHKERRQ(ierr); ierr = MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);CHKERRQ(ierr); ierr = VecCopy(work1,work2);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work3);CHKERRQ(ierr); ierr = PCApply(check_pc,work3,work1);CHKERRQ(ierr); ierr = VecAXPY(work1,m_one,work2);CHKERRQ(ierr); ierr = VecNorm(work1,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank); if (basis_dofs == n_I) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Dirichlet "); } else { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Neumann "); } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"solver is :%1.14e\n",test_err); ierr = MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);CHKERRQ(ierr); ierr = MatShift(test_mat,one);CHKERRQ(ierr); ierr = MatNorm(test_mat,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = MatDestroy(&test_mat);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err); /* Create ksp object suitable for extreme eigenvalues' estimation */ ierr = KSPCreate(PETSC_COMM_SELF,&check_ksp);CHKERRQ(ierr); ierr = KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);CHKERRQ(ierr); ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); if (issym) { ierr = KSPSetType(check_ksp,KSPCG);CHKERRQ(ierr); } ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); ierr = VecSetRandom(work1,NULL);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = KSPSolve(check_ksp,work2,work2);CHKERRQ(ierr); ierr = VecAXPY(work2,m_one,work1);CHKERRQ(ierr); ierr = VecNorm(work2,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max); ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); ierr = VecDestroy(&work1);CHKERRQ(ierr); ierr = VecDestroy(&work2);CHKERRQ(ierr); ierr = VecDestroy(&work3);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }

int main(int argc,char **args) { Mat A,B; Vec xx,s1,s2,yy; PetscErrorCode ierr; PetscInt m=45,rows[2],cols[2],bs=1,i,row,col,*idx,M; PetscScalar rval,vals1[4],vals2[4]; PetscRandom rdm; IS is1,is2; PetscReal s1norm,s2norm,rnorm,tol = 1.e-4; PetscBool flg; MatFactorInfo info; PetscInitialize(&argc,&args,(char *)0,help); /* Test MatSetValues() and MatGetValues() */ ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_size",&m,PETSC_NULL);CHKERRQ(ierr); M = m*bs; ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,M,M,1,PETSC_NULL,&A);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,M,M,15,PETSC_NULL,&B);CHKERRQ(ierr); ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_SELF,&rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,M,&xx);CHKERRQ(ierr); ierr = VecDuplicate(xx,&s1);CHKERRQ(ierr); ierr = VecDuplicate(xx,&s2);CHKERRQ(ierr); ierr = VecDuplicate(xx,&yy);CHKERRQ(ierr); /* For each row add atleast 15 elements */ for (row=0; row<M; row++) { for (i=0; i<25*bs; i++) { ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); col = (PetscInt)(PetscRealPart(rval)*M); ierr = MatSetValues(A,1,&row,1,&col,&rval,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,1,&row,1,&col,&rval,INSERT_VALUES);CHKERRQ(ierr); } } /* Now set blocks of values */ for (i=0; i<20*bs; i++) { ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); cols[0] = (PetscInt)(PetscRealPart(rval)*M); vals1[0] = rval; ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); cols[1] = (PetscInt)(PetscRealPart(rval)*M); vals1[1] = rval; ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); rows[0] = (PetscInt)(PetscRealPart(rval)*M); vals1[2] = rval; ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); rows[1] = (PetscInt)(PetscRealPart(rval)*M); vals1[3] = rval; ierr = MatSetValues(A,2,rows,2,cols,vals1,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,2,rows,2,cols,vals1,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Test MatNorm() */ ierr = MatNorm(A,NORM_FROBENIUS,&s1norm);CHKERRQ(ierr); ierr = MatNorm(B,NORM_FROBENIUS,&s2norm);CHKERRQ(ierr); rnorm = PetscAbsScalar(s2norm-s1norm)/s2norm; if ( rnorm>tol ) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_FROBENIUS()- NormA=%16.14e NormB=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } ierr = MatNorm(A,NORM_INFINITY,&s1norm);CHKERRQ(ierr); ierr = MatNorm(B,NORM_INFINITY,&s2norm);CHKERRQ(ierr); rnorm = PetscAbsScalar(s2norm-s1norm)/s2norm; if ( rnorm>tol ) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_INFINITY()- NormA=%16.14e NormB=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } ierr = MatNorm(A,NORM_1,&s1norm);CHKERRQ(ierr); ierr = MatNorm(B,NORM_1,&s2norm);CHKERRQ(ierr); rnorm = PetscAbsScalar(s2norm-s1norm)/s2norm; if ( rnorm>tol ) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_NORM_1()- NormA=%16.14e NormB=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } /* MatShift() */ rval = 10*s1norm; ierr = MatShift(A,rval);CHKERRQ(ierr); ierr = MatShift(B,rval);CHKERRQ(ierr); /* Test MatTranspose() */ ierr = MatTranspose(A,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); ierr = MatTranspose(B,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); /* Now do MatGetValues() */ for (i=0; i<30; i++) { ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); cols[0] = (PetscInt)(PetscRealPart(rval)*M); ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); cols[1] = (PetscInt)(PetscRealPart(rval)*M); ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); rows[0] = (PetscInt)(PetscRealPart(rval)*M); ierr = PetscRandomGetValue(rdm,&rval);CHKERRQ(ierr); rows[1] = (PetscInt)(PetscRealPart(rval)*M); ierr = MatGetValues(A,2,rows,2,cols,vals1);CHKERRQ(ierr); ierr = MatGetValues(B,2,rows,2,cols,vals2);CHKERRQ(ierr); ierr = PetscMemcmp(vals1,vals2,4*sizeof(PetscScalar),&flg);CHKERRQ(ierr); if (!flg) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetValues bs = %D\n",bs);CHKERRQ(ierr); } } /* Test MatMult(), MatMultAdd() */ for (i=0; i<40; i++) { ierr = VecSetRandom(xx,rdm);CHKERRQ(ierr); ierr = VecSet(s2,0.0);CHKERRQ(ierr); ierr = MatMult(A,xx,s1);CHKERRQ(ierr); ierr = MatMultAdd(A,xx,s2,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_2,&s1norm);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_2,&s2norm);CHKERRQ(ierr); rnorm = s2norm-s1norm; if (rnorm<-tol || rnorm>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatMult not equal to MatMultAdd Norm1=%e Norm2=%e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } } /* Test MatMult() */ ierr = MatMultEqual(A,B,10,&flg);CHKERRQ(ierr); if (!flg){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatMult()\n");CHKERRQ(ierr); } /* Test MatMultAdd() */ ierr = MatMultAddEqual(A,B,10,&flg);CHKERRQ(ierr); if (!flg){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatMultAdd()\n");CHKERRQ(ierr); } /* Test MatMultTranspose() */ ierr = MatMultTransposeEqual(A,B,10,&flg);CHKERRQ(ierr); if (!flg){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatMultTranspose()\n");CHKERRQ(ierr); } /* Test MatMultTransposeAdd() */ ierr = MatMultTransposeAddEqual(A,B,10,&flg);CHKERRQ(ierr); if (!flg){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatMultTransposeAdd()\n");CHKERRQ(ierr); } /* Do LUFactor() on both the matrices */ ierr = PetscMalloc(M*sizeof(PetscInt),&idx);CHKERRQ(ierr); for (i=0; i<M; i++) idx[i] = i; ierr = ISCreateGeneral(PETSC_COMM_SELF,M,idx,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,M,idx,PETSC_COPY_VALUES,&is2);CHKERRQ(ierr); ierr = PetscFree(idx);CHKERRQ(ierr); ierr = ISSetPermutation(is1);CHKERRQ(ierr); ierr = ISSetPermutation(is2);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); info.fill = 2.0; info.dtcol = 0.0; info.zeropivot = 1.e-14; info.pivotinblocks = 1.0; ierr = MatLUFactor(B,is1,is2,&info);CHKERRQ(ierr); ierr = MatLUFactor(A,is1,is2,&info);CHKERRQ(ierr); /* Test MatSolveAdd() */ for (i=0; i<10; i++) { ierr = VecSetRandom(xx,rdm);CHKERRQ(ierr); ierr = VecSetRandom(yy,rdm);CHKERRQ(ierr); ierr = MatSolveAdd(B,xx,yy,s2);CHKERRQ(ierr); ierr = MatSolveAdd(A,xx,yy,s1);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_2,&s1norm);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_2,&s2norm);CHKERRQ(ierr); rnorm = s2norm-s1norm; if (rnorm<-tol || rnorm>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatSolveAdd - Norm1=%16.14e Norm2=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } } /* Test MatSolveAdd() when x = A'b +x */ for (i=0; i<10; i++) { ierr = VecSetRandom(xx,rdm);CHKERRQ(ierr); ierr = VecSetRandom(s1,rdm);CHKERRQ(ierr); ierr = VecCopy(s2,s1);CHKERRQ(ierr); ierr = MatSolveAdd(B,xx,s2,s2);CHKERRQ(ierr); ierr = MatSolveAdd(A,xx,s1,s1);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_2,&s1norm);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_2,&s2norm);CHKERRQ(ierr); rnorm = s2norm-s1norm; if (rnorm<-tol || rnorm>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatSolveAdd(same) - Norm1=%16.14e Norm2=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } } /* Test MatSolve() */ for (i=0; i<10; i++) { ierr = VecSetRandom(xx,rdm);CHKERRQ(ierr); ierr = MatSolve(B,xx,s2);CHKERRQ(ierr); ierr = MatSolve(A,xx,s1);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_2,&s1norm);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_2,&s2norm);CHKERRQ(ierr); rnorm = s2norm-s1norm; if (rnorm<-tol || rnorm>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatSolve - Norm1=%16.14e Norm2=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } } /* Test MatSolveTranspose() */ if (bs < 8) { for (i=0; i<10; i++) { ierr = VecSetRandom(xx,rdm);CHKERRQ(ierr); ierr = MatSolveTranspose(B,xx,s2);CHKERRQ(ierr); ierr = MatSolveTranspose(A,xx,s1);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_2,&s1norm);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_2,&s2norm);CHKERRQ(ierr); rnorm = s2norm-s1norm; if (rnorm<-tol || rnorm>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error:MatSolveTranspose - Norm1=%16.14e Norm2=%16.14e bs = %D\n",s1norm,s2norm,bs);CHKERRQ(ierr); } } } ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = VecDestroy(&xx);CHKERRQ(ierr); ierr = VecDestroy(&s1);CHKERRQ(ierr); ierr = VecDestroy(&s2);CHKERRQ(ierr); ierr = VecDestroy(&yy);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscErrorCode PCBDDCNullSpaceAdaptGlobal(PC pc) { PC_IS* pcis = (PC_IS*) (pc->data); PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); KSP inv_change; PC pc_change; const Vec *nsp_vecs; Vec *new_nsp_vecs; PetscInt i,nsp_size,new_nsp_size,start_new; PetscBool nsp_has_cnst; MatNullSpace new_nsp; PetscErrorCode ierr; MPI_Comm comm; PetscFunctionBegin; /* create KSP for change of basis */ ierr = KSPCreate(PETSC_COMM_SELF,&inv_change);CHKERRQ(ierr); ierr = KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);CHKERRQ(ierr); ierr = KSPSetType(inv_change,KSPPREONLY);CHKERRQ(ierr); ierr = KSPGetPC(inv_change,&pc_change);CHKERRQ(ierr); ierr = PCSetType(pc_change,PCLU);CHKERRQ(ierr); ierr = KSPSetUp(inv_change);CHKERRQ(ierr); /* get nullspace and transform it */ ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); new_nsp_size = nsp_size; if (nsp_has_cnst) { new_nsp_size++; } ierr = PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);CHKERRQ(ierr); for (i=0;i<new_nsp_size;i++) { ierr = VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]);CHKERRQ(ierr); } start_new = 0; if (nsp_has_cnst) { start_new = 1; ierr = VecSet(new_nsp_vecs[0],1.0);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_B,1.0);CHKERRQ(ierr); ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); } for (i=0;i<nsp_size;i++) { ierr = VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); } ierr = PCBDDCOrthonormalizeVecs(new_nsp_size,new_nsp_vecs);CHKERRQ(ierr); #if 0 PetscBool nsp_t=PETSC_FALSE; ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); printf("Original Null Space test: %d\n",nsp_t); Mat temp_mat; Mat_IS* matis = (Mat_IS*)pc->pmat->data; temp_mat = matis->A; matis->A = pcbddc->local_mat; pcbddc->local_mat = temp_mat; ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); printf("Original Null Space, mat changed test: %d\n",nsp_t); { PetscReal test_norm; for (i=0;i<new_nsp_size;i++) { ierr = MatMult(pc->pmat,new_nsp_vecs[i],pcis->vec1_global);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_global,NORM_2,&test_norm);CHKERRQ(ierr); if (test_norm > 1.e-12) { printf("------------ERROR VEC %d------------------\n",i); ierr = VecView(pcis->vec1_global,PETSC_VIEWER_STDOUT_WORLD); printf("------------------------------------------\n"); } } } #endif ierr = KSPDestroy(&inv_change);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); ierr = MatNullSpaceCreate(comm,PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);CHKERRQ(ierr); ierr = PCBDDCSetNullSpace(pc,new_nsp);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&new_nsp);CHKERRQ(ierr); #if 0 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); printf("New Null Space, mat changed: %d\n",nsp_t); temp_mat = matis->A; matis->A = pcbddc->local_mat; pcbddc->local_mat = temp_mat; ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); printf("New Null Space, mat original: %d\n",nsp_t); #endif for (i=0;i<new_nsp_size;i++) { ierr = VecDestroy(&new_nsp_vecs[i]);CHKERRQ(ierr); } ierr = PetscFree(new_nsp_vecs);CHKERRQ(ierr); PetscFunctionReturn(0); }

int main(int argc,char **args) { Mat C; PetscInt i,j,m = 5,n = 2,Ii,J; PetscErrorCode ierr; PetscMPIInt rank,size; PetscScalar v; Vec x,y; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); n = 2*size; /* Create the matrix for the five point stencil, YET AGAIN */ ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); for (i=0; i<m; i++) { for (j=2*rank; j<2*rank+2; j++) { v = -1.0; Ii = j + n*i; if (i>0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); for (i=0; i<m; i++) { for (j=2*rank; j<2*rank+2; j++) { v = 1.0; Ii = j + n*i; if (i>0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = -4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } /* Introduce new nonzero that requires new construction for matrix-vector product */ if (rank) { Ii = rank-1; J = m*n-1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Form a couple of vectors to test matrix-vector product */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,m*n);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); v = 1.0; ierr = VecSet(x,v);CHKERRQ(ierr); ierr = MatMult(C,x,y);CHKERRQ(ierr); ierr = MatDestroy(C);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(y);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }

PetscErrorCode PCBDDCNullSpaceAssembleCoarse(PC pc, MatNullSpace* CoarseNullSpace) { PC_BDDC *pcbddc = (PC_BDDC*)pc->data; Mat_IS *matis = (Mat_IS*)pc->pmat->data; MatNullSpace tempCoarseNullSpace; const Vec *nsp_vecs; Vec *coarse_nsp_vecs,local_vec,local_primal_vec; PetscInt nsp_size,coarse_nsp_size,i; PetscBool nsp_has_cnst; PetscReal test_null; PetscErrorCode ierr; PetscFunctionBegin; tempCoarseNullSpace = 0; coarse_nsp_size = 0; coarse_nsp_vecs = 0; ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); if (pcbddc->coarse_mat) { ierr = PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);CHKERRQ(ierr); for (i=0;i<nsp_size+1;i++) { ierr = VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);CHKERRQ(ierr); } } ierr = MatGetVecs(pcbddc->ConstraintMatrix,&local_vec,&local_primal_vec);CHKERRQ(ierr); if (nsp_has_cnst) { ierr = VecSet(local_vec,1.0);CHKERRQ(ierr); ierr = MatMult(pcbddc->ConstraintMatrix,local_vec,local_primal_vec);CHKERRQ(ierr); ierr = PCBDDCScatterCoarseDataBegin(pc,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScatterCoarseDataEnd(pc,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (pcbddc->coarse_mat) { if (pcbddc->dbg_flag) { ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); } ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); coarse_nsp_size++; } } for (i=0;i<nsp_size;i++) { ierr = VecScatterBegin(matis->ctx,nsp_vecs[i],local_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(matis->ctx,nsp_vecs[i],local_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = MatMult(pcbddc->ConstraintMatrix,local_vec,local_primal_vec);CHKERRQ(ierr); ierr = PCBDDCScatterCoarseDataBegin(pc,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScatterCoarseDataEnd(pc,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (pcbddc->coarse_mat) { if (pcbddc->dbg_flag) { ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); ierr = VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); } ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); coarse_nsp_size++; } } if (coarse_nsp_size > 0) { ierr = PCBDDCOrthonormalizeVecs(coarse_nsp_size,coarse_nsp_vecs);CHKERRQ(ierr); ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)(pcbddc->coarse_mat)),PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&tempCoarseNullSpace);CHKERRQ(ierr); for (i=0;i<nsp_size+1;i++) { ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); } } ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); ierr = VecDestroy(&local_vec);CHKERRQ(ierr); ierr = VecDestroy(&local_primal_vec);CHKERRQ(ierr); *CoarseNullSpace = tempCoarseNullSpace; PetscFunctionReturn(0); }

Example: mpiexec -n <np> ./ex130 -f <matrix binary file> -mat_solver_type 1 -mat_superlu_equil \n\n"; #include <petscmat.h> int main(int argc,char **args) { Mat A,F; Vec u,x,b; PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt m,n,nfact,ipack=0; PetscReal norm,tol=1.e-12,Anorm; IS perm,iperm; MatFactorInfo info; PetscBool flg,testMatSolve=PETSC_TRUE; PetscViewer fd; /* viewer */ char file[PETSC_MAX_PATH_LEN]; /* input file name */ ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); /* Determine file from which we read the matrix A */ ierr = PetscOptionsGetString(NULL,NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,1,"Must indicate binary file with the -f option"); /* Load matrix A */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&b);CHKERRQ(ierr); ierr = VecLoad(b,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n); ierr = MatNorm(A,NORM_INFINITY,&Anorm);CHKERRQ(ierr); /* Create vectors */ ierr = VecDuplicate(b,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ /* Test LU Factorization */ ierr = MatGetOrdering(A,MATORDERINGNATURAL,&perm,&iperm);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-mat_solver_type",&ipack,NULL);CHKERRQ(ierr); switch (ipack) { case 1: #if defined(PETSC_HAVE_SUPERLU) if (!rank) printf(" SUPERLU LU:\n"); ierr = MatGetFactor(A,MATSOLVERSUPERLU,MAT_FACTOR_LU,&F);CHKERRQ(ierr); break; #endif case 2: #if defined(PETSC_HAVE_MUMPS) if (!rank) printf(" MUMPS LU:\n"); ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr); { /* test mumps options */ PetscInt icntl_7 = 5; ierr = MatMumpsSetIcntl(F,7,icntl_7);CHKERRQ(ierr); } break; #endif default: if (!rank) printf(" PETSC LU:\n"); ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } info.fill = 5.0; ierr = MatLUFactorSymbolic(F,A,perm,iperm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 1; nfact++) { if (!rank) printf(" %d-the LU numfactorization \n",nfact); ierr = MatLUFactorNumeric(F,A,&info);CHKERRQ(ierr); /* Test MatSolve() */ if (testMatSolve) { ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the residual */ ierr = MatMult(A,x,u);CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(u,NORM_INFINITY,&norm);CHKERRQ(ierr); if (norm > tol) { if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve: rel residual %g/%g = %g, LU numfact %d\n",norm,Anorm,norm/Anorm,nfact);CHKERRQ(ierr); } } } } /* Free data structures */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = ISDestroy(&perm);CHKERRQ(ierr); ierr = ISDestroy(&iperm);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

int main(int argc,char *argv[]) { char mat_type[256] = "aij"; /* default matrix type */ PetscErrorCode ierr; MPI_Comm comm; PetscMPIInt rank,size; Sliced slice; PetscInt i,bs=1,N=5,n,m,rstart,ghosts[2],*d_nnz,*o_nnz,dfill[4]={1,0,0,1},ofill[4]={1,1,1,1}; PetscReal alpha=1,K=1,rho0=1,u0=0,sigma=0.2; PetscTruth useblock=PETSC_TRUE; PetscScalar *xx; Mat A; Vec x,b,lf; ierr = PetscInitialize(&argc,&argv,0,help);CHKERRQ(ierr); comm = PETSC_COMM_WORLD; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = PetscOptionsBegin(comm,0,"Options for Sliced test",0);CHKERRQ(ierr); { ierr = PetscOptionsInt("-n","Global number of nodes","",N,&N,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-bs","Block size (1 or 2)","",bs,&bs,PETSC_NULL);CHKERRQ(ierr); if (bs != 1) { if (bs != 2) SETERRQ(1,"Block size must be 1 or 2"); ierr = PetscOptionsReal("-alpha","Inverse time step for wave operator","",alpha,&alpha,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-K","Bulk modulus of compressibility","",K,&K,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-rho0","Reference density","",rho0,&rho0,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-u0","Reference velocity","",u0,&u0,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-sigma","Width of Gaussian density perturbation","",sigma,&sigma,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsTruth("-block","Use block matrix assembly","",useblock,&useblock,PETSC_NULL);CHKERRQ(ierr); } ierr = PetscOptionsString("-sliced_mat_type","Matrix type to use (aij or baij)","",mat_type,mat_type,sizeof mat_type,PETSC_NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); /* Split ownership, set up periodic grid in 1D */ n = PETSC_DECIDE; ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); rstart = 0; ierr = MPI_Scan(&n,&rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); rstart -= n; ghosts[0] = (N+rstart-1)%N; ghosts[1] = (rstart+n)%N; ierr = SlicedCreate(comm,&slice);CHKERRQ(ierr); ierr = SlicedSetGhosts(slice,bs,n,2,ghosts);CHKERRQ(ierr); ierr = PetscMalloc2(n,PetscInt,&d_nnz,n,PetscInt,&o_nnz);CHKERRQ(ierr); for (i=0; i<n; i++) { if (size > 1 && (i==0 || i==n-1)) { d_nnz[i] = 2; o_nnz[i] = 1; } else { d_nnz[i] = 3; o_nnz[i] = 0; } } ierr = SlicedSetPreallocation(slice,0,d_nnz,0,o_nnz);CHKERRQ(ierr); /* Currently does not copy X_nnz so we can't free them until after SlicedGetMatrix */ if (!useblock) {ierr = SlicedSetBlockFills(slice,dfill,ofill);CHKERRQ(ierr);} /* Irrelevant for baij formats */ ierr = SlicedGetMatrix(slice,mat_type,&A);CHKERRQ(ierr); ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); ierr = SlicedCreateGlobalVector(slice,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecGhostGetLocalForm(x,&lf);CHKERRQ(ierr); ierr = VecGetSize(lf,&m);CHKERRQ(ierr); if (m != (n+2)*bs) SETERRQ2(1,"size of local form %D, expected %D",m,(n+2)*bs); ierr = VecGetArray(lf,&xx);CHKERRQ(ierr); for (i=0; i<n; i++) { PetscInt row[2],col[9],im,ip; PetscScalar v[12]; const PetscReal xref = 2.0*(rstart+i)/N - 1; /* [-1,1] */ const PetscReal h = 1.0/N; /* grid spacing */ im = (i==0) ? n : i-1; ip = (i==n-1) ? n+1 : i+1; switch (bs) { case 1: /* Laplacian with periodic boundaries */ col[0] = im; col[1] = i; col[2] = ip; v[0] = -h; v[1] = 2*h; v[2] = -h; ierr = MatSetValuesLocal(A,1,&i,3,col,v,INSERT_VALUES);CHKERRQ(ierr); xx[i] = sin(xref*PETSC_PI); break; case 2: /* Linear acoustic wave operator in variables [rho, u], central differences, periodic, timestep 1/alpha */ v[0] = -0.5*u0; v[1] = -0.5*K; v[2] = alpha; v[3] = 0; v[4] = 0.5*u0; v[5] = 0.5*K; v[6] = -0.5/rho0; v[7] = -0.5*u0; v[8] = 0; v[9] = alpha; v[10] = 0.5/rho0; v[11] = 0.5*u0; if (useblock) { row[0] = i; col[0] = im; col[1] = i; col[2] = ip; ierr = MatSetValuesBlockedLocal(A,1,row,3,col,v,INSERT_VALUES);CHKERRQ(ierr); } else { row[0] = 2*i; row[1] = 2*i+1; col[0] = 2*im; col[1] = 2*im+1; col[2] = 2*i; col[3] = 2*ip; col[4] = 2*ip+1; v[3] = v[4]; v[4] = v[5]; /* pack values in first row */ ierr = MatSetValuesLocal(A,1,row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); col[2] = 2*i+1; v[8] = v[9]; v[9] = v[10]; v[10] = v[11]; /* pack values in second row */ ierr = MatSetValuesLocal(A,1,row+1,5,col,v+6,INSERT_VALUES);CHKERRQ(ierr); } /* Set current state (gaussian density perturbation) */ xx[2*i] = 0.2*exp(-PetscSqr(xref)/(2*PetscSqr(sigma))); xx[2*i+1] = 0; break; default: SETERRQ1(1,"not implemented for block size %D",bs); } } ierr = VecRestoreArray(lf,&xx);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(x,&lf);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMult(A,x,b);CHKERRQ(ierr); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(b,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Update the ghosted values, view the result on rank 0. */ ierr = VecGhostUpdateBegin(b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGhostUpdateEnd(b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (!rank) { ierr = VecGhostGetLocalForm(b,&lf);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_SELF,"Local form of b on rank 0, last two nodes are ghost nodes\n");CHKERRQ(ierr); ierr = VecView(lf,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(b,&lf);CHKERRQ(ierr); } ierr = SlicedDestroy(slice);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(b);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }

PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx fetidpmat_ctx ) { PetscErrorCode ierr; PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; PCBDDCGraph mat_graph=pcbddc->mat_graph; Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; MPI_Comm comm; Mat ScalingMat; Vec lambda_global; IS IS_l2g_lambda; PetscBool skip_node,fully_redundant; PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; PetscMPIInt rank,size,buf_size,neigh; PetscScalar scalar_value; PetscInt *vertex_indices; PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; PetscInt *aux_sums,*cols_B_delta,*l2g_indices; PetscScalar *array,*scaling_factors,*vals_B_delta; PetscInt *aux_local_numbering_2; /* For communication of scaling factors */ PetscInt *ptrs_buffer,neigh_position; PetscScalar **all_factors,*send_buffer,*recv_buffer; MPI_Request *send_reqs,*recv_reqs; /* tests */ Vec test_vec; PetscBool test_fetidp; PetscViewer viewer; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)(fetidpmat_ctx->pc),&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* Default type of lagrange multipliers is non-redundant */ fully_redundant = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); /* Evaluate local and global number of lagrange multipliers */ ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); n_local_lambda = 0; partial_sum = 0; n_boundary_dofs = 0; s = 0; /* Get Vertices used to define the BDDC */ ierr = PCBDDCGetPrimalVerticesLocalIdx(fetidpmat_ctx->pc,&n_vertices,&vertex_indices);CHKERRQ(ierr); dual_size = pcis->n_B-n_vertices; ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_2);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<pcis->n;i++){ j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ if ( j > 0 ) { n_boundary_dofs++; } skip_node = PETSC_FALSE; if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ skip_node = PETSC_TRUE; s++; } if (j < 1) { skip_node = PETSC_TRUE; } if ( !skip_node ) { if (fully_redundant) { /* fully redundant set of lagrange multipliers */ n_lambda_for_dof = (j*(j+1))/2; } else { n_lambda_for_dof = j; } n_local_lambda += j; /* needed to evaluate global number of lagrange multipliers */ array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ /* store some data needed */ dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; aux_local_numbering_1[partial_sum] = i; aux_local_numbering_2[partial_sum] = n_lambda_for_dof; partial_sum++; } } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); fetidpmat_ctx->n_lambda = (PetscInt)PetscRealPart(scalar_value); /* compute global ordering of lagrange multipliers and associate l2g map */ ierr = PCBDDCSubsetNumbering(comm,matis->mapping,partial_sum,aux_local_numbering_1,aux_local_numbering_2,&i,&aux_global_numbering);CHKERRQ(ierr); if (i != fetidpmat_ctx->n_lambda) { SETERRQ3(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Error in %s: global number of multipliers mismatch! (%d!=%d)\n",__FUNCT__,fetidpmat_ctx->n_lambda,i); } ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); /* init data for scaling factors exchange */ partial_sum = 0; j = 0; ierr = PetscMalloc1(pcis->n_neigh,&ptrs_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&send_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&recv_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&all_factors);CHKERRQ(ierr); ptrs_buffer[0]=0; for (i=1;i<pcis->n_neigh;i++) { partial_sum += pcis->n_shared[i]; ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; } ierr = PetscMalloc1(partial_sum,&send_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&all_factors[0]);CHKERRQ(ierr); for (i=0;i<pcis->n-1;i++) { j = mat_graph->count[i]; all_factors[i+1]=all_factors[i]+j; } /* scatter B scaling to N vec */ ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); /* communications */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; } ierr = PetscMPIIntCast(ptrs_buffer[i]-ptrs_buffer[i-1],&buf_size);CHKERRQ(ierr); ierr = PetscMPIIntCast(pcis->neigh[i],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&send_reqs[i-1]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* put values in correct places */ for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { k = pcis->shared[i][j]; neigh_position = 0; while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; } } ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree(send_reqs);CHKERRQ(ierr); ierr = PetscFree(recv_reqs);CHKERRQ(ierr); ierr = PetscFree(send_buffer);CHKERRQ(ierr); ierr = PetscFree(recv_buffer);CHKERRQ(ierr); ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); /* Compute B and B_delta (local actions) */ ierr = PetscMalloc1(pcis->n_neigh,&aux_sums);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&l2g_indices);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&vals_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&cols_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&scaling_factors);CHKERRQ(ierr); n_global_lambda=0; partial_sum=0; for (i=0;i<dual_size;i++) { n_global_lambda = aux_global_numbering[i]; j = mat_graph->count[aux_local_numbering_1[i]]; aux_sums[0]=0; for (s=1;s<j;s++) { aux_sums[s]=aux_sums[s-1]+j-s+1; } array = all_factors[aux_local_numbering_1[i]]; n_neg_values = 0; while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values] < rank) {n_neg_values++;} n_pos_values = j - n_neg_values; if (fully_redundant) { for (s=0;s<n_neg_values;s++) { l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=-1.0; scaling_factors[partial_sum+s]=array[s]; } for (s=0;s<n_pos_values;s++) { l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s+n_neg_values]=1.0; scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; } partial_sum += j; } else { /* l2g_indices and default cols and vals of B_delta */ for (s=0;s<j;s++) { l2g_indices [partial_sum+s]=n_global_lambda+s; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=0.0; } /* B_delta */ if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ vals_B_delta [partial_sum+n_neg_values-1]=-1.0; } if ( n_neg_values < j ) { /* there's a rank next to me to the right */ vals_B_delta [partial_sum+n_neg_values]=1.0; } /* scaling as in Klawonn-Widlund 1999*/ for (s=0;s<n_neg_values;s++) { scalar_value = 0.0; for (k=0;k<s+1;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s] = -scalar_value; } for (s=0;s<n_pos_values;s++) { scalar_value = 0.0; for (k=s+n_neg_values;k<j;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s+n_neg_values] = scalar_value; } partial_sum += j; } } ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); ierr = PetscFree(aux_sums);CHKERRQ(ierr); ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); ierr = PetscFree(all_factors);CHKERRQ(ierr); /* Local to global mapping of fetidpmat */ ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); /* Create local part of B_delta */ ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (fully_redundant) { ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); } else { ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = PetscFree(scaling_factors);CHKERRQ(ierr); ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); /* Create some vectors needed by fetidp */ ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); test_fetidp = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); if (test_fetidp && !pcbddc->use_deluxe_scaling) { PetscReal real_value; ierr = PetscViewerASCIIGetStdout(comm,&viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); if (fully_redundant) { ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST A/B: Test numbering of global lambda dofs */ /******************************************************************/ ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecNorm(test_vec,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (fully_redundant) { ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,PetscRealPart(scalar_value)-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } /******************************************************************/ /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ /* This is the meaning of the B matrix */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ /* E_D = R_D^TR */ /* P_D = B_{D,delta}^T B_{delta} */ /* eq.44 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ /* compute a random vector in \widetilde{W} */ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* store w for final comparison */ ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Average operator E_D : results stored in pcis->vec2_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec2_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* test E_D=I-P_D */ scalar_value = 1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ /* eq.48 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* scaling */ ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (!fully_redundant) { /******************************************************************/ /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); } } /* final cleanup */ ierr = PetscFree(vertex_indices);CHKERRQ(ierr); ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); PetscFunctionReturn(0); }

int main(int argc,char **args) { Vec x1,b1,x2,b2; /* solution and RHS vectors for systems #1 and #2 */ Vec u; /* exact solution vector */ Mat C1,C2; /* matrices for systems #1 and #2 */ KSP ksp1,ksp2; /* KSP contexts for systems #1 and #2 */ PetscInt ntimes = 3; /* number of times to solve the linear systems */ PetscLogEvent CHECK_ERROR; /* event number for error checking */ PetscInt ldim,low,high,iglobal,Istart,Iend,Istart2,Iend2; PetscInt Ii,J,i,j,m = 3,n = 2,its,t; PetscErrorCode ierr; PetscBool flg = PETSC_FALSE; PetscScalar v; PetscMPIInt rank,size; #if defined(PETSC_USE_LOG) PetscLogStage stages[3]; #endif PetscInitialize(&argc,&args,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-t",&ntimes,NULL);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); n = 2*size; /* Register various stages for profiling */ ierr = PetscLogStageRegister("Prelim setup",&stages[0]);CHKERRQ(ierr); ierr = PetscLogStageRegister("Linear System 1",&stages[1]);CHKERRQ(ierr); ierr = PetscLogStageRegister("Linear System 2",&stages[2]);CHKERRQ(ierr); /* Register a user-defined event for profiling (error checking). */ CHECK_ERROR = 0; ierr = PetscLogEventRegister("Check Error",KSP_CLASSID,&CHECK_ERROR);CHKERRQ(ierr); /* - - - - - - - - - - - - Stage 0: - - - - - - - - - - - - - - Preliminary Setup - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); /* Create data structures for first linear system. - Create parallel matrix, specifying only its global dimensions. When using MatCreate(), the matrix format can be specified at runtime. Also, the parallel partitioning of the matrix is determined by PETSc at runtime. - Create parallel vectors. - When using VecSetSizes(), we specify only the vector's global dimension; the parallel partitioning is determined at runtime. - Note: We form 1 vector from scratch and then duplicate as needed. */ ierr = MatCreate(PETSC_COMM_WORLD,&C1);CHKERRQ(ierr); ierr = MatSetSizes(C1,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C1);CHKERRQ(ierr); ierr = MatSetUp(C1);CHKERRQ(ierr); ierr = MatGetOwnershipRange(C1,&Istart,&Iend);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr); ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr); ierr = VecSetFromOptions(u);CHKERRQ(ierr); ierr = VecDuplicate(u,&b1);CHKERRQ(ierr); ierr = VecDuplicate(u,&x1);CHKERRQ(ierr); /* Create first linear solver context. Set runtime options (e.g., -pc_type <type>). Note that the first linear system uses the default option names, while the second linear systme uses a different options prefix. */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp1);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp1);CHKERRQ(ierr); /* Set user-defined monitoring routine for first linear system. */ ierr = PetscOptionsGetBool(NULL,"-my_ksp_monitor",&flg,NULL);CHKERRQ(ierr); if (flg) {ierr = KSPMonitorSet(ksp1,MyKSPMonitor,NULL,0);CHKERRQ(ierr);} /* Create data structures for second linear system. */ ierr = MatCreate(PETSC_COMM_WORLD,&C2);CHKERRQ(ierr); ierr = MatSetSizes(C2,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C2);CHKERRQ(ierr); ierr = MatSetUp(C2);CHKERRQ(ierr); ierr = MatGetOwnershipRange(C2,&Istart2,&Iend2);CHKERRQ(ierr); ierr = VecDuplicate(u,&b2);CHKERRQ(ierr); ierr = VecDuplicate(u,&x2);CHKERRQ(ierr); /* Create second linear solver context */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp2);CHKERRQ(ierr); /* Set different options prefix for second linear system. Set runtime options (e.g., -s2_pc_type <type>) */ ierr = KSPAppendOptionsPrefix(ksp2,"s2_");CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp2);CHKERRQ(ierr); /* Assemble exact solution vector in parallel. Note that each processor needs to set only its local part of the vector. */ ierr = VecGetLocalSize(u,&ldim);CHKERRQ(ierr); ierr = VecGetOwnershipRange(u,&low,&high);CHKERRQ(ierr); for (i=0; i<ldim; i++) { iglobal = i + low; v = (PetscScalar)(i + 100*rank); ierr = VecSetValues(u,1,&iglobal,&v,ADD_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(u);CHKERRQ(ierr); ierr = VecAssemblyEnd(u);CHKERRQ(ierr); /* Log the number of flops for computing vector entries */ ierr = PetscLogFlops(2.0*ldim);CHKERRQ(ierr); /* End curent profiling stage */ ierr = PetscLogStagePop();CHKERRQ(ierr); /* -------------------------------------------------------------- Linear solver loop: Solve 2 different linear systems several times in succession -------------------------------------------------------------- */ for (t=0; t<ntimes; t++) { /* - - - - - - - - - - - - Stage 1: - - - - - - - - - - - - - - Assemble and solve first linear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Begin profiling stage #1 */ ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr); /* Initialize all matrix entries to zero. MatZeroEntries() retains the nonzero structure of the matrix for sparse formats. */ if (t > 0) {ierr = MatZeroEntries(C1);CHKERRQ(ierr);} /* Set matrix entries in parallel. Also, log the number of flops for computing matrix entries. - Each processor needs to insert only elements that it owns locally (but any non-local elements will be sent to the appropriate processor during matrix assembly). - Always specify global row and columns of matrix entries. */ for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; i = Ii/n; j = Ii - i*n; if (i>0) {J = Ii - n; ierr = MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(C1,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } for (Ii=Istart; Ii<Iend; Ii++) { /* Make matrix nonsymmetric */ v = -1.0*(t+0.5); i = Ii/n; if (i>0) {J = Ii - n; ierr = MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} } ierr = PetscLogFlops(2.0*(Iend-Istart));CHKERRQ(ierr); /* Assemble matrix, using the 2-step process: MatAssemblyBegin(), MatAssemblyEnd() Computations can be done while messages are in transition by placing code between these two statements. */ ierr = MatAssemblyBegin(C1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Indicate same nonzero structure of successive linear system matrices */ ierr = MatSetOption(C1,MAT_NEW_NONZERO_LOCATIONS,PETSC_TRUE);CHKERRQ(ierr); /* Compute right-hand-side vector */ ierr = MatMult(C1,u,b1);CHKERRQ(ierr); /* Set operators. Here the matrix that defines the linear system also serves as the preconditioning matrix. - The flag SAME_NONZERO_PATTERN indicates that the preconditioning matrix has identical nonzero structure as during the last linear solve (although the values of the entries have changed). Thus, we can save some work in setting up the preconditioner (e.g., no need to redo symbolic factorization for ILU/ICC preconditioners). - If the nonzero structure of the matrix is different during the second linear solve, then the flag DIFFERENT_NONZERO_PATTERN must be used instead. If you are unsure whether the matrix structure has changed or not, use the flag DIFFERENT_NONZERO_PATTERN. - Caution: If you specify SAME_NONZERO_PATTERN, PETSc believes your assertion and does not check the structure of the matrix. If you erroneously claim that the structure is the same when it actually is not, the new preconditioner will not function correctly. Thus, use this optimization feature with caution! */ ierr = KSPSetOperators(ksp1,C1,C1,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* Use the previous solution of linear system #1 as the initial guess for the next solve of linear system #1. The user MUST call KSPSetInitialGuessNonzero() in indicate use of an initial guess vector; otherwise, an initial guess of zero is used. */ if (t>0) { ierr = KSPSetInitialGuessNonzero(ksp1,PETSC_TRUE);CHKERRQ(ierr); } /* Solve the first linear system. Here we explicitly call KSPSetUp() for more detailed performance monitoring of certain preconditioners, such as ICC and ILU. This call is optional, ase KSPSetUp() will automatically be called within KSPSolve() if it hasn't been called already. */ ierr = KSPSetUp(ksp1);CHKERRQ(ierr); ierr = KSPSolve(ksp1,b1,x1);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp1,&its);CHKERRQ(ierr); /* Check error of solution to first linear system */ ierr = CheckError(u,x1,b1,its,1.e-4,CHECK_ERROR);CHKERRQ(ierr); /* - - - - - - - - - - - - Stage 2: - - - - - - - - - - - - - - Assemble and solve second linear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Conclude profiling stage #1; begin profiling stage #2 */ ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); /* Initialize all matrix entries to zero */ if (t > 0) {ierr = MatZeroEntries(C2);CHKERRQ(ierr);} /* Assemble matrix in parallel. Also, log the number of flops for computing matrix entries. - To illustrate the features of parallel matrix assembly, we intentionally set the values differently from the way in which the matrix is distributed across the processors. Each entry that is not owned locally will be sent to the appropriate processor during MatAssemblyBegin() and MatAssemblyEnd(). - For best efficiency the user should strive to set as many entries locally as possible. */ for (i=0; i<m; i++) { for (j=2*rank; j<2*rank+2; j++) { v = -1.0; Ii = j + n*i; if (i>0) {J = Ii - n; ierr = MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} v = 6.0 + t*0.5; ierr = MatSetValues(C2,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } } for (Ii=Istart2; Ii<Iend2; Ii++) { /* Make matrix nonsymmetric */ v = -1.0*(t+0.5); i = Ii/n; if (i>0) {J = Ii - n; ierr = MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} } ierr = MatAssemblyBegin(C2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*(Iend-Istart));CHKERRQ(ierr); /* Indicate same nonzero structure of successive linear system matrices */ ierr = MatSetOption(C2,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);CHKERRQ(ierr); /* Compute right-hand-side vector */ ierr = MatMult(C2,u,b2);CHKERRQ(ierr); /* Set operators. Here the matrix that defines the linear system also serves as the preconditioning matrix. Indicate same nonzero structure of successive preconditioner matrices by setting flag SAME_NONZERO_PATTERN. */ ierr = KSPSetOperators(ksp2,C2,C2,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* Solve the second linear system */ ierr = KSPSetUp(ksp2);CHKERRQ(ierr); ierr = KSPSolve(ksp2,b2,x2);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp2,&its);CHKERRQ(ierr); /* Check error of solution to second linear system */ ierr = CheckError(u,x2,b2,its,1.e-4,CHECK_ERROR);CHKERRQ(ierr); /* Conclude profiling stage #2 */ ierr = PetscLogStagePop();CHKERRQ(ierr); } /* -------------------------------------------------------------- End of linear solver loop -------------------------------------------------------------- */ /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = KSPDestroy(&ksp1);CHKERRQ(ierr); ierr = KSPDestroy(&ksp2);CHKERRQ(ierr); ierr = VecDestroy(&x1);CHKERRQ(ierr); ierr = VecDestroy(&x2);CHKERRQ(ierr); ierr = VecDestroy(&b1);CHKERRQ(ierr); ierr = VecDestroy(&b2);CHKERRQ(ierr); ierr = MatDestroy(&C1);CHKERRQ(ierr); ierr = MatDestroy(&C2);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscInt main(PetscInt argc,char **args) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt N0=2048,N1=2048,N2=3,N3=5,N4=5,N=N0*N1; PetscRandom rdm; PetscReal enorm; Vec x,y,z,input,output; Mat A; PetscInt DIM, dim[5],vsize; PetscReal fac; PetscScalar one=1,two=2,three=3; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&input);CHKERRQ(ierr); ierr = VecSetSizes(input,PETSC_DECIDE,N);CHKERRQ(ierr); ierr = VecSetFromOptions(input);CHKERRQ(ierr); /* ierr = VecSet(input,one);CHKERRQ(ierr); */ /* ierr = VecSetValue(input,1,two,INSERT_VALUES);CHKERRQ(ierr); */ /* ierr = VecSetValue(input,2,three,INSERT_VALUES);CHKERRQ(ierr); */ /* ierr = VecSetValue(input,3,three,INSERT_VALUES);CHKERRQ(ierr); */ ierr = VecSetRandom(input,rdm);CHKERRQ(ierr); /* ierr = VecSetRandom(input,rdm);CHKERRQ(ierr); */ /* ierr = VecSetRandom(input,rdm);CHKERRQ(ierr); */ ierr = VecDuplicate(input,&output); DIM = 2; dim[0] = N0; dim[1] = N1; dim[2] = N2; dim[3] = N3; dim[4] = N4; ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr); ierr = MatGetVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr); /* ierr = MatGetVecs(A,&x,&y);CHKERRQ(ierr); */ /* ierr = MatGetVecs(A,&z,NULL);CHKERRQ(ierr); */ ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); printf("The vector size of input from the main routine is %d\n",vsize); ierr = VecGetSize(z,&vsize);CHKERRQ(ierr); printf("The vector size of output from the main routine is %d\n",vsize); ierr = InputTransformFFT(A,input,x);CHKERRQ(ierr); ierr = MatMult(A,x,y);CHKERRQ(ierr); ierr = VecAssemblyBegin(y);CHKERRQ(ierr); ierr = VecAssemblyEnd(y);CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); ierr = OutputTransformFFT(A,z,output);CHKERRQ(ierr); fac = 1.0/(PetscReal)N; ierr = VecScale(output,fac);CHKERRQ(ierr); ierr = VecAssemblyBegin(input);CHKERRQ(ierr); ierr = VecAssemblyEnd(input);CHKERRQ(ierr); ierr = VecAssemblyBegin(output);CHKERRQ(ierr); ierr = VecAssemblyEnd(output);CHKERRQ(ierr); /* ierr = VecView(input,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ /* ierr = VecView(output,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ ierr = VecAXPY(output,-1.0,input);CHKERRQ(ierr); ierr = VecNorm(output,NORM_1,&enorm);CHKERRQ(ierr); /* if (enorm > 1.e-14) { */ ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr); /* } */ ierr = VecDestroy(&output);CHKERRQ(ierr); ierr = VecDestroy(&input);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); PetscFinalize(); return 0; }

int main(int argc,char **args) { Mat C; PetscInt i,j,m = 3,n = 3,Ii,J; PetscErrorCode ierr; PetscTruth flg; PetscScalar v; IS perm,iperm; Vec x,u,b,y; PetscReal norm; MatFactorInfo info; PetscMPIInt size; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(1,"This is a uniprocessor example only!"); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL,"-symmetric",&flg);CHKERRQ(ierr); if (flg) { /* Treat matrix as symmetric only if we set this flag */ ierr = MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr); } /* Create the matrix for the five point stencil, YET AGAIN */ for (i=0; i<m; i++) { for (j=0; j<n; j++) { v = -1.0; Ii = j + n*i; if (i>0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatGetOrdering(C,MATORDERING_RCM,&perm,&iperm);CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(perm,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,m*n,&u);CHKERRQ(ierr); ierr = VecSet(u,1.0);CHKERRQ(ierr); ierr = VecDuplicate(u,&x);CHKERRQ(ierr); ierr = VecDuplicate(u,&b);CHKERRQ(ierr); ierr = VecDuplicate(u,&y);CHKERRQ(ierr); ierr = MatMult(C,u,b);CHKERRQ(ierr); ierr = VecCopy(b,y);CHKERRQ(ierr); ierr = VecScale(y,2.0);CHKERRQ(ierr); ierr = MatNorm(C,NORM_FROBENIUS,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Frobenius norm of matrix %G\n",norm);CHKERRQ(ierr); ierr = MatNorm(C,NORM_1,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"One norm of matrix %G\n",norm);CHKERRQ(ierr); ierr = MatNorm(C,NORM_INFINITY,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Infinity norm of matrix %G\n",norm);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); info.fill = 2.0; info.dtcol = 0.0; info.zeropivot = 1.e-14; info.pivotinblocks = 1.0; ierr = MatLUFactor(C,perm,iperm,&info);CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Test MatSolve */ ierr = MatSolve(C,b,x);CHKERRQ(ierr); ierr = VecView(b,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Norm of error %A\n",norm);CHKERRQ(ierr); /* Test MatSolveAdd */ ierr = MatSolveAdd(C,b,y,x);CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,y);CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Norm of error %A\n",norm);CHKERRQ(ierr); ierr = ISDestroy(perm);CHKERRQ(ierr); ierr = ISDestroy(iperm);CHKERRQ(ierr); ierr = VecDestroy(u);CHKERRQ(ierr); ierr = VecDestroy(y);CHKERRQ(ierr); ierr = VecDestroy(b);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = MatDestroy(C);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }

static PetscErrorCode QPIPSetInitialPoint(TAO_BQPIP *qp, Tao tao) { PetscErrorCode ierr; PetscReal two=2.0,p01=1; PetscReal gap1,gap2,fff,mu; PetscFunctionBegin; /* Compute function, Gradient R=Hx+b, and Hessian */ ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(qp->XL, tao->solution, qp->XU, tao->solution);CHKERRQ(ierr); ierr = MatMult(tao->hessian, tao->solution, tao->gradient);CHKERRQ(ierr); ierr = VecCopy(qp->C0, qp->Work);CHKERRQ(ierr); ierr = VecAXPY(qp->Work, 0.5, tao->gradient);CHKERRQ(ierr); ierr = VecAXPY(tao->gradient, 1.0, qp->C0);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->Work, &fff);CHKERRQ(ierr); qp->pobj = fff + qp->c; /* Initialize Primal Vectors */ /* T = XU - X; G = X - XL */ ierr = VecCopy(qp->XU, qp->T);CHKERRQ(ierr); ierr = VecAXPY(qp->T, -1.0, tao->solution);CHKERRQ(ierr); ierr = VecCopy(tao->solution, qp->G);CHKERRQ(ierr); ierr = VecAXPY(qp->G, -1.0, qp->XL);CHKERRQ(ierr); ierr = VecSet(qp->GZwork, p01);CHKERRQ(ierr); ierr = VecSet(qp->TSwork, p01);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->G, qp->G, qp->GZwork);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->T, qp->T, qp->TSwork);CHKERRQ(ierr); /* Initialize Dual Variable Vectors */ ierr = VecCopy(qp->G, qp->Z);CHKERRQ(ierr); ierr = VecReciprocal(qp->Z);CHKERRQ(ierr); ierr = VecCopy(qp->T, qp->S);CHKERRQ(ierr); ierr = VecReciprocal(qp->S);CHKERRQ(ierr); ierr = MatMult(tao->hessian, qp->Work, qp->RHS);CHKERRQ(ierr); ierr = VecAbs(qp->RHS);CHKERRQ(ierr); ierr = VecSet(qp->Work, p01);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->RHS, qp->RHS, qp->Work);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->RHS, tao->gradient, qp->RHS);CHKERRQ(ierr); ierr = VecNorm(qp->RHS, NORM_1, &gap1);CHKERRQ(ierr); mu = PetscMin(10.0,(gap1+10.0)/qp->m); ierr = VecScale(qp->S, mu);CHKERRQ(ierr); ierr = VecScale(qp->Z, mu);CHKERRQ(ierr); ierr = VecSet(qp->TSwork, p01);CHKERRQ(ierr); ierr = VecSet(qp->GZwork, p01);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->S, qp->S, qp->TSwork);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->Z, qp->Z, qp->GZwork);CHKERRQ(ierr); qp->mu=0;qp->dinfeas=1.0;qp->pinfeas=1.0; while ( (qp->dinfeas+qp->pinfeas)/(qp->m+qp->n) >= qp->mu ){ ierr = VecScale(qp->G, two);CHKERRQ(ierr); ierr = VecScale(qp->Z, two);CHKERRQ(ierr); ierr = VecScale(qp->S, two);CHKERRQ(ierr); ierr = VecScale(qp->T, two);CHKERRQ(ierr); ierr = QPIPComputeResidual(qp,tao);CHKERRQ(ierr); ierr = VecCopy(tao->solution, qp->R3);CHKERRQ(ierr); ierr = VecAXPY(qp->R3, -1.0, qp->G);CHKERRQ(ierr); ierr = VecAXPY(qp->R3, -1.0, qp->XL);CHKERRQ(ierr); ierr = VecCopy(tao->solution, qp->R5);CHKERRQ(ierr); ierr = VecAXPY(qp->R5, 1.0, qp->T);CHKERRQ(ierr); ierr = VecAXPY(qp->R5, -1.0, qp->XU);CHKERRQ(ierr); ierr = VecNorm(qp->R3, NORM_INFINITY, &gap1);CHKERRQ(ierr); ierr = VecNorm(qp->R5, NORM_INFINITY, &gap2);CHKERRQ(ierr); qp->pinfeas=PetscMax(gap1,gap2); /* Compute the duality gap */ ierr = VecDot(qp->G, qp->Z, &gap1);CHKERRQ(ierr); ierr = VecDot(qp->T, qp->S, &gap2);CHKERRQ(ierr); qp->gap = (gap1+gap2); qp->dobj = qp->pobj - qp->gap; if (qp->m>0) qp->mu=qp->gap/(qp->m); else qp->mu=0.0; qp->rgap=qp->gap/( PetscAbsReal(qp->dobj) + PetscAbsReal(qp->pobj) + 1.0 ); } PetscFunctionReturn(0); }

int main(int argc,char **args) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt N0=50,N1=20,N=N0*N1,DIM; PetscRandom rdm; PetscScalar a; PetscReal enorm; Vec x,y,z; PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers"); #endif ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex143");CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view draw", "View the vectors", "ex143", view, &view, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex143",use_interface, &use_interface, NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-use_FFTW_interface",&use_interface,NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); if (!use_interface) { /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ /*---------------------------------------------------------*/ fftw_plan fplan,bplan; fftw_complex *data_in,*data_out,*data_out2; ptrdiff_t alloc_local,local_n0,local_0_start; DIM = 2; if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"Use FFTW without PETSc-FFTW interface, DIM %D\n",DIM);CHKERRQ(ierr); } fftw_mpi_init(); N = N0*N1; alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(fplan); if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(bplan); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11 && !rank) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr); } /* Free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(data_in); ierr = VecDestroy(&x);CHKERRQ(ierr); fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr); fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr); } else { /* Use PETSc-FFTW interface */ /*-------------------------------------------*/ PetscInt i,*dim,k; Mat A; N=1; for (i=1; i<5; i++) { DIM = i; ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr); for (k=0; k<i; k++) { dim[k]=30; } N *= dim[i-1]; /* Create FFTW object */ if (!rank) printf("Use PETSc-FFTW interface...%d-DIM: %d\n",(int)DIM,(int)N); ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr); /* Create vectors that are compatible with parallel layout of A - must call MatGetVecs()! */ ierr = MatGetVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set values of space vector x */ ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} /* Apply FFTW_FORWARD and FFTW_BACKWARD */ ierr = MatMult(A,x,y);CHKERRQ(ierr); if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view) {ierr = VecView(z,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-9 && !rank) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr); } ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFree(dim);CHKERRQ(ierr); } } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

int main(int argc,char **args) { Mat C,C1,F; Vec u,x,b; PetscErrorCode ierr; PetscMPIInt rank,nproc; PetscInt i,M = 10,m,n,nfact,nsolve; PetscScalar *array,rval; PetscReal norm,tol=1.e-12; IS perm,iperm; MatFactorInfo info; PetscRandom rand; PetscTruth flg; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &nproc);CHKERRQ(ierr); /* Create matrix and vectors */ ierr = PetscOptionsGetInt(PETSC_NULL,"-M",&M,PETSC_NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,M);CHKERRQ(ierr); ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatGetLocalSize(C,&m,&n);CHKERRQ(ierr); if (m != n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Matrix local size m %d must equal n %d",m,n); ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ /* Assembly */ ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatGetArray(C,&array);CHKERRQ(ierr); for (i=0; i<m*M; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = MatRestoreArray(C,&array);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /*if (!rank) {printf("main, C: \n");} ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ /* Test MatDuplicate() */ ierr = MatDuplicate(C,MAT_COPY_VALUES,&C1);CHKERRQ(ierr); ierr = MatEqual(C,C1,&flg);CHKERRQ(ierr); if (!flg){ SETERRQ(PETSC_ERR_ARG_WRONG,"Duplicate C1 != C"); } /* Test LU Factorization */ ierr = MatGetOrdering(C1,MATORDERING_NATURAL,&perm,&iperm);CHKERRQ(ierr); if (nproc == 1){ ierr = MatGetFactor(C1,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } else { ierr = MatGetFactor(C1,MAT_SOLVER_PLAPACK,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } ierr = MatLUFactorSymbolic(F,C1,perm,iperm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 2; nfact++){ if (!rank) printf(" LU nfact %d\n",nfact); ierr = MatLUFactorNumeric(F,C1,&info);CHKERRQ(ierr); /* Test MatSolve() */ for (nsolve = 0; nsolve < 5; nsolve++){ ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); ierr = MatMult(C,x,b);CHKERRQ(ierr); ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol){ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: Norm of error %g, LU nfact %d\n",norm,nfact);CHKERRQ(ierr); } } } } ierr = MatDestroy(C1);CHKERRQ(ierr); ierr = MatDestroy(F);CHKERRQ(ierr); /* Test Cholesky Factorization */ ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&C1);CHKERRQ(ierr); /* C1 = C^T */ ierr = MatAXPY(C,1.0,C1,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* make C symmetric: C <- C + C^T */ ierr = MatShift(C,M);CHKERRQ(ierr); /* make C positive definite */ ierr = MatDestroy(C1);CHKERRQ(ierr); ierr = MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr); if (nproc == 1){ ierr = MatGetFactor(C,MAT_SOLVER_PETSC,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); } else { ierr = MatGetFactor(C,MAT_SOLVER_PLAPACK,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); } ierr = MatCholeskyFactorSymbolic(F,C,perm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 2; nfact++){ if (!rank) printf(" Cholesky nfact %d\n",nfact); ierr = MatCholeskyFactorNumeric(F,C,&info);CHKERRQ(ierr); /* Test MatSolve() */ for (nsolve = 0; nsolve < 5; nsolve++){ ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); ierr = MatMult(C,x,b);CHKERRQ(ierr); ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol){ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: Norm of error %g, Cholesky nfact %d\n",norm,nfact);CHKERRQ(ierr); } } } } ierr = MatDestroy(F);CHKERRQ(ierr); /* Free data structures */ ierr = PetscRandomDestroy(rand);CHKERRQ(ierr); ierr = ISDestroy(perm);CHKERRQ(ierr); ierr = ISDestroy(iperm);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(b);CHKERRQ(ierr); ierr = VecDestroy(u);CHKERRQ(ierr); ierr = MatDestroy(C);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }

static PetscErrorCode SNESSolve_QN(SNES snes) { PetscErrorCode ierr; SNES_QN *qn = (SNES_QN*) snes->data; Vec X,Xold; Vec F,W; Vec Y,D,Dold; PetscInt i, i_r; PetscReal fnorm,xnorm,ynorm,gnorm; PetscBool lssucceed,powell,periodic; PetscScalar DolddotD,DolddotDold; SNESConvergedReason reason; /* basically just a regular newton's method except for the application of the jacobian */ PetscFunctionBegin; ierr = PetscCitationsRegister(SNESCitation,&SNEScite);CHKERRQ(ierr); F = snes->vec_func; /* residual vector */ Y = snes->vec_sol_update; /* search direction generated by J^-1D*/ W = snes->work[3]; X = snes->vec_sol; /* solution vector */ Xold = snes->work[0]; /* directions generated by the preconditioned problem with F_pre = F or x - M(x, b) */ D = snes->work[1]; Dold = snes->work[2]; snes->reason = SNES_CONVERGED_ITERATING; ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_PRECONDITIONED) { ierr = SNESApplyNPC(snes,X,NULL,F);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); } else { if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } } else snes->vec_func_init_set = PETSC_FALSE; ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(fnorm)) { snes->reason = SNES_DIVERGED_FNORM_NAN; PetscFunctionReturn(0); } } if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_UNPRECONDITIONED) { ierr = SNESApplyNPC(snes,X,F,D);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } else { ierr = VecCopy(F,D);CHKERRQ(ierr); } ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,fnorm,0);CHKERRQ(ierr); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); if (snes->pc && snes->pcside == PC_RIGHT) { ierr = PetscLogEventBegin(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESSolve(snes->pc,snes->vec_rhs,X);CHKERRQ(ierr); ierr = PetscLogEventEnd(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = SNESGetNPCFunction(snes,F,&fnorm);CHKERRQ(ierr); ierr = VecCopy(F,D);CHKERRQ(ierr); } /* scale the initial update */ if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); } for (i = 0, i_r = 0; i < snes->max_its; i++, i_r++) { if (qn->scale_type == SNES_QN_SCALE_SHANNO && i_r > 0) { PetscScalar ff,xf; ierr = VecCopy(Dold,Y);CHKERRQ(ierr); ierr = VecCopy(Xold,W);CHKERRQ(ierr); ierr = VecAXPY(Y,-1.0,D);CHKERRQ(ierr); ierr = VecAXPY(W,-1.0,X);CHKERRQ(ierr); ierr = VecDotBegin(Y,Y,&ff);CHKERRQ(ierr); ierr = VecDotBegin(W,Y,&xf);CHKERRQ(ierr); ierr = VecDotEnd(Y,Y,&ff);CHKERRQ(ierr); ierr = VecDotEnd(W,Y,&xf);CHKERRQ(ierr); qn->scaling = PetscRealPart(xf)/PetscRealPart(ff); } switch (qn->type) { case SNES_QN_BADBROYDEN: ierr = SNESQNApply_BadBroyden(snes,i_r,Y,X,Xold,D,Dold);CHKERRQ(ierr); break; case SNES_QN_BROYDEN: ierr = SNESQNApply_Broyden(snes,i_r,Y,X,Xold,D);CHKERRQ(ierr); break; case SNES_QN_LBFGS: SNESQNApply_LBFGS(snes,i_r,Y,X,Xold,D,Dold);CHKERRQ(ierr); break; } /* line search for lambda */ ynorm = 1; gnorm = fnorm; ierr = VecCopy(D, Dold);CHKERRQ(ierr); ierr = VecCopy(X, Xold);CHKERRQ(ierr); ierr = SNESLineSearchApply(snes->linesearch, X, F, &fnorm, Y);CHKERRQ(ierr); if (snes->reason == SNES_DIVERGED_FUNCTION_COUNT) break; if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } ierr = SNESLineSearchGetSuccess(snes->linesearch, &lssucceed);CHKERRQ(ierr); if (!lssucceed) { if (++snes->numFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_LINE_SEARCH; break; } } ierr = SNESLineSearchGetNorms(snes->linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); if (qn->scale_type == SNES_QN_SCALE_LINESEARCH) { ierr = SNESLineSearchGetLambda(snes->linesearch, &qn->scaling);CHKERRQ(ierr); } /* convergence monitoring */ ierr = PetscInfo4(snes,"fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, lssucceed=%d\n",(double)fnorm,(double)gnorm,(double)ynorm,(int)lssucceed);CHKERRQ(ierr); if (snes->pc && snes->pcside == PC_RIGHT) { ierr = PetscLogEventBegin(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESSolve(snes->pc,snes->vec_rhs,X);CHKERRQ(ierr); ierr = PetscLogEventEnd(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = SNESGetNPCFunction(snes,F,&fnorm);CHKERRQ(ierr); } ierr = SNESSetIterationNumber(snes, i+1);CHKERRQ(ierr); ierr = SNESSetFunctionNorm(snes, fnorm);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,snes->norm,snes->iter);CHKERRQ(ierr); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* set parameter for default relative tolerance convergence test */ ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_UNPRECONDITIONED) { ierr = SNESApplyNPC(snes,X,F,D);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } else { ierr = VecCopy(F, D);CHKERRQ(ierr); } powell = PETSC_FALSE; if (qn->restart_type == SNES_QN_RESTART_POWELL) { /* check restart by Powell's Criterion: |F^T H_0 Fold| > 0.2 * |Fold^T H_0 Fold| */ if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = MatMult(snes->jacobian_pre,Dold,W);CHKERRQ(ierr); } else { ierr = VecCopy(Dold,W);CHKERRQ(ierr); } ierr = VecDotBegin(W, Dold, &DolddotDold);CHKERRQ(ierr); ierr = VecDotBegin(W, D, &DolddotD);CHKERRQ(ierr); ierr = VecDotEnd(W, Dold, &DolddotDold);CHKERRQ(ierr); ierr = VecDotEnd(W, D, &DolddotD);CHKERRQ(ierr); if (PetscAbs(PetscRealPart(DolddotD)) > qn->powell_gamma*PetscAbs(PetscRealPart(DolddotDold))) powell = PETSC_TRUE; } periodic = PETSC_FALSE; if (qn->restart_type == SNES_QN_RESTART_PERIODIC) { if (i_r>qn->m-1) periodic = PETSC_TRUE; } /* restart if either powell or periodic restart is satisfied. */ if (powell || periodic) { if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(qn->monitor, "restart! |%14.12e| > %4.2f*|%14.12e| or i_r = %d\n", PetscRealPart(DolddotD), qn->powell_gamma, PetscRealPart(DolddotDold), i_r);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } i_r = -1; /* general purpose update */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); } } /* general purpose update */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } } if (i == snes->max_its) { ierr = PetscInfo1(snes, "Maximum number of iterations has been reached: %D\n", snes->max_its);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; } PetscFunctionReturn(0); }