PetscErrorCode DMCoarsen_Plex(DM dm, MPI_Comm comm, DM *dmCoarsened) { DM_Plex *mesh = (DM_Plex*) dm->data; #ifdef PETSC_HAVE_PRAGMATIC DM udm, coordDM; DMLabel bd; Mat A; Vec coordinates, mb, mx; PetscSection coordSection; const PetscScalar *coords; double *coarseCoords; IS bdIS; PetscReal *x, *y, *z, *eqns, *metric; PetscReal coarseRatio = PetscSqr(0.5); const PetscInt *faces; PetscInt *cells, *bdFaces, *bdFaceIds; PetscInt dim, numCorners, cStart, cEnd, numCells, numCoarseCells, c, vStart, vEnd, numVertices, numCoarseVertices, v, numBdFaces, f, maxConeSize, size, bdSize, coff; #endif PetscErrorCode ierr; PetscFunctionBegin; #ifdef PETSC_HAVE_PRAGMATIC if (!mesh->coarseMesh) { ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMGetCoordinateDM(dm, &coordDM);CHKERRQ(ierr); ierr = DMGetDefaultSection(coordDM, &coordSection);CHKERRQ(ierr); ierr = DMGetCoordinatesLocal(dm, &coordinates);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd);CHKERRQ(ierr); ierr = DMPlexUninterpolate(dm, &udm);CHKERRQ(ierr); ierr = DMPlexGetMaxSizes(udm, &maxConeSize, NULL);CHKERRQ(ierr); numCells = cEnd - cStart; numVertices = vEnd - vStart; ierr = PetscCalloc5(numVertices, &x, numVertices, &y, numVertices, &z, numVertices*PetscSqr(dim), &metric, numCells*maxConeSize, &cells);CHKERRQ(ierr); ierr = VecGetArrayRead(coordinates, &coords);CHKERRQ(ierr); for (v = vStart; v < vEnd; ++v) { PetscInt off; ierr = PetscSectionGetOffset(coordSection, v, &off);CHKERRQ(ierr); x[v-vStart] = coords[off+0]; y[v-vStart] = coords[off+1]; if (dim > 2) z[v-vStart] = coords[off+2]; } ierr = VecRestoreArrayRead(coordinates, &coords);CHKERRQ(ierr); for (c = 0, coff = 0; c < numCells; ++c) { const PetscInt *cone; PetscInt coneSize, cl; ierr = DMPlexGetConeSize(udm, c, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(udm, c, &cone);CHKERRQ(ierr); for (cl = 0; cl < coneSize; ++cl) cells[coff++] = cone[cl] - vStart; } switch (dim) { case 2: pragmatic_2d_init(&numVertices, &numCells, cells, x, y); break; case 3: pragmatic_3d_init(&numVertices, &numCells, cells, x, y, z); break; default: SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_OUTOFRANGE, "No Pragmatic coarsening defined for dimension %d", dim); } /* Create boundary mesh */ ierr = DMLabelCreate("boundary", &bd);CHKERRQ(ierr); ierr = DMPlexMarkBoundaryFaces(dm, bd);CHKERRQ(ierr); ierr = DMLabelGetStratumIS(bd, 1, &bdIS);CHKERRQ(ierr); ierr = DMLabelGetStratumSize(bd, 1, &numBdFaces);CHKERRQ(ierr); ierr = ISGetIndices(bdIS, &faces);CHKERRQ(ierr); for (f = 0, bdSize = 0; f < numBdFaces; ++f) { PetscInt *closure = NULL; PetscInt closureSize, cl; ierr = DMPlexGetTransitiveClosure(dm, faces[f], PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); for (cl = 0; cl < closureSize*2; cl += 2) { if ((closure[cl] >= vStart) && (closure[cl] < vEnd)) ++bdSize; } ierr = DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); } ierr = PetscMalloc2(bdSize, &bdFaces, numBdFaces, &bdFaceIds);CHKERRQ(ierr); for (f = 0, bdSize = 0; f < numBdFaces; ++f) { PetscInt *closure = NULL; PetscInt closureSize, cl; ierr = DMPlexGetTransitiveClosure(dm, faces[f], PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); for (cl = 0; cl < closureSize*2; cl += 2) { if ((closure[cl] >= vStart) && (closure[cl] < vEnd)) bdFaces[bdSize++] = closure[cl] - vStart; } /* TODO Fix */ bdFaceIds[f] = 1; ierr = DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr); } ierr = ISDestroy(&bdIS);CHKERRQ(ierr); ierr = DMLabelDestroy(&bd);CHKERRQ(ierr); pragmatic_set_boundary(&numBdFaces, bdFaces, bdFaceIds); /* Create metric */ size = (dim*(dim+1))/2; ierr = PetscMalloc1(PetscSqr(size), &eqns);CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF, size, size, eqns, &A);CHKERRQ(ierr); ierr = MatCreateVecs(A, &mx, &mb);CHKERRQ(ierr); ierr = VecSet(mb, 1.0);CHKERRQ(ierr); for (c = 0; c < numCells; ++c) { const PetscScalar *sol; PetscScalar *cellCoords = NULL; PetscReal e[3], vol; const PetscInt *cone; PetscInt coneSize, cl, i, j, d, r; ierr = DMPlexVecGetClosure(dm, coordSection, coordinates, c, NULL, &cellCoords);CHKERRQ(ierr); /* Only works for simplices */ for (i = 0, r = 0; i < dim+1; ++i) { for (j = 0; j < i; ++j, ++r) { for (d = 0; d < dim; ++d) e[d] = cellCoords[i*dim+d] - cellCoords[j*dim+d]; /* FORTRAN ORDERING */ if (dim == 2) { eqns[0*size+r] = PetscSqr(e[0]); eqns[1*size+r] = 2.0*e[0]*e[1]; eqns[2*size+r] = PetscSqr(e[1]); } else { eqns[0*size+r] = PetscSqr(e[0]); eqns[1*size+r] = 2.0*e[0]*e[1]; eqns[2*size+r] = 2.0*e[0]*e[2]; eqns[3*size+r] = PetscSqr(e[1]); eqns[4*size+r] = 2.0*e[1]*e[2]; eqns[5*size+r] = PetscSqr(e[2]); } } } ierr = MatSetUnfactored(A);CHKERRQ(ierr); ierr = DMPlexVecRestoreClosure(dm, coordSection, coordinates, c, NULL, &cellCoords);CHKERRQ(ierr); ierr = MatLUFactor(A, NULL, NULL, NULL);CHKERRQ(ierr); ierr = MatSolve(A, mb, mx);CHKERRQ(ierr); ierr = VecGetArrayRead(mx, &sol);CHKERRQ(ierr); ierr = DMPlexComputeCellGeometryFVM(dm, c, &vol, NULL, NULL);CHKERRQ(ierr); ierr = DMPlexGetCone(udm, c, &cone);CHKERRQ(ierr); ierr = DMPlexGetConeSize(udm, c, &coneSize);CHKERRQ(ierr); for (cl = 0; cl < coneSize; ++cl) { const PetscInt v = cone[cl] - vStart; if (dim == 2) { metric[v*4+0] += vol*coarseRatio*sol[0]; metric[v*4+1] += vol*coarseRatio*sol[1]; metric[v*4+2] += vol*coarseRatio*sol[1]; metric[v*4+3] += vol*coarseRatio*sol[2]; } else { metric[v*9+0] += vol*coarseRatio*sol[0]; metric[v*9+1] += vol*coarseRatio*sol[1]; metric[v*9+3] += vol*coarseRatio*sol[1]; metric[v*9+2] += vol*coarseRatio*sol[2]; metric[v*9+6] += vol*coarseRatio*sol[2]; metric[v*9+4] += vol*coarseRatio*sol[3]; metric[v*9+5] += vol*coarseRatio*sol[4]; metric[v*9+7] += vol*coarseRatio*sol[4]; metric[v*9+8] += vol*coarseRatio*sol[5]; } } ierr = VecRestoreArrayRead(mx, &sol);CHKERRQ(ierr); } for (v = 0; v < numVertices; ++v) { const PetscInt *support; PetscInt supportSize, s; PetscReal vol, totVol = 0.0; ierr = DMPlexGetSupport(udm, v+vStart, &support);CHKERRQ(ierr); ierr = DMPlexGetSupportSize(udm, v+vStart, &supportSize);CHKERRQ(ierr); for (s = 0; s < supportSize; ++s) {ierr = DMPlexComputeCellGeometryFVM(dm, support[s], &vol, NULL, NULL);CHKERRQ(ierr); totVol += vol;} for (s = 0; s < PetscSqr(dim); ++s) metric[v*PetscSqr(dim)+s] /= totVol; } ierr = VecDestroy(&mx);CHKERRQ(ierr); ierr = VecDestroy(&mb);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = DMDestroy(&udm);CHKERRQ(ierr); ierr = PetscFree(eqns);CHKERRQ(ierr); pragmatic_set_metric(metric); pragmatic_adapt(); /* Read out mesh */ pragmatic_get_info(&numCoarseVertices, &numCoarseCells); ierr = PetscMalloc1(numCoarseVertices*dim, &coarseCoords);CHKERRQ(ierr); switch (dim) { case 2: pragmatic_get_coords_2d(x, y); numCorners = 3; for (v = 0; v < numCoarseVertices; ++v) {coarseCoords[v*2+0] = x[v]; coarseCoords[v*2+1] = y[v];} break; case 3: pragmatic_get_coords_3d(x, y, z); numCorners = 4; for (v = 0; v < numCoarseVertices; ++v) {coarseCoords[v*3+0] = x[v]; coarseCoords[v*3+1] = y[v]; coarseCoords[v*3+2] = z[v];} break; default: SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_OUTOFRANGE, "No Pragmatic coarsening defined for dimension %d", dim); } pragmatic_get_elements(cells); /* TODO Read out markers for boundary */ ierr = DMPlexCreateFromCellList(PetscObjectComm((PetscObject) dm), dim, numCoarseCells, numCoarseVertices, numCorners, PETSC_TRUE, cells, dim, coarseCoords, &mesh->coarseMesh);CHKERRQ(ierr); pragmatic_finalize(); ierr = PetscFree5(x, y, z, metric, cells);CHKERRQ(ierr); ierr = PetscFree2(bdFaces, bdFaceIds);CHKERRQ(ierr); ierr = PetscFree(coarseCoords);CHKERRQ(ierr); } #endif ierr = PetscObjectReference((PetscObject) mesh->coarseMesh);CHKERRQ(ierr); *dmCoarsened = mesh->coarseMesh; PetscFunctionReturn(0); }

int main(int argc,char **args) { Mat A,RHS,C,F,X,S; Vec u,x,b; Vec xschur,bschur,uschur; IS is_schur; PetscErrorCode ierr; PetscMPIInt size; PetscInt isolver=0,size_schur,m,n,nfact,nsolve,nrhs; PetscReal norm,tol=PETSC_SQRT_MACHINE_EPSILON; PetscRandom rand; PetscBool data_provided,herm,symm,use_lu; PetscReal sratio = 5.1/12.; PetscViewer fd; /* viewer */ char solver[256]; char file[PETSC_MAX_PATH_LEN]; /* input file name */ PetscInitialize(&argc,&args,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size > 1) SETERRQ(PETSC_COMM_WORLD,1,"This is a uniprocessor test"); /* Determine which type of solver we want to test for */ herm = PETSC_FALSE; symm = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-symmetric_solve",&symm,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-hermitian_solve",&herm,NULL);CHKERRQ(ierr); if (herm) symm = PETSC_TRUE; /* Determine file from which we read the matrix A */ ierr = PetscOptionsGetString(NULL,NULL,"-f",file,PETSC_MAX_PATH_LEN,&data_provided);CHKERRQ(ierr); if (!data_provided) { /* get matrices from PETSc distribution */ sprintf(file,PETSC_DIR); ierr = PetscStrcat(file,"/share/petsc/datafiles/matrices/");CHKERRQ(ierr); if (symm) { #if defined (PETSC_USE_COMPLEX) ierr = PetscStrcat(file,"hpd-complex-");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"spd-real-");CHKERRQ(ierr); #endif } else { #if defined (PETSC_USE_COMPLEX) ierr = PetscStrcat(file,"nh-complex-");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"ns-real-");CHKERRQ(ierr); #endif } #if defined(PETSC_USE_64BIT_INDICES) ierr = PetscStrcat(file,"int64-");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"int32-");CHKERRQ(ierr); #endif #if defined (PETSC_USE_REAL_SINGLE) ierr = PetscStrcat(file,"float32");CHKERRQ(ierr); #else ierr = PetscStrcat(file,"float64");CHKERRQ(ierr); #endif } /* Load matrix A */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n); /* Create dense matrix C and X; C holds true solution with identical colums */ nrhs = 2; ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);CHKERRQ(ierr); ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatSetRandom(C,rand);CHKERRQ(ierr); ierr = MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);CHKERRQ(ierr); /* Create vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ ierr = PetscOptionsGetInt(NULL,NULL,"-solver",&isolver,NULL);CHKERRQ(ierr); switch (isolver) { #if defined(PETSC_HAVE_MUMPS) case 0: ierr = PetscStrcpy(solver,MATSOLVERMUMPS);CHKERRQ(ierr); break; #endif #if defined(PETSC_HAVE_MKL_PARDISO) case 1: ierr = PetscStrcpy(solver,MATSOLVERMKL_PARDISO);CHKERRQ(ierr); break; #endif default: ierr = PetscStrcpy(solver,MATSOLVERPETSC);CHKERRQ(ierr); break; } #if defined (PETSC_USE_COMPLEX) if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */ PetscScalar im = PetscSqrtScalar((PetscScalar)-1.); PetscScalar val = -1.0; val = val + im; ierr = MatSetValue(A,1,0,val,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } #endif ierr = PetscOptionsGetReal(NULL,NULL,"-schur_ratio",&sratio,NULL);CHKERRQ(ierr); if (sratio < 0. || sratio > 1.) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %f", sratio); } size_schur = (PetscInt)(sratio*m); ierr = PetscPrintf(PETSC_COMM_SELF,"Solving with %s: nrhs %d, sym %d, herm %d, size schur %d, size mat %d\n",solver,nrhs,symm,herm,size_schur,m);CHKERRQ(ierr); /* Test LU/Cholesky Factorization */ use_lu = PETSC_FALSE; if (!symm) use_lu = PETSC_TRUE; #if defined (PETSC_USE_COMPLEX) if (isolver == 1) use_lu = PETSC_TRUE; #endif if (herm && !use_lu) { /* test also conversion routines inside the solver packages */ ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatConvert(A,MATSEQSBAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); } if (use_lu) { ierr = MatGetFactor(A,solver,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } else { if (herm) { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } else { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_SPD,PETSC_FALSE);CHKERRQ(ierr); } ierr = MatGetFactor(A,solver,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); } ierr = ISCreateStride(PETSC_COMM_SELF,size_schur,m-size_schur,1,&is_schur);CHKERRQ(ierr); ierr = MatFactorSetSchurIS(F,is_schur);CHKERRQ(ierr); ierr = ISDestroy(&is_schur);CHKERRQ(ierr); if (use_lu) { ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr); } else { ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr); } for (nfact = 0; nfact < 3; nfact++) { Mat AD; if (!nfact) { ierr = VecSetRandom(x,rand);CHKERRQ(ierr); if (symm && herm) { ierr = VecAbs(x);CHKERRQ(ierr); } ierr = MatDiagonalSet(A,x,ADD_VALUES);CHKERRQ(ierr); } if (use_lu) { ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr); } else { ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr); } ierr = MatFactorCreateSchurComplement(F,&S);CHKERRQ(ierr); ierr = MatCreateVecs(S,&xschur,&bschur);CHKERRQ(ierr); ierr = VecDuplicate(xschur,&uschur);CHKERRQ(ierr); if (nfact == 1) { ierr = MatFactorInvertSchurComplement(F);CHKERRQ(ierr); } for (nsolve = 0; nsolve < 2; nsolve++) { ierr = VecSetRandom(x,rand);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); if (nsolve) { ierr = MatMult(A,x,b);CHKERRQ(ierr); ierr = MatSolve(F,b,x);CHKERRQ(ierr); } else { ierr = MatMultTranspose(A,x,b);CHKERRQ(ierr); ierr = MatSolveTranspose(F,b,x);CHKERRQ(ierr); } /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { PetscReal resi; if (nsolve) { ierr = MatMult(A,x,u);CHKERRQ(ierr); /* u = A*x */ } else { ierr = MatMultTranspose(A,x,u);CHKERRQ(ierr); /* u = A*x */ } ierr = VecAXPY(u,-1.0,b);CHKERRQ(ierr); /* u <- (-1.0)b + u */ ierr = VecNorm(u,NORM_2,&resi);CHKERRQ(ierr); if (nsolve) { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatSolve error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatSolveTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } } ierr = VecSetRandom(xschur,rand);CHKERRQ(ierr); ierr = VecCopy(xschur,uschur);CHKERRQ(ierr); if (nsolve) { ierr = MatMult(S,xschur,bschur);CHKERRQ(ierr); ierr = MatFactorSolveSchurComplement(F,bschur,xschur);CHKERRQ(ierr); } else { ierr = MatMultTranspose(S,xschur,bschur);CHKERRQ(ierr); ierr = MatFactorSolveSchurComplementTranspose(F,bschur,xschur);CHKERRQ(ierr); } /* Check the error */ ierr = VecAXPY(uschur,-1.0,xschur);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(uschur,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { PetscReal resi; if (nsolve) { ierr = MatMult(S,xschur,uschur);CHKERRQ(ierr); /* u = A*x */ } else { ierr = MatMultTranspose(S,xschur,uschur);CHKERRQ(ierr); /* u = A*x */ } ierr = VecAXPY(uschur,-1.0,bschur);CHKERRQ(ierr); /* u <- (-1.0)b + u */ ierr = VecNorm(uschur,NORM_2,&resi);CHKERRQ(ierr); if (nsolve) { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatFactorSolveSchurComplement error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %d, s %d) MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);CHKERRQ(ierr); } } } ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&AD); if (!nfact) { ierr = MatMatMult(AD,C,MAT_INITIAL_MATRIX,2.0,&RHS);CHKERRQ(ierr); } else { ierr = MatMatMult(AD,C,MAT_REUSE_MATRIX,2.0,&RHS);CHKERRQ(ierr); } ierr = MatDestroy(&AD);CHKERRQ(ierr); for (nsolve = 0; nsolve < 2; nsolve++) { ierr = MatMatSolve(F,RHS,X);CHKERRQ(ierr); /* Check the error */ ierr = MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatNorm(X,NORM_FROBENIUS,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);CHKERRQ(ierr); } } ierr = MatDestroy(&S);CHKERRQ(ierr); ierr = VecDestroy(&xschur);CHKERRQ(ierr); ierr = VecDestroy(&bschur);CHKERRQ(ierr); ierr = VecDestroy(&uschur);CHKERRQ(ierr); } /* Free data structures */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = MatDestroy(&X);CHKERRQ(ierr); ierr = MatDestroy(&RHS);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscErrorCode CreateMesh(MPI_Comm comm, PetscInt testNum, AppCtx *user, DM *dm) { PetscInt dim = user->dim; PetscBool cellSimplex = user->cellSimplex; PetscBool useGenerator = user->useGenerator; const char *filename = user->filename; const char *partitioner = "chaco"; size_t len; PetscMPIInt rank; PetscErrorCode ierr; PetscFunctionBegin; ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscStrlen(filename, &len);CHKERRQ(ierr); if (len) { #if defined(PETSC_HAVE_EXODUSII) int CPU_word_size = 0, IO_word_size = 0, exoid = -1; float version; if (!rank) { exoid = ex_open(filename, EX_READ, &CPU_word_size, &IO_word_size, &version); if (exoid <= 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ex_open(\"%s\",...) did not return a valid file ID", filename); } ierr = DMPlexCreateExodus(comm, exoid, PETSC_FALSE, dm);CHKERRQ(ierr); if (!rank) {ierr = ex_close(exoid);CHKERRQ(ierr);} ierr = DMPlexGetDimension(*dm, &dim);CHKERRQ(ierr); #else SETERRQ(comm, PETSC_ERR_SUP, "Loading meshes requires ExodusII support. Reconfigure using --download-exodusii"); #endif } else if (useGenerator) { if (cellSimplex) { ierr = DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, dm);CHKERRQ(ierr); } else { ierr = DMPlexCreateHexBoxMesh(comm, dim, PETSC_FALSE, dm);CHKERRQ(ierr); } } else { ierr = DMCreate(comm, dm);CHKERRQ(ierr); ierr = DMSetType(*dm, DMPLEX);CHKERRQ(ierr); ierr = DMPlexSetDimension(*dm, dim);CHKERRQ(ierr); switch (dim) { case 2: if (cellSimplex) { ierr = CreateSimplex_2D(comm, *dm);CHKERRQ(ierr); } else { ierr = CreateQuad_2D(comm, testNum, *dm);CHKERRQ(ierr); } break; case 3: if (cellSimplex) { ierr = CreateSimplex_3D(comm, *dm);CHKERRQ(ierr); } else { ierr = CreateHex_3D(comm, *dm);CHKERRQ(ierr); } break; default: SETERRQ1(comm, PETSC_ERR_ARG_OUTOFRANGE, "Cannot make meshes for dimension %d", dim); } } { DM interpolatedMesh = NULL; ierr = CheckMesh(*dm);CHKERRQ(ierr); ierr = DMPlexInterpolate(*dm, &interpolatedMesh);CHKERRQ(ierr); ierr = DMPlexCopyCoordinates(*dm, interpolatedMesh);CHKERRQ(ierr); ierr = CompareCones(*dm, interpolatedMesh);CHKERRQ(ierr); ierr = DMDestroy(dm);CHKERRQ(ierr); *dm = interpolatedMesh; } { DM distributedMesh = NULL; /* Distribute mesh over processes */ ierr = DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);CHKERRQ(ierr); if (distributedMesh) { ierr = DMDestroy(dm);CHKERRQ(ierr); *dm = distributedMesh; } } ierr = PetscObjectSetName((PetscObject) *dm, "Interpolated Mesh");CHKERRQ(ierr); ierr = DMSetFromOptions(*dm);CHKERRQ(ierr); user->dm = *dm; PetscFunctionReturn(0); }

PetscErrorCode MatFactorNumeric_PaStiX(Mat F,Mat A,const MatFactorInfo *info) { Mat_Pastix *lu =(Mat_Pastix*)(F)->spptr; Mat *tseq; PetscErrorCode ierr = 0; PetscInt icntl; PetscInt M=A->rmap->N; PetscBool valOnly,flg, isSym; Mat F_diag; IS is_iden; Vec b; IS isrow; PetscBool isSeqAIJ,isSeqSBAIJ,isMPIAIJ; PetscFunctionBegin; ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); if (lu->matstruc == DIFFERENT_NONZERO_PATTERN) { (F)->ops->solve = MatSolve_PaStiX; /* Initialize a PASTIX instance */ ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(lu->pastix_comm));CHKERRQ(ierr); ierr = MPI_Comm_rank(lu->pastix_comm, &lu->commRank);CHKERRQ(ierr); ierr = MPI_Comm_size(lu->pastix_comm, &lu->commSize);CHKERRQ(ierr); /* Set pastix options */ lu->iparm[IPARM_MODIFY_PARAMETER] = API_NO; lu->iparm[IPARM_START_TASK] = API_TASK_INIT; lu->iparm[IPARM_END_TASK] = API_TASK_INIT; lu->rhsnbr = 1; /* Call to set default pastix options */ PASTIX_CALL(&(lu->pastix_data), lu->pastix_comm, lu->n, lu->colptr, lu->row, (PastixScalar*)lu->val, lu->perm, lu->invp, (PastixScalar*)lu->rhs, lu->rhsnbr, lu->iparm, lu->dparm); ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"PaStiX Options","Mat");CHKERRQ(ierr); icntl = -1; lu->iparm[IPARM_VERBOSE] = API_VERBOSE_NOT; ierr = PetscOptionsInt("-mat_pastix_verbose","iparm[IPARM_VERBOSE] : level of printing (0 to 2)","None",lu->iparm[IPARM_VERBOSE],&icntl,&flg);CHKERRQ(ierr); if ((flg && icntl >= 0) || PetscLogPrintInfo) { lu->iparm[IPARM_VERBOSE] = icntl; } icntl=-1; ierr = PetscOptionsInt("-mat_pastix_threadnbr","iparm[IPARM_THREAD_NBR] : Number of thread by MPI node","None",lu->iparm[IPARM_THREAD_NBR],&icntl,&flg);CHKERRQ(ierr); if ((flg && icntl > 0)) { lu->iparm[IPARM_THREAD_NBR] = icntl; } PetscOptionsEnd(); valOnly = PETSC_FALSE; } else { if (isSeqAIJ || isMPIAIJ) { ierr = PetscFree(lu->colptr);CHKERRQ(ierr); ierr = PetscFree(lu->row);CHKERRQ(ierr); ierr = PetscFree(lu->val);CHKERRQ(ierr); valOnly = PETSC_FALSE; } else valOnly = PETSC_TRUE; } lu->iparm[IPARM_MATRIX_VERIFICATION] = API_YES; /* convert mpi A to seq mat A */ ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); ierr = ISDestroy(&isrow);CHKERRQ(ierr); ierr = MatConvertToCSC(*tseq,valOnly, &lu->n, &lu->colptr, &lu->row, &lu->val);CHKERRQ(ierr); ierr = MatIsSymmetric(*tseq,0.0,&isSym);CHKERRQ(ierr); ierr = MatDestroyMatrices(1,&tseq);CHKERRQ(ierr); if (!lu->perm) { ierr = PetscMalloc1((lu->n),&(lu->perm));CHKERRQ(ierr); ierr = PetscMalloc1((lu->n),&(lu->invp));CHKERRQ(ierr); } if (isSym) { /* On symmetric matrix, LLT */ lu->iparm[IPARM_SYM] = API_SYM_YES; lu->iparm[IPARM_FACTORIZATION] = API_FACT_LDLT; } else { /* On unsymmetric matrix, LU */ lu->iparm[IPARM_SYM] = API_SYM_NO; lu->iparm[IPARM_FACTORIZATION] = API_FACT_LU; } /*----------------*/ if (lu->matstruc == DIFFERENT_NONZERO_PATTERN) { if (!(isSeqAIJ || isSeqSBAIJ)) { /* PaStiX only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); ierr = VecScatterCreate(lu->b_seq,is_iden,b,is_iden,&lu->scat_sol);CHKERRQ(ierr); ierr = ISDestroy(&is_iden);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); } lu->iparm[IPARM_START_TASK] = API_TASK_ORDERING; lu->iparm[IPARM_END_TASK] = API_TASK_NUMFACT; PASTIX_CALL(&(lu->pastix_data), lu->pastix_comm, lu->n, lu->colptr, lu->row, (PastixScalar*)lu->val, lu->perm, lu->invp, (PastixScalar*)lu->rhs, lu->rhsnbr, lu->iparm, lu->dparm); if (lu->iparm[IPARM_ERROR_NUMBER] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by PaStiX in analysis phase: iparm(IPARM_ERROR_NUMBER)=%d\n",lu->iparm[IPARM_ERROR_NUMBER]); } else { lu->iparm[IPARM_START_TASK] = API_TASK_NUMFACT; lu->iparm[IPARM_END_TASK] = API_TASK_NUMFACT; PASTIX_CALL(&(lu->pastix_data), lu->pastix_comm, lu->n, lu->colptr, lu->row, (PastixScalar*)lu->val, lu->perm, lu->invp, (PastixScalar*)lu->rhs, lu->rhsnbr, lu->iparm, lu->dparm); if (lu->iparm[IPARM_ERROR_NUMBER] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by PaStiX in analysis phase: iparm(IPARM_ERROR_NUMBER)=%d\n",lu->iparm[IPARM_ERROR_NUMBER]); } if (lu->commSize > 1) { if ((F)->factortype == MAT_FACTOR_LU) { F_diag = ((Mat_MPIAIJ*)(F)->data)->A; } else { F_diag = ((Mat_MPISBAIJ*)(F)->data)->A; } F_diag->assembled = PETSC_TRUE; } (F)->assembled = PETSC_TRUE; lu->matstruc = SAME_NONZERO_PATTERN; lu->CleanUpPastix = PETSC_TRUE; PetscFunctionReturn(0); }

PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info) { Mat_SuperLU *lu = (Mat_SuperLU*)F->spptr; Mat_SeqAIJ *aa; PetscErrorCode ierr; PetscInt sinfo; PetscReal ferr, berr; NCformat *Ustore; SCformat *Lstore; PetscFunctionBegin; if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */ lu->options.Fact = SamePattern; /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ Destroy_SuperMatrix_Store(&lu->A); if (lu->options.Equil){ ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } if ( lu->lwork >= 0 ) { Destroy_SuperNode_Matrix(&lu->L); Destroy_CompCol_Matrix(&lu->U); lu->options.Fact = SamePattern; } } /* Create the SuperMatrix for lu->A=A^T: Since SuperLU likes column-oriented matrices,we pass it the transpose, and then solve A^T X = B in MatSolve(). */ if (lu->options.Equil){ aa = (Mat_SeqAIJ*)(lu->A_dup)->data; } else { aa = (Mat_SeqAIJ*)(A)->data; } #if defined(PETSC_USE_COMPLEX) zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i, SLU_NC,SLU_Z,SLU_GE); #else dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i, SLU_NC,SLU_D,SLU_GE); #endif /* Numerical factorization */ lu->B.ncol = 0; /* Indicate not to solve the system */ if (F->factortype == MAT_FACTOR_LU){ #if defined(PETSC_USE_COMPLEX) zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &sinfo); #else dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &sinfo); #endif } else if (F->factortype == MAT_FACTOR_ILU){ /* Compute the incomplete factorization, condition number and pivot growth */ #if defined(PETSC_USE_COMPLEX) zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &sinfo); #else dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &sinfo); #endif } else { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); } if ( !sinfo || sinfo == lu->A.ncol+1 ) { if ( lu->options.PivotGrowth ) ierr = PetscPrintf(PETSC_COMM_SELF," Recip. pivot growth = %e\n", lu->rpg); if ( lu->options.ConditionNumber ) ierr = PetscPrintf(PETSC_COMM_SELF," Recip. condition number = %e\n", lu->rcond); } else if ( sinfo > 0 ){ if ( lu->lwork == -1 ) { ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol); } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); } else { /* sinfo < 0 */ SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo); } if ( lu->options.PrintStat ) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n"); StatPrint(&lu->stat); Lstore = (SCformat *) lu->L.Store; Ustore = (NCformat *) lu->U.Store; ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor L = %D\n", Lstore->nnz); ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor U = %D\n", Ustore->nnz); ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol); ierr = PetscPrintf(PETSC_COMM_SELF," L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6); } lu->flg = SAME_NONZERO_PATTERN; F->ops->solve = MatSolve_SuperLU; F->ops->solvetranspose = MatSolveTranspose_SuperLU; F->ops->matsolve = MatMatSolve_SuperLU; PetscFunctionReturn(0); }

PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt M=A->rmap->N,m=A->rmap->n,nrhs; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ PetscBool flg; PetscFunctionBegin; if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr); if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr); if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size); /* size==1 or distributed mat input */ if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled){ /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, thus destroy it and create a new SOLVEstruct. Otherwise it may result in memory corruption or incorrect solution See src/mat/examples/tests/ex125.c */ #if defined(PETSC_USE_COMPLEX) zSolveFinalize(&lu->options, &lu->SOLVEstruct); #else dSolveFinalize(&lu->options, &lu->SOLVEstruct); #endif lu->options.SolveInitialized = NO; } ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatGetSize(B_mpi,PETSC_NULL,&nrhs);CHKERRQ(ierr); PStatInit(&stat); /* Initialize the statistics variables. */ ierr = MatGetArray(X,&bptr);CHKERRQ(ierr); if (lu->MatInputMode == GLOBAL) { /* size == 1 */ #if defined(PETSC_USE_COMPLEX) pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs, &lu->grid, &lu->LUstruct, berr, &stat, &info); #else pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs, &lu->grid, &lu->LUstruct, berr, &stat, &info); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info); #else pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); ierr = MatRestoreArray(X,&bptr);CHKERRQ(ierr); if (lu->options.PrintStat) { PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); lu->matsolve_iscalled = PETSC_FALSE; lu->matmatsolve_iscalled = PETSC_TRUE; PetscFunctionReturn(0); }

PetscErrorCode KSPSolve_MINRES(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscScalar alpha,beta,ibeta,betaold,eta,c=1.0,ceta,cold=1.0,coold,s=0.0,sold=0.0,soold; PetscScalar rho0,rho1,irho1,rho2,mrho2,rho3,mrho3,dp = 0.0; PetscReal np; Vec X,B,R,Z,U,V,W,UOLD,VOLD,WOLD,WOOLD; Mat Amat,Pmat; MatStructure pflag; KSP_MINRES *minres = (KSP_MINRES*)ksp->data; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; U = ksp->work[2]; V = ksp->work[3]; W = ksp->work[4]; UOLD = ksp->work[5]; VOLD = ksp->work[6]; WOLD = ksp->work[7]; WOOLD = ksp->work[8]; ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); ksp->its = 0; ierr = VecSet(UOLD,0.0);CHKERRQ(ierr); /* u_old <- 0 */ ierr = VecCopy(UOLD,VOLD);CHKERRQ(ierr); /* v_old <- 0 */ ierr = VecCopy(UOLD,W);CHKERRQ(ierr); /* w <- 0 */ ierr = VecCopy(UOLD,WOLD);CHKERRQ(ierr); /* w_old <- 0 */ if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* r <- b - A*x */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- B*r */ ierr = VecDot(R,Z,&dp);CHKERRQ(ierr); if (PetscRealPart(dp) < minres->haptol) { ierr = PetscInfo2(ksp,"Detected indefinite operator %G tolerance %G\n",PetscRealPart(dp),minres->haptol);CHKERRQ(ierr); ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; PetscFunctionReturn(0); } dp = PetscAbsScalar(dp); dp = PetscSqrtScalar(dp); beta = dp; /* beta <- sqrt(r'*z */ eta = beta; ierr = VecCopy(R,V);CHKERRQ(ierr); ierr = VecCopy(Z,U);CHKERRQ(ierr); ibeta = 1.0 / beta; ierr = VecScale(V,ibeta);CHKERRQ(ierr); /* v <- r / beta */ ierr = VecScale(U,ibeta);CHKERRQ(ierr); /* u <- z / beta */ ierr = VecNorm(Z,NORM_2,&np);CHKERRQ(ierr); /* np <- ||z|| */ ierr = KSPLogResidualHistory(ksp,np);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,np);CHKERRQ(ierr); ksp->rnorm = np; ierr = (*ksp->converged)(ksp,0,np,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); i = 0; do { ksp->its = i+1; /* Lanczos */ ierr = KSP_MatMult(ksp,Amat,U,R);CHKERRQ(ierr); /* r <- A*u */ ierr = VecDot(U,R,&alpha);CHKERRQ(ierr); /* alpha <- r'*u */ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- B*r */ ierr = VecAXPY(R,-alpha,V);CHKERRQ(ierr); /* r <- r - alpha v */ ierr = VecAXPY(Z,-alpha,U);CHKERRQ(ierr); /* z <- z - alpha u */ ierr = VecAXPY(R,-beta,VOLD);CHKERRQ(ierr); /* r <- r - beta v_old */ ierr = VecAXPY(Z,-beta,UOLD);CHKERRQ(ierr); /* z <- z - beta u_old */ betaold = beta; ierr = VecDot(R,Z,&dp);CHKERRQ(ierr); if ( PetscRealPart(dp) < minres->haptol) { ierr = PetscInfo2(ksp,"Detected indefinite operator %G tolerance %G\n",PetscRealPart(dp),minres->haptol);CHKERRQ(ierr); ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; break; } dp = PetscAbsScalar(dp); beta = PetscSqrtScalar(dp); /* beta <- sqrt(r'*z) */ /* QR factorisation */ coold = cold; cold = c; soold = sold; sold = s; rho0 = cold * alpha - coold * sold * betaold; rho1 = PetscSqrtScalar(rho0*rho0 + beta*beta); rho2 = sold * alpha + coold * cold * betaold; rho3 = soold * betaold; /* Givens rotation */ c = rho0 / rho1; s = beta / rho1; /* Update */ ierr = VecCopy(WOLD,WOOLD);CHKERRQ(ierr); /* w_oold <- w_old */ ierr = VecCopy(W,WOLD);CHKERRQ(ierr); /* w_old <- w */ ierr = VecCopy(U,W);CHKERRQ(ierr); /* w <- u */ mrho2 = -rho2; ierr = VecAXPY(W,mrho2,WOLD);CHKERRQ(ierr); /* w <- w - rho2 w_old */ mrho3 = -rho3; ierr = VecAXPY(W,mrho3,WOOLD);CHKERRQ(ierr); /* w <- w - rho3 w_oold */ irho1 = 1.0 / rho1; ierr = VecScale(W,irho1);CHKERRQ(ierr); /* w <- w / rho1 */ ceta = c * eta; ierr = VecAXPY(X,ceta,W);CHKERRQ(ierr); /* x <- x + c eta w */ eta = -s * eta; ierr = VecCopy(V,VOLD);CHKERRQ(ierr); ierr = VecCopy(U,UOLD);CHKERRQ(ierr); ierr = VecCopy(R,V);CHKERRQ(ierr); ierr = VecCopy(Z,U);CHKERRQ(ierr); ibeta = 1.0 / beta; ierr = VecScale(V,ibeta);CHKERRQ(ierr); /* v <- r / beta */ ierr = VecScale(U,ibeta);CHKERRQ(ierr); /* u <- z / beta */ np = ksp->rnorm * PetscAbsScalar(s); ksp->rnorm = np; ierr = KSPLogResidualHistory(ksp,np);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,np);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,np,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) break; i++; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }

/*@C PetscSFSetGraph - Set a parallel star forest Collective Input Arguments: + sf - star forest . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) . nleaves - number of leaf vertices on the current process, each of these references a root on any process . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage . localmode - copy mode for ilocal . iremote - remote locations of root vertices for each leaf on the current process - remotemode - copy mode for iremote Level: intermediate .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() @*/ PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) { PetscErrorCode ierr; PetscTable table; PetscTablePosition pos; PetscMPIInt size; PetscInt i,*rcount,*ranks; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); if (nleaves && ilocal) PetscValidIntPointer(ilocal,4); if (nleaves) PetscValidPointer(iremote,6); if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots); if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); ierr = PetscSFReset(sf);CHKERRQ(ierr); sf->nroots = nroots; sf->nleaves = nleaves; if (ilocal) { switch (localmode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); sf->mine = sf->mine_alloc; ierr = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr); sf->minleaf = PETSC_MAX_INT; sf->maxleaf = PETSC_MIN_INT; for (i=0; i<nleaves; i++) { sf->minleaf = PetscMin(sf->minleaf,ilocal[i]); sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]); } break; case PETSC_OWN_POINTER: sf->mine_alloc = (PetscInt*)ilocal; sf->mine = sf->mine_alloc; break; case PETSC_USE_POINTER: sf->mine = (PetscInt*)ilocal; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); } } if (!ilocal || nleaves > 0) { sf->minleaf = 0; sf->maxleaf = nleaves - 1; } switch (remotemode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); sf->remote = sf->remote_alloc; ierr = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr); break; case PETSC_OWN_POINTER: sf->remote_alloc = (PetscSFNode*)iremote; sf->remote = sf->remote_alloc; break; case PETSC_USE_POINTER: sf->remote = (PetscSFNode*)iremote; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); } ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); for (i=0; i<nleaves; i++) { /* Log 1-based rank */ ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); } ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr); ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); for (i=0; i<sf->nranks; i++) { ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); ranks[i]--; /* Convert back to 0-based */ } ierr = PetscTableDestroy(&table);CHKERRQ(ierr); ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr); sf->roffset[0] = 0; for (i=0; i<sf->nranks; i++) { ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); sf->roffset[i+1] = sf->roffset[i] + rcount[i]; rcount[i] = 0; } for (i=0; i<nleaves; i++) { PetscInt lo,hi,irank; /* Search for index of iremote[i].rank in sf->ranks */ lo = 0; hi = sf->nranks; while (hi - lo > 1) { PetscInt mid = lo + (hi - lo)/2; if (iremote[i].rank < sf->ranks[mid]) hi = mid; else lo = mid; } if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo; else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank); sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i; sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index; rcount[irank]++; } ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (nroots == PETSC_DETERMINE) { /* Jed, if you have a better way to do this, put it in */ PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0; /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */ ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr); ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank]; ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Could set nroots to this maximum */ for (i = 0; i < size; ++i) maxRoots += numRankRoots[i]; /* Gather all indices */ ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr); leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index; leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; rootOff[0] = 0; for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i]; ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Sort and reduce */ ierr = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr); ierr = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr); ierr = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr); sf->nroots = maxRoots; } #endif sf->graphset = PETSC_TRUE; ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode KSPSolve_CG(KSP ksp) { PetscErrorCode ierr; PetscInt i,stored_max_it,eigs; PetscScalar dpi = 0.0,a = 1.0,beta,betaold = 1.0,b = 0,*e = 0,*d = 0,delta,dpiold; PetscReal dp = 0.0; Vec X,B,Z,R,P,S,W; KSP_CG *cg; Mat Amat,Pmat; MatStructure pflag; PetscTruth diagonalscale; PetscFunctionBegin; ierr = PCDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); cg = (KSP_CG*)ksp->data; eigs = ksp->calc_sings; stored_max_it = ksp->max_it; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; P = ksp->work[2]; if (cg->singlereduction) { S = ksp->work[3]; W = ksp->work[4]; } else { S = 0; /* unused */ W = Z; } #if !defined(PETSC_USE_COMPLEX) #define VecXDot(x,y,a) VecDot(x,y,a) #else #define VecXDot(x,y,a) (((cg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x,y,a) : VecTDot(x,y,a)) #endif if (eigs) {e = cg->e; d = cg->d; e[0] = 0.0; } ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z = e'*A'*B'*B*A'*e' */ } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r = e'*A'*A*e */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ if PetscIsInfOrNanScalar(beta) SETERRQ(PETSC_ERR_FP,"Infinite or not-a-number generated in dot product"); dp = sqrt(PetscAbsScalar(beta)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ } else dp = 0.0;

PetscErrorCode TSStep_Sundials(TS ts) { TS_Sundials *cvode = (TS_Sundials*)ts->data; PetscErrorCode ierr; PetscInt flag; long int its,nsteps; realtype t,tout; PetscScalar *y_data; void *mem; PetscFunctionBegin; mem = cvode->mem; tout = ts->max_time; ierr = VecGetArray(ts->vec_sol,&y_data);CHKERRQ(ierr); N_VSetArrayPointer((realtype*)y_data,cvode->y); ierr = VecRestoreArray(ts->vec_sol,NULL);CHKERRQ(ierr); ierr = TSPreStep(ts);CHKERRQ(ierr); /* We would like to call TSPreStep() when starting each step (including rejections), TSPreStage(), * and TSPostStage() before each stage solve, but CVode does not appear to support this. */ if (cvode->monitorstep) flag = CVode(mem,tout,cvode->y,&t,CV_ONE_STEP); else flag = CVode(mem,tout,cvode->y,&t,CV_NORMAL); if (flag) { /* display error message */ switch (flag) { case CV_ILL_INPUT: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_ILL_INPUT"); break; case CV_TOO_CLOSE: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_TOO_CLOSE"); break; case CV_TOO_MUCH_WORK: { PetscReal tcur; ierr = CVodeGetNumSteps(mem,&nsteps);CHKERRQ(ierr); ierr = CVodeGetCurrentTime(mem,&tcur);CHKERRQ(ierr); SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_TOO_MUCH_WORK. At t=%G, nsteps %D exceeds mxstep %D. Increase '-ts_max_steps <>' or modify TSSetDuration()",tcur,nsteps,ts->max_steps); } break; case CV_TOO_MUCH_ACC: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_TOO_MUCH_ACC"); break; case CV_ERR_FAILURE: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_ERR_FAILURE"); break; case CV_CONV_FAILURE: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_CONV_FAILURE"); break; case CV_LINIT_FAIL: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_LINIT_FAIL"); break; case CV_LSETUP_FAIL: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_LSETUP_FAIL"); break; case CV_LSOLVE_FAIL: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_LSOLVE_FAIL"); break; case CV_RHSFUNC_FAIL: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_RHSFUNC_FAIL"); break; case CV_FIRST_RHSFUNC_ERR: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_FIRST_RHSFUNC_ERR"); break; case CV_REPTD_RHSFUNC_ERR: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_REPTD_RHSFUNC_ERR"); break; case CV_UNREC_RHSFUNC_ERR: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_UNREC_RHSFUNC_ERR"); break; case CV_RTFUNC_FAIL: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, CV_RTFUNC_FAIL"); break; default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVode() fails, flag %d",flag); } } /* copy the solution from cvode->y to cvode->update and sol */ ierr = VecPlaceArray(cvode->w1,y_data);CHKERRQ(ierr); ierr = VecCopy(cvode->w1,cvode->update);CHKERRQ(ierr); ierr = VecResetArray(cvode->w1);CHKERRQ(ierr); ierr = VecCopy(cvode->update,ts->vec_sol);CHKERRQ(ierr); ierr = CVodeGetNumNonlinSolvIters(mem,&its);CHKERRQ(ierr); ierr = CVSpilsGetNumLinIters(mem, &its); ts->snes_its = its; ts->ksp_its = its; ts->time_step = t - ts->ptime; ts->ptime = t; ts->steps++; ierr = CVodeGetNumSteps(mem,&nsteps);CHKERRQ(ierr); if (!cvode->monitorstep) ts->steps = nsteps; PetscFunctionReturn(0); }

PetscErrorCode TSSetUp_Sundials(TS ts) { TS_Sundials *cvode = (TS_Sundials*)ts->data; PetscErrorCode ierr; PetscInt glosize,locsize,i,flag; PetscScalar *y_data,*parray; void *mem; PC pc; PCType pctype; PetscBool pcnone; PetscFunctionBegin; /* get the vector size */ ierr = VecGetSize(ts->vec_sol,&glosize);CHKERRQ(ierr); ierr = VecGetLocalSize(ts->vec_sol,&locsize);CHKERRQ(ierr); /* allocate the memory for N_Vec y */ cvode->y = N_VNew_Parallel(cvode->comm_sundials,locsize,glosize); if (!cvode->y) SETERRQ(PETSC_COMM_SELF,1,"cvode->y is not allocated"); /* initialize N_Vec y: copy ts->vec_sol to cvode->y */ ierr = VecGetArray(ts->vec_sol,&parray);CHKERRQ(ierr); y_data = (PetscScalar*) N_VGetArrayPointer(cvode->y); for (i = 0; i < locsize; i++) y_data[i] = parray[i]; ierr = VecRestoreArray(ts->vec_sol,NULL);CHKERRQ(ierr); ierr = VecDuplicate(ts->vec_sol,&cvode->update);CHKERRQ(ierr); ierr = VecDuplicate(ts->vec_sol,&cvode->ydot);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->update);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->ydot);CHKERRQ(ierr); /* Create work vectors for the TSPSolve_Sundials() routine. Note these are allocated with zero space arrays because the actual array space is provided by Sundials and set using VecPlaceArray(). */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)ts),1,locsize,PETSC_DECIDE,0,&cvode->w1);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)ts),1,locsize,PETSC_DECIDE,0,&cvode->w2);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->w1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->w2);CHKERRQ(ierr); /* Call CVodeCreate to create the solver memory and the use of a Newton iteration */ mem = CVodeCreate(cvode->cvode_type, CV_NEWTON); if (!mem) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"CVodeCreate() fails"); cvode->mem = mem; /* Set the pointer to user-defined data */ flag = CVodeSetUserData(mem, ts); if (flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVodeSetUserData() fails"); /* Sundials may choose to use a smaller initial step, but will never use a larger step. */ flag = CVodeSetInitStep(mem,(realtype)ts->time_step); if (flag) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetInitStep() failed"); if (cvode->mindt > 0) { flag = CVodeSetMinStep(mem,(realtype)cvode->mindt); if (flag) { if (flag == CV_MEM_NULL) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMinStep() failed, cvode_mem pointer is NULL"); else if (flag == CV_ILL_INPUT) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMinStep() failed, hmin is nonpositive or it exceeds the maximum allowable step size"); else SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMinStep() failed"); } } if (cvode->maxdt > 0) { flag = CVodeSetMaxStep(mem,(realtype)cvode->maxdt); if (flag) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMaxStep() failed"); } /* Call CVodeInit to initialize the integrator memory and specify the * user's right hand side function in u'=f(t,u), the inital time T0, and * the initial dependent variable vector cvode->y */ flag = CVodeInit(mem,TSFunction_Sundials,ts->ptime,cvode->y); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVodeInit() fails, flag %d",flag); /* specifies scalar relative and absolute tolerances */ flag = CVodeSStolerances(mem,cvode->reltol,cvode->abstol); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVodeSStolerances() fails, flag %d",flag); /* Specify max num of steps to be taken by cvode in its attempt to reach the next output time */ flag = CVodeSetMaxNumSteps(mem,ts->max_steps); /* call CVSpgmr to use GMRES as the linear solver. */ /* setup the ode integrator with the given preconditioner */ ierr = TSSundialsGetPC(ts,&pc);CHKERRQ(ierr); ierr = PCGetType(pc,&pctype);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)pc,PCNONE,&pcnone);CHKERRQ(ierr); if (pcnone) { flag = CVSpgmr(mem,PREC_NONE,0); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpgmr() fails, flag %d",flag); } else { flag = CVSpgmr(mem,PREC_LEFT,cvode->maxl); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpgmr() fails, flag %d",flag); /* Set preconditioner and solve routines Precond and PSolve, and the pointer to the user-defined block data */ flag = CVSpilsSetPreconditioner(mem,TSPrecond_Sundials,TSPSolve_Sundials); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpilsSetPreconditioner() fails, flag %d", flag); } flag = CVSpilsSetGSType(mem, MODIFIED_GS); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpgmrSetGSType() fails, flag %d",flag); PetscFunctionReturn(0); }

/* KSPPGMRESCycle - Run pgmres, possibly with restart. Return residual history if requested. input parameters: . pgmres - structure containing parameters and work areas output parameters: . itcount - number of iterations used. If null, ignored. . converged - 0 if not converged Notes: On entry, the value in vector VEC_VV(0) should be the initial residual. */ static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscReal res_norm,res,newnorm; PetscErrorCode ierr; PetscInt it = 0,j,k; PetscBool hapend = PETSC_FALSE; PetscFunctionBegin; if (itcount) *itcount = 0; ierr = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr); KSPCheckNorm(ksp,res_norm); res = res_norm; *RS(0) = res_norm; /* check for the convergence */ ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = res; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); pgmres->it = it-2; ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if (!res) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); for (; !ksp->reason; it++) { Vec Zcur,Znext; if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) { ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr); } /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */ Zcur = VEC_VV(it); /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */ Znext = VEC_VV(it+1); /* This iteration will compute Znext, update with a deferred correction once we know how * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */ if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */ ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr); } if (it > 1) { /* Complete the pending reduction */ ierr = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr); *HH(it-1,it-2) = newnorm; } if (it > 0) { /* Finish the reduction computing the latest column of H */ ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr); } if (it > 1) { /* normalize the base vector from two iterations ago, basis is complete up to here */ ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr); ierr = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr); pgmres->it = it-2; ksp->its++; ksp->rnorm = res; ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */ ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } if (ksp->reason) break; /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break; /* The it-2 column of H was not scaled when we computed Zcur, apply correction */ ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* And Znext computed in this iteration was computed using the under-scaled Zcur */ ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */ for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2); /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This * column is complete except for HH(it,it-1) which we won't know until the next iteration. */ *HH(it-1,it-1) /= *HH(it-1,it-2); } if (it > 0) { PetscScalar *work; if (!pgmres->orthogwork) {ierr = PetscMalloc1(pgmres->max_k + 2,&pgmres->orthogwork);CHKERRQ(ierr);} work = pgmres->orthogwork; /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is * * Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1] * * where * * Z[j] = sum_{k=0}^j V[k] * H[k,j-1] * * substituting * * Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1] * * rearranging the iteration space from row-column to column-row * * Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1] * * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already * been transformed to upper triangular form. */ for (k=0; k<it+1; k++) { work[k] = 0; for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1); } ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr); ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr); /* Orthogonalize Zcur against existing basis vectors. */ for (k=0; k<it; k++) work[k] = -*HH(k,it-1); ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr); /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */ /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */ ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr); } /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */ ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr); /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr); } if (itcount) *itcount = it-1; /* Number of iterations actually completed. */ /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr); PetscFunctionReturn(0); }

/*@C PetscScalarView - Prints an array of scalars; useful for debugging. Collective on PetscViewer Input Parameters: + N - number of scalars in array . idx - array of scalars - viewer - location to print array, PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_STDOUT_SELF or 0 Level: intermediate Developer Notes: idx cannot be const because may be passed to binary viewer where byte swappping is done .seealso: PetscIntView(), PetscRealView() @*/ PetscErrorCode PetscScalarView(PetscInt N,const PetscScalar idx[],PetscViewer viewer) { PetscErrorCode ierr; PetscInt j,i,n = N/3,p = N % 3; PetscBool iascii,isbinary; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_SELF; PetscValidHeader(viewer,3); PetscValidScalarPointer(idx,2); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); if (iascii) { ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%2d:",3*i);CHKERRQ(ierr); for (j=0; j<3; j++) { #if defined(PETSC_USE_COMPLEX) ierr = PetscViewerASCIISynchronizedPrintf(viewer," (%12.4e,%12.4e)", PetscRealPart(idx[i*3+j]),PetscImaginaryPart(idx[i*3+j]));CHKERRQ(ierr); #else ierr = PetscViewerASCIISynchronizedPrintf(viewer," %12.4e",idx[i*3+j]);CHKERRQ(ierr); #endif } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } if (p) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%2d:",3*n);CHKERRQ(ierr); for (i=0; i<p; i++) { #if defined(PETSC_USE_COMPLEX) ierr = PetscViewerASCIISynchronizedPrintf(viewer," (%12.4e,%12.4e)", PetscRealPart(idx[n*3+i]),PetscImaginaryPart(idx[n*3+i]));CHKERRQ(ierr); #else ierr = PetscViewerASCIISynchronizedPrintf(viewer," %12.4e",idx[3*n+i]);CHKERRQ(ierr); #endif } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); } else if (isbinary) { PetscMPIInt size,rank,*sizes,Ntotal,*displs,NN; PetscScalar *array; ierr = PetscMPIIntCast(N,&NN);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size > 1) { if (rank) { ierr = MPI_Gather(&NN,1,MPI_INT,0,0,MPI_INT,0,comm);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_SCALAR,0,0,0,MPIU_SCALAR,0,comm);CHKERRQ(ierr); } else { ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sizes);CHKERRQ(ierr); ierr = MPI_Gather(&NN,1,MPI_INT,sizes,1,MPI_INT,0,comm);CHKERRQ(ierr); Ntotal = sizes[0]; ierr = PetscMalloc(size*sizeof(PetscMPIInt),&displs);CHKERRQ(ierr); displs[0] = 0; for (i=1; i<size; i++) { Ntotal += sizes[i]; displs[i] = displs[i-1] + sizes[i-1]; } ierr = PetscMalloc(Ntotal*sizeof(PetscScalar),&array);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_SCALAR,array,sizes,displs,MPIU_SCALAR,0,comm);CHKERRQ(ierr); ierr = PetscViewerBinaryWrite(viewer,array,Ntotal,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscFree(sizes);CHKERRQ(ierr); ierr = PetscFree(displs);CHKERRQ(ierr); ierr = PetscFree(array);CHKERRQ(ierr); } } else { ierr = PetscViewerBinaryWrite(viewer,(void*) idx,N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); } } else { const char *tname; ierr = PetscObjectGetName((PetscObject)viewer,&tname);CHKERRQ(ierr); SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot handle that PetscViewer of type %s",tname); } PetscFunctionReturn(0); }

/*@C PetscIntView - Prints an array of integers; useful for debugging. Collective on PetscViewer Input Parameters: + N - number of integers in array . idx - array of integers - viewer - location to print array, PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_STDOUT_SELF or 0 Level: intermediate Developer Notes: idx cannot be const because may be passed to binary viewer where byte swappping is done .seealso: PetscRealView() @*/ PetscErrorCode PetscIntView(PetscInt N,const PetscInt idx[],PetscViewer viewer) { PetscErrorCode ierr; PetscInt j,i,n = N/20,p = N % 20; PetscBool iascii,isbinary; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_SELF; PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,3); if (N) PetscValidIntPointer(idx,2); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); if (iascii) { ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D:",20*i);CHKERRQ(ierr); for (j=0; j<20; j++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," %D",idx[i*20+j]);CHKERRQ(ierr); } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } if (p) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D:",20*n);CHKERRQ(ierr); for (i=0; i<p; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," %D",idx[20*n+i]);CHKERRQ(ierr);} ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); } else if (isbinary) { PetscMPIInt rank,size,*sizes,Ntotal,*displs,NN; PetscInt *array; ierr = PetscMPIIntCast(N,&NN);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size > 1) { if (rank) { ierr = MPI_Gather(&NN,1,MPI_INT,0,0,MPI_INT,0,comm);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_INT,0,0,0,MPIU_INT,0,comm);CHKERRQ(ierr); } else { ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sizes);CHKERRQ(ierr); ierr = MPI_Gather(&NN,1,MPI_INT,sizes,1,MPI_INT,0,comm);CHKERRQ(ierr); Ntotal = sizes[0]; ierr = PetscMalloc(size*sizeof(PetscMPIInt),&displs);CHKERRQ(ierr); displs[0] = 0; for (i=1; i<size; i++) { Ntotal += sizes[i]; displs[i] = displs[i-1] + sizes[i-1]; } ierr = PetscMalloc(Ntotal*sizeof(PetscInt),&array);CHKERRQ(ierr); ierr = MPI_Gatherv((void*)idx,NN,MPIU_INT,array,sizes,displs,MPIU_INT,0,comm);CHKERRQ(ierr); ierr = PetscViewerBinaryWrite(viewer,array,Ntotal,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscFree(sizes);CHKERRQ(ierr); ierr = PetscFree(displs);CHKERRQ(ierr); ierr = PetscFree(array);CHKERRQ(ierr); } } else { ierr = PetscViewerBinaryWrite(viewer,(void*) idx,N,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); } } else { const char *tname; ierr = PetscObjectGetName((PetscObject)viewer,&tname);CHKERRQ(ierr); SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot handle that PetscViewer of type %s",tname); } PetscFunctionReturn(0); }

/*@ KSPComputeEigenvaluesExplicitly - Computes all of the eigenvalues of the preconditioned operator using LAPACK. Collective on KSP Input Parameter: + ksp - iterative context obtained from KSPCreate() - n - size of arrays r and c Output Parameters: + r - real part of computed eigenvalues, provided by user with a dimension at least of n - c - complex part of computed eigenvalues, provided by user with a dimension at least of n Notes: This approach is very slow but will generally provide accurate eigenvalue estimates. This routine explicitly forms a dense matrix representing the preconditioned operator, and thus will run only for relatively small problems, say n < 500. Many users may just want to use the monitoring routine KSPMonitorSingularValue() (which can be set with option -ksp_monitor_singular_value) to print the singular values at each iteration of the linear solve. The preconditoner operator, rhs vector, solution vectors should be set before this routine is called. i.e use KSPSetOperators(),KSPSolve() or KSPSetOperators() Level: advanced .keywords: KSP, compute, eigenvalues, explicitly .seealso: KSPComputeEigenvalues(), KSPMonitorSingularValue(), KSPComputeExtremeSingularValues(), KSPSetOperators(), KSPSolve() @*/ PetscErrorCode KSPComputeEigenvaluesExplicitly(KSP ksp,PetscInt nmax,PetscReal r[],PetscReal c[]) { Mat BA; PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm; PetscScalar *array; Mat A; PetscInt m,row,nz,i,n,dummy; const PetscInt *cols; const PetscScalar *vals; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); ierr = KSPComputeExplicitOperator(ksp,&BA);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MatGetSize(BA,&n,&n);CHKERRQ(ierr); if (size > 1) { /* assemble matrix on first processor */ ierr = MatCreate(PetscObjectComm((PetscObject)ksp),&A);CHKERRQ(ierr); if (!rank) { ierr = MatSetSizes(A,n,n,n,n);CHKERRQ(ierr); } else { ierr = MatSetSizes(A,0,0,n,n);CHKERRQ(ierr); } ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)BA,(PetscObject)A);CHKERRQ(ierr); ierr = MatGetOwnershipRange(BA,&row,&dummy);CHKERRQ(ierr); ierr = MatGetLocalSize(BA,&m,&dummy);CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = MatGetRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); row++; } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); } else { ierr = MatDenseGetArray(BA,&array);CHKERRQ(ierr); } #if defined(PETSC_HAVE_ESSL) /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ if (!rank) { PetscScalar sdummy,*cwork; PetscReal *work,*realpart; PetscBLASInt clen,idummy,lwork,bn,zero = 0; PetscInt *perm; #if !defined(PETSC_USE_COMPLEX) clen = n; #else clen = 2*n; #endif ierr = PetscMalloc1(clen,&cwork);CHKERRQ(ierr); idummy = -1; /* unused */ ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); lwork = 5*n; ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); ierr = PetscMalloc1(n,&realpart);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,array,&bn,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); ierr = PetscFPTrapPop();CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); /* For now we stick with the convention of storing the real and imaginary components of evalues separately. But is this what we really want? */ ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) for (i=0; i<n; i++) { realpart[i] = cwork[2*i]; perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = cwork[2*perm[i]]; c[i] = cwork[2*perm[i]+1]; } #else for (i=0; i<n; i++) { realpart[i] = PetscRealPart(cwork[i]); perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(cwork[perm[i]]); c[i] = PetscImaginaryPart(cwork[perm[i]]); } #endif ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(realpart);CHKERRQ(ierr); ierr = PetscFree(cwork);CHKERRQ(ierr); } #elif !defined(PETSC_USE_COMPLEX) if (!rank) { PetscScalar *work; PetscReal *realpart,*imagpart; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc2(n,&realpart,n,&imagpart);CHKERRQ(ierr); ierr = PetscMalloc1(5*n,&work);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt bn; ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,array,&bn,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); for (i=0; i<n; i++) perm[i] = i; ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree2(realpart,imagpart);CHKERRQ(ierr); } #else if (!rank) { PetscScalar *work,*eigs; PetscReal *rwork; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc1(5*n,&work);CHKERRQ(ierr); ierr = PetscMalloc1(2*n,&rwork);CHKERRQ(ierr); ierr = PetscMalloc1(n,&eigs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt nb; ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&nb,array,&nb,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,rwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); for (i=0; i<n; i++) perm[i] = i; for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[perm[i]]); c[i] = PetscImaginaryPart(eigs[perm[i]]); } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(eigs);CHKERRQ(ierr); } #endif if (size > 1) { ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); } else { ierr = MatDenseRestoreArray(BA,&array);CHKERRQ(ierr); } ierr = MatDestroy(&BA);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode KSPSolve_FCG(KSP ksp) { PetscErrorCode ierr; PetscInt i,k,idx,mi; KSP_FCG *fcg = (KSP_FCG*)ksp->data; PetscScalar alpha=0.0,beta = 0.0,dpi,s; PetscReal dp=0.0; Vec B,R,Z,X,Pcurr,Ccurr; Mat Amat,Pmat; PetscInt eigs = ksp->calc_sings; /* Variables for eigen estimation - START*/ PetscInt stored_max_it = ksp->max_it; PetscScalar alphaold = 0,betaold = 1.0,*e = 0,*d = 0;/* Variables for eigen estimation - FINISH */ PetscFunctionBegin; #define VecXDot(x,y,a) (((fcg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x,y,a) : VecTDot(x,y,a)) #define VecXMDot(a,b,c,d) (((fcg->type) == (KSP_CG_HERMITIAN)) ? VecMDot(a,b,c,d) : VecMTDot(a,b,c,d)) X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); if (eigs) {e = fcg->e; d = fcg->d; e[0] = 0.0; } /* Compute initial residual needed for convergence check*/ ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); /* r <- b - Ax */ } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- dqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e) */ break; case KSP_NORM_NATURAL: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecXDot(R,Z,&s);CHKERRQ(ierr); dp = PetscSqrtReal(PetscAbsScalar(s)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e) */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } /* Initial Convergence Check */ ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ksp->rnorm = dp; if (ksp->normtype == KSP_NORM_NONE) { ierr = KSPConvergedSkip(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); } else { ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); } if (ksp->reason) PetscFunctionReturn(0); /* Apply PC if not already done for convergence check */ if(ksp->normtype == KSP_NORM_UNPRECONDITIONED || ksp->normtype == KSP_NORM_NONE){ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ } i = 0; do { ksp->its = i+1; /* If needbe, allocate a new chunk of vectors in P and C */ ierr = KSPAllocateVectors_FCG(ksp,i+1,fcg->vecb);CHKERRQ(ierr); /* Note that we wrap around and start clobbering old vectors */ idx = i % (fcg->mmax+1); Pcurr = fcg->Pvecs[idx]; Ccurr = fcg->Cvecs[idx]; /* Compute a new column of P (Currently does not support modified G-S or iterative refinement)*/ switch(fcg->truncstrat){ case KSP_FCG_TRUNC_TYPE_NOTAY : mi = PetscMax(1,i%(fcg->mmax+1)); break; case KSP_FCG_TRUNC_TYPE_STANDARD : mi = fcg->mmax; break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unrecognized FCG Truncation Strategy");CHKERRQ(ierr); } ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr); { PetscInt l,ndots; l = PetscMax(0,i-mi); ndots = i-l; if (ndots){ PetscInt j; Vec *Pold, *Cold; PetscScalar *dots; ierr = PetscMalloc3(ndots,&dots,ndots,&Cold,ndots,&Pold);CHKERRQ(ierr); for(k=l,j=0;j<ndots;++k,++j){ idx = k % (fcg->mmax+1); Cold[j] = fcg->Cvecs[idx]; Pold[j] = fcg->Pvecs[idx]; } ierr = VecXMDot(Z,ndots,Cold,dots);CHKERRQ(ierr); for(k=0;k<ndots;++k){ dots[k] = -dots[k]; } ierr = VecMAXPY(Pcurr,ndots,dots,Pold);CHKERRQ(ierr); ierr = PetscFree3(dots,Cold,Pold);CHKERRQ(ierr); } } /* Update X and R */ betaold = beta; ierr = VecXDot(Pcurr,R,&beta);CHKERRQ(ierr); /* beta <- pi'*r */ ierr = KSP_MatMult(ksp,Amat,Pcurr,Ccurr);CHKERRQ(ierr); /* w <- A*pi (stored in ci) */ ierr = VecXDot(Pcurr,Ccurr,&dpi);CHKERRQ(ierr); /* dpi <- pi'*w */ alphaold = alpha; alpha = beta / dpi; /* alpha <- beta/dpi */ ierr = VecAXPY(X,alpha,Pcurr);CHKERRQ(ierr); /* x <- x + alpha * pi */ ierr = VecAXPY(R,-alpha,Ccurr);CHKERRQ(ierr); /* r <- r - alpha * wi */ /* Compute norm for convergence check */ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e) */ break; case KSP_NORM_NATURAL: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecXDot(R,Z,&s);CHKERRQ(ierr); dp = PetscSqrtReal(PetscAbsScalar(s)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e) */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } /* Check for convergence */ ksp->rnorm = dp; KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* Apply PC if not already done for convergence check */ if(ksp->normtype == KSP_NORM_UNPRECONDITIONED || ksp->normtype == KSP_NORM_NONE){ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ } /* Compute current C (which is W/dpi) */ ierr = VecScale(Ccurr,1.0/dpi);CHKERRQ(ierr); /* w <- ci/dpi */ if (eigs) { if (i > 0) { if (ksp->max_it != stored_max_it) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Can not change maxit AND calculate eigenvalues"); e[i] = PetscSqrtReal(PetscAbsScalar(beta/betaold))/alphaold; d[i] = PetscSqrtReal(PetscAbsScalar(beta/betaold))*e[i] + 1.0/alpha; } else { d[i] = PetscSqrtReal(PetscAbsScalar(beta))*e[i] + 1.0/alpha; } } ++i; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; if (eigs) fcg->ned = ksp->its-1; PetscFunctionReturn(0); }

PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) { Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; PetscErrorCode ierr; PetscMPIInt size; PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N; SuperLUStat_t stat; double berr[1]; PetscScalar *bptr; PetscInt nrhs=1; Vec x_seq; IS iden; VecScatter scat; int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ PetscFunctionBegin; ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); if (size > 1 && lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); ierr = ISDestroy(&iden);CHKERRQ(ierr); ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); } else { /* size==1 || distributed mat input */ if (lu->options.SolveInitialized && !lu->matsolve_iscalled){ /* see comments in MatMatSolve() */ #if defined(PETSC_USE_COMPLEX) zSolveFinalize(&lu->options, &lu->SOLVEstruct); #else dSolveFinalize(&lu->options, &lu->SOLVEstruct); #endif lu->options.SolveInitialized = NO; } ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); } if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); PStatInit(&stat); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { #if defined(PETSC_USE_COMPLEX) pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs, &lu->grid,&lu->LUstruct,berr,&stat,&info); #else pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs, &lu->grid,&lu->LUstruct,berr,&stat,&info); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info); #else pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info); #endif } if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); if (lu->options.PrintStat) { PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); if (size > 1 && lu->MatInputMode == GLOBAL) { /* convert seq x to mpi x */ ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterDestroy(&scat);CHKERRQ(ierr); ierr = VecDestroy(&x_seq);CHKERRQ(ierr); } else { ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); lu->matsolve_iscalled = PETSC_TRUE; lu->matmatsolve_iscalled = PETSC_FALSE; } PetscFunctionReturn(0); }

static PetscErrorCode PetscPythonFindLibrary(char pythonexe[PETSC_MAX_PATH_LEN],char pythonlib[PETSC_MAX_PATH_LEN]) { const char cmdline[] = "-c 'import sys; print(sys.exec_prefix); print(sys.version[:3])'"; char command[PETSC_MAX_PATH_LEN+1+sizeof(cmdline)+1]; char prefix[PETSC_MAX_PATH_LEN],version[8],sep[2]= {PETSC_DIR_SEPARATOR, 0},*eol; FILE *fp = NULL; char path[PETSC_MAX_PATH_LEN+1]; PetscBool found = PETSC_FALSE; PetscErrorCode ierr; PetscFunctionBegin; #if defined(PETSC_PYTHON_LIB) ierr = PetscStrcpy(pythonlib,PETSC_PYTHON_LIB); CHKERRQ(ierr); PetscFunctionReturn(0); #endif /* call Python to find out the name of the Python dynamic library */ ierr = PetscStrncpy(command,pythonexe,PETSC_MAX_PATH_LEN); CHKERRQ(ierr); ierr = PetscStrcat(command," "); CHKERRQ(ierr); ierr = PetscStrcat(command,cmdline); CHKERRQ(ierr); #if defined(PETSC_HAVE_POPEN) ierr = PetscPOpen(PETSC_COMM_SELF,NULL,command,"r",&fp); CHKERRQ(ierr); if (!fgets(prefix,sizeof(prefix),fp)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Python: bad output from executable: %s",pythonexe); if (!fgets(version,sizeof(version),fp)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Python: bad output from executable: %s",pythonexe); ierr = PetscPClose(PETSC_COMM_SELF,fp,NULL); CHKERRQ(ierr); #else SETERRQ(PETSC_COMM_SELF,1,"Python: Aborted due to missing popen()"); #endif /* remove newlines */ ierr = PetscStrchr(prefix,'\n',&eol); CHKERRQ(ierr); if (eol) eol[0] = 0; ierr = PetscStrchr(version,'\n',&eol); CHKERRQ(ierr); if (eol) eol[0] = 0; /* test for $prefix/lib64/libpythonX.X[.so]*/ ierr = PetscStrcpy(pythonlib,prefix); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,sep); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,"lib64"); CHKERRQ(ierr); ierr = PetscTestDirectory(pythonlib,'r',&found); CHKERRQ(ierr); if (found) { ierr = PetscStrcat(pythonlib,sep); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,"libpython"); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,version); CHKERRQ(ierr); ierr = PetscDLLibraryRetrieve(PETSC_COMM_SELF,pythonlib,path,PETSC_MAX_PATH_LEN,&found); CHKERRQ(ierr); if (found) PetscFunctionReturn(0); } /* test for $prefix/lib/libpythonX.X[.so]*/ ierr = PetscStrcpy(pythonlib,prefix); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,sep); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,"lib"); CHKERRQ(ierr); ierr = PetscTestDirectory(pythonlib,'r',&found); CHKERRQ(ierr); if (found) { ierr = PetscStrcat(pythonlib,sep); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,"libpython"); CHKERRQ(ierr); ierr = PetscStrcat(pythonlib,version); CHKERRQ(ierr); ierr = PetscDLLibraryRetrieve(PETSC_COMM_SELF,pythonlib,path,PETSC_MAX_PATH_LEN,&found); CHKERRQ(ierr); if (found) PetscFunctionReturn(0); } /* nothing good found */ ierr = PetscMemzero(pythonlib,PETSC_MAX_PATH_LEN); CHKERRQ(ierr); ierr = PetscInfo(0,"Python dynamic library not found\n"); CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) { Mat *tseq,A_seq = PETSC_NULL; Mat_SeqAIJ *aa,*bb; Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; PetscErrorCode ierr; PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ PetscMPIInt size; SuperLUStat_t stat; double *berr=0; IS isrow; PetscLogDouble time0,time,time_min,time_max; Mat F_diag=PETSC_NULL; #if defined(PETSC_USE_COMPLEX) doublecomplex *av, *bv; #else double *av, *bv; #endif PetscFunctionBegin; ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); if (lu->options.PrintStat) { /* collect time for mat conversion */ ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); ierr = PetscGetTime(&time0);CHKERRQ(ierr); } if (lu->MatInputMode == GLOBAL) { /* global mat input */ if (size > 1) { /* convert mpi A to seq mat A */ ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); ierr = ISDestroy(&isrow);CHKERRQ(ierr); A_seq = *tseq; ierr = PetscFree(tseq);CHKERRQ(ierr); aa = (Mat_SeqAIJ*)A_seq->data; } else { PetscBool flg; ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); if (flg) { Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data; A = At->A; } aa = (Mat_SeqAIJ*)A->data; } /* Convert Petsc NR matrix to SuperLU_DIST NC. Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ Destroy_CompCol_Matrix_dist(&lu->A_sup); if (lu->FactPattern == SamePattern_SameRowPerm){ lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { /* lu->FactPattern == SamePattern */ Destroy_LU(N, &lu->grid, &lu->LUstruct); lu->options.Fact = SamePattern; } } #if defined(PETSC_USE_COMPLEX) zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); #else dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); #endif /* Create compressed column matrix A_sup. */ #if defined(PETSC_USE_COMPLEX) zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); #else dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); #endif } else { /* distributed mat input */ Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; aa=(Mat_SeqAIJ*)(mat->A)->data; bb=(Mat_SeqAIJ*)(mat->B)->data; ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; #if defined(PETSC_USE_COMPLEX) av=(doublecomplex*)aa->a; bv=(doublecomplex*)bb->a; #else av=aa->a; bv=bb->a; #endif rstart = A->rmap->rstart; nz = aa->nz + bb->nz; garray = mat->garray; if (lu->options.Fact == DOFACT) {/* first numeric factorization */ #if defined(PETSC_USE_COMPLEX) zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); #else dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); #endif } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */ if (lu->FactPattern == SamePattern_SameRowPerm){ lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ lu->options.Fact = SamePattern; } } nz = 0; for ( i=0; i<m; i++ ) { lu->row[i] = nz; countA = ai[i+1] - ai[i]; countB = bi[i+1] - bi[i]; ajj = aj + ai[i]; /* ptr to the beginning of this row */ bjj = bj + bi[i]; /* B part, smaller col index */ colA_start = rstart + ajj[0]; /* the smallest global col index of A */ jB = 0; for (j=0; j<countB; j++){ jcol = garray[bjj[j]]; if (jcol > colA_start) { jB = j; break; } lu->col[nz] = jcol; lu->val[nz++] = *bv++; if (j==countB-1) jB = countB; } /* A part */ for (j=0; j<countA; j++){ lu->col[nz] = rstart + ajj[j]; lu->val[nz++] = *av++; } /* B part, larger col index */ for (j=jB; j<countB; j++){ lu->col[nz] = garray[bjj[j]]; lu->val[nz++] = *bv++; } } lu->row[m] = nz; #if defined(PETSC_USE_COMPLEX) zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); #else dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); #endif } if (lu->options.PrintStat) { ierr = PetscGetTime(&time);CHKERRQ(ierr); time0 = time - time0; } /* Factor the matrix. */ PStatInit(&stat); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { /* global mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo); #else pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); #else pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); #endif } if (lu->MatInputMode == GLOBAL && size > 1){ ierr = MatDestroy(&A_seq);CHKERRQ(ierr); } if (lu->options.PrintStat) { ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); time = time/size; /* average time */ ierr = PetscPrintf(((PetscObject)A)->comm, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); if (size > 1){ F_diag = ((Mat_MPIAIJ *)(F)->data)->A; F_diag->assembled = PETSC_TRUE; } (F)->assembled = PETSC_TRUE; (F)->preallocated = PETSC_TRUE; lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ PetscFunctionReturn(0); }

PETSC_EXTERN PetscErrorCode MatGetOrdering_AWBM(Mat A, MatOrderingType type, IS *permR, IS *permC) { Vec *scalR, *scalC, scalRVec, scalCVec; scalR = &scalRVec; scalC = &scalCVec; /* EVERYTHING IS WRITTEN AS IF THE MATRIX WERE COLUMN-MAJOR */ Mat_SeqAIJ *aij = (Mat_SeqAIJ *) A->data; PetscInt n = A->rmap->n; /* Number of local columns */ PetscInt m = A->cmap->n; /* Number of local rows */ PetscInt *match; /* The row matched to each column, and inverse column permutation */ PetscInt *matchR; /* The column matched to each row */ PetscInt *p; /* The column permutation */ const PetscInt *ia = aij->i; const PetscInt *ja = aij->j; const MatScalar *a = aij->a; Vec colMax; PetscScalar *a_j, *sr, *sc; PetscReal *weights /* c_ij */, *u /* u_i */, *v /* v_j */, eps = PETSC_SQRT_MACHINE_EPSILON; PetscInt debug = 0, r, c, r1, c1; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscOptionsGetInt(NULL, "-debug", &debug, NULL);CHKERRQ(ierr); ierr = MatGetVecs(A, NULL, &colMax);CHKERRQ(ierr); ierr = MatGetRowMaxAbs(A, colMax, NULL);CHKERRQ(ierr); ierr = PetscMalloc2(n, &match, m, &matchR);CHKERRQ(ierr); ierr = PetscMalloc1(n, &p);CHKERRQ(ierr); ierr = PetscCalloc3(m, &u, n, &v, ia[n], &weights);CHKERRQ(ierr); for (c = 0; c < n; ++c) match[c] = -1; /* Compute weights */ ierr = VecGetArray(colMax, &a_j);CHKERRQ(ierr); for (c = 0; c < n; ++c) { for (r = ia[c]; r < ia[c+1]; ++r) { PetscReal ar = PetscAbsScalar(a[r]); if (ar == 0.0) weights[r] = PETSC_MAX_REAL; else weights[r] = log(a_j[c]/ar); } } /* Compute local row weights */ for (r = 0; r < m; ++r) u[r] = PETSC_MAX_REAL; for (c = 0; c < n; ++c) { for (r = ia[c]; r < ia[c+1]; ++r) { u[ja[r]] = PetscMin(u[ja[r]], weights[r]); } } /* Compute local column weights */ for (c = 0; c < n; ++c) { v[c] = PETSC_MAX_REAL; for (r = ia[c]; r < ia[c+1]; ++r) { v[c] = PetscMin(v[c], weights[r] - u[ja[r]]); } } for (r = 0; r < m; ++r) matchR[r] = -1; /* Match columns */ ierr = CheckUnmatched(n, match, matchR);CHKERRQ(ierr); for (c = 0; c < n; ++c) { /* if (match[c] >= 0) continue; */ if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "Row %d\n Weights:", c);CHKERRQ(ierr);} for (r = ia[c]; r < ia[c+1]; ++r) { PetscReal weight = weights[r] - u[ja[r]] - v[c]; if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, " %g", weight);CHKERRQ(ierr);} if ((weight <= eps) && (matchR[ja[r]] < 0)) { if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "Matched %d -- %d\n", c, ja[r]);CHKERRQ(ierr);} match[c] = ja[r]; matchR[ja[r]] = c; break; } } if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "\n");CHKERRQ(ierr);} } /* Deal with unmatched columns */ ierr = CheckUnmatched(n, match, matchR);CHKERRQ(ierr); for (c = 0; c < n; ++c) { if (match[c] >= 0) continue; for (r = ia[c]; r < ia[c+1]; ++r) { PetscReal weight = weights[r] - u[ja[r]] - v[c]; if (weight > eps) continue; /* \bar c_ij = 0 and (r, j1) \in M */ c1 = matchR[ja[r]]; for (r1 = ia[c1]; r1 < ia[c1+1]; ++r1) { PetscReal weight1 = weights[r1] - u[ja[r1]] - v[c1]; if ((matchR[ja[r1]] < 0) && (weight1 <= eps)) { /* (r, c1) in M is replaced by (r, c) and (r1, c1) */ if (debug) { ierr = PetscPrintf(PETSC_COMM_SELF, "Replaced match %d -- %d\n", c1, ja[r]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, " Added match %d -- %d\n", c, ja[r]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, " Added match %d -- %d\n", c1, ja[r1]);CHKERRQ(ierr); } match[c] = ja[r]; matchR[ja[r]] = c; match[c1] = ja[r1]; matchR[ja[r1]] = c1; break; } } if (match[c] >= 0) break; } } /* Allow matching with non-optimal rows */ ierr = CheckUnmatched(n, match, matchR);CHKERRQ(ierr); for (c = 0; c < n; ++c) { if (match[c] >= 0) continue; for (r = ia[c]; r < ia[c+1]; ++r) { if (matchR[ja[r]] < 0) { if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "Matched non-opt %d -- %d\n", c, ja[r]);CHKERRQ(ierr);} match[c] = ja[r]; matchR[ja[r]] = c; break; } } } /* Deal with non-optimal unmatched columns */ ierr = CheckUnmatched(n, match, matchR);CHKERRQ(ierr); for (c = 0; c < n; ++c) { if (match[c] >= 0) continue; for (r = ia[c]; r < ia[c+1]; ++r) { /* \bar c_ij = 0 and (r, j1) \in M */ c1 = matchR[ja[r]]; for (r1 = ia[c1]; r1 < ia[c1+1]; ++r1) { if (matchR[ja[r1]] < 0) { /* (r, c1) in M is replaced by (r, c) and (r1, c1) */ if (debug) { ierr = PetscPrintf(PETSC_COMM_SELF, "Replaced match %d -- %d\n", c1, ja[r]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, " Added match %d -- %d\n", c, ja[r]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, " Added match %d -- %d\n", c1, ja[r1]);CHKERRQ(ierr); } match[c] = ja[r]; matchR[ja[r]] = c; match[c1] = ja[r1]; matchR[ja[r1]] = c1; break; } } if (match[c] >= 0) break; } } /* Complete matching */ ierr = CheckUnmatched(n, match, matchR);CHKERRQ(ierr); for (c = 0, r = 0; c < n; ++c) { if (match[c] >= n) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Column %d matched to invalid row %d", c, match[c]); if (match[c] < 0) { for (; r < n; ++r) { if (matchR[r] < 0) { if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, "Matched default %d -- %d\n", c, r);CHKERRQ(ierr);} match[c] = r; matchR[r] = c; break; } } } } /* Check matching */ ierr = CheckUnmatched(n, match, matchR);CHKERRQ(ierr); for (c = 0; c < n; ++c) { if (match[c] < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Column %d unmatched", c); if (match[c] >= n) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Column %d matched to invalid row %d", c, match[c]); } /* Make permutation */ for (c = 0; c < n; ++c) {p[match[c]] = c;} ierr = ISCreateGeneral(PETSC_COMM_SELF, n, p, PETSC_OWN_POINTER, permR);CHKERRQ(ierr); ierr = ISSetPermutation(*permR);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF, n, 0, 1, permC);CHKERRQ(ierr); ierr = ISSetPermutation(*permC);CHKERRQ(ierr); ierr = PetscFree2(match, matchR);CHKERRQ(ierr); /* Make scaling */ ierr = VecCreateSeq(PETSC_COMM_SELF, n, scalR);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF, n, scalC);CHKERRQ(ierr); ierr = VecGetArray(*scalR, &sr);CHKERRQ(ierr); ierr = VecGetArray(*scalC, &sc);CHKERRQ(ierr); for (c = 0; c < n; ++c) { sr[c] = PetscExpReal(v[c])/a_j[c]; sc[c] = PetscExpReal(u[c]); } ierr = VecRestoreArray(*scalR, &sr);CHKERRQ(ierr); ierr = VecRestoreArray(*scalC, &sc);CHKERRQ(ierr); ierr = VecRestoreArray(colMax, &a_j);CHKERRQ(ierr); ierr = VecDestroy(&colMax);CHKERRQ(ierr); ierr = PetscFree3(u,v,weights);CHKERRQ(ierr); ierr = VecDestroy(scalR);CHKERRQ(ierr); ierr = VecDestroy(scalC);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode KSPSolve_CG(KSP ksp) { PetscErrorCode ierr; PetscInt i,stored_max_it,eigs; PetscScalar dpi = 0.0,a = 1.0,beta,betaold = 1.0,b = 0,*e = 0,*d = 0,delta,dpiold; PetscReal dp = 0.0; Vec X,B,Z,R,P,S,W; KSP_CG *cg; Mat Amat,Pmat; PetscBool diagonalscale; /* Dingwen */ PetscInt itv_d, itv_c; PetscScalar CKSX1,CKSZ1,CKSR1,CKSP1,CKSS1,CKSW1; PetscScalar CKSX2,CKSZ2,CKSR2,CKSP2,CKSS2,CKSW2; Vec CKSAmat1; Vec CKSAmat2; Vec C1,C2; PetscScalar d1,d2; PetscScalar sumX1,sumR1; PetscScalar sumX2,sumR2; Vec CKPX,CKPP; PetscScalar CKPbetaold; PetscInt CKPi; PetscBool flag1 = PETSC_TRUE, flag2 = PETSC_TRUE; PetscInt pos; PetscScalar v; VecScatter ctx; Vec W_SEQ; PetscScalar *_W; /* Dingwen */ PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); cg = (KSP_CG*)ksp->data; eigs = ksp->calc_sings; stored_max_it = ksp->max_it; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; P = ksp->work[2]; /* Dingwen */ CKPX = ksp->work[3]; CKPP = ksp->work[4]; CKSAmat1 = ksp->work[5]; CKSAmat2 = ksp->work[6]; C1 = ksp->work[7]; C2 = ksp->work[8]; /* Dingwen */ /* Dingwen */ int rank; /* Get MPI variables */ MPI_Comm_rank (MPI_COMM_WORLD,&rank); /* Dingwen */ #define VecXDot(x,y,a) (((cg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x,y,a) : VecTDot(x,y,a)) if (cg->singlereduction) { S = ksp->work[9]; W = ksp->work[10]; } else { S = 0; /* unused */ W = Z; } if (eigs) {e = cg->e; d = cg->d; e[0] = 0.0; } ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } /* Dingwen */ /* checksum coefficients initialization */ PetscInt size; ierr = VecGetSize(B,&size); for (i=0; i<size; i++) { v = 1.0; ierr = VecSetValues(C1,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); v = i; ierr = VecSetValues(C2,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); } d1 = 1.0; d2 = 2.0; /* Dingwen */ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z = e'*A'*B'*B*A'*e' */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r = e'*A'*A*e */ break; case KSP_NORM_NATURAL: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); /* Dingwen */ ierr = VecXDot(C1,S,&CKSS1);CHKERRQ(ierr); /* Compute the initial checksum1(S) */ ierr = VecXDot(C2,S,&CKSS2);CHKERRQ(ierr); /* Compute the initial checksum2(S) */ /* Dingwen */ } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ KSPCheckDot(ksp,beta); dp = PetscSqrtReal(PetscAbsScalar(beta)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ksp->rnorm = dp; ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); if (ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ } if (ksp->normtype != KSP_NORM_NATURAL) { if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ KSPCheckDot(ksp,beta); } /* Dingwen */ /* Checksum Initialization */ ierr = VecXDot(C1,X,&CKSX1);CHKERRQ(ierr); /* Compute the initial checksum1(X) */ ierr = VecXDot(C1,W,&CKSW1);CHKERRQ(ierr); /* Compute the initial checksum1(W) */ ierr = VecXDot(C1,R,&CKSR1);CHKERRQ(ierr); /* Compute the initial checksum1(R) */ ierr = VecXDot(C1,Z,&CKSZ1);CHKERRQ(ierr); /* Compute the initial checksum1(Z) */ ierr = VecXDot(C2,X,&CKSX2);CHKERRQ(ierr); /* Compute the initial checksum2(X) */ ierr = VecXDot(C2,W,&CKSW2);CHKERRQ(ierr); /* Compute the initial checksum2(W) */ ierr = VecXDot(C2,R,&CKSR2);CHKERRQ(ierr); /* Compute the initial checksum2(R) */ ierr = VecXDot(C2,Z,&CKSZ2);CHKERRQ(ierr); /* Compute the initial checksum2(Z) */ ierr = KSP_MatMultTranspose(ksp,Amat,C1,CKSAmat1);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat1,-d1,C1);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat1,-d2,C2);CHKERRQ(ierr); /* Compute the initial checksum1(A) */ ierr = KSP_MatMultTranspose(ksp,Amat,C2,CKSAmat2);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat2,-d2,C1);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat2,-d1,C2);CHKERRQ(ierr); /* Compute the initial checksum2(A) */ itv_c = 2; itv_d = 10; /* Dingwen */ i = 0; do { /* Dingwen */ if ((i>0) && (i%itv_d == 0)) { ierr = VecXDot(C1,X,&sumX1);CHKERRQ(ierr); ierr = VecXDot(C1,R,&sumR1);CHKERRQ(ierr); if ((PetscAbsScalar(sumX1-CKSX1) > 1.0e-6) || (PetscAbsScalar(sumR1-CKSR1) > 1.0e-6)) { /* Rollback and Recovery */ if (rank==0) printf ("Recovery start...\n"); if (rank==0) printf ("Rollback from iteration-%d to iteration-%d\n",i,CKPi); betaold = CKPbetaold; /* Recovery scalar betaold by checkpoint*/ i = CKPi; /* Recovery integer i by checkpoint */ ierr = VecCopy(CKPP,P);CHKERRQ(ierr); /* Recovery vector P from checkpoint */ ierr = VecXDot(C1,P,&CKSP1);CHKERRQ(ierr); /* Recovery checksum1(P) by P */ ierr = VecXDot(C2,P,&CKSP2);CHKERRQ(ierr); /* Recovery checksum2(P) by P */ ierr = KSP_MatMult(ksp,Amat,P,W);CHKERRQ(ierr); /* Recovery vector W by P */ ierr = VecXDot(P,W,&dpi);CHKERRQ(ierr); /* Recovery scalar dpi by P and W */ ierr = VecCopy(CKPX,X);CHKERRQ(ierr); /* Recovery vector X from checkpoint */ ierr = VecXDot(C1,X,&CKSX1);CHKERRQ(ierr); /* Recovery checksum1(X) by X */ ierr = VecXDot(C2,X,&CKSX2);CHKERRQ(ierr); /* Recovery checksum2(X) by X */ ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* Recovery vector R by X */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); ierr = VecXDot(C1,R,&CKSR1);CHKERRQ(ierr); /* Recovery checksum1(R) by R */ ierr = VecXDot(C2,R,&CKSR2);CHKERRQ(ierr); /* Recovery checksum2(R) by R */ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* Recovery vector Z by R */ ierr = VecXDot(C1,Z,&CKSZ1);CHKERRQ(ierr); /* Recovery checksum1(Z) by Z */ ierr = VecXDot(C2,Z,&CKSZ2);CHKERRQ(ierr); /* Recovery checksum2(Z) by Z */ ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* Recovery scalar beta by Z and R */ if (rank==0) printf ("Recovery end.\n"); } else if (i%(itv_c*itv_d) == 0) { if (rank==0) printf ("Checkpoint iteration-%d\n",i); ierr = VecCopy(X,CKPX);CHKERRQ(ierr); ierr = VecCopy(P,CKPP);CHKERRQ(ierr); CKPbetaold = betaold; CKPi = i; } } ksp->its = i+1; if (beta == 0.0) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"converged due to beta = 0\n");CHKERRQ(ierr); break; #if !defined(PETSC_USE_COMPLEX) } else if ((i > 0) && (beta*betaold < 0.0)) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo(ksp,"diverging due to indefinite preconditioner\n");CHKERRQ(ierr); break; #endif } if (!i) { ierr = VecCopy(Z,P);CHKERRQ(ierr); /* p <- z */ b = 0.0; /* Dingwen */ ierr = VecXDot(C1,P, &CKSP1);CHKERRQ(ierr); /* Compute the initial checksum1(P) */ ierr = VecXDot(C2,P, &CKSP2);CHKERRQ(ierr); /* Compute the initial checksum2(P) */ /* Dingwen */ } else { b = beta/betaold; if (eigs) { if (ksp->max_it != stored_max_it) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Can not change maxit AND calculate eigenvalues"); e[i] = PetscSqrtReal(PetscAbsScalar(b))/a; } ierr = VecAYPX(P,b,Z);CHKERRQ(ierr); /* p <- z + b* p */ /* Dingwen */ CKSP1 = CKSZ1 + b*CKSP1; /* Update checksum1(P) = checksum1(Z) + b*checksum1(P); */ CKSP2 = CKSZ2 + b*CKSP2; /* Update checksum2(P) = checksum2(Z) + b*checksum2(P); */ /* Dingwen */ } dpiold = dpi; if (!cg->singlereduction || !i) { ierr = KSP_MatMult(ksp,Amat,P,W);CHKERRQ(ierr); /* w <- Ap */ /* MVM */ ierr = VecXDot(P,W,&dpi);CHKERRQ(ierr); /* dpi <- p'w */ /* Dingwen */ ierr = VecXDot(CKSAmat1, P, &CKSW1);CHKERRQ(ierr); CKSW1 = CKSW1 + d1*CKSP1 + d2*CKSP2; /* Update checksum1(W) = checksum1(A)P + d1*checksum1(P) + d2*checksum2(P); */ ierr = VecXDot(CKSAmat2, P, &CKSW2);CHKERRQ(ierr); CKSW2 = CKSW2 + d2*CKSP1 + d1*CKSP2; /* Update checksum2(W) = checksum2(A)P + d2*checksum1(P) + d1*checksum2(P); */ if((i==41)&&(flag2)) { pos = 100; v = 1000; ierr = VecSetValue(W,pos,v,INSERT_VALUES);CHKERRQ(ierr); VecAssemblyBegin(W); VecAssemblyEnd(W); if (rank==0) printf ("Inject an error in %d-th element of vector W after MVM W=AP at iteration-%d\n", pos,i); flag2 = PETSC_FALSE; } PetscScalar delta1,delta2; PetscScalar sumW1,sumW2; ierr = VecXDot(C1,W,&sumW1);CHKERRQ(ierr); ierr = VecXDot(C2,W,&sumW2);CHKERRQ(ierr); delta1 = sumW1 - CKSW1; delta2 = sumW2 - CKSW2; if (PetscAbsScalar(delta1) > 1.0e-6) { VecScatterCreateToAll(W,&ctx,&W_SEQ); VecScatterBegin(ctx,W,W_SEQ,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(ctx,W,W_SEQ,INSERT_VALUES,SCATTER_FORWARD); VecGetArray(W_SEQ,&_W); pos = rint(delta2/delta1); v = _W[pos]; v = v - delta1; ierr = VecSetValues(W,1,&pos,&v,INSERT_VALUES);CHKERRQ(ierr); if (rank==0) printf ("Correct an error of %d-th elements of vector W after MVM W=AP at iteration-%d\n", pos, i); } } else { ierr = VecAYPX(W,beta/betaold,S);CHKERRQ(ierr); /* w <- Ap */ dpi = delta - beta*beta*dpiold/(betaold*betaold); /* dpi <- p'w */ /* Dingwen */ CKSW1 = beta/betaold*CKSW1 + CKSS1; /* Update checksum1(W) = checksum1(S) + beta/betaold*checksum1(W); */ CKSW2 = beta/betaold*CKSW2 + CKSS2; /* Update checksum2(W) = checksum2(S) + beta/betaold*checksum2(W); */ /* Dingwen */ } betaold = beta; KSPCheckDot(ksp,beta); if ((dpi == 0.0) || ((i > 0) && (PetscRealPart(dpi*dpiold) <= 0.0))) { ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite matrix\n");CHKERRQ(ierr); break; } a = beta/dpi; /* a = beta/p'w */ if (eigs) d[i] = PetscSqrtReal(PetscAbsScalar(b))*e[i] + 1.0/a; ierr = VecAXPY(X,a,P);CHKERRQ(ierr); /* x <- x + ap */ /* Dingwen */ CKSX1 = CKSX1 + a*CKSP1; /* Update checksum1(X) = checksum1(X) + a*checksum1(P); */ CKSX2 = CKSX2 + a*CKSP2; /* Update checksum2(X) = checksum2(X) + a*checksum2(P); */ /* Dingwen */ ierr = VecAXPY(R,-a,W);CHKERRQ(ierr); /* r <- r - aw */ /* Dingwen */ CKSR1 = CKSR1 - a*CKSW1; /* Update checksum1(R) = checksum1(R) - a*checksum1(W); */ CKSR2 = CKSR2 - a*CKSW2; /* Update checksum2(R) = checksum2(R) - a*checksum2(W); */ /* Dingwen */ if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->chknorm < i+2) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ /* Dingwen */ ierr = VecXDot(C1,Z, &CKSZ1);CHKERRQ(ierr); /* Update checksum1(Z) */ ierr = VecXDot(C2,Z, &CKSZ2);CHKERRQ(ierr); /* Update checksum2(Z) */ /* Dingwen */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); /* MVM */ /* Dingwen */ ierr = VecXDot(CKSAmat1, Z, &CKSS1);CHKERRQ(ierr); CKSS1 = CKSS1 + d1*CKSZ1 + d2*CKSZ2; /* Update checksum1(S) = checksum1(A)Z + d1*chekcsum1(Z) + d2*checksum2(Z); */ ierr = VecXDot(CKSAmat2, Z, &CKSS2);CHKERRQ(ierr); CKSS2 = CKSS2 + d2*CKSZ1 + d1*CKSZ2; /* Update checksum2(S) = checksum2(A)Z + d2*chekcsum1(Z) + d1*checksum2(Z); */ /* Dingwen */ } ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z */ } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->chknorm < i+2) { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ /* Dingwen */ ierr = VecXDot(C1,Z, &CKSZ1);CHKERRQ(ierr); /* Update checksum1(Z) */ ierr = VecXDot(C2,Z, &CKSZ2);CHKERRQ(ierr); /* Update checksum2(Z) */ /* Dingwen */ if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecMDot(Z,2,vecs,tmp);CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- r'*z */ } KSPCheckDot(ksp,beta); dp = PetscSqrtReal(PetscAbsScalar(beta)); } else { dp = 0.0; } ksp->rnorm = dp; CHKERRQ(ierr);KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; if ((ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) || (ksp->chknorm >= i+2)) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ /* Dingwen */ ierr = VecXDot(C1,Z, &CKSZ1);CHKERRQ(ierr); /* Update checksum1(Z) */ ierr = VecXDot(C2,Z, &CKSZ2);CHKERRQ(ierr); /* Update checksum2(Z) */ /* Dingwen */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); } } if ((ksp->normtype != KSP_NORM_NATURAL) || (ksp->chknorm >= i+2)) { if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = VecMDot(Z,2,vecs,tmp);CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ } KSPCheckDot(ksp,beta); } i++; /* Dingwen */ /* Inject error */ if ((i==50)&&(flag1)) { pos = 1000; v = -1; ierr = VecSetValues(X,1,&pos,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(X);CHKERRQ(ierr); ierr = VecAssemblyEnd(X);CHKERRQ(ierr); flag1 = PETSC_FALSE; if (rank==0)printf ("Inject an error in vector X at the end of iteration-%d\n", i-1); } /* Dingwen */ } while (i<ksp->max_it); /* Dingwen */ ierr = VecXDot(C1,X,&sumX1);CHKERRQ(ierr); ierr = VecXDot(C1,R,&sumR1);CHKERRQ(ierr); ierr = VecXDot(C2,X,&sumX2);CHKERRQ(ierr); ierr = VecXDot(C2,R,&sumR2);CHKERRQ(ierr); if (rank==0) { printf ("sum1 of X = %f\n", sumX1); printf ("checksum1(X) = %f\n", CKSX1); printf ("sum2 of X = %f\n", sumX2); printf ("checksum2(X) = %f\n", CKSX2); printf ("sum1 of R = %f\n", sumR1); printf ("checksum1(R) = %f\n", CKSR1); printf ("sum2 of R = %f\n", sumR2); printf ("checksum2(R) = %f\n", CKSR2); } VecDestroy(&W_SEQ); VecScatterDestroy(&ctx); /* Dingwen */ if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; if (eigs) cg->ned = ksp->its; PetscFunctionReturn(0); }

static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,*tags,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,*usendreqs,*req,barrier; PetscSegBuffer segrank,segdata,segreq; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); for (i=0; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); } ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); /* Post synchronous sends */ for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the * synchronous messages above. */ for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank,k; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); *toreqs = usendreqs; ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); ierr = PetscFree(tags);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }

/* convert Petsc seqaij matrix to CSC: colptr[n], row[nz], val[nz] input: A - matrix in seqaij or mpisbaij (bs=1) format valOnly - FALSE: spaces are allocated and values are set for the CSC TRUE: Only fill values output: n - Size of the matrix colptr - Index of first element of each column in row and val row - Row of each element of the matrix values - Value of each element of the matrix */ PetscErrorCode MatConvertToCSC(Mat A,PetscBool valOnly,PetscInt *n,PetscInt **colptr,PetscInt **row,PetscScalar **values) { Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; PetscInt *rowptr = aa->i; PetscInt *col = aa->j; PetscScalar *rvalues = aa->a; PetscInt m = A->rmap->N; PetscInt nnz; PetscInt i,j, k; PetscInt base = 1; PetscInt idx; PetscErrorCode ierr; PetscInt colidx; PetscInt *colcount; PetscBool isSBAIJ; PetscBool isSeqSBAIJ; PetscBool isMpiSBAIJ; PetscBool isSym; PetscBool flg; PetscInt icntl; PetscInt verb; PetscInt check; PetscFunctionBegin; ierr = MatIsSymmetric(A,0.0,&isSym);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSBAIJ,&isSBAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATMPISBAIJ,&isMpiSBAIJ);CHKERRQ(ierr); *n = A->cmap->N; /* PaStiX only needs triangular matrix if matrix is symmetric */ if (isSym && !(isSBAIJ || isSeqSBAIJ || isMpiSBAIJ)) nnz = (aa->nz - *n)/2 + *n; else nnz = aa->nz; if (!valOnly) { ierr = PetscMalloc(((*n)+1) *sizeof(PetscInt) ,colptr);CHKERRQ(ierr); ierr = PetscMalloc(nnz *sizeof(PetscInt) ,row);CHKERRQ(ierr); ierr = PetscMalloc1(nnz ,values);CHKERRQ(ierr); if (isSBAIJ || isSeqSBAIJ || isMpiSBAIJ) { ierr = PetscMemcpy (*colptr, rowptr, ((*n)+1)*sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < *n+1; i++) (*colptr)[i] += base; ierr = PetscMemcpy (*row, col, (nnz)*sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < nnz; i++) (*row)[i] += base; ierr = PetscMemcpy (*values, rvalues, (nnz)*sizeof(PetscScalar));CHKERRQ(ierr); } else { ierr = PetscMalloc1((*n),&colcount);CHKERRQ(ierr); for (i = 0; i < m; i++) colcount[i] = 0; /* Fill-in colptr */ for (i = 0; i < m; i++) { for (j = rowptr[i]; j < rowptr[i+1]; j++) { if (!isSym || col[j] <= i) colcount[col[j]]++; } } (*colptr)[0] = base; for (j = 0; j < *n; j++) { (*colptr)[j+1] = (*colptr)[j] + colcount[j]; /* in next loop we fill starting from (*colptr)[colidx] - base */ colcount[j] = -base; } /* Fill-in rows and values */ for (i = 0; i < m; i++) { for (j = rowptr[i]; j < rowptr[i+1]; j++) { if (!isSym || col[j] <= i) { colidx = col[j]; idx = (*colptr)[colidx] + colcount[colidx]; (*row)[idx] = i + base; (*values)[idx] = rvalues[j]; colcount[colidx]++; } } } ierr = PetscFree(colcount);CHKERRQ(ierr); } } else { /* Fill-in only values */ for (i = 0; i < m; i++) { for (j = rowptr[i]; j < rowptr[i+1]; j++) { colidx = col[j]; if ((isSBAIJ || isSeqSBAIJ || isMpiSBAIJ) ||!isSym || col[j] <= i) { /* look for the value to fill */ for (k = (*colptr)[colidx] - base; k < (*colptr)[colidx + 1] - base; k++) { if (((*row)[k]-base) == i) { (*values)[k] = rvalues[j]; break; } } /* data structure of sparse matrix has changed */ if (k == (*colptr)[colidx + 1] - base) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"overflow on k %D",k); } } } } icntl =-1; check = 0; ierr = PetscOptionsGetInt(((PetscObject) A)->prefix, "-mat_pastix_check", &icntl, &flg);CHKERRQ(ierr); if ((flg && icntl >= 0) || PetscLogPrintInfo) check = icntl; if (check == 1) { PetscScalar *tmpvalues; PetscInt *tmprows,*tmpcolptr; tmpvalues = (PetscScalar*)malloc(nnz*sizeof(PetscScalar)); if (!tmpvalues) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Unable to allocate memory"); tmprows = (PetscInt*) malloc(nnz*sizeof(PetscInt)); if (!tmprows) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Unable to allocate memory"); tmpcolptr = (PetscInt*) malloc((*n+1)*sizeof(PetscInt)); if (!tmpcolptr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Unable to allocate memory"); ierr = PetscMemcpy(tmpcolptr,*colptr,(*n+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(tmprows,*row,nnz*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(tmpvalues,*values,nnz*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscFree(*row);CHKERRQ(ierr); ierr = PetscFree(*values);CHKERRQ(ierr); icntl=-1; verb = API_VERBOSE_NOT; /* "iparm[IPARM_VERBOSE] : level of printing (0 to 2)" */ ierr = PetscOptionsGetInt(((PetscObject) A)->prefix, "-mat_pastix_verbose", &icntl, &flg);CHKERRQ(ierr); if ((flg && icntl >= 0) || PetscLogPrintInfo) verb = icntl; PASTIX_CHECKMATRIX(MPI_COMM_WORLD,verb,((isSym != 0) ? API_SYM_YES : API_SYM_NO),API_YES,*n,&tmpcolptr,&tmprows,(PastixScalar**)&tmpvalues,NULL,1); ierr = PetscMemcpy(*colptr,tmpcolptr,(*n+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMalloc1(((*colptr)[*n]-1),row);CHKERRQ(ierr); ierr = PetscMemcpy(*row,tmprows,((*colptr)[*n]-1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMalloc1(((*colptr)[*n]-1),values);CHKERRQ(ierr); ierr = PetscMemcpy(*values,tmpvalues,((*colptr)[*n]-1)*sizeof(PetscScalar));CHKERRQ(ierr); free(tmpvalues); free(tmprows); free(tmpcolptr); } PetscFunctionReturn(0); }

static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }

/* This should handle properly the cases where PetscInt is 32 or 64 and hsize_t is 32 or 64. These means properly casting with checks back and forth between the two types of variables. */ PetscErrorCode VecLoad_HDF5(Vec xin, PetscViewer viewer) { hid_t file_id, group, dset_id, filespace, memspace, plist_id; hsize_t rdim, dim; hsize_t dims[4], count[4], offset[4]; herr_t status; PetscInt n, N, bs = 1, bsInd, lenInd, low, timestep; PetscScalar *x; const char *vecname; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscViewerHDF5OpenGroup(viewer, &file_id, &group);CHKERRQ(ierr); ierr = PetscViewerHDF5GetTimestep(viewer, ×tep);CHKERRQ(ierr); ierr = VecGetBlockSize(xin,&bs);CHKERRQ(ierr); /* Create the dataset with default properties and close filespace */ ierr = PetscObjectGetName((PetscObject)xin,&vecname);CHKERRQ(ierr); #if (H5_VERS_MAJOR * 10000 + H5_VERS_MINOR * 100 + H5_VERS_RELEASE >= 10800) dset_id = H5Dopen2(group, vecname, H5P_DEFAULT); #else dset_id = H5Dopen(group, vecname); #endif if (dset_id == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not H5Dopen() with Vec named %s",vecname); /* Retrieve the dataspace for the dataset */ filespace = H5Dget_space(dset_id); if (filespace == -1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not H5Dget_space()"); dim = 0; if (timestep >= 0) ++dim; ++dim; if (bs >= 1) ++dim; #if defined(PETSC_USE_COMPLEX) ++dim; #endif rdim = H5Sget_simple_extent_dims(filespace, dims, NULL); #if defined(PETSC_USE_COMPLEX) bsInd = rdim-2; #else bsInd = rdim-1; #endif lenInd = timestep >= 0 ? 1 : 0; if (rdim != dim) { if (rdim == dim+1 && bs == -1) bs = dims[bsInd]; else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Dimension of array in file %d not %d as expected",rdim,dim); } else if (bs >= 1 && bs != (PetscInt) dims[bsInd]) { ierr = VecSetBlockSize(xin, dims[bsInd]);CHKERRQ(ierr); if (ierr) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Block size %d specified for vector does not match blocksize in file %d",bs,dims[bsInd]); bs = dims[bsInd]; } /* Set Vec sizes,blocksize,and type if not already set */ if ((xin)->map->n < 0 && (xin)->map->N < 0) { ierr = VecSetSizes(xin, PETSC_DECIDE, dims[lenInd]*bs);CHKERRQ(ierr); } /* If sizes and type already set,check if the vector global size is correct */ ierr = VecGetSize(xin, &N);CHKERRQ(ierr); if (N/bs != (PetscInt) dims[lenInd]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Vector in file different length (%d) then input vector (%d)", (PetscInt) dims[lenInd], N/bs); /* Each process defines a dataset and reads it from the hyperslab in the file */ ierr = VecGetLocalSize(xin, &n);CHKERRQ(ierr); dim = 0; if (timestep >= 0) { count[dim] = 1; ++dim; } ierr = PetscHDF5IntCast(n/bs,count + dim);CHKERRQ(ierr); ++dim; if (bs >= 1) { count[dim] = bs; ++dim; } #if defined(PETSC_USE_COMPLEX) count[dim] = 2; ++dim; #endif memspace = H5Screate_simple(dim, count, NULL); if (memspace == -1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not H5Screate_simple()"); /* Select hyperslab in the file */ ierr = VecGetOwnershipRange(xin, &low, NULL);CHKERRQ(ierr); dim = 0; if (timestep >= 0) { offset[dim] = timestep; ++dim; } ierr = PetscHDF5IntCast(low/bs,offset + dim);CHKERRQ(ierr); ++dim; if (bs >= 1) { offset[dim] = 0; ++dim; } #if defined(PETSC_USE_COMPLEX) offset[dim] = 0; ++dim; #endif status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);CHKERRQ(status); /* Create property list for collective dataset read */ plist_id = H5Pcreate(H5P_DATASET_XFER); if (plist_id == -1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not H5Pcreate()"); #if defined(PETSC_HAVE_H5PSET_FAPL_MPIO) status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);CHKERRQ(status); #endif /* To write dataset independently use H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_INDEPENDENT) */ ierr = VecGetArray(xin, &x);CHKERRQ(ierr); status = H5Dread(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, plist_id, x);CHKERRQ(status); ierr = VecRestoreArray(xin, &x);CHKERRQ(ierr); /* Close/release resources */ if (group != file_id) { status = H5Gclose(group);CHKERRQ(status); } status = H5Pclose(plist_id);CHKERRQ(status); status = H5Sclose(filespace);CHKERRQ(status); status = H5Sclose(memspace);CHKERRQ(status); status = H5Dclose(dset_id);CHKERRQ(status); ierr = VecAssemblyBegin(xin);CHKERRQ(ierr); ierr = VecAssemblyEnd(xin);CHKERRQ(ierr); PetscFunctionReturn(0); }

int main(int argc,char **args) { PetscErrorCode ierr; #if defined(PETSC_USE_COMPLEX) || defined(PETSC_MISSING_LAPACK_DSTEBZ) || defined(PETSC_MISSING_LAPACK_STEIN) ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; SETERRQ(PETSC_COMM_WORLD,1,"This example requires LAPACK routines dstebz and stien and real numbers"); #else PetscReal *work,tols[2]; PetscInt i,j; PetscBLASInt n,il=1,iu=5,*iblock,*isplit,*iwork,nevs,*ifail,cklvl=2; PetscMPIInt size; PetscBool flg; Vec *evecs; PetscScalar *evecs_array,*D,*E,*evals; Mat T; PetscReal vl=0.0,vu=4.0,tol= 1000*PETSC_MACHINE_EPSILON; PetscBLASInt nsplit,info; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); n = 100; nevs = iu - il; ierr = PetscMalloc1(3*n+1,&D);CHKERRQ(ierr); E = D + n; evals = E + n; ierr = PetscMalloc1(5*n+1,&work);CHKERRQ(ierr); ierr = PetscMalloc1(3*n+1,&iwork);CHKERRQ(ierr); ierr = PetscMalloc1(3*n+1,&iblock);CHKERRQ(ierr); isplit = iblock + n; /* Set symmetric tridiagonal matrix */ for (i=0; i<n; i++) { D[i] = 2.0; E[i] = 1.0; } /* Solve eigenvalue problem: A*evec = eval*evec */ ierr = PetscPrintf(PETSC_COMM_SELF," LAPACKstebz_: compute %d eigenvalues...\n",nevs);CHKERRQ(ierr); LAPACKstebz_("I","E",&n,&vl,&vu,&il,&iu,&tol,(PetscReal*)D,(PetscReal*)E,&nevs,&nsplit,(PetscReal*)evals,iblock,isplit,work,iwork,&info); if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"LAPACKstebz_ fails. info %d",info); ierr = PetscPrintf(PETSC_COMM_SELF," LAPACKstein_: compute %d found eigenvectors...\n",nevs);CHKERRQ(ierr); ierr = PetscMalloc1(n*nevs,&evecs_array);CHKERRQ(ierr); ierr = PetscMalloc1(nevs,&ifail);CHKERRQ(ierr); LAPACKstein_(&n,(PetscReal*)D,(PetscReal*)E,&nevs,(PetscReal*)evals,iblock,isplit,evecs_array,&n,work,iwork,ifail,&info); if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"LAPACKstein_ fails. info %d",info); /* View evals */ ierr = PetscOptionsHasName(NULL,NULL, "-eig_view", &flg);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(PETSC_COMM_SELF," %d evals: \n",nevs);CHKERRQ(ierr); for (i=0; i<nevs; i++) {ierr = PetscPrintf(PETSC_COMM_SELF,"%D %g\n",i,(double)evals[i]);CHKERRQ(ierr);} } /* Check residuals and orthogonality */ ierr = MatCreate(PETSC_COMM_SELF,&T);CHKERRQ(ierr); ierr = MatSetSizes(T,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetType(T,MATSBAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(T);CHKERRQ(ierr); ierr = MatSetUp(T);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = MatSetValues(T,1,&i,1,&i,&D[i],INSERT_VALUES);CHKERRQ(ierr); if (i != n-1) { j = i+1; ierr = MatSetValues(T,1,&i,1,&j,&E[i],INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscMalloc1(nevs+1,&evecs);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecCreate(PETSC_COMM_SELF,&evecs[i]);CHKERRQ(ierr); ierr = VecSetSizes(evecs[i],PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(evecs[i]);CHKERRQ(ierr); ierr = VecPlaceArray(evecs[i],evecs_array+i*n);CHKERRQ(ierr); } tols[0] = 1.e-8; tols[1] = 1.e-8; ierr = CkEigenSolutions(cklvl,T,il-1,iu-1,evals,evecs,tols);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecResetArray(evecs[i]);CHKERRQ(ierr); } /* free space */ ierr = MatDestroy(&T);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecDestroy(&evecs[i]);CHKERRQ(ierr);} ierr = PetscFree(evecs);CHKERRQ(ierr); ierr = PetscFree(D);CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscFree(iwork);CHKERRQ(ierr); ierr = PetscFree(iblock);CHKERRQ(ierr); ierr = PetscFree(evecs_array);CHKERRQ(ierr); ierr = PetscFree(ifail);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; #endif }

PetscErrorCode PetscViewerFileSetName_Binary(PetscViewer viewer,const char name[]) { PetscMPIInt rank; PetscErrorCode ierr; size_t len; PetscViewer_Binary *vbinary = (PetscViewer_Binary*)viewer->data; const char *fname; char bname[PETSC_MAX_PATH_LEN],*gz; PetscBool found; PetscFileMode type = vbinary->btype; PetscFunctionBegin; if (type == (PetscFileMode) -1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call PetscViewerFileSetMode() before PetscViewerFileSetName()"); ierr = PetscViewerFileClose_Binary(viewer);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject)viewer)->prefix,"-viewer_binary_skip_info",&vbinary->skipinfo,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject)viewer)->prefix,"-viewer_binary_skip_options",&vbinary->skipoptions,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject)viewer)->prefix,"-viewer_binary_skip_header",&vbinary->skipheader,NULL);CHKERRQ(ierr); ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)viewer),&rank);CHKERRQ(ierr); /* copy name so we can edit it */ ierr = PetscStrallocpy(name,&vbinary->filename);CHKERRQ(ierr); /* if ends in .gz strip that off and note user wants file compressed */ vbinary->storecompressed = PETSC_FALSE; if (!rank && type == FILE_MODE_WRITE) { /* remove .gz if it ends library name */ ierr = PetscStrstr(vbinary->filename,".gz",&gz);CHKERRQ(ierr); if (gz) { ierr = PetscStrlen(gz,&len);CHKERRQ(ierr); if (len == 3) { *gz = 0; vbinary->storecompressed = PETSC_TRUE; } } } /* only first processor opens file if writeable */ if (!rank || type == FILE_MODE_READ) { if (type == FILE_MODE_READ) { /* possibly get the file from remote site or compressed file */ ierr = PetscFileRetrieve(PetscObjectComm((PetscObject)viewer),vbinary->filename,bname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); fname = bname; if (!rank && !found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot locate file: %s on node zero",vbinary->filename); else if (!found) { ierr = PetscInfo(viewer,"Nonzero processor did not locate readonly file\n");CHKERRQ(ierr); fname = 0; } } else fname = vbinary->filename; #if defined(PETSC_HAVE_O_BINARY) if (type == FILE_MODE_WRITE) { if ((vbinary->fdes = open(fname,O_WRONLY|O_CREAT|O_TRUNC|O_BINARY,0666)) == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot create file %s for writing",fname); } else if (type == FILE_MODE_READ && fname) { if ((vbinary->fdes = open(fname,O_RDONLY|O_BINARY,0)) == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file %s for reading",fname); } else if (type == FILE_MODE_APPEND) { if ((vbinary->fdes = open(fname,O_WRONLY|O_APPEND|O_BINARY,0)) == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file %s for writing",fname); } else if (fname) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Unknown file type"); #else if (type == FILE_MODE_WRITE) { if ((vbinary->fdes = creat(fname,0666)) == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot create file %s for writing",fname); } else if (type == FILE_MODE_READ && fname) { if ((vbinary->fdes = open(fname,O_RDONLY,0)) == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file %s for reading",fname); } else if (type == FILE_MODE_APPEND) { if ((vbinary->fdes = open(fname,O_WRONLY|O_APPEND,0)) == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file %s for writing",fname); } else if (fname) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Unknown file type"); #endif } else vbinary->fdes = -1; /* try to open info file: all processors open this file if read only */ if (!vbinary->skipinfo && (!rank || type == FILE_MODE_READ)) { char infoname[PETSC_MAX_PATH_LEN],iname[PETSC_MAX_PATH_LEN]; ierr = PetscStrcpy(infoname,name);CHKERRQ(ierr); /* remove .gz if it ends library name */ ierr = PetscStrstr(infoname,".gz",&gz);CHKERRQ(ierr); if (gz) { ierr = PetscStrlen(gz,&len);CHKERRQ(ierr); if (len == 3) *gz = 0; } ierr = PetscStrcat(infoname,".info");CHKERRQ(ierr); ierr = PetscFixFilename(infoname,iname);CHKERRQ(ierr); if (type == FILE_MODE_READ) { ierr = PetscFileRetrieve(PetscObjectComm((PetscObject)viewer),iname,infoname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); ierr = PetscOptionsInsertFile(PetscObjectComm((PetscObject)viewer),infoname,PETSC_FALSE);CHKERRQ(ierr); } else { vbinary->fdes_info = fopen(infoname,"w"); if (!vbinary->fdes_info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open .info file %s for writing",infoname); } } #if defined(PETSC_USE_LOG) PetscLogObjectState((PetscObject)viewer,"File: %s",name); #endif PetscFunctionReturn(0); }

static PetscErrorCode MatPartitioningApply_Parmetis(MatPartitioning part,IS *partitioning) { MatPartitioning_Parmetis *parmetis = (MatPartitioning_Parmetis*)part->data; PetscErrorCode ierr; PetscInt *locals = PETSC_NULL; Mat mat = part->adj,amat,pmat; PetscBool flg; PetscInt bs = 1; PetscFunctionBegin; ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIADJ,&flg);CHKERRQ(ierr); if (flg) { amat = mat; PetscObjectReference((PetscObject)amat);CHKERRQ(ierr); } else { /* bs indicates if the converted matrix is "reduced" from the original and hence the resulting partition results need to be stretched to match the original matrix */ ierr = MatConvert(mat,MATMPIADJ,MAT_INITIAL_MATRIX,&amat);CHKERRQ(ierr); if (amat->rmap->n > 0) bs = mat->rmap->n/amat->rmap->n; } ierr = MatMPIAdjCreateNonemptySubcommMat(amat,&pmat);CHKERRQ(ierr); ierr = MPI_Barrier(((PetscObject)part)->comm);CHKERRQ(ierr); if (pmat) { MPI_Comm pcomm = ((PetscObject)pmat)->comm,comm_pmetis; Mat_MPIAdj *adj = (Mat_MPIAdj*)pmat->data; PetscInt *vtxdist = pmat->rmap->range; PetscInt *xadj = adj->i; PetscInt *adjncy = adj->j; PetscInt itmp = 0,wgtflag=0, numflag=0, ncon=1, nparts=part->n, options[24], i, j; real_t *tpwgts,*ubvec; int status; #if defined(PETSC_USE_DEBUG) /* check that matrix has no diagonal entries */ { PetscInt rstart; ierr = MatGetOwnershipRange(pmat,&rstart,PETSC_NULL);CHKERRQ(ierr); for (i=0; i<pmat->rmap->n; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (adjncy[j] == i+rstart) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row %d has diagonal entry; Parmetis forbids diagonal entry",i+rstart); } } } #endif ierr = PetscMalloc(amat->rmap->n*sizeof(PetscInt),&locals);CHKERRQ(ierr); if (PetscLogPrintInfo) {itmp = parmetis->printout; parmetis->printout = 127;} ierr = PetscMalloc(ncon*nparts*sizeof(real_t),&tpwgts);CHKERRQ(ierr); for (i=0; i<ncon; i++) { for (j=0; j<nparts; j++) { if (part->part_weights) { tpwgts[i*nparts+j] = part->part_weights[i*nparts+j]; } else { tpwgts[i*nparts+j] = 1./nparts; } } } ierr = PetscMalloc(ncon*sizeof(real_t),&ubvec);CHKERRQ(ierr); for (i=0; i<ncon; i++) { ubvec[i] = 1.05; } /* This sets the defaults */ options[0] = 0; for (i=1; i<24; i++) { options[i] = -1; } /* Duplicate the communicator to be sure that ParMETIS attribute caching does not interfere with PETSc. */ ierr = MPI_Comm_dup(pcomm,&comm_pmetis);CHKERRQ(ierr); status = ParMETIS_V3_PartKway(vtxdist,xadj,adjncy,part->vertex_weights,adj->values,&wgtflag,&numflag,&ncon,&nparts,tpwgts,ubvec,options,&parmetis->cuts,locals,&comm_pmetis);CHKERRQPARMETIS(status); ierr = MPI_Comm_free(&comm_pmetis);CHKERRQ(ierr); ierr = PetscFree(tpwgts);CHKERRQ(ierr); ierr = PetscFree(ubvec);CHKERRQ(ierr); if (PetscLogPrintInfo) {parmetis->printout = itmp;} } if (bs > 1) { PetscInt i,j,*newlocals; ierr = PetscMalloc(bs*amat->rmap->n*sizeof(PetscInt),&newlocals);CHKERRQ(ierr); for (i=0; i<amat->rmap->n; i++) { for (j=0; j<bs; j++) { newlocals[bs*i + j] = locals[i]; } } ierr = PetscFree(locals);CHKERRQ(ierr); ierr = ISCreateGeneral(((PetscObject)part)->comm,bs*amat->rmap->n,newlocals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr); } else { ierr = ISCreateGeneral(((PetscObject)part)->comm,amat->rmap->n,locals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr); } ierr = MatDestroy(&pmat);CHKERRQ(ierr); ierr = MatDestroy(&amat);CHKERRQ(ierr); PetscFunctionReturn(0); }

EXTERN_C_BEGIN /* MatGetOrdering_AMD - Find the Approximate Minimum Degree ordering This provides an interface to Tim Davis' AMD package (used by UMFPACK, CHOLMOD, MATLAB, etc). */ #undef __FUNCT__ #define __FUNCT__ "MatGetOrdering_AMD" PetscErrorCode MatGetOrdering_AMD(Mat mat,const MatOrderingType type,IS *row,IS *col) { PetscErrorCode ierr; PetscInt nrow,*ia,*ja,*perm; int status; PetscReal val; double Control[AMD_CONTROL],Info[AMD_INFO]; PetscBool tval,done; PetscFunctionBegin; /* AMD does not require that the matrix be symmetric (it does so internally, at least in so far as computing orderings for A+A^T. */ ierr = MatGetRowIJ(mat,0,PETSC_FALSE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name); amd_AMD_defaults(Control); ierr = PetscOptionsBegin(((PetscObject)mat)->comm,((PetscObject)mat)->prefix,"AMD Options","Mat");CHKERRQ(ierr); /* We have to use temporary values here because AMD always uses double, even though PetscReal may be single */ val = (PetscReal)Control[AMD_DENSE]; ierr = PetscOptionsReal("-mat_ordering_amd_dense","threshold for \"dense\" rows/columns","None",val,&val,PETSC_NULL);CHKERRQ(ierr); Control[AMD_DENSE] = (double)val; tval = (PetscBool)Control[AMD_AGGRESSIVE]; ierr = PetscOptionsBool("-mat_ordering_amd_aggressive","use aggressive absorption","None",tval,&tval,PETSC_NULL);CHKERRQ(ierr); Control[AMD_AGGRESSIVE] = (double)tval; ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscMalloc(nrow*sizeof(PetscInt),&perm);CHKERRQ(ierr); status = amd_AMD_order(nrow,ia,ja,perm,Control,Info); switch (status) { case AMD_OK: break; case AMD_OK_BUT_JUMBLED: /* The result is fine, but PETSc matrices are supposed to satisfy stricter preconditions, so PETSc considers a * matrix that triggers this error condition to be invalid. */ SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_PLIB,"According to AMD, the matrix has unsorted and/or duplicate row indices"); case AMD_INVALID: amd_info(Info); SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_PLIB,"According to AMD, the matrix is invalid"); case AMD_OUT_OF_MEMORY: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_MEM,"AMD could not compute ordering"); default: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_LIB,"Unexpected return value"); } ierr = MatRestoreRowIJ(mat,0,PETSC_FALSE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,row);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_OWN_POINTER,col);CHKERRQ(ierr); PetscFunctionReturn(0); }

static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp,PetscInt it) { KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscErrorCode ierr; PetscInt max_k = agmres->max_k; /* Size of the non-augmented Krylov basis */ PetscInt i, j; PetscInt r = agmres->r; /* current number of augmented eigenvectors */ PetscBLASInt KspSize; PetscBLASInt lC; PetscBLASInt N; PetscBLASInt ldH = N + 1; PetscBLASInt lwork; PetscBLASInt info, nrhs = 1; PetscFunctionBegin; ierr = PetscBLASIntCast(KSPSIZE,&KspSize);CHKERRQ(ierr); ierr = PetscBLASIntCast(4 * (KspSize+1),&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(KspSize+1,&lC);CHKERRQ(ierr); ierr = PetscBLASIntCast(MAXKSPSIZE + 1,&N);CHKERRQ(ierr); ierr = PetscBLASIntCast(N + 1,&ldH);CHKERRQ(ierr); /* Save a copy of the Hessenberg matrix */ for (j = 0; j < N-1; j++) { for (i = 0; i < N; i++) { *HS(i,j) = *H(i,j); } } /* QR factorize the Hessenberg matrix */ #if defined(PETSC_MISSING_LAPACK_GEQRF) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGEQRF INFO=%d", info); #endif /* Update the right hand side of the least square problem */ ierr = PetscMemzero(agmres->nrs, N*sizeof(PetscScalar));CHKERRQ(ierr); agmres->nrs[0] = ksp->rnorm; #if defined(PETSC_MISSING_LAPACK_ORMQR) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKormqr",LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XORMQR INFO=%d",info); #endif ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]); /* solve the least-square problem */ #if defined(PETSC_MISSING_LAPACK_TRTRS) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRTRS - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XTRTRS INFO=%d",info); #endif /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */ ierr = VecZeroEntries(VEC_TMP);CHKERRQ(ierr); ierr = VecMAXPY(VEC_TMP, max_k, agmres->nrs, &VEC_V(0));CHKERRQ(ierr); if (!agmres->DeflPrecond) { ierr = VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U);CHKERRQ(ierr); } if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); ierr = VecCopy(VEC_TMP_MATOP, VEC_TMP);CHKERRQ(ierr); } ierr = KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* add the solution to the previous one */ ierr = VecAXPY(ksp->vec_sol, 1.0, VEC_TMP);CHKERRQ(ierr); PetscFunctionReturn(0); }