int main(int argc, char** argv) { PC pc; PetscErrorCode ierr; PetscInt m, nn, M, j, k, ne = 4; PetscReal* coords; Vec x, rhs; Mat A; KSP ksp; PetscMPIInt npe, rank; PetscInitialize(&argc, &argv, NULL, NULL); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &npe); CHKERRQ(ierr); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Linear elasticity in 3D", ""); { char nestring[256]; ierr = PetscSNPrintf(nestring, sizeof nestring, "number of elements in each direction, ne+1 must be a multiple of %D (sizes^{1/3})", (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5)); ierr = PetscOptionsInt("-ne", nestring, "", ne, &ne, NULL); } ierr = PetscOptionsEnd(); CHKERRQ(ierr); const HpddmOption* const opt = HpddmOptionGet(); { HpddmOptionParse(opt, argc, argv, rank == 0); if (rank) HpddmOptionRemove(opt, "verbosity"); } nn = ne + 1; M = 3 * nn * nn * nn; if (npe == 2) { if (rank == 1) m = 0; else m = nn * nn * nn; npe = 1; } else { m = nn * nn * nn / npe; if (rank == npe - 1) m = nn * nn * nn - (npe - 1) * m; } m *= 3; ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); int i; { PetscInt Istart, Iend, jj, ic; const PetscInt NP = (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5); const PetscInt ipx = rank % NP, ipy = (rank % (NP * NP)) / NP, ipz = rank / (NP * NP); const PetscInt Ni0 = ipx * (nn / NP), Nj0 = ipy * (nn / NP), Nk0 = ipz * (nn / NP); const PetscInt Ni1 = Ni0 + (m > 0 ? (nn / NP) : 0), Nj1 = Nj0 + (nn / NP), Nk1 = Nk0 + (nn / NP); PetscInt *d_nnz, *o_nnz, osz[4] = {0, 9, 15, 19}, nbc; if (npe != NP * NP * NP) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "npe=%d: npe^{1/3} must be integer", npe); if (nn != NP * (nn / NP)) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "-ne %d: (ne+1)%(npe^{1/3}) must equal zero", ne); ierr = PetscMalloc1(m + 1, &d_nnz); CHKERRQ(ierr); ierr = PetscMalloc1(m + 1, &o_nnz); CHKERRQ(ierr); for (i = Ni0, ic = 0; i < Ni1; i++) { for (j = Nj0; j < Nj1; j++) { for (k = Nk0; k < Nk1; k++) { nbc = 0; if (i == Ni0 || i == Ni1 - 1) nbc++; if (j == Nj0 || j == Nj1 - 1) nbc++; if (k == Nk0 || k == Nk1 - 1) nbc++; for (jj = 0; jj < 3; jj++, ic++) { d_nnz[ic] = 3 * (27 - osz[nbc]); o_nnz[ic] = 3 * osz[nbc]; } } } } if (ic != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "ic %D does not equal m %D", ic, m); ierr = MatCreate(PETSC_COMM_WORLD, &A); CHKERRQ(ierr); ierr = MatSetSizes(A, m, m, M, M); CHKERRQ(ierr); ierr = MatSetBlockSize(A, 3); CHKERRQ(ierr); ierr = MatSetType(A, MATAIJ); CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A, 0, d_nnz); CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(A, 0, d_nnz, 0, o_nnz); CHKERRQ(ierr); ierr = PetscFree(d_nnz); CHKERRQ(ierr); ierr = PetscFree(o_nnz); CHKERRQ(ierr); ierr = MatGetOwnershipRange(A, &Istart, &Iend); CHKERRQ(ierr); if (m != Iend - Istart) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "m %D does not equal Iend %D - Istart %D", m, Iend, Istart); ierr = VecCreate(PETSC_COMM_WORLD, &x); CHKERRQ(ierr); ierr = VecSetSizes(x, m, M); CHKERRQ(ierr); ierr = VecSetBlockSize(x, 3); CHKERRQ(ierr); ierr = VecSetFromOptions(x); CHKERRQ(ierr); ierr = VecDuplicate(x, &rhs); CHKERRQ(ierr); ierr = PetscMalloc1(m + 1, &coords); CHKERRQ(ierr); coords[m] = -99.0; PetscReal h = 1.0 / ne; for (i = Ni0, ic = 0; i < Ni1; i++) { for (j = Nj0; j < Nj1; j++) { for (k = Nk0; k < Nk1; k++, ic++) { coords[3 * ic] = h * (PetscReal)i; coords[3 * ic + 1] = h * (PetscReal)j; coords[3 * ic + 2] = h * (PetscReal)k; } } } } PetscReal s_r[SIZE_ARRAY_R] = {30, 0.1, 20, 10}; PetscReal x_r[SIZE_ARRAY_R] = {0.5, 0.4, 0.4, 0.4}; PetscReal y_r[SIZE_ARRAY_R] = {0.5, 0.5, 0.4, 0.4}; PetscReal z_r[SIZE_ARRAY_R] = {0.5, 0.45, 0.4, 0.35}; PetscReal r[SIZE_ARRAY_R] = {0.5, 0.5, 0.4, 0.4}; AssembleSystem(A, rhs, s_r[0], x_r[0], y_r[0], z_r[0], r[0], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); MatNullSpace matnull; Vec vec_coords; PetscScalar* c; ierr = VecCreate(MPI_COMM_WORLD, &vec_coords); CHKERRQ(ierr); ierr = VecSetBlockSize(vec_coords, 3); CHKERRQ(ierr); ierr = VecSetSizes(vec_coords, m, PETSC_DECIDE); CHKERRQ(ierr); ierr = VecSetUp(vec_coords); CHKERRQ(ierr); ierr = VecGetArray(vec_coords, &c); CHKERRQ(ierr); for (i = 0; i < m; i++) c[i] = coords[i]; ierr = VecRestoreArray(vec_coords, &c); CHKERRQ(ierr); ierr = MatNullSpaceCreateRigidBody(vec_coords, &matnull); CHKERRQ(ierr); ierr = MatSetNearNullSpace(A, matnull); CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&matnull); CHKERRQ(ierr); ierr = VecDestroy(&vec_coords); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); double time = MPI_Wtime(); ierr = KSPSetUp(ksp); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- PC setup = %f\n", time); CHKERRQ(ierr); float t_time[SIZE_ARRAY_R]; int t_its[SIZE_ARRAY_R]; { { ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); ierr = KSPReset(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; PetscInt its; ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); t_its[i] = its; ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); if (i == (SIZE_ARRAY_R - 1)) AssembleSystem(A, rhs, s_r[0], x_r[0], y_r[0], z_r[0], r[0], ne, npe, rank, nn, m); else AssembleSystem(A, rhs, s_r[i + 1], x_r[i + 1], y_r[i + 1], z_r[i + 1], r[i + 1], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_R > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } { ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); HpddmCustomOperator H; H._A = A; H._M = pc; H._mv = mv; H._precond = precond; H._b = rhs; H._x = x; int n; MatGetLocalSize(A, &n, NULL); { ierr = VecZeroEntries(x); K* pt_rhs; K* pt_x; VecGetArray(rhs, &pt_rhs); VecGetArray(x, &pt_x); int previous = HpddmOptionVal(opt, "verbosity"); if (previous > 0) HpddmOptionRemove(opt, "verbosity"); HpddmCustomOperatorSolve(&H, n, H._mv, H._precond, pt_rhs, pt_x, 1, &PETSC_COMM_WORLD); if (previous > 0) { char buffer[20]; snprintf(buffer, 20, "%d", previous); char* concat = malloc(strlen("-hpddm_verbosity ") + strlen(buffer) + 1); strcpy(concat, "-hpddm_verbosity "); strcat(concat, buffer); HpddmOptionParseString(opt, concat); free(concat); } VecRestoreArray(x, &pt_x); VecRestoreArray(rhs, &pt_rhs); previous = HpddmOptionVal(opt, "krylov_method"); if(previous == 4 || previous == 5) HpddmDestroyRecycling(); ierr = KSPReset(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); K* pt_rhs; K* pt_x; VecGetArray(rhs, &pt_rhs); VecGetArray(x, &pt_x); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); t_its[i] = HpddmCustomOperatorSolve(&H, n, H._mv, H._precond, pt_rhs, pt_x, 1, &PETSC_COMM_WORLD); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; VecRestoreArray(x, &pt_x); VecRestoreArray(rhs, &pt_rhs); ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); if (i != (SIZE_ARRAY_R - 1)) { AssembleSystem(A, rhs, s_r[i + 1], x_r[i + 1], y_r[i + 1], z_r[i + 1], r[i + 1], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_R > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } ierr = KSPDestroy(&ksp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&rhs); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = PetscFree(coords); CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc, char** argv) { DM da; PetscErrorCode ierr; Vec x, rhs; Mat A, jac; ierr = PetscInitialize(&argc, &argv, NULL, NULL); CHKERRQ(ierr); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Laplacian in 2D", ""); CHKERRQ(ierr); ierr = PetscOptionsEnd(); CHKERRQ(ierr); ierr = HpddmRegisterKSP(); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); double time = MPI_Wtime(); ierr = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 10, 10, PETSC_DECIDE, PETSC_DECIDE, 1, 1, 0, 0, &da); CHKERRQ(ierr); ierr = DMSetFromOptions(da); CHKERRQ(ierr); ierr = DMSetUp(da); CHKERRQ(ierr); ierr = DMCreateGlobalVector(da, &rhs); CHKERRQ(ierr); ierr = DMCreateGlobalVector(da, &x); CHKERRQ(ierr); ierr = DMCreateMatrix(da, &A); CHKERRQ(ierr); ierr = DMCreateMatrix(da, &jac); CHKERRQ(ierr); ierr = ComputeMatrix(da, jac, A); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- Mat assembly = %f\n", time); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); KSP ksp; ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRQ(ierr); ierr = KSPSetDM(ksp, da); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetDMActive(ksp, PETSC_FALSE); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- PC setup = %f\n", time); CHKERRQ(ierr); PetscScalar nus[SIZE_ARRAY_NU] = {0.1, 10.0, 0.001, 100.0}; float t_time[SIZE_ARRAY_NU]; int t_its[SIZE_ARRAY_NU]; int i, j; for (j = 0; j < 2; ++j) { { if (j == 1) { ierr = KSPSetType(ksp, "hpddm"); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); ierr = VecZeroEntries(x); CHKERRQ(ierr); } ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); if (j == 1) { const HpddmOption* const opt = HpddmOptionGet(); int previous = HpddmOptionVal(opt, "krylov_method"); if (previous == HPDDM_KRYLOV_METHOD_GCRODR || previous == HPDDM_KRYLOV_METHOD_BGCRODR) HpddmDestroyRecycling(); } } for (i = 0; i < SIZE_ARRAY_NU; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); ierr = ComputeRHS(da, rhs, nus[i]); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; PetscInt its; ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); t_its[i] = its; ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_NU; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_NU > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } ierr = KSPDestroy(&ksp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&rhs); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = MatDestroy(&jac); CHKERRQ(ierr); ierr = DMDestroy(&da); CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); /*# Init #*/ int rankWorld, sizeWorld; MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); const HpddmOption* const opt = HpddmOptionGet(); HpddmOptionParse(opt, argc, argv, rankWorld == 0); { char* val[4] = { "Nx=<100>", "Ny=<100>", "overlap=<1>", "generate_random_rhs=<0>" }; char* desc[4] = { "Number of grid points in the x-direction.", "Number of grid points in the y-direction.", "Number of grid points in the overlap.", "Number of generated random right-hand sides." }; HpddmOptionParseInts(opt, argc, argv, 4, val, desc); val[0] = "symmetric_csr=(0|1)"; desc[0] = "Assemble symmetric matrices."; val[1] = "nonuniform=(0|1)"; desc[1] = "Use a different number of eigenpairs to compute on each subdomain."; HpddmOptionParseArgs(opt, argc, argv, 2, val, desc); } int sizes[8]; int* connectivity[8]; int o[8]; int neighbors = 0; HpddmMatrixCSR* Mat, *MatNeumann = NULL; K* f, *sol; underlying_type* d; int ndof; generate(rankWorld, sizeWorld, &neighbors, o, sizes, connectivity, &ndof, &Mat, &MatNeumann, &d, &f, &sol); unsigned short mu = HpddmOptionApp(opt, "generate_random_rhs"); int status = 0; if(sizeWorld > 1) { HpddmSchwarz* A = HpddmSchwarzCreate(Mat, neighbors, o, sizes, connectivity); for(int i = 0; i < neighbors; ++i) free(connectivity[i]); HpddmSchwarzMultiplicityScaling(A, d); HpddmSchwarzInitialize(A, d); if(mu != 0) HpddmSchwarzScaledExchange(A, f, mu); else mu = 1; if(HpddmOptionSet(opt, "schwarz_coarse_correction")) { double* addr = HpddmOptionAddr(opt, "geneo_nu"); unsigned short nu = *addr; if(nu > 0) { if(HpddmOptionApp(opt, "nonuniform")) *addr += MAX((int)(-*addr + 1), pow(-1, rankWorld) * rankWorld); HpddmSchwarzSolveGEVP(A, MatNeumann); nu = HpddmOptionVal(opt, "geneo_nu"); } else { nu = 1; K** deflation = malloc(sizeof(K*)); *deflation = malloc(sizeof(K) * ndof); for(int i = 0; i < ndof; ++i) deflation[0][i] = 1.0; HpddmSetVectors(HpddmSchwarzPreconditioner(A), deflation); } HpddmInitializeCoarseOperator(HpddmSchwarzPreconditioner(A), nu); HpddmSchwarzBuildCoarseOperator(A, MPI_COMM_WORLD); /*# FactorizationEnd #*/ } HpddmSchwarzCallNumfact(A); if(rankWorld != 0) HpddmOptionRemove(opt, "verbosity"); const MPI_Comm* comm = HpddmGetCommunicator(HpddmSchwarzPreconditioner(A)); /*# Solution #*/ int it = HpddmSolve(A, f, sol, mu, comm); /*# SolutionEnd #*/ underlying_type* storage = malloc(sizeof(underlying_type) * 2 * mu); HpddmSchwarzComputeResidual(A, sol, f, storage, mu); if(rankWorld == 0) for(unsigned short nu = 0; nu < mu; ++nu) { if(nu == 0) printf(" --- residual = "); else printf(" "); printf("%e / %e", storage[1 + 2 * nu], storage[2 * nu]); if(mu > 1) printf(" (rhs #%d)", nu + 1); printf("\n"); } if(it > ((int)HpddmOptionVal(opt, "krylov_method") == 6 ? 60 : 45)) status = 1; else { for(unsigned short nu = 0; nu < mu; ++nu) if(storage[1 + 2 * nu] / storage[2 * nu] > 1.0e-2) status = 1; } free(storage); if(HpddmOptionVal(opt, "geneo_nu") == 0) HpddmDestroyVectors(HpddmSchwarzPreconditioner(A)); HpddmSchwarzDestroy(A); } else { HpddmSubdomain* S = NULL; HpddmSubdomainNumfact(&S, Mat); mu = MAX(1, mu); HpddmSubdomainSolve(S, f, sol, mu); int one = 1; underlying_type* nrmb = malloc(sizeof(underlying_type) * 2 * mu); for(unsigned short nu = 0; nu < mu; ++nu) nrmb[nu] = nrm2(&ndof, f + nu * ndof, &one); K* tmp = malloc(sizeof(K) * mu * ndof); HpddmCSRMM(Mat, sol, tmp, mu); K minus = -1; ndof *= mu; axpy(&ndof, &minus, f, &one, tmp, &one); ndof /= mu; underlying_type* nrmAx = nrmb + mu; for(unsigned short nu = 0; nu < mu; ++nu) { nrmAx[nu] = nrm2(&ndof, tmp + nu * ndof, &one); if(nu == 0) printf(" --- residual = "); else printf(" "); printf("%e / %e", nrmAx[nu], nrmb[nu]); if(mu > 1) printf(" (rhs #%d)", nu + 1); printf("\n"); if(nrmAx[nu] / nrmb[nu] > (sizeof(underlying_type) == sizeof(double) ? 1.0e-6 : 1.0e-2)) status = 1; } free(tmp); free(nrmb); HpddmSubdomainDestroy(S); HpddmMatrixCSRDestroy(Mat); } free(d); if(HpddmOptionSet(opt, "schwarz_coarse_correction") && HpddmOptionVal(opt, "geneo_nu") > 0) HpddmMatrixCSRDestroy(MatNeumann); free(sol); free(f); MPI_Finalize(); return status; }