static void do_test(MAP *rmap, MAP *cmap) { MAT *A; VEC *x, *y; INT i, j, m0, n0; INT *cols = phgAlloc(cmap->nglobal * sizeof(*cols)); FLOAT *data = phgAlloc(cmap->nglobal * sizeof(*data)); A = phgMapCreateMat(rmap, cmap); m0 = A->rmap->partition[A->rmap->rank]; n0 = A->cmap->partition[A->cmap->rank]; /* Matrix entries: A(I,J) = 1 + (I-1) + (J-1) */ for (i = m0; i < m0 + A->rmap->nlocal; i++) { #if 0 /* Test MatAddEntry */ for (j = 0; j < A->cmap->nglobal; j++) phgMatAddGlobalEntry(A, i, j, 1.0 + i + j); #else /* Test MatAddEntries */ for (j = 0; j < A->cmap->nglobal; j++) { cols[j] = j; data[j] = 1.0 + i + j; } phgMatAddGlobalEntries(A, 1, &i, A->cmap->nglobal, cols, data); #endif } phgFree(cols); phgFree(data); phgMatAssemble(A); phgInfo(-1, "y = A * x\n"); x = phgMapCreateVec(A->cmap, 1); for (i = 0; i < x->map->nlocal; i++) x->data[i] = 1.0 + i + n0; phgVecAssemble(x); y = phgMatVec(MAT_OP_N, 1.0, A, x, 0.0, NULL); for (i = 0; i < y->map->nlocal; i++) phgInfo(-1, " y->data[%d] = %lg\n", i + m0, (double)y->data[i]); phgVecDestroy(&x); phgVecDestroy(&y); phgInfo(-1, "y = A' * x\n"); x = phgMapCreateVec(A->rmap, 1); for (i = 0; i < x->map->nlocal; i++) x->data[i] = 1.0 + i + m0; phgVecAssemble(x); y = phgMatVec(MAT_OP_T, 1.0, A, x, 0.0, NULL); for (i = 0; i < y->map->nlocal; i++) phgInfo(-1, " y->data[%d] = %lg\n", i + n0, (double)y->data[i]); phgVecDestroy(&x); phgVecDestroy(&y); phgMatDestroy(&A); }
/* PC_PROC function which applies the Sherman-Morrison-Woodbury formula */ static void sherman(void *ctx, VEC *b0, VEC **x0) { SOLVER *solver = ctx; VEC *U = ((void **)solver->mat->mv_data)[1]; SOLVER *solver1 = ((void **)solver->mat->mv_data)[2]; FLOAT *C = ((void **)solver->mat->mv_data)[3]; INT *pvt = ((void **)solver->mat->mv_data)[4]; VEC *x = *x0, *y; INT i, j, n = U->map->nlocal, K = U->nvec; FLOAT *z; phgPrintf("Note: applying the Sherman-Morrison-Woodbury formula.\n"); /* x := inv(A) * b */ #if 0 memset(x->data, 0, n * sizeof(*x->data)); #endif z = solver1->rhs->data; /* save solver1->rhs->data */ solver1->rhs->data = b0->data; solver1->rhs->assembled = TRUE; phgSolverVecSolve(solver1, FALSE, x); solver1->rhs->data = z; /* restore solver1->rhs->data */ if (K == 0) return; /* z := U^t * x */ z = phgCalloc(K, sizeof(*z)); for (j = 0; j < K; j++) for (i = 0; i < n; i++) z[j] += U->data[j * n + i] * x->data[i]; #if USE_MPI if (U->map->nprocs > 1) { FLOAT *tmp = phgAlloc(K * sizeof(*tmp)); MPI_Allreduce(z, tmp, K, PHG_MPI_FLOAT, MPI_SUM, U->map->comm); phgFree(z); z = tmp; } #endif /* USE_MPI */ /* z := C*z */ phgSolverDenseSV(K, C, pvt, 1, z); /* y := inv(A) * (U*z) */ memset(solver1->rhs->data, 0, n * sizeof(*solver1->rhs->data)); for (i = 0; i < n; i++) for (j = 0; j < K; j++) solver1->rhs->data[i] += U->data[j * n + i] * z[j]; y = phgMapCreateVec(U->map, 1); phgSolverVecSolve(solver1, FALSE, y); phgFree(z); /* x -= y */ for (i = 0; i < n; i++) x->data[i] -= y->data[i]; phgVecDestroy(&y); }
/* Preconditioning procedure using diag(matFu, matFu, matFu) */ static void pc_proc1(void *ctx, VEC *b0, VEC **x0) { SOLVER *pc_solver = (SOLVER *) ctx; int Nu = matF->rmap->nlocal; int k, nits; FLOAT res; INT i; VEC *tmp = phgMapCreateVec(pc_solver->rhs->map, 1); INT Nu0 = tmp->map->nlocal; assert(pc_solver->mat == matFu); assert(Nu == Nu0 * 3); nits = 0; res = 0.; for (k = 0; k < 3; k++) { for (i = 0; i < Nu0; i++) pc_solver->rhs->data[i] = b0->data[3 * i + k]; pc_solver->rhs->assembled = TRUE; pc_solver->rhs_updated = TRUE; bzero(tmp->data, sizeof(*tmp->data) * Nu0); phgSolverVecSolve(pc_solver, FALSE, tmp); for (i = 0; i < Nu0; i++) (*x0)->data[3 * i + k] = tmp->data[i]; if (nits < pc_solver->nits) nits = pc_solver->nits; if (res < pc_solver->residual) res = pc_solver->residual; } phgVecDestroy(&tmp); pc_solver->nits = nits; pc_solver->residual = res; return; }
/* * Jacobi smoother. * * Implement using phgMatvec * ref: phgSolverJacobi, solver-phg.c * */ void mg_Jacobi(MAT *A, VEC *x, VEC *b, int nsmooth, void *ctx) { VEC *r; INT i, k; FLOAT dx, omega = _p->smooth_damp; if (A->diag == NULL) phgMatSetupDiagonal(A); r = phgMapCreateVec(b->map, 1); for (k = 0; k < nsmooth; k++) { phgVecCopy(b, &r); phgMatVec(MAT_OP_N, -1.0, A, x, 1.0, &r); for (i = 0; i < A->cmap->nlocal; i++) { assert(fabs(A->diag[i]) > 1e-12); dx = r->data[i] / A->diag[i]; x->data[i] += omega * dx; } } phgVecDestroy(&r); return; }
/* * Jacobi smoother2: * Implement using details of matvec * ref: matvec in matvec.c * * */ void mg_Jacobi2(MAT *A, VEC *x, VEC *b, int nsmooth, void *ctx) { INT i, j, k, n, *pc, *pc_offp; FLOAT *pd, *pd_offp, *vx, *vx0, *vb; FLOAT sum, omega = _p->smooth_damp;; VEC *x0; #if USE_MPI FLOAT *offp_data = NULL; #endif /* USE_MPI */ MagicCheck(VEC, x); MagicCheck(VEC, b); assert(x == NULL || x->nvec == 1); assert(b == NULL || b->nvec == 1); if (x != NULL && !x->assembled) phgVecAssemble(x); if (b != NULL && !b->assembled) phgVecAssemble(b); x0 = phgMapCreateVec(x->map, 1); assert(A->type != PHG_DESTROYED); if (!A->assembled) phgMatAssemble(A); assert(A->type != PHG_MATRIX_FREE); phgMatPack(A); #if USE_MPI if (A->cmap->nprocs > 1) { offp_data = phgAlloc(A->cinfo->rsize * sizeof(*offp_data)); } #endif /* USE_MPI */ if (A->cmap->nlocal != A->rmap->nlocal || A->cmap->nlocal != x->map->nlocal || A->cmap->nlocal != b->map->nlocal) phgError(1, "%s:%d: inconsistent matrix-vector.", __FILE__, __LINE__); #if USE_MPI if (A->cmap->nprocs > 1) { phgMapScatterBegin(A->cinfo, x->nvec, x->data, offp_data); phgMapScatterEnd(A->cinfo, x->nvec, x->data, offp_data); } #endif /* USE_MPI */ /* iteration */ for (k = 0; k < nsmooth; k++) { phgVecCopy(x, &x0); /* multiply with local data */ vx = x->data; vx0 = x0->data; vb = b->data; pc = A->packed_cols; pd = A->packed_data; if (A->cmap->nprocs > 1) { pc_offp = A->packed_cols + A->rmap->nlocal + A->nnz_d; pd_offp = A->packed_data + A->nnz_d; } else { pc_offp = NULL; pd_offp = NULL; } for (i = 0; i < A->rmap->nlocal; i++) { INT jcol; FLOAT aa = 0., dx; /* x_i = (b_i - \sum_{j ~= i} a_ij * x_j) / a_ii */ sum = vb[i]; /* local data */ if ((n = *(pc++)) != 0) { for (j = 0; j < n; j++) { jcol = *(pc++); if (jcol != i) { sum -= *(pd++) * vx0[jcol]; } else { aa = *(pd++); assert(fabs(aa) > 1e-14); } } } /* remote data */ if (pc_offp != NULL && (n = *(pc_offp++)) != 0) { for (j = 0; j < n; j++) { jcol = *(pc_offp++); sum -= *(pd_offp++) * offp_data[jcol]; } } dx = sum / aa - vx[i]; vx[i] += omega * dx; } #if USE_MPI if (A->cmap->nprocs > 1) { phgMapScatterBegin(A->cinfo, x->nvec, x->data, offp_data); phgMapScatterEnd(A->cinfo, x->nvec, x->data, offp_data); } #endif /* USE_MPI */ } phgVecDestroy(&x0); #if USE_MPI phgFree(offp_data); #endif /* USE_MPI */ return; }
/**************************************************************** * Build RHS which is the residual of the nonlinear system. ***************************************************************/ void phgNSBuildSolverURHS(NSSolver *ns) { GRID *g = ns->g; SIMPLEX *e; SOLVER *solver_u = ns->solver_u; int i, k, l, q, s; FLOAT *dt = ns->dt; BOOLEAN tstep_minus = (ns->u[-1] != NULL); VEC *vec_rhs = phgMapCreateVec(solver_u->rhs->map, 1); FLOAT Theta = _nsp->Theta, nu = _nsp->nu, Thet1; int viscosity_type = ns->viscosity_type; SURF_BAS *surf_bas = ns->surf_bas; DOF *surf_dof = surf_bas->dof; BOOLEAN *rotated = surf_bas->rotated; const FLOAT *Trans = DofData(surf_dof); #if STEADY_STATE assert(fabs(Theta - 1) < 1e-12); Thet1 = 0; Unused(Thet1); Unused(dt); #else Thet1 = 1 - Theta; Unused(dt); #endif /* STEADY_STATE */ phgPrintf(" DB_mask: ["); for (k = 0; k < Dim; k++) phgPrintf("%d ", ns->u[1]->DB_masks[k]); phgPrintf("] "); nu_max = -1e10; nu_min = +1e10; phgVecDisassemble(vec_rhs); ForAllElements(g, e) { int M = ns->u[1]->type->nbas; /* num of bases of Velocity */ int N = ns->p[1]->type->nbas; /* num of bases of Pressure */ int order = DofTypeOrder(ns->u[1], e) * 3 - 1; /* Note: * quad order is really high here, * highest order term (u \nabla u, phi) */ FLOAT bufu[M], bufp[N], rhsu[M][Dim], rhsp[N]; INT Iu[M][Dim], Ip[N]; QUAD *quad; FLOAT vol, area, det; const FLOAT *w, *p, *normal, **vu, *vu_queue[3], *vf[2], *gu[2], *vp[2], *vw, *vT; FLOAT *vf_cache[2]; vu = vu_queue + 1; quad = phgQuadGetQuad3D(order); vu[0] = phgQuadGetDofValues(e, ns->u[0], quad); /* u^{n} */ vp[0] = phgQuadGetDofValues(e, ns->p[0], quad); /* p^{n} */ gu[0] = phgQuadGetDofValues(e, ns->gradu[0], quad); /* grad u^{n} */ vw = phgQuadGetDofValues(e, ns->wind, quad); /* wind */ vT = phgQuadGetDofValues(e, ns->T[1], quad); /* T^{n} */ if (tstep_minus) { vu[-1] = phgQuadGetDofValues(e, ns->u[-1], quad); /* u^{n-1} */ } else { vu[-1] = vu[0]; } #if STEADY_STATE || TIME_DEP_NON vu[1] = phgQuadGetDofValues(e, ns->u[1], quad); /* u^{n+1} */ gu[1] = phgQuadGetDofValues(e, ns->gradu[1], quad); /* grad u^{n} */ vp[1] = phgQuadGetDofValues(e, ns->p[1], quad); /* p^{n+1} */ #else TIME_DEP_LINEAR_ENTRY; /* Unavailable */ #endif /* STEADY_STATE || TIME_DEP_NON */ Unused(l); Unused(vf); Unused(vf_cache); if (!_nsp->no_extern_source) { /* cache f values */ for (l = 0; l < 2; l++) { const FLOAT *cache; size_t cache_size; setFuncTime(ns->time[l]); /* set static time in ins-test.c */ /* cache f */ cache_size = Dim * quad->npoints * sizeof(FLOAT); cache = phgQuadGetFuncValues(g, e, Dim, func_f, quad); vf[l] = vf_cache[l] = phgAlloc(cache_size); memcpy(vf_cache[l], cache, cache_size); phgQuadGetFuncValues(NULL, NULL, 0, NULL, NULL); /* clear cache */ } } /* Global Matrix */ Bzero(rhsu); Bzero(rhsp); p = quad->points; w = quad->weights; for (q = 0; q < quad->npoints; q++) { phgGeomGetCurvedJacobianAtLambda(g, e, p, &det); vol = fabs(det / 6.); /* rhs u */ for (i = 0; i < M; i++) { /* interior node or Neumann */ const FLOAT *gi_u = phgQuadGetBasisValues(e, ns->u[1], i, quad) + q; /* phi_i */ const FLOAT *ggi_u = phgQuadGetBasisCurvedGradient(e, ns->u[1], i, quad, q); /* grad phi_i */ for (k = 0; k < Dim; k++) { #if ICE_BENCH_TEST nu = get_effective_viscosity(gu[1], 0, 0, viscosity_type); FLOAT eu[DDim]; MAT3_SYM(gu[1], eu); rhsu[i][k] += vol*(*w) * EQU_SCALING * (- nu * INNER_PRODUCT(eu+k*Dim, ggi_u) + (*vp[1]) * *(ggi_u+k) * LEN_SCALING * PRES_SCALING ); /* left */ if (k == Z_DIR) { const FLOAT rho = RHO_ICE; const FLOAT grav = GRAVITY; const FLOAT a = SEC_PER_YEAR; const FLOAT f = rho*grav * EQU_SCALING * LEN_SCALING2; Unused(a); rhsu[i][k] += vol*(*w) * (-f * (*gi_u) ); /* right */ } #elif ESIMINT_TEST || \ HEINO_TEST || \ TEST_CASE == ICE_GREEN_LAND nu = get_effective_viscosity(gu[1], *vT, 0, viscosity_type); FLOAT eu[DDim]; MAT3_SYM(gu[1], eu); rhsu[i][k] += vol*(*w) * EQU_SCALING * (- nu * INNER_PRODUCT(eu+k*Dim, ggi_u) + (*vp[1]) * *(ggi_u+k) * LEN_SCALING * PRES_SCALING ); /* left */ if (k == Z_DIR) { const FLOAT rho = RHO_ICE; const FLOAT grav = GRAVITY; const FLOAT a = SEC_PER_YEAR; const FLOAT f = rho*grav * EQU_SCALING * LEN_SCALING2; Unused(a); rhsu[i][k] += vol*(*w) * (-f * (*gi_u) ); /* right */ } #elif STEADY_STATE rhsu[i][k] += vol*(*w) * (- nu * INNER_PRODUCT(gu[1]+k*Dim, ggi_u) + (*vp[1]) * *(ggi_u+k) ); /* left */ if (!_nsp->no_extern_source) rhsu[i][k] += vol*(*w) * (*(vf[1]+k) * (*gi_u) ); /* right */ #elif TIME_DEP_NON rhsu[i][k] -= vol*(*w) * ((vu[1][k] - vu[0][k]) * (*gi_u) / dt[0] + Theta * (nu * INNER_PRODUCT(gu[1]+k*Dim, ggi_u) ) - (*vp[1]) * *(ggi_u+k) + Thet1 * (nu * INNER_PRODUCT(gu[0]+k*Dim, ggi_u) ) ); /* left */ if (!_nsp->no_extern_source) rhsu[i][k] += vol*(*w) * (Theta * *(vf[1]+k) * (*gi_u) + Thet1 * *(vf[0]+k) * (*gi_u) ); /* right */ #else TIME_DEP_LINEAR_ENTRY; /* Unavailable */ #endif /* STEADY_STATE */ } } /* rhs p */ for (i = 0; i < N; i++) { const FLOAT *gi_p = phgQuadGetBasisValues(e, ns->p[1], i, quad) + q; /* psi_i */ FLOAT divu1 = gu[1][0] + gu[1][4] + gu[1][8]; //FLOAT divu0 = gu[0][0] + gu[0][4] + gu[0][8]; rhsp[i] += vol*(*w) * (divu1 * (*gi_p) ); } if (tstep_minus) vu[-1] += Dim; #if STEADY_STATE || TIME_DEP_NON vu[1] += Dim; gu[1] += Dim*Dim; vp[1]++; #else TIME_DEP_LINEAR; /* Unavailable */ #endif /* STEADY_STATE || TIME_DEP_NON */ vu[0] += Dim; gu[0] += Dim * Dim; vp[0]++; vw += Dim; if (!_nsp->no_extern_source) { vf[0] += Dim; vf[1] += Dim; } vT++; w++; p += Dim + 1; } if (!_nsp->no_extern_source) { phgFree(vf_cache[0]); phgFree(vf_cache[1]); } normal = NULL; Unused(normal); area = 0; Unused(area); if (!_nsp->enclosed_flow) { /* slip boundary */ for (s = 0; s < NFace; s++) { if (e->bound_type[s] & INFLOW) { int v0, v1, v2; int nbas_face = NbasFace(ns->u[1]); SHORT bases[nbas_face]; FLOAT lambda[Dim + 1], x,y,z, beta; order = DofTypeOrder(ns->u[1], e) * 3 - 1; phgDofGetBasesOnFace(ns->u[1], e, s, bases); v0 = GetFaceVertex(s, 0); v1 = GetFaceVertex(s, 1); v2 = GetFaceVertex(s, 2); lambda[s] = 0.; area = phgGeomGetFaceArea(g, e, s); normal = phgGeomGetFaceOutNormal(g, e, s); quad = phgQuadGetQuad2D(order); p = quad->points; w = quad->weights; for (q = 0; q < quad->npoints; q++) { FLOAT vu[Dim]; lambda[v0] = *(p++); lambda[v1] = *(p++); lambda[v2] = *(p++); phgGeomLambda2XYZ(g, e, lambda, &x, &y, &z); func_beta(x, y, z, &beta); phgDofEval(ns->u[1], e, lambda, vu); for (i = 0; i < nbas_face; i++) { int ii = bases[i]; FLOAT gi_u = *ns->u[1]->type->BasFuncs(ns->u[1], e, ii, ii + 1, lambda); for (k = 0; k < Dim; k++) { #if STEADY_STATE rhus[ii][k] += 0.; #elif TIME_DEP_NON # if USE_SLIDING_BC abort(); rhsu[ii][k] += SIGN_FRICTION * area*(*w) * beta * vu[k] * (gi_u) * EQU_SCALING * LEN_SCALING; # else Unused(gi_u); # endif #else TIME_DEP_LINEAR_ENTRY; /* Unavailable */ #endif /* STEADY_STATE */ } } /* end of bas_i */ w++; } /* end of quad point */ } /* end of face outflow */ } /* end of all outflow face in element */ } /* end out flow boundary */ #if USE_SLIDING_BC /* Rotate bases */ for (i = 0; i < M; i++) { INT id = phgDofMapE2D(surf_dof, e, i * (Dim*Dim)) / (Dim*Dim); if (!rotated[id]) continue; const FLOAT *trans = Trans + id*(Dim*Dim); trans_left(&rhsu[i][0], 1, 1, trans); } #else Unused(Trans); Unused(rotated); #endif /* Map: Element -> system */ for (i = 0; i < M; i++) for (k = 0; k < Dim; k++) Iu[i][k] = phgMapE2L(solver_u->rhs->map, 0, e, i * Dim + k); for (i = 0; i < N; i++) Ip[i] = phgMapE2L(solver_u->rhs->map, 1, e, i); /* set velocity dirichlet bdry */ FLOAT tmp[Dim]; for (i = 0; i < M; i++) for (k = 0; k < Dim; k++) if (phgDofDirichletBC_(ns->u[1], e, i*Dim+k, NULL, bufu, tmp, DOF_PROJ_NONE)) { rhsu[i][k] = 0.; } #if STEADY_STATE || TIME_DEP_NON /* set pressure dirichlet bdry for pinned point */ for (i = 0; i < N; i++) if (phgDofDirichletBC(ns->p[1], e, i, NULL, bufp, &rhsp[i], DOF_PROJ_NONE)) { if (!_nsp->enclosed_flow) phgError(1, "No dirichlet bc for Unenclosed flow!\n"); if (_nsp->pin_node) { # if PIN_AT_ROOT if (g->rank != 0) phgError(1, "Pinned node only on rank 0!\n"); if (g, e->verts[i] != ns->pinned_node_id) phgError(1, "Build rhs: pinned node e:%d, bas:%d, [%d] and [%d] " "doesn't coincide when build RHS!\n", e->index, i, e->verts[i], ns->pinned_node_id); # else if (GlobalVertex(g, e->verts[i]) != ns->pinned_node_id) phgError(1, "Build rhs: pinned node e:%d, bas:%d, [%d] and [%d] " "doesn't coincide when build RHS!\n", e->index, i, e->verts[i], ns->pinned_node_id); # endif /* PIN_AT_ROOT */ } } #else TIME_DEP_LINEAR; /* Unavailable */ #endif /* STEADY_STATE || TIME_DEP_NON */ /* Global res */ phgVecAddEntries(vec_rhs, 0, M * Dim, Iu[0], &rhsu[0][0]); phgVecAddEntries(vec_rhs, 0, N, Ip, rhsp); } /* end element */
int main(int argc, char *argv[]) { MAT *A, *B; VEC *U = NULL, *x; FLOAT *C; SOLVER *solver, *solver1 = NULL; INT i, j, k, n, *pvt, N = 1000, K = 2; char *main_opts = NULL, *sub_opts = NULL; phgOptionsRegisterInt("-n", "N value", &N); phgOptionsRegisterInt("-k", "K value", &K); phgOptionsRegisterString("-main_solver_opts", "Options for the main solver", &main_opts); phgOptionsRegisterString("-sub_solver_opts", "Options for the subsolver", &sub_opts); /* a direct solver is preferable for the sparse matrix */ phgOptionsPreset("-solver mumps"); phgInit(&argc, &argv); phgPrintf( "----------------------------------------------------------------------------\n" "This code solves (A+UU^t)x=b using the Sherman-Morrison-Woodbury formula.\n" "Note: may use the following to disable use of the Sherman-Morrison-Woodbury\n" "algorithm and change to the default solver instead:\n" " -preonly_pc_type solver -preonly_pc_opts \"-solver_maxit 2000\"\n" "----------------------------------------------------------------------------\n" ); phgPrintf("Generating the linear system: N = %"dFMT", K = %"dFMT"\n", N, K); /* A is a distributed NxN SPD tridiagonal matrix (A = [-1, 2, -1]) */ n = N / phgNProcs + (phgRank < (N % phgNProcs) ? 1 : 0); A = phgMatCreate(phgComm, n, N); phgPrintf(" Generating matrix A.\n"); for (i = 0; i < n; i++) { /* diagonal */ phgMatAddEntry(A, i, i, 2.0); /* diagonal - 1 */ if (i > 0) phgMatAddEntry(A, i, i - 1, -1.0); else if (phgRank > 0) phgMatAddLGEntry(A, i, A->rmap->partition[phgRank] - 1, -1.0); /* diagonal + 1 */ if (i < n - 1) phgMatAddEntry(A, i, i + 1, -1.0); else if (phgRank < phgNProcs - 1) phgMatAddLGEntry(A, i, A->rmap->partition[phgRank] + n, -1.0); } phgMatAssemble(A); /* U is a K-component vector */ U = phgMapCreateVec(A->rmap, K); phgVecRandomize(U, 123); /* solver1 is the solver for A */ phgOptionsPush(); phgOptionsSetOptions(sub_opts); solver1 = phgMat2Solver(SOLVER_DEFAULT, A); phgOptionsPop(); /* x is a scratch vector */ x = phgMapCreateVec(A->rmap, 1); /* C is a KxK dense matrix, pvt is an integer array, they store the LU * factorization of (I + U^t*inv(A)*U) */ phgPrintf(" Generating the dense matrix I+U^t*inv(A)*U.\n"); C = phgCalloc(K * K, sizeof(*C)); pvt = phgAlloc(K * sizeof(*pvt)); for (i = 0; i < K; i++) { for (j = 0; j < n; j++) { solver1->rhs->data[j] = U->data[i * n + j]; x->data[j] = 0.0; } solver1->rhs->assembled = TRUE; phgSolverVecSolve(solver1, FALSE, x); for (j = 0; j < K; j++) for (k = 0; k < n; k++) C[i * K + j] += U->data[j * n + k] * x->data[k]; } #if USE_MPI if (U->map->nprocs > 1) { FLOAT *tmp = phgAlloc(K * K * sizeof(*tmp)); MPI_Allreduce(C, tmp, K * K, PHG_MPI_FLOAT, MPI_SUM, U->map->comm); phgFree(C); C = tmp; } #endif /* USE_MPI */ for (i = 0; i < K; i++) C[i * K + i] += 1.0; phgPrintf(" Factorizing the dense matrix I+U^t*inv(A)*U.\n"); phgSolverDenseLU(K, C, pvt); /* B is a matrix-free matrix representing A + U*U^t, B->mv_data is used * to pass A, U, solver1, C and pvt to callback functions */ B = phgMapCreateMatrixFreeMat(A->rmap, A->cmap, funcB, /* arguments carried over to CB functions */ A, U, solver1, C, pvt, NULL); /* solver is a PreOnly solver for B whose pc_proc is set to sherman(). * * Note: can also use pcg, gmres, or petsc for this solver, in this case * the solution obtained with the Sherman-Morisson formula is iteratively * refined. */ phgOptionsPush(); phgOptionsSetOptions("-solver preonly"); phgOptionsSetOptions(main_opts); solver = phgMat2Solver(SOLVER_DEFAULT, B); phgSolverSetPC(solver, solver, sherman); phgOptionsPop(); for (i = 0; i < n; i++) x->data[i] = 1.0; phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, &solver->rhs); phgPrintf("Solving the linear system.\n"); /* reset initial solution to zero */ memset(x->data, 0, n * sizeof(*x->data)); phgSolverVecSolve(solver, TRUE, x); for (i = 0; i < n; i++) solver->rhs->data[i] = 1.0; phgVecAXPBY(-1.0, solver->rhs, 1.0, &x); phgPrintf("Checking the result: |x - x_exact| / |x_exact| = %lg\n", (double)phgVecNorm2(x, 0, NULL) / sqrt((double)N)); phgSolverDestroy(&solver); phgSolverDestroy(&solver1); phgMatDestroy(&A); phgMatDestroy(&B); phgVecDestroy(&U); phgVecDestroy(&x); phgFree(C); phgFree(pvt); phgFinalize(); return 0; }
/*************************************************************************** * Build matrices *F, *Fu, *B, *Bt, *Fp, *Ap, and *Qp used * by the solvers. **************************************************************************/ static void build_matrices(SOLVER *solver, SOLVER *pc, DOF **dofs, MAT **mats, LTYPE type) { DOF *u = dofs[0], *p = dofs[1]; DOF *f, *pbc, *gn[3], *gradu, *divu; int M = u->type->nbas; /* num of bases of Velocity */ int N = p->type->nbas; /* num of bases of Pressure */ int i, j, k, l; GRID *g = u->g; ELEMENT *e; MAT *matB, *matC, *matAp, *matQp; FLOAT F[M][Dim][M][Dim], Fu[M][M], B[N][M][Dim], Bt[M][Dim][N], Ap[N][N], Fp[N][N], Qp[N][N], bufu[M], bufp[N], tmp[9]; INT Iu[M][Dim], Ju[Dim][M], Iu0[M], Ip[N]; /* Unpack Dofs */ unpackDof(dofs, 9, &u, &p, &gradu, &divu, &f, &pbc, &gn[0], &gn[1], &gn[2]); unpackMat(mats, 4, &matB, &matC, &matAp, &matQp); #if 0 #warning remove me! SOLVER *s = phgSolverCreate(SOLVER_PCG, p, NULL); #endif ForAllElements(g, e) { /* Map: Element -> system */ for (i = 0; i < M; i++) { for (k = 0; k < Dim; k++) Ju[k][i] = Iu[i][k] = phgMapE2L(matF->cmap, 0, e, i * Dim + k); Iu0[i] = phgMapE2L(matFu->cmap, 0, e, i); } for (i = 0; i < N; i++) { Ip[i] = phgMapE2L(matFp->cmap, 0, e, i); } /* A V W */ for (i = 0; i < M; i++) { for (j = 0; j < M; j++) { FLOAT m, a, v, w[9]; /* \phi_j \dot \phi_i */ m = phgQuadBasDotBas(e, u, j, u, i, QUAD_DEFAULT); /* (\grad \phi_j) \cdot (\grad \phi_i) */ a = nu * phgQuadGradBasDotGradBas(e, u, j, u, i, QUAD_DEFAULT); /* (u \cdot \grad) \phi_j \times \phi_i, 1 item */ v = phgQuadDofDotGradBasBas(e, u, u, j, i, QUAD_DEFAULT); Fu[i][j] = a + v; if (type == PICARD) { memset(w, 0, sizeof(w)); } else { /* \phi_j (\grad u) \times \phi_i, 9 items */ phgQuadGradDofBasDotBas(e, gradu, u, j, i, QUAD_DEFAULT, w); } for (k = 0; k < Dim; k++) { for (l = 0; l < Dim; l++) { F[i][k][j][l] = Theta * dt * *(w + k * Dim + l); if (k == l) F[i][k][j][k] += m + Theta * dt * (a + v); } } } } /* B Bt */ for (i = 0; i < M; i++) { for (j = 0; j < N; j++) { FLOAT bxyz[3]; phgQuadGradBasDotBas3(e, u, i, p, j, bxyz, QUAD_DEFAULT); for (k = 0; k < Dim; k++) Bt[i][k][j] = B[j][i][k] = -Theta * dt * bxyz[k]; } } /* Ap Qp; Fp(update) */ for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { FLOAT ap, qp, cp; ap = phgQuadGradBasDotGradBas(e, p, j, p, i, QUAD_DEFAULT); Ap[i][j] = Theta * dt * ap; qp = phgQuadBasDotBas(e, p, j, p, i, QUAD_DEFAULT); Qp[i][j] = Theta * dt * qp; cp = phgQuadDofDotGradBasBas(e, u, p, j, i, QUAD_DEFAULT); Fp[i][j] = qp + Theta * dt * (nu * ap + cp); } } /* Global Matrix */ for (i = 0; i < M; i++) { /* Dirichle Boundary for velocity. */ if (phgDofDirichletBC(u, e, i, NULL, bufu, NULL, DOF_PROJ_NONE)) { for (k = 0; k < Dim; k++) phgMatAddEntries(matF, 1, Iu[i] + k, M, Ju[k], bufu); phgMatAddEntries(matFu, 1, Iu0 + i, M, Iu0, bufu); } else { /* interior node Or Neumann */ /* Matrix F */ phgMatAddEntries(matF, Dim, Iu[i], M * Dim, Iu[0], &(F[i][0][0][0])); /* Matrix Fu */ phgMatAddEntries(matFu, 1, Iu0 + i, M, Iu0, &(Fu[i][0])); /* Matrix Bt */ for (k = 0; k < Dim; k++) phgMatAddEntries(matBt, 1, &Iu[i][k], N, Ip, &Bt[i][k][0]); } } /* end of Block (1,1), (1,2) */ for (i = 0; i < N; i++) phgMatAddEntries(matB, 1, Ip + i, M * Dim, Iu[0], &B[i][0][0]); /* Matrix Ap Qp Fp */ for (i = 0; i < N; i++) { if (phgDofDirichletBC(pbc, e, i, NULL, bufp, tmp, DOF_PROJ_NONE)) { /* Dirichle Boundary for pressure PC. */ phgMatAddEntries(matAp, 1, Ip + i, N, Ip, bufp); phgMatAddEntries(matFp, 1, Ip + i, N, Ip, bufp); } else { /* interior node Or Neumann */ phgMatAddEntries(matAp, 1, Ip + i, N, Ip, Ap[i]); phgMatAddEntries(matFp, 1, Ip + i, N, Ip, Fp[i]); } #if 0 /* use only diagonal of the mass matrix in the preconditioner */ phgMatAddEntries(matQp, 1, Ip + i, 1, Ip + i, &Qp[i][i]); #else /* use full mass matrix in the preconditioner */ phgMatAddEntries(matQp, 1, Ip + i, N, Ip, Qp[i]); #endif #if 0 phgMatAddEntries(s->mat, 1, Ip + i, N, Ip, Qp[i]); #endif } } /* end element */ #if 0 VEC *v = phgMapCreateVec(s->rhs->map, 1); memcpy(s->rhs->data, solver->rhs->data + DofGetDataCount(u), s->rhs->map->nlocal * sizeof(FLOAT)); s->rhs_updated = TRUE; s->rhs->assembled = TRUE; s->rtol = 1e-10; s->maxit = 10000; phgSolverVecSolve(s, TRUE, v); phgPrintf("v = %e\n", phgVecNormInfty(v, 0, NULL)); phgVecDestroy(&v); phgSolverDestroy(&s); #endif return; }
/* * Preconditioning procedure: * p = Qp^-1 Fp Ap^-1 * q * u = F^-1 ( bu - Bt * p ) * */ static void pc_proc(void *ctx, VEC *b0, VEC **x0) { SOLVER *pc_solver = (SOLVER *) ctx; GRID *g = matF->rmap->dofs[0]->g; VEC *xu, *xp, *xu2, *xp2; int Nu = matF->rmap->nlocal; int Np = solver_Ap->mat->rmap->nlocal; INT verb = phgVerbosity; FLOAT *rhsF, *rhsAp, *rhsQp; double t; if (phgVerbosity > 0) phgVerbosity--; /* save old rhs */ rhsF = solver_F->rhs->data; rhsAp = solver_Ap->rhs->data; rhsQp = solver_Qp->rhs->data; xu = phgMapCreateVec(matF->rmap, 1); xu2 = phgMapCreateVec(matF->rmap, 1); xp = phgMapCreateVec(solver_Ap->mat->rmap, 1); xp2 = phgMapCreateVec(solver_Ap->mat->rmap, 1); /* Ap^-1 * q */ t = phgGetTime(NULL); solver_Ap->rhs->data = xp->data; solver_Ap->rhs->assembled = TRUE; solver_Ap->rhs_updated = TRUE; memcpy(xp->data, b0->data + Nu, sizeof(*xp->data) * Np); bzero(xp2->data, sizeof(*xp->data) * Np); phgSolverVecSolve(solver_Ap, FALSE, xp2); memcpy(xp->data, xp2->data, sizeof(*xp->data) * Np); if (verb > 0) phgPrintf("\t Ap: nits = %d, residual = %0.4le [%0.4lgMB %0.4lfs]\n", solver_Ap->nits, (double)solver_Ap->residual, phgMemoryUsage(g, NULL) / (1024.0 * 1024.0), phgGetTime(NULL) - t); /* Fp Ap^-1 * q */ phgMatVec(MAT_OP_N, 1.0, matFp, xp, 0., &xp2); memcpy(xp->data, xp2->data, sizeof(*xp->data) * Np); /* xp = Qp^-1 Fp Ap^-1 * q */ t = phgGetTime(NULL); solver_Qp->rhs->data = xp->data; solver_Qp->rhs->assembled = TRUE; solver_Qp->rhs_updated = TRUE; bzero(xp2->data, sizeof(*xp->data) * Np); phgSolverVecSolve(solver_Qp, FALSE, xp2); if (verb > 0) phgPrintf("\t Qp: nits = %d, residual = %0.4le [%0.4lgMB %0.4lfs]\n", solver_Qp->nits, (double)solver_Qp->residual, phgMemoryUsage(g, NULL) / (1024.0 * 1024.0), phgGetTime(NULL) - t); /* xu = F^-1 ( bu - Bt * xp ) */ t = phgGetTime(NULL); memcpy(xu->data, b0->data, sizeof(*xu->data) * Nu); phgMatVec(MAT_OP_N, -1.0, matBt, xp2, 1., &xu); if (solver_F->mat->cmap->nlocal == Nu) { /* use F in the preconditioning matrix */ solver_F->rhs->data = xu->data; solver_F->rhs->assembled = TRUE; solver_F->rhs_updated = TRUE; bzero(xu2->data, sizeof(*xu->data) * Nu); phgSolverVecSolve(solver_F, FALSE, xu2); } else { pc_proc1(solver_F, xu, &xu2); } if (verb > 0) phgPrintf("\t F: nits = %d, residual = %0.4le [%0.4lgMB %0.4lfs]\n", solver_F->nits, (double)solver_F->residual, phgMemoryUsage(g, NULL) / (1024.0 * 1024.0), phgGetTime(NULL) - t); /* Copy xu, xp to x */ memcpy((*x0)->data, xu2->data, sizeof(*xu->data) * Nu); memcpy((*x0)->data + Nu, xp2->data, sizeof(*xp->data) * Np); /* restore rhs */ solver_F->rhs->data = rhsF; solver_Ap->rhs->data = rhsAp; solver_Qp->rhs->data = rhsQp; phgVecDestroy(&xu); phgVecDestroy(&xu2); phgVecDestroy(&xp); phgVecDestroy(&xp2); phgVerbosity = verb; return; }
static void build_matrices(MAT *matA, MAT *matM, MAT *matC, MAT *S, FLOAT s, DOF *u_h, DOF *p_h) /* S is used to store s*diag(M(p_h))^(-1) */ { int N = u_h->type->nbas; /* number of basis functions in an element */ int M = p_h->type->nbas; int i, j; GRID *g = u_h->g; ELEMENT *e; FLOAT A[N][N], B[N][N], C[N][M]; INT I[N], Ip[M]; INT k, n0; VEC *V = phgMapCreateVec(S->rmap, 1); phgVecDisassemble(V); /* for phgVecAddEntry */ ForAllElements(g, e) { for (i = 0; i < N; i++) { I[i] = phgMapE2L(matA->rmap, 0, e, i); for (k = 0; k < M; k++) { /* \int \grad\psi_k\cdot\phi_i */ C[i][k] = phgQuadGradBasDotBas(e, p_h, k, u_h, i, QUAD_DEFAULT); } for (j = 0; j <= i; j++) { /* \int \phi_i\cdot\phi_j */ B[j][i] = B[i][j] = phgQuadBasDotBas(e, u_h, j, u_h, i, QUAD_DEFAULT); /* \int \curl\phi_i\cdot\curl\phi_j */ A[j][i] = A[i][j] = phgQuadCurlBasDotCurlBas(e, u_h, j, u_h, i, QUAD_DEFAULT); } } for (i = 0; i < M; i++) { Ip[i] = phgMapE2L(matC->cmap, 0, e, i); if (Ip[i] < 0) /* boundary entry */ continue; phgVecAddEntry(V, 0, Ip[i], phgQuadBasDotBas(e, p_h, i, p_h, i, QUAD_DEFAULT)); } /* loop on basis functions */ for (i = 0; i < N; i++) { if (phgDofDirichletBC(u_h, e, i, NULL, NULL, NULL, DOF_PROJ_CROSS)) continue; phgMatAddEntries(matA, 1, I + i, N, I, A[i]); phgMatAddEntries(matM, 1, I + i, N, I, B[i]); phgMatAddEntries(matC, 1, I + i, M, Ip, C[i]); } } phgVecAssemble(V); n0 = V->map->partition[V->map->rank]; for (k = 0; k < V->map->nlocal; k++) phgMatAddGlobalEntry(S, k + n0, k + n0, s / V->data[k]); phgVecDestroy(&V); phgMatAssemble(S); phgMatSetupDiagonal(S); phgMatAssemble(matA); phgMatAssemble(matM); phgMatAssemble(matC); }
int main(int argc, char *argv[]) { GRID *g; DOF *u_h; MAT *A, *A0, *B; MAP *map; INT i; size_t nnz, mem, mem_peak; VEC *x, *y0, *y1, *y2; double t0, t1, dnz, dnz1, mflops, mop; char *fn = "../test/cube.dat"; FLOAT mem_max = 300; INT refine = 0; phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn); phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count); phgOptionsRegisterInt("-refine", "Refinement level", &refine); phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max); phgInit(&argc, &argv); g = phgNewGrid(-1); if (!phgImport(g, fn, FALSE)) phgError(1, "can't read file \"%s\".\n", fn); phgRefineAllElements(g, refine); u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction); while (TRUE) { phgPrintf("\n"); if (phgBalanceGrid(g, 1.2, 1, NULL, 0.)) phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n", g->nprocs, (double)g->lif); map = phgMapCreate(u_h, NULL); A = phgMapCreateMat(map, map); A->handle_bdry_eqns = TRUE; build_matrix(A, u_h); phgMatAssemble(A); /* Note: A is unsymmetric (A' != A) if boundary entries not removed */ phgMatRemoveBoundaryEntries(A); #if 0 /* test block matrix operation */ A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL); #else A0 = A; #endif phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n", DofGetDataCountGlobal(u_h), g->nleaf_global, g->nprocs, A->rmap->nglobal, (double)g->lif); /* test PHG mat-vec multiply */ x = phgMapCreateVec(A->cmap, 1); y1 = phgMapCreateVec(A->rmap, 1); phgVecRandomize(x, 123); phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); y0 = phgVecCopy(y1, NULL); nnz = A->nnz_d + A->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6; phgPrintf("\n"); t1 -= t0; phgPrintf(" PHG: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops); /* test trans(A)*x */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); /* time A * trans(A) */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); nnz = B->nnz_d + B->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL); phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1); phgMatDestroy(&B); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); #if USE_PETSC { Mat ma, mb; MatInfo info; Vec va, vb, vc; PetscScalar *vec; ma = phgPetscCreateMatAIJ(A); MatGetVecs(ma, PETSC_NULL, &va); VecDuplicate(va, &vb); VecGetArray(va, &vec); memcpy(vec, x->data, x->map->nlocal * sizeof(*vec)); VecRestoreArray(va, &vec); MatMult(ma, va, vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMult(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); MatGetInfo(ma, MAT_GLOBAL_SUM, &info); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; dnz = info.nz_used; phgPrintf(" PETSc: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMultTranspose(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; MatGetInfo(mb, MAT_GLOBAL_SUM, &info); dnz = info.nz_used; VecDuplicate(va, &vc); /* compare B*x <--> A*A*x */ MatMult(ma, vb, vc); MatMult(mb, va, vb); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); VecGetArray(vc, &vec); memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vc, &vec); phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgPetscMatDestroy(&mb); phgPetscMatDestroy(&ma); phgPetscVecDestroy(&va); phgPetscVecDestroy(&vb); phgPetscVecDestroy(&vc); } #endif /* USE_PETSC */ #if USE_HYPRE { HYPRE_IJMatrix ma; HYPRE_IJVector va, vb, vc; HYPRE_ParCSRMatrix par_ma; hypre_ParCSRMatrix *par_mb; HYPRE_ParVector par_va, par_vb, par_vc; HYPRE_Int offset, *ni, start, end; assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double)); setup_hypre_mat(A, &ma); ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni)); offset = A->cmap->partition[A->cmap->rank]; for (i = 0; i < A->rmap->nlocal; i++) ni[i] = i + offset; HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &va); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vb); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vc); HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR); HYPRE_IJVectorSetMaxOffProcElmts(va, 0); HYPRE_IJVectorSetMaxOffProcElmts(vb, 0); HYPRE_IJVectorSetMaxOffProcElmts(vc, 0); HYPRE_IJVectorInitialize(va); HYPRE_IJVectorInitialize(vb); HYPRE_IJVectorInitialize(vc); HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma); HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va); HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb); HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc); HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data); HYPRE_IJVectorAssemble(va); HYPRE_IJVectorAssemble(vb); HYPRE_IJVectorAssemble(vc); HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal, ni, ni + A->rmap->nlocal); for (i = 0, nnz = 0; i < A->rmap->nlocal; i++) nnz += ni[A->rmap->nlocal + i]; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; phgPrintf(" HYPRE: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to * 'hypre_ParCSRMatrix *' */ par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma, (hypre_ParCSRMatrix *)par_ma); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); start = hypre_ParCSRMatrixFirstRowIndex(par_mb); end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1; for (i = start, nnz = 0; i < end; i++) { HYPRE_Int ncols; hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL); hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL); nnz += ncols; } #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc); HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data); hypre_ParCSRMatrixDestroy((par_mb)); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgFree(ni); HYPRE_IJMatrixDestroy(ma); HYPRE_IJVectorDestroy(va); HYPRE_IJVectorDestroy(vb); HYPRE_IJVectorDestroy(vc); } #endif /* USE_HYPRE */ if (A0 != A) phgMatDestroy(&A0); #if 0 if (A->rmap->nglobal > 1000) { VEC *v = phgMapCreateVec(A->rmap, 3); for (i = 0; i < v->map->nlocal; i++) { v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]); v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]); v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]); } phgMatDumpMATLAB(A, "A", "A.m"); phgVecDumpMATLAB(v, "v", "v.m"); phgFinalize(); exit(0); } #endif phgMatDestroy(&A); phgVecDestroy(&x); phgVecDestroy(&y0); phgVecDestroy(&y1); phgVecDestroy(&y2); phgMapDestroy(&map); mem = phgMemoryUsage(g, &mem_peak); dnz = mem / (1024.0 * 1024.0); dnz1 = mem_peak / (1024.0 * 1024.0); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); phgPrintf(" Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1); #if 0 { static int loop_count = 0; if (++loop_count == 4) break; } #endif if (mem_peak > 1024 * (size_t)1024 * mem_max) break; phgRefineAllElements(g, 1); } phgDofFree(&u_h); phgFreeGrid(&g); phgFinalize(); return 0; }
void phgNSInitPc(NSSolver *ns) { GRID *g = ns->g; MAP *Pmap = ns->Pmap, *Pbcmap; BOOLEAN use_Fu = _nsp->use_Fu; int verb; /* pcd boundary type test */ _pcd->dof_inflow = phgDofNew(g, _nsp->ptype, 1, "dof inflow", DofNoAction); _pcd->dof_outflow = phgDofNew(g, _nsp->ptype, 1, "dof outflow", DofNoAction); _pcd->dof_nobdry = phgDofNew(g, _nsp->ptype, 1, "dof nobdry", DofNoAction); phgDofSetDirichletBoundaryMask(_pcd->dof_inflow, INFLOW); phgDofSetDirichletBoundaryMask(_pcd->dof_outflow, OUTFLOW); phgDofSetDirichletBoundaryMask(_pcd->dof_nobdry, 0); _pcd->map_inflow = phgMapCreate(_pcd->dof_inflow, NULL); _pcd->map_outflow = phgMapCreate(_pcd->dof_outflow, NULL); _pcd->map_nobdry = phgMapCreate(_pcd->dof_nobdry, NULL); _pcd->Pbcmap = phgMapCreate(_pcd->pbc, NULL); Pbcmap = _pcd->Pbcmap; Unused(Pmap); #warning PCD B.C.: step 1. build mat using map... /* * PCD boundary setup: should be consistent with code above */ if (_nsp->pin_node) { _pcd->matFp = phgMapCreateMat(Pbcmap, Pbcmap); _pcd->matAp = phgMapCreateMat(Pbcmap, Pbcmap); _pcd->matQp = phgMapCreateMat(Pbcmap, Pbcmap); } else { //_pcd->matAp = phgMapCreateMat(_pcd->map_inflow, _pcd->map_inflow); //_pcd->matFp = phgMapCreateMat(_pcd->map_inflow, _pcd->map_inflow); _pcd->matFp = phgMapCreateMat(_pcd->map_outflow, _pcd->map_outflow); _pcd->matAp = phgMapCreateMat(_pcd->map_outflow, _pcd->map_outflow); //_pcd->matQp = phgMapCreateMat(_pcd->map_outflow, _pcd->map_outflow); //_pcd->matQp = phgMapCreateMat(_pcd->map_inflow, _pcd->map_inflow); //_pcd->matFp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry); //_pcd->matAp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry); //_pcd->matFp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry); _pcd->matQp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry); } /* stokes problem: get SYMETRIC mat when assemble. * Handle_bdry_eqns means mat is composed with row of boundary row * and non-bdry row, and eliminating mat columes of dirichlet dof. */ if (_nsp->use_symetric) { _pcd->matFp->handle_bdry_eqns = TRUE; _pcd->matAp->handle_bdry_eqns = TRUE; _pcd->matQp->handle_bdry_eqns = TRUE; } /* genearl NS: no need to eliminate mat columes of dirichlet dof */ else { _pcd->matFp->handle_bdry_eqns = FALSE; _pcd->matAp->handle_bdry_eqns = FALSE; _pcd->matQp->handle_bdry_eqns = FALSE; } _pcd->rhsScale = phgMapCreateVec(_pcd->matQp->rmap, 1); phgVecDisassemble(_pcd->rhsScale); ns->pc = phgMat2Solver(SOLVER_PreOnly, ns->solver_u->mat); if (_nsp->use_PCD) phgSolverSetPC(ns->solver_u, ns->pc, pc_proc); /* solver F */ phgOptionsPush(); phgOptionsSetOptions("-solver hypre " "-hypre_solver pcg " "-hypre_pc boomeramg " "-solver_maxit 10 " "-solver_rtol 1e-4"); phgOptionsSetOptions(_nsp->F_opts); /* use matF in the preconditioning matrix */ _pcd->solver_F = phgMat2Solver(SOLVER_DEFAULT, ns->matF); _pcd->solver_F->verb = SUB_SOLVER_VERB; /* Set user options. */ _pcd->pc_F = NULL; #if USE_MG if (ns_params->use_mg_F) { MAT *matF = ns->matF; assert(ns_params->use_PCD && !ns_params->use_Fu); _pcd->pc_F = phgMat2Solver(SOLVER_PreOnly, matF); phgOptionsSetOptions("-solver petsc "); matF->mv_data = phgAlloc(sizeof(*matF->mv_data)); matF->mv_data[0] = (void *) ns->mg; phgSolverSetPC(_pcd->solver_F, _pcd->pc_F, mg_pc_proc); } #endif /* USE_MG */ _pcd->solver_F->warn_maxit = FALSE; phgOptionsPop(); /* solver Ap */ phgOptionsPush(); phgOptionsSetOptions("-solver hypre " "-hypre_solver gmres " "-hypre_pc boomeramg " "-solver_maxit 10 " "-solver_rtol 1e-3"); phgOptionsSetOptions(_nsp->Ap_opts); _pcd->solver_Ap = phgMat2Solver(SOLVER_DEFAULT, _pcd->matAp); _pcd->solver_Ap->warn_maxit = FALSE; _pcd->solver_Ap->verb = SUB_SOLVER_VERB; phgOptionsPop(); _pcd->pc_Ap = NULL; #if USE_MG if (ns_params->use_mg_Ap) { MAT *matAp = _pcd->matAp; assert(ns_params->use_PCD); _pcd->pc_Ap = phgMat2Solver(SOLVER_PreOnly, matAp); phgOptionsSetOptions("-solver petsc "); matAp->mv_data = phgAlloc(sizeof(*matAp->mv_data)); matAp->mv_data[0] = (void *) ns->mg; phgSolverSetPC(_pcd->solver_Ap, _pcd->pc_Ap, mg_pc_proc); } #endif /* USE_MG */ /* solver Qp */ phgOptionsPush(); phgOptionsSetOptions("-solver hypre " "-hypre_solver pcg " "-hypre_pc boomeramg " "-solver_maxit 10 " "-solver_rtol 1e-3"); phgOptionsSetOptions(_nsp->Qp_opts); _pcd->solver_Qp = phgMat2Solver(SOLVER_DEFAULT, _pcd->matQp); _pcd->solver_Qp->warn_maxit = FALSE; _pcd->solver_Qp->verb = SUB_SOLVER_VERB; phgOptionsPop(); _pcd->pc_Qp = NULL; #if USE_MG if (ns_params->use_mg_Qp) { MAT *matQp = _pcd->matQp; assert(ns_params->use_PCD); _pcd->pc_Qp = phgMat2Solver(SOLVER_PreOnly, matQp); phgOptionsSetOptions("-solver petsc "); matQp->mv_data = phgAlloc(sizeof(*matQp->mv_data)); matQp->mv_data[0] = (void *) ns->mg; phgSolverSetPC(_pcd->solver_Qp, _pcd->pc_Qp, mg_pc_proc); } #endif /* USE_MG */ /* Fu for solve F^{-1} */ if (use_Fu) { /* _nsp->implicit_centrip && */ DOF *u1; MAP *u1map; MAT *matFu; u1 = _pcd->u1 = phgDofNew(g, _nsp->utype, 1, "velocity component u", DofNoAction); phgDofSetDirichletBoundaryMask(u1, SETFLOW); u1map = _pcd->u1map = phgMapCreate(_pcd->u1, NULL); matFu = _pcd->matFu = phgMapCreateMat(u1map, u1map); if (_nsp->use_symetric) matFu->handle_bdry_eqns = TRUE; /* solver Fu */ phgOptionsPush(); phgOptionsSetOptions("-solver hypre " "-hypre_solver pcg " "-hypre_pc boomeramg " "-solver_maxit 10 " "-solver_rtol 1e-4"); phgOptionsSetOptions(_nsp->Fu_opts); _pcd->solver_Fu = phgMat2Solver(SOLVER_DEFAULT, _pcd->matFu); _pcd->solver_Fu->warn_maxit = FALSE; _pcd->solver_Fu->verb = SUB_SOLVER_VERB; phgOptionsPop(); } return; }