inline int Normalize(double v[]) { double len = sqrt(InnerProd(v, v)); if (len > 0.0) len = 1.0 / len; else return 1; for (int i = 0; i < 3; i++) v[i] *= len; return 0; }
void bicgstab_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, HYPRE_Int *itsOUT) { START_FUNC_DH HYPRE_Int its, m = ctx->m; bool monitor; HYPRE_Int maxIts = ctx->maxIts; double atol = ctx->atol, rtol = ctx->rtol; /* scalars */ double alpha, alpha_1, beta_1, widget, widget_1, rho_1, rho_2, s_norm, eps, exit_a, b_iprod, r_iprod; /* vectors */ double *t, *s, *s_hat, *v, *p, *p_hat, *r, *r_hat; monitor = Parser_dhHasSwitch(parser_dh, "-monitor"); /* allocate working space */ t = (double*)MALLOC_DH(m*sizeof(double)); s = (double*)MALLOC_DH(m*sizeof(double)); s_hat = (double*)MALLOC_DH(m*sizeof(double)); v = (double*)MALLOC_DH(m*sizeof(double)); p = (double*)MALLOC_DH(m*sizeof(double)); p_hat = (double*)MALLOC_DH(m*sizeof(double)); r = (double*)MALLOC_DH(m*sizeof(double)); r_hat = (double*)MALLOC_DH(m*sizeof(double)); /* r = b - Ax */ Mat_dhMatVec(A, x, s); /* s = Ax */ CopyVec(m, b, r); /* r = b */ Axpy(m, -1.0, s, r); /* r = b-Ax */ CopyVec(m, r, r_hat); /* r_hat = r */ /* compute stopping criteria */ b_iprod = InnerProd(m, b, b); CHECK_V_ERROR; exit_a = atol*atol*b_iprod; CHECK_V_ERROR; /* absolute stopping criteria */ eps = rtol*rtol*b_iprod; /* relative stoping criteria (residual reduction) */ its = 0; while(1) { ++its; rho_1 = InnerProd(m, r_hat, r); if (rho_1 == 0) { SET_V_ERROR("(r_hat . r) = 0; method fails"); } if (its == 1) { CopyVec(m, r, p); /* p = r_0 */ CHECK_V_ERROR; } else { beta_1 = (rho_1/rho_2)*(alpha_1/widget_1); /* p_i = r_(i-1) + beta_(i-1)*( p_(i-1) - w_(i-1)*v_(i-1) ) */ Axpy(m, -widget_1, v, p); CHECK_V_ERROR; ScaleVec(m, beta_1, p); CHECK_V_ERROR; Axpy(m, 1.0, r, p); CHECK_V_ERROR; } /* solve M*p_hat = p_i */ Euclid_dhApply(ctx, p, p_hat); CHECK_V_ERROR; /* v_i = A*p_hat */ Mat_dhMatVec(A, p_hat, v); CHECK_V_ERROR; /* alpha_i = rho_(i-1) / (r_hat^T . v_i ) */ { double tmp = InnerProd(m, r_hat, v); CHECK_V_ERROR; alpha = rho_1/tmp; } /* s = r_(i-1) - alpha_i*v_i */ CopyVec(m, r, s); CHECK_V_ERROR; Axpy(m, -alpha, v, s); CHECK_V_ERROR; /* check norm of s; if small enough: * set x_i = x_(i-1) + alpha_i*p_i and stop. * (Actually, we use the square of the norm) */ s_norm = InnerProd(m, s, s); if (s_norm < exit_a) { SET_INFO("reached absolute stopping criteria"); break; } /* solve M*s_hat = s */ Euclid_dhApply(ctx, s, s_hat); CHECK_V_ERROR; /* t = A*s_hat */ Mat_dhMatVec(A, s_hat, t); CHECK_V_ERROR; /* w_i = (t . s)/(t . t) */ { double tmp1, tmp2; tmp1 = InnerProd(m, t, s); CHECK_V_ERROR; tmp2 = InnerProd(m, t, t); CHECK_V_ERROR; widget = tmp1/tmp2; } /* x_i = x_(i-1) + alpha_i*p_hat + w_i*s_hat */ Axpy(m, alpha, p_hat, x); CHECK_V_ERROR; Axpy(m, widget, s_hat, x); CHECK_V_ERROR; /* r_i = s - w_i*t */ CopyVec(m, s, r); CHECK_V_ERROR; Axpy(m, -widget, t, r); CHECK_V_ERROR; /* check convergence; continue if necessary; * for continuation it is necessary thea w != 0. */ r_iprod = InnerProd(m, r, r); CHECK_V_ERROR; if (r_iprod < eps) { SET_INFO("stipulated residual reduction achieved"); break; } /* monitor convergence */ if (monitor && myid_dh == 0) { hypre_fprintf(stderr, "[it = %i] %e\n", its, sqrt(r_iprod/b_iprod)); } /* prepare for next iteration */ rho_2 = rho_1; widget_1 = widget; alpha_1 = alpha; if (its >= maxIts) { its = -its; break; } } *itsOUT = its; FREE_DH(t); FREE_DH(s); FREE_DH(s_hat); FREE_DH(v); FREE_DH(p); FREE_DH(p_hat); FREE_DH(r); FREE_DH(r_hat); END_FUNC_DH }
void cg_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, HYPRE_Int *itsOUT) { START_FUNC_DH HYPRE_Int its, m = A->m; double *p, *r, *s; double alpha, beta, gamma, gamma_old, eps, bi_prod, i_prod; bool monitor; HYPRE_Int maxIts = ctx->maxIts; /* double atol = ctx->atol */ double rtol = ctx->rtol; monitor = Parser_dhHasSwitch(parser_dh, "-monitor"); /* compute square of absolute stopping threshold */ /* bi_prod = <b,b> */ bi_prod = InnerProd(m, b, b); CHECK_V_ERROR; eps = (rtol*rtol)*bi_prod; p = (double *) MALLOC_DH(m * sizeof(double)); s = (double *) MALLOC_DH(m * sizeof(double)); r = (double *) MALLOC_DH(m * sizeof(double)); /* r = b - Ax */ Mat_dhMatVec(A, x, r); /* r = Ax */ CHECK_V_ERROR; ScaleVec(m, -1.0, r); /* r = b */ CHECK_V_ERROR; Axpy(m, 1.0, b, r); /* r = r + b */ CHECK_V_ERROR; /* solve Mp = r */ Euclid_dhApply(ctx, r, p); CHECK_V_ERROR; /* gamma = <r,p> */ gamma = InnerProd(m, r, p); CHECK_V_ERROR; its = 0; while (1) { ++its; /* s = A*p */ Mat_dhMatVec(A, p, s); CHECK_V_ERROR; /* alpha = gamma / <s,p> */ { double tmp = InnerProd(m, s, p); CHECK_V_ERROR; alpha = gamma / tmp; gamma_old = gamma; } /* x = x + alpha*p */ Axpy(m, alpha, p, x); CHECK_V_ERROR; /* r = r - alpha*s */ Axpy(m, -alpha, s, r); CHECK_V_ERROR; /* solve Ms = r */ Euclid_dhApply(ctx, r, s); CHECK_V_ERROR; /* gamma = <r,s> */ gamma = InnerProd(m, r, s); CHECK_V_ERROR; /* set i_prod for convergence test */ i_prod = InnerProd(m, r, r); CHECK_V_ERROR; if (monitor && myid_dh == 0) { hypre_fprintf(stderr, "iter = %i rel. resid. norm: %e\n", its, sqrt(i_prod/bi_prod)); } /* check for convergence */ if (i_prod < eps) break; /* beta = gamma / gamma_old */ beta = gamma / gamma_old; /* p = s + beta p */ ScaleVec(m, beta, p); CHECK_V_ERROR; Axpy(m, 1.0, s, p); CHECK_V_ERROR; if (its >= maxIts) { its = -its; break; } } *itsOUT = its; FREE_DH(p); FREE_DH(s); FREE_DH(r); END_FUNC_DH }
void PCG_ParaSails(Matrix *mat, ParaSails *ps, double *b, double *x, double tol, HYPRE_Int max_iter) { double *p, *s, *r; double alpha, beta; double gamma, gamma_old; double bi_prod, i_prod, eps; HYPRE_Int i = 0; HYPRE_Int mype; /* local problem size */ HYPRE_Int n = mat->end_row - mat->beg_row + 1; MPI_Comm comm = mat->comm; hypre_MPI_Comm_rank(comm, &mype); /* compute square of absolute stopping threshold */ /* bi_prod = <b,b> */ bi_prod = InnerProd(n, b, b, comm); eps = (tol*tol)*bi_prod; /* Check to see if the rhs vector b is zero */ if (bi_prod == 0.0) { /* Set x equal to zero and return */ CopyVector(n, b, x); return; } p = (double *) malloc(n * sizeof(double)); s = (double *) malloc(n * sizeof(double)); r = (double *) malloc(n * sizeof(double)); /* r = b - Ax */ MatrixMatvec(mat, x, r); /* r = Ax */ ScaleVector(n, -1.0, r); /* r = -r */ Axpy(n, 1.0, b, r); /* r = r + b */ /* p = C*r */ if (ps != NULL) ParaSailsApply(ps, r, p); else CopyVector(n, r, p); /* gamma = <r,p> */ gamma = InnerProd(n, r, p, comm); while ((i+1) <= max_iter) { i++; /* s = A*p */ MatrixMatvec(mat, p, s); /* alpha = gamma / <s,p> */ alpha = gamma / InnerProd(n, s, p, comm); gamma_old = gamma; /* x = x + alpha*p */ Axpy(n, alpha, p, x); /* r = r - alpha*s */ Axpy(n, -alpha, s, r); /* s = C*r */ if (ps != NULL) ParaSailsApply(ps, r, s); else CopyVector(n, r, s); /* gamma = <r,s> */ gamma = InnerProd(n, r, s, comm); /* set i_prod for convergence test */ i_prod = InnerProd(n, r, r, comm); #ifdef PARASAILS_CG_PRINT if (mype == 0 && i % 100 == 0) hypre_printf("Iter (%d): rel. resid. norm: %e\n", i, sqrt(i_prod/bi_prod)); #endif /* check for convergence */ if (i_prod < eps) break; /* non-convergence test */ if (i >= 1000 && i_prod/bi_prod > 0.01) { if (mype == 0) hypre_printf("Aborting solve due to slow or no convergence.\n"); break; } /* beta = gamma / gamma_old */ beta = gamma / gamma_old; /* p = s + beta p */ ScaleVector(n, beta, p); Axpy(n, 1.0, s, p); } free(p); free(s); /* compute exact relative residual norm */ MatrixMatvec(mat, x, r); /* r = Ax */ ScaleVector(n, -1.0, r); /* r = -r */ Axpy(n, 1.0, b, r); /* r = r + b */ i_prod = InnerProd(n, r, r, comm); free(r); if (mype == 0) hypre_printf("Iter (%4d): computed rrn : %e\n", i, sqrt(i_prod/bi_prod)); }
//STARTBDRYRESIDUALS PetscErrorCode FormFunction(SNES snes, Vec u, Vec F, void *ctx) { PetscErrorCode ierr; unfemCtx *user = (unfemCtx*)ctx; const int *abfn, *ae, *as, *abfs, *en, deg = user->quaddeg - 1; const Node *aloc; const double *au; double *aF, unode[3], gradu[2], gradpsi[3][2], uquad[4], aquad[4], fquad[4], dx, dy, dx1, dx2, dy1, dy2, detJ, ls, xmid, ymid, sint, xx, yy, sum; int n, p, na, nb, k, l, q; PetscLogStagePush(user->resstage); //STRIP ierr = VecGetArrayRead(u,&au); CHKERRQ(ierr); ierr = VecSet(F,0.0); CHKERRQ(ierr); ierr = VecGetArray(F,&aF); CHKERRQ(ierr); ierr = UMGetNodeCoordArrayRead(user->mesh,&aloc); CHKERRQ(ierr); // Dirichlet node residuals ierr = ISGetIndices(user->mesh->bfn,&abfn); CHKERRQ(ierr); for (n = 0; n < user->mesh->N; n++) { if (abfn[n] == 2) // node is Dirichlet aF[n] = au[n] - user->gD_fcn(aloc[n].x,aloc[n].y); } // Neumann segment contributions ierr = ISGetIndices(user->mesh->s,&as); CHKERRQ(ierr); ierr = ISGetIndices(user->mesh->bfs,&abfs); CHKERRQ(ierr); for (p = 0; p < user->mesh->P; p++) { if (abfs[p] == 1) { // segment is Neumann na = as[2*p+0]; // nodes at end of segment nb = as[2*p+1]; // length of segment dx = aloc[na].x-aloc[nb].x; dy = aloc[na].y-aloc[nb].y; ls = sqrt(dx * dx + dy * dy); // midpoint rule; psi_na=psi_nb=0.5 at midpoint of segment xmid = 0.5*(aloc[na].x+aloc[nb].x); ymid = 0.5*(aloc[na].y+aloc[nb].y); sint = 0.5 * user->gN_fcn(xmid,ymid) * ls; // nodes at end of segment could be Dirichlet if (abfn[na] != 2) aF[na] -= sint; if (abfn[nb] != 2) aF[nb] -= sint; } } ierr = ISRestoreIndices(user->mesh->s,&as); CHKERRQ(ierr); ierr = ISRestoreIndices(user->mesh->bfs,&abfs); CHKERRQ(ierr); //ENDBDRYRESIDUALS //STARTELEMENTRESIDUALS // element contributions ierr = ISGetIndices(user->mesh->e,&ae); CHKERRQ(ierr); for (k = 0; k < user->mesh->K; k++) { en = ae + 3*k; // en[0], en[1], en[2] are nodes of element k // geometry of element dx1 = aloc[en[1]].x - aloc[en[0]].x; dx2 = aloc[en[2]].x - aloc[en[0]].x; dy1 = aloc[en[1]].y - aloc[en[0]].y; dy2 = aloc[en[2]].y - aloc[en[0]].y; detJ = dx1 * dy2 - dx2 * dy1; // gradients of hat functions for (l = 0; l < 3; l++) { gradpsi[l][0] = ( dy2 * dchi[l][0] - dy1 * dchi[l][1]) / detJ; gradpsi[l][1] = (-dx2 * dchi[l][0] + dx1 * dchi[l][1]) / detJ; } // u and grad u on element gradu[0] = 0.0; gradu[1] = 0.0; for (l = 0; l < 3; l++) { if (abfn[en[l]] == 2) unode[l] = user->gD_fcn(aloc[en[l]].x,aloc[en[l]].y); else unode[l] = au[en[l]]; gradu[0] += unode[l] * gradpsi[l][0]; gradu[1] += unode[l] * gradpsi[l][1]; } // function values at quadrature points on element for (q = 0; q < Q[deg]; q++) { uquad[q] = eval(unode,xi[deg][q],eta[deg][q]); xx = aloc[en[0]].x + dx1 * xi[deg][q] + dx2 * eta[deg][q]; yy = aloc[en[0]].y + dy1 * xi[deg][q] + dy2 * eta[deg][q]; aquad[q] = user->a_fcn(uquad[q],xx,yy); fquad[q] = user->f_fcn(uquad[q],xx,yy); } // residual contribution for each node of element for (l = 0; l < 3; l++) { if (abfn[en[l]] < 2) { // if NOT a Dirichlet node sum = 0.0; for (q = 0; q < Q[deg]; q++) sum += w[deg][q] * ( aquad[q] * InnerProd(gradu,gradpsi[l]) - fquad[q] * chi(l,xi[deg][q],eta[deg][q]) ); aF[en[l]] += fabs(detJ) * sum; } } } ierr = ISRestoreIndices(user->mesh->e,&ae); CHKERRQ(ierr); ierr = ISRestoreIndices(user->mesh->bfn,&abfn); CHKERRQ(ierr); ierr = UMRestoreNodeCoordArrayRead(user->mesh,&aloc); CHKERRQ(ierr); ierr = VecRestoreArrayRead(u,&au); CHKERRQ(ierr); ierr = VecRestoreArray(F,&aF); CHKERRQ(ierr); PetscLogStagePop(); //STRIP return 0; }
PetscErrorCode FormPicard(SNES snes, Vec u, Mat A, Mat P, void *ctx) { PetscErrorCode ierr; unfemCtx *user = (unfemCtx*)ctx; const int *abfn, *ae, *en, deg = user->quaddeg - 1; const Node *aloc; const double *au; double unode[3], gradpsi[3][2], uquad[4], aquad[4], v[9], dx1, dx2, dy1, dy2, detJ, xx, yy, sum; int n, k, l, m, q, cr, cv, row[3]; PetscLogStagePush(user->jacstage); //STRIP ierr = MatZeroEntries(P); CHKERRQ(ierr); ierr = ISGetIndices(user->mesh->bfn,&abfn); CHKERRQ(ierr); for (n = 0; n < user->mesh->N; n++) { if (abfn[n] == 2) { v[0] = 1.0; ierr = MatSetValues(P,1,&n,1,&n,v,ADD_VALUES); CHKERRQ(ierr); } } ierr = ISGetIndices(user->mesh->e,&ae); CHKERRQ(ierr); ierr = VecGetArrayRead(u,&au); CHKERRQ(ierr); ierr = UMGetNodeCoordArrayRead(user->mesh,&aloc); CHKERRQ(ierr); for (k = 0; k < user->mesh->K; k++) { en = ae + 3*k; // en[0], en[1], en[2] are nodes of element k // geometry of element dx1 = aloc[en[1]].x - aloc[en[0]].x; dx2 = aloc[en[2]].x - aloc[en[0]].x; dy1 = aloc[en[1]].y - aloc[en[0]].y; dy2 = aloc[en[2]].y - aloc[en[0]].y; detJ = dx1 * dy2 - dx2 * dy1; // gradients of hat functions and u on element for (l = 0; l < 3; l++) { gradpsi[l][0] = ( dy2 * dchi[l][0] - dy1 * dchi[l][1]) / detJ; gradpsi[l][1] = (-dx2 * dchi[l][0] + dx1 * dchi[l][1]) / detJ; if (abfn[en[l]] == 2) unode[l] = user->gD_fcn(aloc[en[l]].x,aloc[en[l]].y); else unode[l] = au[en[l]]; } // function values at quadrature points on element for (q = 0; q < Q[deg]; q++) { uquad[q] = eval(unode,xi[deg][q],eta[deg][q]); xx = aloc[en[0]].x + dx1 * xi[deg][q] + dx2 * eta[deg][q]; yy = aloc[en[0]].y + dy1 * xi[deg][q] + dy2 * eta[deg][q]; aquad[q] = user->a_fcn(uquad[q],xx,yy); } // generate 3x3 element stiffness matrix cr = 0; // count rows cv = 0; // count values for (l = 0; l < 3; l++) { if (abfn[en[l]] != 2) { row[cr] = en[l]; cr++; for (m = 0; m < 3; m++) { if (abfn[en[m]] != 2) { sum = 0.0; for (q = 0; q < Q[deg]; q++) { sum += w[deg][q] * aquad[q] * InnerProd(gradpsi[l],gradpsi[m]); } v[cv] = fabs(detJ) * sum; cv++; } } } } // insert element stiffness matrix ierr = MatSetValues(P,cr,row,cr,row,v,ADD_VALUES); CHKERRQ(ierr); } ierr = ISRestoreIndices(user->mesh->e,&ae); CHKERRQ(ierr); ierr = ISRestoreIndices(user->mesh->bfn,&abfn); CHKERRQ(ierr); ierr = VecRestoreArrayRead(u,&au); CHKERRQ(ierr); ierr = UMRestoreNodeCoordArrayRead(user->mesh,&aloc); CHKERRQ(ierr); ierr = MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); if (A != P) { ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); } ierr = MatSetOption(P,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE); CHKERRQ(ierr); PetscLogStagePop(); //STRIP return 0; }
inline int ProjectVector(double v[], const double n[]) { // project 'v' on the plane with normal given by 'n' and then normalize 'v' LinearCombination(InnerProd(n, n), v, -InnerProd(v, n), n, v); return Normalize(v); }