double *RefMap::get_phys_z(const int np, const QuadPt3D *pt) { _F_ // transform all z coordinates of the integration points double *z = new double[np]; MEM_CHECK(z); memset(z, 0, np * sizeof(double)); pss->force_transform(sub_idx, ctm); for (int i = 0; i < n_coefs; i++) { pss->set_active_shape(indices[i]); pss->precalculate(np, pt, FN_DEFAULT); blas_axpy(np, coefs[i].z, pss->get_fn_values(), 1, z, 1); } return z; }
void H1ProjectionIpol::calc_bubble_proj(int split, int son, const Ord3 &order) { _F_ int bubble_fns = (order.x - 1) * (order.y - 1) * (order.z - 1); if (bubble_fns <= 0) return; scalar *proj_rhs = new scalar[bubble_fns]; MEM_CHECK(proj_rhs); memset(proj_rhs, 0, sizeof(scalar) * bubble_fns); double **proj_mat = new_matrix<double>(bubble_fns, bubble_fns); MEM_CHECK(proj_mat); // get total number of functions (vertex + edge + face) int ipol_fns = Hex::NUM_VERTICES; for (int iedge = 0; iedge < Hex::NUM_EDGES; iedge++) { ipol_fns += order.get_edge_order(iedge) - 1; } for (int iface = 0; iface < Hex::NUM_FACES; iface++) { Ord2 face_order = order.get_face_order(iface); ipol_fns += (face_order.x - 1) * (face_order.y - 1); } ProjItem * ipol = new ProjItem[ipol_fns]; int mm = 0; // vertex projection coefficients for (int vtx = 0; vtx < Hex::NUM_VERTICES; vtx++, mm++) ipol[mm] = vertex_proj[vtx]; // edge projection coefficients for (int iedge = 0; iedge < Hex::NUM_EDGES; iedge++) { Ord1 edge_order = order.get_edge_order(iedge); int edge_fns = edge_order - 1; for (int i = 0; i < edge_fns; i++, mm++) ipol[mm] = edge_proj[iedge][i]; } // face projection coefficients for (int iface = 0; iface < Hex::NUM_FACES; iface++) { Ord2 face_order = order.get_face_order(iface); int face_fns = (face_order.x - 1) * (face_order.y - 1); for (int i = 0; i < face_fns; i++, mm++) ipol[mm] = face_proj[iface][i]; } // do it // int *bubble_fn_idx = ss->get_bubble_indices(order); for (int i = 0; i < bubble_fns; i++) { int iidx = bubble_fn_idx[i]; Ord3 oi = ss->get_dcmp(iidx); for (int j = 0; j < bubble_fns; j++) { int jidx = bubble_fn_idx[j]; Ord3 oj = ss->get_dcmp(jidx); double val = prod_fn[oi.x][oj.x] * prod_fn[oi.y][oj.y] * prod_fn[oi.z][oj.z] + prod_dx[oi.x][oj.x] * prod_fn[oi.y][oj.y] * prod_fn[oi.z][oj.z] + prod_fn[oi.x][oj.x] * prod_dx[oi.y][oj.y] * prod_fn[oi.z][oj.z] + prod_fn[oi.x][oj.x] * prod_fn[oi.y][oj.y] * prod_dx[oi.z][oj.z]; proj_mat[i][j] += val; } } for (int e = 0; e < int_ns[split]; e++) { unsigned int son_idx = base_elem->get_son(int_son[son][e]); sln->set_active_element(mesh->elements[son_idx]); Trf *tr = get_trf(int_trf[split][e]); for (int i = 0; i < bubble_fns; i++) { int iidx = bubble_fn_idx[i]; fu->set_active_shape(iidx); Ord3 order_rhs = ss->get_order(iidx) + order; QuadPt3D *pt = quad->get_points(order_rhs); int np = quad->get_num_points(order_rhs); if (int_trf[split][e] != -1) fu->push_transform(int_trf[split][e]); fu->precalculate(np, pt, FN_DEFAULT); sln->precalculate(np, pt, FN_DEFAULT); double *uval = fu->get_fn_values(); scalar *rval = sln->get_fn_values(); double *dudx, *dudy, *dudz; scalar *drdx, *drdy, *drdz; fu->get_dx_dy_dz_values(dudx, dudy, dudz); sln->get_dx_dy_dz_values(drdx, drdy, drdz); QuadPt3D *tpt = new QuadPt3D[np]; transform_points(np, pt, tr, tpt); scalar *g = new scalar[np]; scalar *dgdx = new scalar[np]; scalar *dgdy = new scalar[np]; scalar *dgdz = new scalar[np]; memset(g, 0, np * sizeof(scalar)); memset(dgdx, 0, np * sizeof(scalar)); memset(dgdy, 0, np * sizeof(scalar)); memset(dgdz, 0, np * sizeof(scalar)); for (int l = 0; l < ipol_fns; l++) { double *h = new double[np]; scalar *sch = new scalar[np]; ss->get_fn_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, g, 1); ss->get_dx_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1); ss->get_dy_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1); ss->get_dz_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdz, 1); delete [] h; delete [] sch; } delete [] tpt; scalar value = 0.0; for (int k = 0; k < quad->get_num_points(order_rhs); k++) { value += pt[k].w * (uval[k] * (rval[k] - g[k]) + dudx[k] * ((drdx[k] * mdx[split]) - dgdx[k]) + dudy[k] * ((drdy[k] * mdy[split]) - dgdy[k]) + dudz[k] * ((drdz[k] * mdz[split]) - dgdz[k])); } delete [] g; delete [] dgdx; delete [] dgdy; delete [] dgdz; proj_rhs[i] += value * (1 / (double) int_ns[split]); if (int_trf[split][e] != -1) fu->pop_transform(); } } delete [] ipol; double d; int *iperm = new int[bubble_fns]; ludcmp(proj_mat, bubble_fns, iperm, &d); lubksb(proj_mat, bubble_fns, iperm, proj_rhs); delete iperm; bubble_proj = new ProjItem [bubble_fns]; for (int i = 0; i < bubble_fns; i++) { bubble_proj[i].coef = proj_rhs[i]; bubble_proj[i].idx = bubble_fn_idx[i]; } delete [] proj_mat; delete [] proj_rhs; }
double H1ProjectionIpol::get_error(int split, int son, const Ord3 &order) { _F_ sln->enable_transform(false); Ord3 order_rhs = order; calc_projection(split, son, order_rhs); // error QuadPt3D *pt = quad->get_points(order_rhs); int np = quad->get_num_points(order_rhs); double error = 0.0; for (int i = 0; i < int_ns[split]; i++) { Trf *tr = get_trf(int_trf[split][i]); unsigned int son_idx = base_elem->get_son(int_son[son + 1][i]); sln->set_active_element(mesh->elements[son_idx]); sln->precalculate(np, pt, FN_DEFAULT); scalar *rval = sln->get_fn_values(); scalar *rdx, *rdy, *rdz; sln->get_dx_dy_dz_values(rdx, rdy, rdz); QuadPt3D * tpt = new QuadPt3D[np]; transform_points(np, pt, tr, tpt); scalar * prfn = new scalar[np]; scalar * prdx = new scalar[np]; scalar * prdy = new scalar[np]; scalar * prdz = new scalar[np]; memset(prfn, 0, np * sizeof(double)); memset(prdx, 0, np * sizeof(double)); memset(prdy, 0, np * sizeof(double)); memset(prdz, 0, np * sizeof(double)); for (int i = 0; i < proj_fns; i++) { #ifndef H3D_COMPLEX double * tmp = new double[np]; ss->get_fn_values(proj[i]->idx, np, tpt, 0, tmp); blas_axpy(np, proj[i]->coef, tmp, 1, prfn, 1); ss->get_dx_values(proj[i]->idx, np, tpt, 0, tmp); blas_axpy(np, proj[i]->coef, tmp, 1, prdx, 1); ss->get_dy_values(proj[i]->idx, np, tpt, 0, tmp); blas_axpy(np, proj[i]->coef, tmp, 1, prdy, 1); ss->get_dz_values(proj[i]->idx, np, tpt, 0, tmp); blas_axpy(np, proj[i]->coef, tmp, 1, prdz, 1); delete[] tmp; #else double * tmp = new double[np]; scalar * sctmp = new scalar[np]; ss->get_fn_values(proj[i]->idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj[i]->coef, sctmp, 1, prfn, 1); ss->get_dx_values(proj[i]->idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj[i]->coef, sctmp, 1, prdx, 1); ss->get_dy_values(proj[i]->idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj[i]->coef, sctmp, 1, prdy, 1); ss->get_dz_values(proj[i]->idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj[i]->coef, sctmp, 1, prdz, 1); delete[] tmp; delete[] sctmp; #endif } for (int k = 0; k < np; k++) error += pt[k].w * (sqr(rval[k] - prfn[k]) + sqr(rdx[k] * mdx[split] - prdx[k]) + sqr(rdy[k] * mdy[split] - prdy[k]) + sqr(rdz[k] * mdz[split] - prdz[k])); delete[] tpt; delete[] prfn; delete[] prdx; delete[] prdy; delete[] prdz; } sln->enable_transform(true); return error; }
void H1ProjectionIpol::calc_face_proj(int iface, int split, int son, const Ord3 &order) { _F_ Ord2 face_order = order.get_face_order(iface); int face_fns = (face_order.x - 1) * (face_order.y - 1); if (face_fns <= 0) return; scalar *proj_rhs = new scalar[face_fns]; MEM_CHECK(proj_rhs); memset(proj_rhs, 0, sizeof(scalar) * face_fns); double **proj_mat = new_matrix<double>(face_fns, face_fns); MEM_CHECK(proj_mat); const int *face_vertex = RefHex::get_face_vertices(iface); const int *face_edge = RefHex::get_face_edges(iface); // get total number of functions for interpolant (vertex + edge functions) int ipol_fns = RefHex::get_num_face_vertices(iface); for (int iedge = 0; iedge < RefHex::get_num_face_edges(iface); iedge++) ipol_fns += order.get_edge_order(face_edge[iedge]) - 1; // interpolant ProjItem * ipol = new ProjItem[ipol_fns]; int mm = 0; for (int vtx = 0; vtx < RefHex::get_num_face_vertices(iface); vtx++, mm++) ipol[mm] = vertex_proj[face_vertex[vtx]]; for (int iedge = 0; iedge < RefHex::get_num_face_edges(iface); iedge++) { Ord1 edge_order = order.get_edge_order(face_edge[iedge]); int edge_fns = edge_order - 1; for (int i = 0; i < edge_fns; i++, mm++) ipol[mm] = edge_proj[face_edge[iedge]][i]; } int face_ori = 0; int *face_fn_idx = ss->get_face_indices(iface, face_ori, face_order); for (int i = 0; i < face_fns; i++) { int iidx = face_fn_idx[i]; Ord3 oi = ss->get_dcmp(iidx); for (int j = 0; j < face_fns; j++) { int jidx = face_fn_idx[j]; Ord3 oj = ss->get_dcmp(jidx); double val = 0.0; if (iface == 0 || iface == 1) { val = prod_fn[oi.y][oj.y] * prod_fn[oi.z][oj.z] + prod_dx[oi.y][oj.y] * prod_fn[oi.z][oj.z] + prod_fn[oi.y][oj.y] * prod_dx[oi.z][oj.z]; } else if (iface == 2 || iface == 3) { val = prod_fn[oi.x][oj.x] * prod_fn[oi.z][oj.z] + prod_dx[oi.x][oj.x] * prod_fn[oi.z][oj.z] + prod_fn[oi.x][oj.x] * prod_dx[oi.z][oj.z]; } else if (iface == 4 || iface == 5) { val = prod_fn[oi.x][oj.x] * prod_fn[oi.y][oj.y] + prod_dx[oi.x][oj.x] * prod_fn[oi.y][oj.y] + prod_fn[oi.x][oj.x] * prod_dx[oi.y][oj.y]; } else EXIT("Local face number out of range."); proj_mat[i][j] += val; } } for (int e = 0; e < face_ns[split][iface]; e++) { unsigned int son_idx = base_elem->get_son(face_son[son][iface][e]); sln->set_active_element(mesh->elements[son_idx]); Trf *tr = get_trf(face_trf[split][iface][e]); for (int i = 0; i < face_fns; i++) { int iidx = face_fn_idx[i]; fu->set_active_shape(iidx); Ord2 ord = (ss->get_order(iidx) + order).get_face_order(iface); QuadPt3D *pt = quad->get_face_points(iface, ord); int np = quad->get_face_num_points(iface, ord); if (face_trf[split][iface][e] != -1) fu->push_transform(face_trf[split][iface][e]); fu->precalculate(np, pt, FN_DEFAULT); sln->precalculate(np, pt, FN_DEFAULT); double *uval = fu->get_fn_values(); scalar *rval = sln->get_fn_values(); double *dudx, *dudy; scalar *drdx, *drdy; double md, me; if (iface == 0 || iface == 1) { dudx = fu->get_dy_values(); drdx = sln->get_dy_values(); dudy = fu->get_dz_values(); drdy = sln->get_dz_values(); md = mdy[split]; me = mdz[split]; } else if (iface == 2 || iface == 3) { dudx = fu->get_dx_values(); drdx = sln->get_dx_values(); dudy = fu->get_dz_values(); drdy = sln->get_dz_values(); md = mdx[split]; me = mdz[split]; } else if (iface == 4 || iface == 5) { dudx = fu->get_dx_values(); drdx = sln->get_dx_values(); dudy = fu->get_dy_values(); drdy = sln->get_dy_values(); md = mdx[split]; me = mdy[split]; } else EXIT("Local face number out of range."); QuadPt3D *tpt = new QuadPt3D[np]; transform_points(np, pt, tr, tpt); scalar * g = new scalar[np]; scalar * dgdx = new scalar[np]; scalar * dgdy = new scalar[np]; memset(g, 0, np * sizeof(scalar)); memset(dgdx, 0, np * sizeof(scalar)); memset(dgdy, 0, np * sizeof(scalar)); for (int l = 0; l < ipol_fns; l++) { double * h = new double[np]; scalar * sch = new scalar[np]; ss->get_fn_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, g, 1); if (iface == 0 || iface == 1) { ss->get_dy_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1); ss->get_dz_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1); } else if (iface == 2 || iface == 3) { ss->get_dx_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1); ss->get_dz_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1); } else if (iface == 4 || iface == 5) { ss->get_dx_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1); ss->get_dy_values(ipol[l].idx, np, tpt, 0, h); for (int ii = 0; ii < np; ii++) sch[ii] = h[ii]; blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1); } else EXIT("Local face number out of range."); delete [] h; delete [] sch; } delete tpt; scalar value = 0.0; for (int k = 0; k < np; k++) value += pt[k].w * (uval[k] * (rval[k] - g[k]) + dudx[k] * ((drdx[k] * md) - dgdx[k]) + dudy[k] * ((drdy[k] * me) - dgdy[k])); proj_rhs[i] += value * (1 / (double) face_ns[split][iface]); delete [] g; delete [] dgdx; delete [] dgdy; if (face_trf[split][iface][e] != -1) fu->pop_transform(); } } delete [] ipol; double d; int * iperm = new int[face_fns]; ludcmp(proj_mat, face_fns, iperm, &d); lubksb(proj_mat, face_fns, iperm, proj_rhs); delete [] iperm; face_proj[iface] = new ProjItem [face_fns]; for (int i = 0; i < face_fns; i++) { face_proj[iface][i].coef = proj_rhs[i]; face_proj[iface][i].idx = face_fn_idx[i]; } delete [] proj_mat; delete [] proj_rhs; }
void H1ProjectionIpol::calc_edge_proj(int iedge, int split, int son, const Ord3 &order) { _F_ Ord1 edge_order = order.get_edge_order(iedge); int edge_fns = edge_order - 1; if (edge_fns <= 0) return; scalar *proj_rhs = new scalar[edge_fns]; MEM_CHECK(proj_rhs); memset(proj_rhs, 0, sizeof(scalar) * edge_fns); double **proj_mat = new_matrix<double>(edge_fns, edge_fns); MEM_CHECK(proj_rhs); // local edge vertex numbers const int *edge_vtx = RefHex::get_edge_vertices(iedge); ProjItem vtxp[] = { vertex_proj[edge_vtx[0]], vertex_proj[edge_vtx[1]] }; int *edge_fn_idx = ss->get_edge_indices(iedge, 0, edge_order); // indices of edge functions for (int i = 0; i < edge_fns; i++) { int iidx = edge_fn_idx[i]; Ord3 oi = ss->get_dcmp(iidx); for (int j = 0; j < edge_fns; j++) { int jidx = edge_fn_idx[j]; Ord3 oj = ss->get_dcmp(jidx); double val = 0.0; if (iedge == 0 || iedge == 2 || iedge == 8 || iedge == 10) { val = prod_fn[oi.x][oj.x] + prod_dx[oi.x][oj.x]; } else if (iedge == 1 || iedge == 3 || iedge == 9 || iedge == 11) { val = prod_fn[oi.y][oj.y] + prod_dx[oi.y][oj.y]; } else if (iedge == 4 || iedge == 5 || iedge == 6 || iedge == 7) { val = prod_fn[oi.z][oj.z] + prod_dx[oi.z][oj.z]; } else EXIT("Local edge number out of range."); proj_mat[i][j] += val; } } for (int e = 0; e < edge_ns[split][iedge]; e++) { edge_fn_idx = ss->get_edge_indices(iedge, 0, edge_order); // indices of edge functions unsigned int son_idx = base_elem->get_son(edge_son[son][iedge][e]); sln->set_active_element(mesh->elements[son_idx]); Trf *tr = get_trf(edge_trf[split][iedge][e]); for (int i = 0; i < edge_fns; i++) { int iidx = edge_fn_idx[i]; fu->set_active_shape(iidx); Ord1 ord = (ss->get_order(iidx) + order).get_edge_order(iedge); QuadPt3D *pt = quad->get_edge_points(iedge, ord); int np = quad->get_edge_num_points(iedge, ord); if (edge_trf[split][iedge][e] != -1) fu->push_transform(edge_trf[split][iedge][e]); fu->precalculate(np, pt, FN_DEFAULT); sln->precalculate(np, pt, FN_DEFAULT); double *uval = fu->get_fn_values(); scalar *rval = sln->get_fn_values(); double *du, md; scalar *dr; if (iedge == 0 || iedge == 2 || iedge == 8 || iedge == 10) { du = fu->get_dx_values(); dr = sln->get_dx_values(); md = mdx[split]; } else if (iedge == 1 || iedge == 3 || iedge == 9 || iedge == 11) { du = fu->get_dy_values(); dr = sln->get_dy_values(); md = mdy[split]; } else if (iedge == 4 || iedge == 5 || iedge == 6 || iedge == 7) { du = fu->get_dz_values(); dr = sln->get_dz_values(); md = mdz[split]; } else EXIT("Local edge number out of range."); QuadPt3D *tpt = new QuadPt3D[np]; transform_points(np, pt, tr, tpt); double *tmp = new double[np]; scalar *sctmp = new scalar[np]; scalar *g = new scalar[np]; // interpolant memset(g, 0, np * sizeof(scalar)); #ifndef H3D_COMPLEX ss->get_fn_values(vtxp[0].idx, np, tpt, 0, tmp); blas_axpy(np, vtxp[0].coef, tmp, 1, g, 1); ss->get_fn_values(vtxp[1].idx, np, tpt, 0, tmp); blas_axpy(np, vtxp[1].coef, tmp, 1, g, 1); #else ss->get_fn_values(vtxp[0].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[0].coef, sctmp, 1, g, 1); ss->get_fn_values(vtxp[1].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[1].coef, sctmp, 1, g, 1); #endif scalar *dg = new scalar[np]; memset(dg, 0, np * sizeof(scalar)); if (iedge == 0 || iedge == 2 || iedge == 8 || iedge == 10) { ss->get_dx_values(vtxp[0].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[0].coef, sctmp, 1, dg, 1); ss->get_dx_values(vtxp[1].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[1].coef, sctmp, 1, dg, 1); } else if (iedge == 1 || iedge == 3 || iedge == 9 || iedge == 11) { ss->get_dy_values(vtxp[0].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[0].coef, sctmp, 1, dg, 1); ss->get_dy_values(vtxp[1].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[1].coef, sctmp, 1, dg, 1); } else if (iedge == 4 || iedge == 5 || iedge == 6 || iedge == 7) { ss->get_dz_values(vtxp[0].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[0].coef, sctmp, 1, dg, 1); ss->get_dz_values(vtxp[1].idx, np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, vtxp[1].coef, sctmp, 1, dg, 1); } else EXIT("Local edge number out of range."); delete [] tmp; delete [] sctmp; delete [] tpt; scalar value = 0.0; for (int k = 0; k < np; k++) value += pt[k].w * (uval[k] * (rval[k] - g[k]) + du[k] * ((dr[k] * md) - dg[k])); proj_rhs[i] += value * (1 / (double) edge_ns[split][iedge]); delete [] g; delete [] dg; if (edge_trf[split][iedge][e] != -1) fu->pop_transform(); } } double d; int * iperm = new int[edge_fns]; ludcmp(proj_mat, edge_fns, iperm, &d); lubksb(proj_mat, edge_fns, iperm, proj_rhs); delete [] iperm; // copy functions and coefficients to the basis edge_proj[iedge] = new ProjItem[edge_fns]; for (int i = 0; i < edge_fns; i++) { edge_proj[iedge][i].coef = proj_rhs[i]; edge_proj[iedge][i].idx = edge_fn_idx[i]; } delete [] proj_mat; delete [] proj_rhs; }
double H1Projection::get_error(int split, int son, const order3_t &order) { _F_ sln->enable_transform(false); calc_projection(split, son + 1, order); order3_t order_rhs = order; QuadPt3D *pt = quad->get_points(order_rhs); int np = quad->get_num_points(order_rhs); double error = 0.0; for (int i = 0; i < int_ns[split]; i++) { Trf *tr = get_trf(int_trf[split][i]); Word_t son_idx = base_elem->get_son(int_son[son + 1][i]); sln->set_active_element(mesh->elements[son_idx]); sln->precalculate(np, pt, FN_DEFAULT); scalar *rval = sln->get_fn_values(); scalar *rdx, *rdy, *rdz; sln->get_dx_dy_dz_values(rdx, rdy, rdz); QuadPt3D tpt[np]; transform_points(np, pt, tr, tpt); scalar prfn[np], prdx[np], prdy[np], prdz[np]; memset(prfn, 0, np * sizeof(double)); memset(prdx, 0, np * sizeof(double)); memset(prdy, 0, np * sizeof(double)); memset(prdz, 0, np * sizeof(double)); for (int i = 0; i < n_fns; i++) { #ifndef H3D_COMPLEX double tmp[np]; ss->get_fn_values(fn_idx[i], np, tpt, 0, tmp); blas_axpy(np, proj_coef[i], tmp, 1, prfn, 1); ss->get_dx_values(fn_idx[i], np, tpt, 0, tmp); blas_axpy(np, proj_coef[i], tmp, 1, prdx, 1); ss->get_dy_values(fn_idx[i], np, tpt, 0, tmp); blas_axpy(np, proj_coef[i], tmp, 1, prdy, 1); ss->get_dz_values(fn_idx[i], np, tpt, 0, tmp); blas_axpy(np, proj_coef[i], tmp, 1, prdz, 1); #else double tmp[np]; scalar sctmp[np]; ss->get_fn_values(fn_idx[i], np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj_coef[i], sctmp, 1, prfn, 1); ss->get_dx_values(fn_idx[i], np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj_coef[i], sctmp, 1, prdx, 1); ss->get_dy_values(fn_idx[i], np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj_coef[i], sctmp, 1, prdy, 1); ss->get_dz_values(fn_idx[i], np, tpt, 0, tmp); for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii]; blas_axpy(np, proj_coef[i], sctmp, 1, prdz, 1); #endif } for (int k = 0; k < np; k++) error += pt[k].w * (sqr(rval[k] - prfn[k]) + sqr(rdx[k] * mdx[split] - prdx[k]) + sqr(rdy[k] * mdy[split] - prdy[k]) + sqr(rdz[k] * mdz[split] - prdz[k])); } sln->enable_transform(true); return error; }
int ParpackSolver::Solve(int nev) { /* Get MPI info */ int nprocs, me; MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Fint fcomm = MPI_Comm_c2f(MPI_COMM_WORLD); /* Select number of working Ritz vectors */ if(ncv == -1) ncv = 2*nev; ncv = std::min(ncv, n-1); /* Initialize matrix descriptors */ xdesc = pcontext->new_descriptor(n, 1, divup(n,nprocs), 1); Bdesc = pcontext->new_descriptor(n, ncv, divup(n,nprocs), ncv); assert(nloc == Bdesc->num_local_rows() && nloc == xdesc->num_local_rows()); assert(ncv == Bdesc->num_local_cols() && 1 == xdesc->num_local_cols()); /* Allocate local memory for eigenvector matrix $B$ */ Bvalues = (real*) opsec_malloc(Bdesc->local_size() * sizeof(real)); real sigma; int iparam[11], ipntr[11]; /* Set PARPACK parameters */ char bmat[] = "I"; char which[] = "LA"; char howmny[] = "All"; iparam[0] = 1; // ishfts iparam[2] = maxitr; // maxitr iparam[6] = 1; // mode /* Allocate working memory */ int lworkl = ncv*(ncv + 8); real* workl = (real*) opsec_calloc(lworkl, sizeof(real)); real* workd = (real*) opsec_calloc(3*nloc, sizeof(real)); real* resid = (real*) opsec_calloc(nloc, sizeof(real)); int* select = (int*) opsec_calloc(ncv, sizeof(int)); /* Begin reverse communication loop */ int itr = 0; int info = 0; int ido = 0; while(ido != 99) { parpack_psaupd(&fcomm, &ido, bmat, &nloc, which, &nev, &tol, resid, &ncv, Bvalues, &nloc, iparam, ipntr, workd, workl, &lworkl, &info); if(ido == 1 || ido == -1) { /* Compute y = A*x (don't forget Fortran indexing conventions!) */ slp::Matrix<real> A(Adesc, Avalues); slp::Matrix<real> x(xdesc, &workd[ipntr[0] - 1]); slp::Matrix<real> y(xdesc, &workd[ipntr[1] - 1]); slp::multiply(A, x, y); } } if(me == 0) { opsec_info("Number of Implicit Arnoldi update iterations taken is %d\n", iparam[2]); opsec_info(" info = %d\n", info); opsec_info(" nconv = %d, nev = %d\n", iparam[4], nev); time_t t = time(NULL); opsec_info("Time: %s\n", ctime(&t)); opsec_info("Post-processing Ritz values and vectors\n"); } /* Check return code */ if(info < 0) { /* Error encountered. Abort. */ if(me == 0) opsec_error("parpack_psaupd returned error: info = %d\n", info); return info; } else { /* Save number of successfully computed eigenvalues */ nconv = iparam[4]; evals.resize(nconv); /* Retrieve eigenvalues and eigenvectors */ int rvec = 1; int ierr; parpack_pseupd(&fcomm, &rvec, howmny, select, &evals[0], Bvalues, &nloc, &sigma, bmat, &nloc, which, &nev, &tol, resid, &ncv, Bvalues, &nloc, iparam, ipntr, workd, workl, &lworkl, &ierr); if(ierr != 0) { if(me == 0) opsec_error("parpack_pseupd returned error: ierr = %d\n", ierr); } } if(me == 0) { time_t t = time(NULL); opsec_info("Time: %s\n", ctime(&t)); } #if 0 { int i; /* Debugging: check residuals || A*x - lambda*x || */ y = (real*) opsec_calloc(nloc, sizeof(real)); for(i = iparam[4]-1; i >= 0; i--) { static char trans = 'T'; static int incx = 1; static int incy = 1; static real alpha = 1.0; static real beta = 0.0; real a = -evals[i]; ierr = MPI_Allgatherv(&evecs[i*nloc], nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD); blas_gemv(&trans, &n, &nloc, &alpha, A, &n, xfull, &incx, &beta, y, &incy); blas_axpy(&nloc, &a, &evecs[i*nloc], &incx, y, &incy); real d = parpack_pnorm2(&fcomm, &nloc, y, &incy); if(myid == 0) printf("Eigenvalue %d: lambda = %16.16f, |A*x - lambda*x| = %16.16f\n", iparam[4]-i, evals[i], d); ierr = MPI_Allgatherv(y, nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD); } free(y); } #endif #if 0 /* Sort from largest to smallest eigenvalue */ for(int j = 0; j < nconv/2; j++) { std::swap(evals[j], evals[nconv-j-1]); memcpy(workd, &B(0,j), nloc*sizeof(real)); memcpy(&B(0,j), &B(0,nconv-j-1), nloc*sizeof(real)); memcpy(&B(0,nconv-j-1), workd, nloc*sizeof(real)); } #endif /* Clean up */ free(workl); free(workd); free(resid); free(select); return nconv; }
ok_status regularized_sinkhorn_knopp(void * linalg_handle, ok_float * A_in, matrix * A_out, vector * d, vector * e, enum CBLAS_ORDER ord) { OK_CHECK_PTR(A_in); OK_CHECK_MATRIX(A_out); OK_CHECK_VECTOR(d); OK_CHECK_VECTOR(e); ok_status err = OPTKIT_SUCCESS; const ok_float kSinkhornConst = (ok_float) 1e-4; const ok_float kEps = (ok_float) 1e-2; const size_t kMaxIter = 300; ok_float norm_d, norm_e; size_t i; vector a, d_diff, e_diff; a.data = OK_NULL; d_diff.data = OK_NULL; e_diff.data = OK_NULL; if (A_out->size1 != d->size || A_out->size2 != e->size) return OK_SCAN_ERR( OPTKIT_ERROR_DIMENSION_MISMATCH ); vector_calloc(&d_diff, A_out->size1); vector_calloc(&e_diff, A_out->size2); norm_d = norm_e = 1; OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) ); OK_CHECK_ERR( err, matrix_abs(A_out) ); OK_CHECK_ERR( err, vector_set_all(d, kOne) ); OK_CHECK_ERR( err, vector_scale(e, kZero) ); /* optional argument ok_float pnorm? */ /* if (pnorm != 1) { matrix_pow(A, pnorm) } */ for (i = 0; i < kMaxIter && !err; ++i){ blas_gemv(linalg_handle, CblasTrans, kOne, A_out, d, kZero, e); vector_add_constant(e, kSinkhornConst / (ok_float) e->size); vector_recip(e); vector_scale(e, (ok_float) d->size); blas_gemv(linalg_handle, CblasNoTrans, kOne, A_out, e, kZero, d); vector_add_constant(d, kSinkhornConst / (ok_float) d->size); vector_recip(d); vector_scale(d, (ok_float) e->size); blas_axpy(linalg_handle, -kOne, d, &d_diff); blas_axpy(linalg_handle, -kOne, e, &e_diff); blas_nrm2(linalg_handle, &d_diff, &norm_d); blas_nrm2(linalg_handle, &e_diff, &norm_e); if ((norm_d < kEps) && (norm_e < kEps)) break; vector_memcpy_vv(&d_diff, d); vector_memcpy_vv(&e_diff, e); } /* optional argument ok_float pnorm? */ /* if (pnorm != 1) { vector_pow(d, kOne / pnorm) vector_pow(e, kOne / pnorm) } */ OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) ); if (!err) { for (i = 0; i < A_out->size1; ++i) { matrix_row(&a, A_out, i); vector_mul(&a, e); } for (i = 0; i < A_out->size2; ++i) { matrix_column(&a, A_out, i); vector_mul(&a, d); } } vector_free(&d_diff); vector_free(&e_diff); return err; }