Пример #1
0
double *RefMap::get_phys_z(const int np, const QuadPt3D *pt) {
	_F_
	// transform all z coordinates of the integration points
	double *z = new double[np];
	MEM_CHECK(z);
	memset(z, 0, np * sizeof(double));
	pss->force_transform(sub_idx, ctm);
	for (int i = 0; i < n_coefs; i++) {
		pss->set_active_shape(indices[i]);
		pss->precalculate(np, pt, FN_DEFAULT);
		blas_axpy(np, coefs[i].z, pss->get_fn_values(), 1, z, 1);
	}

	return z;
}
Пример #2
0
void H1ProjectionIpol::calc_bubble_proj(int split, int son, const Ord3 &order) {
	_F_
	int bubble_fns = (order.x - 1) * (order.y - 1) * (order.z - 1);
	if (bubble_fns <= 0) return;

	scalar *proj_rhs = new scalar[bubble_fns];
	MEM_CHECK(proj_rhs);
	memset(proj_rhs, 0, sizeof(scalar) * bubble_fns);
	double **proj_mat = new_matrix<double>(bubble_fns, bubble_fns);
	MEM_CHECK(proj_mat);

	// get total number of functions (vertex + edge + face)
	int ipol_fns = Hex::NUM_VERTICES;
	for (int iedge = 0; iedge < Hex::NUM_EDGES; iedge++) {
		ipol_fns += order.get_edge_order(iedge) - 1;
	}
	for (int iface = 0; iface < Hex::NUM_FACES; iface++) {
		Ord2 face_order = order.get_face_order(iface);
		ipol_fns += (face_order.x - 1) * (face_order.y - 1);
	}

	ProjItem * ipol = new ProjItem[ipol_fns];
	int mm = 0;
	// vertex projection coefficients
	for (int vtx = 0; vtx < Hex::NUM_VERTICES; vtx++, mm++)
		ipol[mm] = vertex_proj[vtx];
	// edge projection coefficients
	for (int iedge = 0; iedge < Hex::NUM_EDGES; iedge++) {
		Ord1 edge_order = order.get_edge_order(iedge);
		int edge_fns = edge_order - 1;
		for (int i = 0; i < edge_fns; i++, mm++)
			ipol[mm] = edge_proj[iedge][i];
	}
	// face projection coefficients
	for (int iface = 0; iface < Hex::NUM_FACES; iface++) {
		Ord2 face_order = order.get_face_order(iface);
		int face_fns = (face_order.x - 1) * (face_order.y - 1);
		for (int i = 0; i < face_fns; i++, mm++)
			ipol[mm] = face_proj[iface][i];
	}

	// do it //
	int *bubble_fn_idx = ss->get_bubble_indices(order);
	for (int i = 0; i < bubble_fns; i++) {
		int iidx = bubble_fn_idx[i];
		Ord3 oi = ss->get_dcmp(iidx);
		for (int j = 0; j < bubble_fns; j++) {
			int jidx = bubble_fn_idx[j];
			Ord3 oj = ss->get_dcmp(jidx);
			double val =
				prod_fn[oi.x][oj.x] * prod_fn[oi.y][oj.y] * prod_fn[oi.z][oj.z] +
				prod_dx[oi.x][oj.x] * prod_fn[oi.y][oj.y] * prod_fn[oi.z][oj.z] +
				prod_fn[oi.x][oj.x] * prod_dx[oi.y][oj.y] * prod_fn[oi.z][oj.z] +
				prod_fn[oi.x][oj.x] * prod_fn[oi.y][oj.y] * prod_dx[oi.z][oj.z];
			proj_mat[i][j] += val;
		}
	}

	for (int e = 0; e < int_ns[split]; e++) {
		unsigned int son_idx = base_elem->get_son(int_son[son][e]);
		sln->set_active_element(mesh->elements[son_idx]);

		Trf *tr = get_trf(int_trf[split][e]);
		for (int i = 0; i < bubble_fns; i++) {
			int iidx = bubble_fn_idx[i];
			fu->set_active_shape(iidx);

			Ord3 order_rhs = ss->get_order(iidx) + order;
			QuadPt3D *pt = quad->get_points(order_rhs);
			int np = quad->get_num_points(order_rhs);

			if (int_trf[split][e] != -1) fu->push_transform(int_trf[split][e]);
			fu->precalculate(np, pt, FN_DEFAULT);
			sln->precalculate(np, pt, FN_DEFAULT);

			double *uval = fu->get_fn_values();
			scalar *rval = sln->get_fn_values();

			double *dudx, *dudy, *dudz;
			scalar *drdx, *drdy, *drdz;

			fu->get_dx_dy_dz_values(dudx, dudy, dudz);
			sln->get_dx_dy_dz_values(drdx, drdy, drdz);

			QuadPt3D *tpt = new QuadPt3D[np];
			transform_points(np, pt, tr, tpt);

			scalar *g = new scalar[np];
      scalar *dgdx = new scalar[np];
      scalar *dgdy = new scalar[np];
      scalar *dgdz = new scalar[np];
			memset(g, 0, np * sizeof(scalar));
			memset(dgdx, 0, np * sizeof(scalar));
			memset(dgdy, 0, np * sizeof(scalar));
			memset(dgdz, 0, np * sizeof(scalar));

			for (int l = 0; l < ipol_fns; l++) {
				double *h = new double[np];
				scalar *sch = new scalar[np];
				ss->get_fn_values(ipol[l].idx, np, tpt, 0, h);
				for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
				blas_axpy(np, ipol[l].coef, sch, 1, g, 1);
				ss->get_dx_values(ipol[l].idx, np, tpt, 0, h);
				for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
				blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1);
				ss->get_dy_values(ipol[l].idx, np, tpt, 0, h);
				for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
				blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1);
				ss->get_dz_values(ipol[l].idx, np, tpt, 0, h);
				for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
				blas_axpy(np, ipol[l].coef, sch, 1, dgdz, 1);
        delete [] h;
        delete [] sch;
			}

      delete [] tpt;

			scalar value = 0.0;
			for (int k = 0; k < quad->get_num_points(order_rhs); k++) {
				value += pt[k].w * (uval[k] * (rval[k] - g[k]) +
					dudx[k] * ((drdx[k] * mdx[split]) - dgdx[k]) +
					dudy[k] * ((drdy[k] * mdy[split]) - dgdy[k]) +
					dudz[k] * ((drdz[k] * mdz[split]) - dgdz[k]));
			}
      delete [] g;
      delete [] dgdx;
      delete [] dgdy;
      delete [] dgdz;
			proj_rhs[i] += value * (1 / (double) int_ns[split]);

			if (int_trf[split][e] != -1) fu->pop_transform();
		}
	}
  delete [] ipol;

	double d;
	int *iperm = new int[bubble_fns];
	ludcmp(proj_mat, bubble_fns, iperm, &d);
	lubksb(proj_mat, bubble_fns, iperm, proj_rhs);

  delete iperm;
	bubble_proj = new ProjItem [bubble_fns];
	for (int i = 0; i < bubble_fns; i++) {
		bubble_proj[i].coef = proj_rhs[i];
		bubble_proj[i].idx = bubble_fn_idx[i];
	}

	delete [] proj_mat;
	delete [] proj_rhs;
}
Пример #3
0
double H1ProjectionIpol::get_error(int split, int son, const Ord3 &order)
{
	_F_
	sln->enable_transform(false);

	Ord3 order_rhs = order;

  calc_projection(split, son, order_rhs);

	// error
	QuadPt3D *pt = quad->get_points(order_rhs);
	int np = quad->get_num_points(order_rhs);

	double error = 0.0;
	for (int i = 0; i < int_ns[split]; i++) {
		Trf *tr = get_trf(int_trf[split][i]);

		unsigned int son_idx = base_elem->get_son(int_son[son + 1][i]);
		sln->set_active_element(mesh->elements[son_idx]);
		sln->precalculate(np, pt, FN_DEFAULT);
		scalar *rval = sln->get_fn_values();
		scalar *rdx, *rdy, *rdz;
		sln->get_dx_dy_dz_values(rdx, rdy, rdz);

		QuadPt3D * tpt = new QuadPt3D[np];
		transform_points(np, pt, tr, tpt);
		scalar * prfn = new scalar[np];
    scalar * prdx = new scalar[np];
    scalar * prdy = new scalar[np];
    scalar * prdz = new scalar[np];
		memset(prfn, 0, np * sizeof(double));
		memset(prdx, 0, np * sizeof(double));
		memset(prdy, 0, np * sizeof(double));
		memset(prdz, 0, np * sizeof(double));

		for (int i = 0; i < proj_fns; i++) {
#ifndef H3D_COMPLEX
			double * tmp = new double[np];
			ss->get_fn_values(proj[i]->idx, np, tpt, 0, tmp);
			blas_axpy(np, proj[i]->coef, tmp, 1, prfn, 1);
			ss->get_dx_values(proj[i]->idx, np, tpt, 0, tmp);
			blas_axpy(np, proj[i]->coef, tmp, 1, prdx, 1);
			ss->get_dy_values(proj[i]->idx, np, tpt, 0, tmp);
			blas_axpy(np, proj[i]->coef, tmp, 1, prdy, 1);
			ss->get_dz_values(proj[i]->idx, np, tpt, 0, tmp);
			blas_axpy(np, proj[i]->coef, tmp, 1, prdz, 1);
      delete[] tmp;
#else
			double * tmp = new double[np];
			scalar * sctmp = new scalar[np];
			ss->get_fn_values(proj[i]->idx, np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj[i]->coef, sctmp, 1, prfn, 1);
			ss->get_dx_values(proj[i]->idx, np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj[i]->coef, sctmp, 1, prdx, 1);
			ss->get_dy_values(proj[i]->idx, np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj[i]->coef, sctmp, 1, prdy, 1);
			ss->get_dz_values(proj[i]->idx, np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj[i]->coef, sctmp, 1, prdz, 1);
      delete[] tmp;
      delete[] sctmp;
#endif
		}

		for (int k = 0; k < np; k++)
			error += pt[k].w *
				(sqr(rval[k] - prfn[k]) +
				 sqr(rdx[k] * mdx[split] - prdx[k]) +
				 sqr(rdy[k] * mdy[split] - prdy[k]) +
				 sqr(rdz[k] * mdz[split] - prdz[k]));
   
  delete[] tpt;
  delete[] prfn;
  delete[] prdx;
  delete[] prdy;
  delete[] prdz;
	}

  
	sln->enable_transform(true);

	return error;
}
Пример #4
0
void H1ProjectionIpol::calc_face_proj(int iface, int split, int son, const Ord3 &order)
{
	_F_
	Ord2 face_order = order.get_face_order(iface);
	int face_fns = (face_order.x - 1) * (face_order.y - 1);
	if (face_fns <= 0) return;

	scalar *proj_rhs = new scalar[face_fns];
	MEM_CHECK(proj_rhs);
	memset(proj_rhs, 0, sizeof(scalar) * face_fns);
	double **proj_mat = new_matrix<double>(face_fns, face_fns);
	MEM_CHECK(proj_mat);

	const int *face_vertex = RefHex::get_face_vertices(iface);
	const int *face_edge = RefHex::get_face_edges(iface);

	// get total number of functions for interpolant (vertex + edge functions)
	int ipol_fns = RefHex::get_num_face_vertices(iface);
	for (int iedge = 0; iedge < RefHex::get_num_face_edges(iface); iedge++)
		ipol_fns += order.get_edge_order(face_edge[iedge]) - 1;

	// interpolant
	ProjItem * ipol = new ProjItem[ipol_fns];
	int mm = 0;
	for (int vtx = 0; vtx < RefHex::get_num_face_vertices(iface); vtx++, mm++)
		ipol[mm] = vertex_proj[face_vertex[vtx]];
	for (int iedge = 0; iedge < RefHex::get_num_face_edges(iface); iedge++) {
		Ord1 edge_order = order.get_edge_order(face_edge[iedge]);
		int edge_fns = edge_order - 1;
		for (int i = 0; i < edge_fns; i++, mm++)
			ipol[mm] = edge_proj[face_edge[iedge]][i];
	}

	int face_ori = 0;
	int *face_fn_idx = ss->get_face_indices(iface, face_ori, face_order);
	for (int i = 0; i < face_fns; i++) {
		int iidx = face_fn_idx[i];
		Ord3 oi = ss->get_dcmp(iidx);
		for (int j = 0; j < face_fns; j++) {
			int jidx = face_fn_idx[j];
			Ord3 oj = ss->get_dcmp(jidx);
			double val = 0.0;
			if (iface == 0 || iface == 1) {
				val =
					prod_fn[oi.y][oj.y] * prod_fn[oi.z][oj.z] +
					prod_dx[oi.y][oj.y] * prod_fn[oi.z][oj.z] +
					prod_fn[oi.y][oj.y] * prod_dx[oi.z][oj.z];
			}
			else if (iface == 2 || iface == 3) {
				val =
					prod_fn[oi.x][oj.x] * prod_fn[oi.z][oj.z] +
					prod_dx[oi.x][oj.x] * prod_fn[oi.z][oj.z] +
					prod_fn[oi.x][oj.x] * prod_dx[oi.z][oj.z];
			}
			else if (iface == 4 || iface == 5) {
				val =
					prod_fn[oi.x][oj.x] * prod_fn[oi.y][oj.y] +
					prod_dx[oi.x][oj.x] * prod_fn[oi.y][oj.y] +
					prod_fn[oi.x][oj.x] * prod_dx[oi.y][oj.y];
			}
			else
				EXIT("Local face number out of range.");
			proj_mat[i][j] += val;
		}
	}

	for (int e = 0; e < face_ns[split][iface]; e++) {
		unsigned int son_idx = base_elem->get_son(face_son[son][iface][e]);
		sln->set_active_element(mesh->elements[son_idx]);

		Trf *tr = get_trf(face_trf[split][iface][e]);
		for (int i = 0; i < face_fns; i++) {
			int iidx = face_fn_idx[i];
			fu->set_active_shape(iidx);

			Ord2 ord = (ss->get_order(iidx) + order).get_face_order(iface);
			QuadPt3D *pt = quad->get_face_points(iface, ord);
			int np = quad->get_face_num_points(iface, ord);

			if (face_trf[split][iface][e] != -1) fu->push_transform(face_trf[split][iface][e]);
			fu->precalculate(np, pt, FN_DEFAULT);
			sln->precalculate(np, pt, FN_DEFAULT);

			double *uval = fu->get_fn_values();
			scalar *rval = sln->get_fn_values();

			double *dudx, *dudy;
			scalar *drdx, *drdy;
			double md, me;

			if (iface == 0 || iface == 1) {
				dudx = fu->get_dy_values();
				drdx = sln->get_dy_values();
				dudy = fu->get_dz_values();
				drdy = sln->get_dz_values();
				md = mdy[split];
				me = mdz[split];
			}
			else if (iface == 2 || iface == 3) {
				dudx = fu->get_dx_values();
				drdx = sln->get_dx_values();
				dudy = fu->get_dz_values();
				drdy = sln->get_dz_values();
				md = mdx[split];
				me = mdz[split];
			}
			else if (iface == 4 || iface == 5) {
				dudx = fu->get_dx_values();
				drdx = sln->get_dx_values();
				dudy = fu->get_dy_values();
				drdy = sln->get_dy_values();
				md = mdx[split];
				me = mdy[split];
			}
			else
				EXIT("Local face number out of range.");

			QuadPt3D *tpt = new QuadPt3D[np];
			transform_points(np, pt, tr, tpt);

			scalar * g = new scalar[np];
      scalar * dgdx = new scalar[np];
      scalar * dgdy = new scalar[np];
			memset(g, 0, np * sizeof(scalar));
			memset(dgdx, 0, np * sizeof(scalar));
			memset(dgdy, 0, np * sizeof(scalar));

			for (int l = 0; l < ipol_fns; l++) {
				double * h = new double[np];
				scalar * sch = new scalar[np];
				ss->get_fn_values(ipol[l].idx, np, tpt, 0, h);
				for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
				blas_axpy(np, ipol[l].coef, sch, 1, g, 1);

				if (iface == 0 || iface == 1) {
					ss->get_dy_values(ipol[l].idx, np, tpt, 0, h);
					for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
					blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1);
					ss->get_dz_values(ipol[l].idx, np, tpt, 0, h);
					for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
					blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1);
				}
				else if (iface == 2 || iface == 3) {
					ss->get_dx_values(ipol[l].idx, np, tpt, 0, h);
					for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
					blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1);
					ss->get_dz_values(ipol[l].idx, np, tpt, 0, h);
					for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
					blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1);
				}
				else if (iface == 4 || iface == 5) {
					ss->get_dx_values(ipol[l].idx, np, tpt, 0, h);
					for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
					blas_axpy(np, ipol[l].coef, sch, 1, dgdx, 1);
					ss->get_dy_values(ipol[l].idx, np, tpt, 0, h);
					for (int ii = 0; ii < np; ii++) sch[ii] = h[ii];
					blas_axpy(np, ipol[l].coef, sch, 1, dgdy, 1);
				}
				else
					EXIT("Local face number out of range.");
        delete [] h;
        delete [] sch;
			}

      delete tpt;

			scalar value = 0.0;
			for (int k = 0; k < np; k++)
				value += pt[k].w * (uval[k] * (rval[k] - g[k]) + dudx[k] * ((drdx[k] * md) - dgdx[k]) + dudy[k] * ((drdy[k] * me) - dgdy[k]));
			proj_rhs[i] += value * (1 / (double) face_ns[split][iface]);

      delete [] g;
      delete [] dgdx;
      delete [] dgdy;
			if (face_trf[split][iface][e] != -1) fu->pop_transform();
		}
	}
  delete [] ipol;

	double d;
	int * iperm = new int[face_fns];
	ludcmp(proj_mat, face_fns, iperm, &d);
	lubksb(proj_mat, face_fns, iperm, proj_rhs);

  delete [] iperm;

	face_proj[iface] = new ProjItem [face_fns];
	for (int i = 0; i < face_fns; i++) {
		face_proj[iface][i].coef = proj_rhs[i];
		face_proj[iface][i].idx = face_fn_idx[i];
	}

	delete [] proj_mat;
	delete [] proj_rhs;
}
Пример #5
0
void H1ProjectionIpol::calc_edge_proj(int iedge, int split, int son, const Ord3 &order)
{
	_F_
	Ord1 edge_order = order.get_edge_order(iedge);
	int edge_fns = edge_order - 1;
	if (edge_fns <= 0) return;

	scalar *proj_rhs = new scalar[edge_fns];
	MEM_CHECK(proj_rhs);
	memset(proj_rhs, 0, sizeof(scalar) * edge_fns);
	double **proj_mat = new_matrix<double>(edge_fns, edge_fns);
	MEM_CHECK(proj_rhs);

	// local edge vertex numbers
	const int *edge_vtx = RefHex::get_edge_vertices(iedge);
	ProjItem vtxp[] = { vertex_proj[edge_vtx[0]], vertex_proj[edge_vtx[1]] };

	int *edge_fn_idx = ss->get_edge_indices(iedge, 0, edge_order);	// indices of edge functions
	for (int i = 0; i < edge_fns; i++) {
		int iidx = edge_fn_idx[i];
		Ord3 oi = ss->get_dcmp(iidx);
		for (int j = 0; j < edge_fns; j++) {
			int jidx = edge_fn_idx[j];
			Ord3 oj = ss->get_dcmp(jidx);
			double val = 0.0;
			if (iedge == 0 || iedge == 2 || iedge == 8 || iedge == 10) {
				val = prod_fn[oi.x][oj.x] + prod_dx[oi.x][oj.x];
			}
			else if (iedge == 1 || iedge == 3 || iedge == 9 || iedge == 11) {
				val = prod_fn[oi.y][oj.y] + prod_dx[oi.y][oj.y];
			}
			else if (iedge == 4 || iedge == 5 || iedge == 6 || iedge == 7) {
				val = prod_fn[oi.z][oj.z] + prod_dx[oi.z][oj.z];
			}
			else
				EXIT("Local edge number out of range.");
			proj_mat[i][j] += val;
		}
	}

	for (int e = 0; e < edge_ns[split][iedge]; e++) {
		edge_fn_idx = ss->get_edge_indices(iedge, 0, edge_order);	// indices of edge functions

		unsigned int son_idx = base_elem->get_son(edge_son[son][iedge][e]);
		sln->set_active_element(mesh->elements[son_idx]);

		Trf *tr = get_trf(edge_trf[split][iedge][e]);
		for (int i = 0; i < edge_fns; i++) {
			int iidx = edge_fn_idx[i];
			fu->set_active_shape(iidx);

			Ord1 ord = (ss->get_order(iidx) + order).get_edge_order(iedge);
			QuadPt3D *pt = quad->get_edge_points(iedge, ord);
			int np = quad->get_edge_num_points(iedge, ord);

			if (edge_trf[split][iedge][e] != -1) fu->push_transform(edge_trf[split][iedge][e]);
			fu->precalculate(np, pt, FN_DEFAULT);
			sln->precalculate(np, pt, FN_DEFAULT);

			double *uval = fu->get_fn_values();
			scalar *rval = sln->get_fn_values();

			double *du, md;
			scalar *dr;
			if (iedge == 0 || iedge == 2 || iedge == 8 || iedge == 10) {
				du = fu->get_dx_values();
				dr = sln->get_dx_values();
				md = mdx[split];
			}
			else if (iedge == 1 || iedge == 3 || iedge == 9 || iedge == 11) {
				du = fu->get_dy_values();
				dr = sln->get_dy_values();
				md = mdy[split];
			}
			else if (iedge == 4 || iedge == 5 || iedge == 6 || iedge == 7) {
				du = fu->get_dz_values();
				dr = sln->get_dz_values();
				md = mdz[split];
			}
			else
				EXIT("Local edge number out of range.");

			QuadPt3D *tpt = new QuadPt3D[np];
			transform_points(np, pt, tr, tpt);

			double *tmp = new double[np];
			scalar *sctmp = new scalar[np];
			scalar *g = new scalar[np];						// interpolant
			memset(g, 0, np * sizeof(scalar));
#ifndef H3D_COMPLEX
			ss->get_fn_values(vtxp[0].idx, np, tpt, 0, tmp);
			blas_axpy(np, vtxp[0].coef, tmp, 1, g, 1);
			ss->get_fn_values(vtxp[1].idx, np, tpt, 0, tmp);
			blas_axpy(np, vtxp[1].coef, tmp, 1, g, 1);
#else
			ss->get_fn_values(vtxp[0].idx, np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, vtxp[0].coef, sctmp, 1, g, 1);
			ss->get_fn_values(vtxp[1].idx, np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, vtxp[1].coef, sctmp, 1, g, 1);
#endif

			scalar *dg = new scalar[np];
			memset(dg, 0, np * sizeof(scalar));
			if (iedge == 0 || iedge == 2 || iedge == 8 || iedge == 10) {
				ss->get_dx_values(vtxp[0].idx, np, tpt, 0, tmp);
				for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
				blas_axpy(np, vtxp[0].coef, sctmp, 1, dg, 1);
				ss->get_dx_values(vtxp[1].idx, np, tpt, 0, tmp);
				for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
				blas_axpy(np, vtxp[1].coef, sctmp, 1, dg, 1);
			}
			else if (iedge == 1 || iedge == 3 || iedge == 9 || iedge == 11) {
				ss->get_dy_values(vtxp[0].idx, np, tpt, 0, tmp);
				for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
				blas_axpy(np, vtxp[0].coef, sctmp, 1, dg, 1);
				ss->get_dy_values(vtxp[1].idx, np, tpt, 0, tmp);
				for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
				blas_axpy(np, vtxp[1].coef, sctmp, 1, dg, 1);
			}
			else if (iedge == 4 || iedge == 5 || iedge == 6 || iedge == 7) {
				ss->get_dz_values(vtxp[0].idx, np, tpt, 0, tmp);
				for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
				blas_axpy(np, vtxp[0].coef, sctmp, 1, dg, 1);
				ss->get_dz_values(vtxp[1].idx, np, tpt, 0, tmp);
				for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
				blas_axpy(np, vtxp[1].coef, sctmp, 1, dg, 1);
			}
			else
				EXIT("Local edge number out of range.");
  
      delete [] tmp;
      delete [] sctmp;
      delete [] tpt;

			scalar value = 0.0;
			for (int k = 0; k < np; k++)
				value += pt[k].w * (uval[k] * (rval[k] - g[k]) + du[k] * ((dr[k] * md) - dg[k]));
			proj_rhs[i] += value * (1 / (double) edge_ns[split][iedge]);
    
      delete [] g;
      delete [] dg;
      if (edge_trf[split][iedge][e] != -1) fu->pop_transform();
		}
	}

	double d;
	int * iperm = new int[edge_fns];
	ludcmp(proj_mat, edge_fns, iperm, &d);
	lubksb(proj_mat, edge_fns, iperm, proj_rhs);

  delete [] iperm;
	// copy functions and coefficients to the basis
	edge_proj[iedge] = new ProjItem[edge_fns];
	for (int i = 0; i < edge_fns; i++) {
		edge_proj[iedge][i].coef = proj_rhs[i];
		edge_proj[iedge][i].idx = edge_fn_idx[i];
	}
	delete [] proj_mat;
	delete [] proj_rhs;
}
Пример #6
0
double H1Projection::get_error(int split, int son, const order3_t &order)
{
	_F_
	sln->enable_transform(false);

	calc_projection(split, son + 1, order);

	order3_t order_rhs = order;
	QuadPt3D *pt = quad->get_points(order_rhs);
	int np = quad->get_num_points(order_rhs);

	double error = 0.0;
	for (int i = 0; i < int_ns[split]; i++) {
		Trf *tr = get_trf(int_trf[split][i]);

		Word_t son_idx = base_elem->get_son(int_son[son + 1][i]);
		sln->set_active_element(mesh->elements[son_idx]);
		sln->precalculate(np, pt, FN_DEFAULT);
		scalar *rval = sln->get_fn_values();
		scalar *rdx, *rdy, *rdz;
		sln->get_dx_dy_dz_values(rdx, rdy, rdz);

		QuadPt3D tpt[np];
		transform_points(np, pt, tr, tpt);
		scalar prfn[np], prdx[np], prdy[np], prdz[np];
		memset(prfn, 0, np * sizeof(double));
		memset(prdx, 0, np * sizeof(double));
		memset(prdy, 0, np * sizeof(double));
		memset(prdz, 0, np * sizeof(double));

		for (int i = 0; i < n_fns; i++) {
#ifndef H3D_COMPLEX
			double tmp[np];
			ss->get_fn_values(fn_idx[i], np, tpt, 0, tmp);
			blas_axpy(np, proj_coef[i], tmp, 1, prfn, 1);
			ss->get_dx_values(fn_idx[i], np, tpt, 0, tmp);
			blas_axpy(np, proj_coef[i], tmp, 1, prdx, 1);
			ss->get_dy_values(fn_idx[i], np, tpt, 0, tmp);
			blas_axpy(np, proj_coef[i], tmp, 1, prdy, 1);
			ss->get_dz_values(fn_idx[i], np, tpt, 0, tmp);
			blas_axpy(np, proj_coef[i], tmp, 1, prdz, 1);
#else
			double tmp[np];
			scalar sctmp[np];
			ss->get_fn_values(fn_idx[i], np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj_coef[i], sctmp, 1, prfn, 1);
			ss->get_dx_values(fn_idx[i], np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj_coef[i], sctmp, 1, prdx, 1);
			ss->get_dy_values(fn_idx[i], np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj_coef[i], sctmp, 1, prdy, 1);
			ss->get_dz_values(fn_idx[i], np, tpt, 0, tmp);
			for (int ii = 0; ii < np; ii++) sctmp[ii] = tmp[ii];
			blas_axpy(np, proj_coef[i], sctmp, 1, prdz, 1);
#endif
		}

		for (int k = 0; k < np; k++)
			error += pt[k].w *
				(sqr(rval[k] - prfn[k]) +
				 sqr(rdx[k] * mdx[split] - prdx[k]) +
				 sqr(rdy[k] * mdy[split] - prdy[k]) +
				 sqr(rdz[k] * mdz[split] - prdz[k]));
	}

	sln->enable_transform(true);

	return error;
}
Пример #7
0
int ParpackSolver::Solve(int nev) {
    /* Get MPI info */
    int nprocs, me;
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
    MPI_Fint fcomm = MPI_Comm_c2f(MPI_COMM_WORLD);

    /* Select number of working Ritz vectors */
    if(ncv == -1)
        ncv = 2*nev;
    ncv = std::min(ncv, n-1);

    /* Initialize matrix descriptors */
    xdesc = pcontext->new_descriptor(n, 1, divup(n,nprocs), 1);
    Bdesc = pcontext->new_descriptor(n, ncv, divup(n,nprocs), ncv);
    assert(nloc == Bdesc->num_local_rows() && nloc == xdesc->num_local_rows());
    assert(ncv == Bdesc->num_local_cols() && 1 == xdesc->num_local_cols());

    /* Allocate local memory for eigenvector matrix $B$ */
    Bvalues = (real*) opsec_malloc(Bdesc->local_size() * sizeof(real));

    real sigma;
    int iparam[11], ipntr[11];

    /* Set PARPACK parameters */
    char bmat[] = "I";
    char which[] = "LA";
    char howmny[] = "All";
    iparam[0] = 1;      // ishfts
    iparam[2] = maxitr; // maxitr
    iparam[6] = 1;      // mode

    /* Allocate working memory */
    int lworkl = ncv*(ncv + 8);
    real* workl = (real*) opsec_calloc(lworkl, sizeof(real));
    real* workd = (real*) opsec_calloc(3*nloc, sizeof(real));
    real* resid = (real*) opsec_calloc(nloc, sizeof(real));
    int* select = (int*) opsec_calloc(ncv, sizeof(int));

    /* Begin reverse communication loop */
    int itr = 0;
    int info = 0;
    int ido = 0;
    while(ido != 99) {
        parpack_psaupd(&fcomm, &ido, bmat, &nloc, which, &nev,
                       &tol, resid, &ncv, Bvalues, &nloc, iparam, ipntr,
                       workd, workl, &lworkl, &info);

        if(ido == 1 || ido == -1) {
            /* Compute y = A*x (don't forget Fortran indexing conventions!) */
            slp::Matrix<real> A(Adesc, Avalues);
            slp::Matrix<real> x(xdesc, &workd[ipntr[0] - 1]);
            slp::Matrix<real> y(xdesc, &workd[ipntr[1] - 1]);
            slp::multiply(A, x, y);
        }
    }

    if(me == 0) {
        opsec_info("Number of Implicit Arnoldi update iterations taken is %d\n", iparam[2]);
        opsec_info("  info = %d\n", info);
        opsec_info("  nconv = %d, nev = %d\n", iparam[4], nev);

        time_t t = time(NULL);
        opsec_info("Time: %s\n", ctime(&t));
        opsec_info("Post-processing Ritz values and vectors\n");
    }

    /* Check return code */
    if(info < 0) {
        /* Error encountered.  Abort. */
        if(me == 0)
            opsec_error("parpack_psaupd returned error: info = %d\n", info);
        return info;
    }
    else {
        /* Save number of successfully computed eigenvalues */
        nconv = iparam[4];
        evals.resize(nconv);

        /* Retrieve eigenvalues and eigenvectors */
        int rvec = 1;
        int ierr;
        parpack_pseupd(&fcomm, &rvec, howmny, select, &evals[0], Bvalues, &nloc, &sigma,
                       bmat, &nloc, which, &nev, &tol, resid, &ncv, Bvalues, &nloc,
                       iparam, ipntr, workd, workl, &lworkl, &ierr);

        if(ierr != 0) {
            if(me == 0)
                opsec_error("parpack_pseupd returned error: ierr = %d\n", ierr);
        }
    }

    if(me == 0) {
        time_t t = time(NULL);
        opsec_info("Time: %s\n", ctime(&t));
    }

#if 0
    {
        int i;
        /* Debugging: check residuals  || A*x - lambda*x || */
        y = (real*) opsec_calloc(nloc, sizeof(real));
        for(i = iparam[4]-1; i >= 0; i--) { 
            static char trans = 'T';
            static int incx = 1;
            static int incy = 1;
            static real alpha = 1.0;
            static real beta = 0.0;
            real a = -evals[i];
            ierr = MPI_Allgatherv(&evecs[i*nloc], nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD);
            blas_gemv(&trans, &n, &nloc, &alpha, A, &n, xfull, &incx, &beta, y, &incy);
            blas_axpy(&nloc, &a, &evecs[i*nloc], &incx, y, &incy);
            real d = parpack_pnorm2(&fcomm, &nloc, y, &incy);
            if(myid == 0)
                printf("Eigenvalue %d: lambda = %16.16f, |A*x - lambda*x| = %16.16f\n", iparam[4]-i, evals[i], d);
            ierr = MPI_Allgatherv(y, nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD);
        }
        free(y);
    }
#endif

#if 0
    /* Sort from largest to smallest eigenvalue */
    for(int j = 0; j < nconv/2; j++) {
        std::swap(evals[j], evals[nconv-j-1]);
        memcpy(workd, &B(0,j), nloc*sizeof(real));
        memcpy(&B(0,j), &B(0,nconv-j-1), nloc*sizeof(real));
        memcpy(&B(0,nconv-j-1), workd, nloc*sizeof(real));
    }
#endif

    /* Clean up */
    free(workl);
    free(workd);
    free(resid);
    free(select);

    return nconv;
}
Пример #8
0
ok_status regularized_sinkhorn_knopp(void * linalg_handle, ok_float * A_in,
	matrix * A_out, vector * d, vector * e, enum CBLAS_ORDER ord)
{
	OK_CHECK_PTR(A_in);
	OK_CHECK_MATRIX(A_out);
	OK_CHECK_VECTOR(d);
	OK_CHECK_VECTOR(e);

	ok_status err = OPTKIT_SUCCESS;
	const ok_float kSinkhornConst = (ok_float) 1e-4;
	const ok_float kEps = (ok_float) 1e-2;
	const size_t kMaxIter = 300;
	ok_float norm_d, norm_e;
	size_t i;

	vector a, d_diff, e_diff;
	a.data = OK_NULL;
	d_diff.data = OK_NULL;
	e_diff.data = OK_NULL;

	if (A_out->size1 != d->size || A_out->size2 != e->size)
		return OK_SCAN_ERR( OPTKIT_ERROR_DIMENSION_MISMATCH );

	vector_calloc(&d_diff, A_out->size1);
	vector_calloc(&e_diff, A_out->size2);

	norm_d = norm_e = 1;

	OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) );
	OK_CHECK_ERR( err, matrix_abs(A_out) );
	OK_CHECK_ERR( err, vector_set_all(d, kOne) );
	OK_CHECK_ERR( err, vector_scale(e, kZero) );

	/* optional argument ok_float pnorm? */
	/*
	if (pnorm != 1) {
		matrix_pow(A, pnorm)
	}
	*/

	for (i = 0; i < kMaxIter && !err; ++i){
		blas_gemv(linalg_handle, CblasTrans, kOne, A_out, d, kZero, e);
		vector_add_constant(e, kSinkhornConst / (ok_float) e->size);
		vector_recip(e);
		vector_scale(e, (ok_float) d->size);

		blas_gemv(linalg_handle, CblasNoTrans, kOne, A_out, e, kZero,
			d);
		vector_add_constant(d, kSinkhornConst / (ok_float) d->size);
		vector_recip(d);
		vector_scale(d, (ok_float) e->size);

		blas_axpy(linalg_handle, -kOne, d, &d_diff);
		blas_axpy(linalg_handle, -kOne, e, &e_diff);

		blas_nrm2(linalg_handle, &d_diff, &norm_d);
		blas_nrm2(linalg_handle, &e_diff, &norm_e);

		if ((norm_d < kEps) && (norm_e < kEps))
			break;

		vector_memcpy_vv(&d_diff, d);
		vector_memcpy_vv(&e_diff, e);
	}

	/* optional argument ok_float pnorm? */
	/*
	if (pnorm != 1) {
		vector_pow(d, kOne / pnorm)
		vector_pow(e, kOne / pnorm)
	}
	*/

	OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) );
	if (!err) {
		for (i = 0; i < A_out->size1; ++i) {
			matrix_row(&a, A_out, i);
			vector_mul(&a, e);
		}
		for (i = 0; i < A_out->size2; ++i) {
			matrix_column(&a, A_out, i);
			vector_mul(&a, d);
		}
	}

	vector_free(&d_diff);
	vector_free(&e_diff);

	return err;
}