Beispiel #1
0
static void
do_test(MAP *rmap, MAP *cmap)
{
    MAT *A;
    VEC *x, *y;
    INT i, j, m0, n0;
    INT *cols = phgAlloc(cmap->nglobal * sizeof(*cols));
    FLOAT *data = phgAlloc(cmap->nglobal * sizeof(*data));

    A = phgMapCreateMat(rmap, cmap);
    m0 = A->rmap->partition[A->rmap->rank];
    n0 = A->cmap->partition[A->cmap->rank];

    /* Matrix entries: A(I,J) = 1 + (I-1) + (J-1) */
    for (i = m0; i < m0 + A->rmap->nlocal; i++) {
#if 0
	/* Test MatAddEntry */
	for (j = 0; j < A->cmap->nglobal; j++)
	    phgMatAddGlobalEntry(A, i, j, 1.0 + i + j);
#else
	/* Test MatAddEntries */
	for (j = 0; j < A->cmap->nglobal; j++) {
	    cols[j] = j;
	    data[j] = 1.0 + i + j;
	}
	phgMatAddGlobalEntries(A, 1, &i, A->cmap->nglobal, cols, data);
#endif
    }
    phgFree(cols);
    phgFree(data);
    phgMatAssemble(A);

    phgInfo(-1, "y = A * x\n");
    x = phgMapCreateVec(A->cmap, 1);
    for (i = 0; i < x->map->nlocal; i++)
	x->data[i] = 1.0 + i + n0;
    phgVecAssemble(x);
    y = phgMatVec(MAT_OP_N, 1.0, A, x, 0.0, NULL);
    for (i = 0; i < y->map->nlocal; i++)
	phgInfo(-1, "    y->data[%d] = %lg\n", i + m0, (double)y->data[i]);
    phgVecDestroy(&x);
    phgVecDestroy(&y);

    phgInfo(-1, "y = A' * x\n");
    x = phgMapCreateVec(A->rmap, 1);
    for (i = 0; i < x->map->nlocal; i++)
	x->data[i] = 1.0 + i + m0;
    phgVecAssemble(x);
    y = phgMatVec(MAT_OP_T, 1.0, A, x, 0.0, NULL);
    for (i = 0; i < y->map->nlocal; i++)
	phgInfo(-1, "    y->data[%d] = %lg\n", i + n0, (double)y->data[i]);
    phgVecDestroy(&x);
    phgVecDestroy(&y);

    phgMatDestroy(&A);
}
/* PC_PROC function which applies the Sherman-Morrison-Woodbury formula */
static void
sherman(void *ctx, VEC *b0, VEC **x0)
{
    SOLVER *solver = ctx;
    VEC *U = ((void **)solver->mat->mv_data)[1];
    SOLVER *solver1 = ((void **)solver->mat->mv_data)[2];
    FLOAT *C = ((void **)solver->mat->mv_data)[3];
    INT *pvt = ((void **)solver->mat->mv_data)[4];

    VEC *x = *x0, *y;
    INT i, j, n = U->map->nlocal, K = U->nvec;
    FLOAT *z;

    phgPrintf("Note: applying the Sherman-Morrison-Woodbury formula.\n");

    /* x := inv(A) * b */
#if 0
    memset(x->data, 0, n * sizeof(*x->data));
#endif
    z = solver1->rhs->data; 	/* save solver1->rhs->data */
    solver1->rhs->data = b0->data;
    solver1->rhs->assembled = TRUE;
    phgSolverVecSolve(solver1, FALSE, x);
    solver1->rhs->data = z; 	/* restore solver1->rhs->data */

    if (K == 0)
	return;

    /* z := U^t * x */
    z = phgCalloc(K, sizeof(*z));
    for (j = 0; j < K; j++)
	for (i = 0; i < n; i++)
	    z[j] += U->data[j * n + i] * x->data[i];
#if USE_MPI
    if (U->map->nprocs > 1) {
	FLOAT *tmp = phgAlloc(K * sizeof(*tmp));
	MPI_Allreduce(z, tmp, K, PHG_MPI_FLOAT, MPI_SUM, U->map->comm);
	phgFree(z);
	z = tmp;
    }
#endif	/* USE_MPI */

    /* z := C*z */
    phgSolverDenseSV(K, C, pvt, 1, z);

    /* y := inv(A) * (U*z) */
    memset(solver1->rhs->data, 0, n * sizeof(*solver1->rhs->data));
    for (i = 0; i < n; i++)
	for (j = 0; j < K; j++)
	    solver1->rhs->data[i] += U->data[j * n + i] * z[j];
    y = phgMapCreateVec(U->map, 1);
    phgSolverVecSolve(solver1, FALSE, y);
    phgFree(z);

    /* x -= y */
    for (i = 0; i < n; i++)
	x->data[i] -= y->data[i];

    phgVecDestroy(&y);
}
/* Preconditioning procedure using diag(matFu, matFu, matFu) */
static void
pc_proc1(void *ctx, VEC *b0, VEC **x0)
{
    SOLVER *pc_solver = (SOLVER *) ctx;
    int Nu = matF->rmap->nlocal;
    int k, nits;
    FLOAT res;
    INT i;
    VEC *tmp = phgMapCreateVec(pc_solver->rhs->map, 1);
    INT Nu0 = tmp->map->nlocal;

    assert(pc_solver->mat == matFu);
    assert(Nu == Nu0 * 3);
    nits = 0;
    res = 0.;
    for (k = 0; k < 3; k++) {
	for (i = 0; i < Nu0; i++)
	    pc_solver->rhs->data[i] = b0->data[3 * i + k];
	pc_solver->rhs->assembled = TRUE;
	pc_solver->rhs_updated = TRUE;
	bzero(tmp->data, sizeof(*tmp->data) * Nu0);
	phgSolverVecSolve(pc_solver, FALSE, tmp);
	for (i = 0; i < Nu0; i++)
	    (*x0)->data[3 * i + k] = tmp->data[i];
	if (nits < pc_solver->nits)
	    nits = pc_solver->nits;
	if (res < pc_solver->residual)
	    res = pc_solver->residual;
    }
    phgVecDestroy(&tmp);
    pc_solver->nits = nits;
    pc_solver->residual = res;

    return;
}
/*
 * Jacobi smoother.
 *
 *   Implement using phgMatvec
 *   ref: phgSolverJacobi, solver-phg.c
 * */
void 
mg_Jacobi(MAT *A, VEC *x, VEC *b, int nsmooth, void *ctx)
{
    VEC *r;
    INT i, k; 
    FLOAT dx, omega = _p->smooth_damp;

    if (A->diag == NULL)
	phgMatSetupDiagonal(A);

    r = phgMapCreateVec(b->map, 1);
    for (k = 0; k < nsmooth; k++) {
	phgVecCopy(b, &r);
	phgMatVec(MAT_OP_N, -1.0, A, x, 1.0, &r);
	for (i = 0; i < A->cmap->nlocal; i++) {
	    assert(fabs(A->diag[i]) > 1e-12);
	    dx = r->data[i] / A->diag[i]; 
	    x->data[i] += omega * dx;
	}
    }
    phgVecDestroy(&r);
    
    return;
}
/*
 * Jacobi smoother2:
 *   Implement using details of matvec
 *   ref: matvec in matvec.c
 * 
 *  */
void 
mg_Jacobi2(MAT *A, VEC *x, VEC *b, int nsmooth, void *ctx)
{
    INT i, j, k, n, *pc, *pc_offp;
    FLOAT *pd, *pd_offp, *vx, *vx0, *vb;
    FLOAT sum, omega = _p->smooth_damp;;
    VEC *x0;
#if USE_MPI
    FLOAT *offp_data = NULL;
#endif	/* USE_MPI */

    MagicCheck(VEC, x);
    MagicCheck(VEC, b);
    assert(x == NULL || x->nvec == 1);
    assert(b == NULL || b->nvec == 1);
    if (x != NULL && !x->assembled)
	phgVecAssemble(x);
    if (b != NULL && !b->assembled)
	phgVecAssemble(b);
    x0 = phgMapCreateVec(x->map, 1);
    
    assert(A->type != PHG_DESTROYED);
    if (!A->assembled)
	phgMatAssemble(A);
    assert(A->type != PHG_MATRIX_FREE);
    phgMatPack(A);

#if USE_MPI
    if (A->cmap->nprocs > 1) {
	offp_data = phgAlloc(A->cinfo->rsize * sizeof(*offp_data));
    }
#endif	/* USE_MPI */

    if (A->cmap->nlocal != A->rmap->nlocal ||
	A->cmap->nlocal != x->map->nlocal  ||
	A->cmap->nlocal != b->map->nlocal)
	phgError(1, "%s:%d: inconsistent matrix-vector.", __FILE__,
		 __LINE__);

#if USE_MPI
    if (A->cmap->nprocs > 1) {
	phgMapScatterBegin(A->cinfo, x->nvec, x->data, offp_data);
	phgMapScatterEnd(A->cinfo, x->nvec, x->data, offp_data);
    }
#endif	/* USE_MPI */

    /* iteration */
    for (k = 0; k < nsmooth; k++) {
	phgVecCopy(x, &x0);

	/* multiply with local data */
	vx = x->data;
	vx0 = x0->data;
	vb = b->data;
	pc = A->packed_cols;
	pd = A->packed_data;
	if (A->cmap->nprocs > 1) {
	    pc_offp = A->packed_cols + A->rmap->nlocal + A->nnz_d;
	    pd_offp = A->packed_data + A->nnz_d;
	} else {
	    pc_offp = NULL;
	    pd_offp = NULL;
	}

	for (i = 0; i < A->rmap->nlocal;  i++) {
	    INT jcol;
	    FLOAT aa = 0., dx;
	    /* x_i = (b_i - \sum_{j ~= i} a_ij * x_j) / a_ii */

	    sum = vb[i];
	    /* local data */
	    if ((n = *(pc++)) != 0) {
		for (j = 0; j < n; j++) {
		    jcol = *(pc++);
		    if (jcol != i) {
			sum -= *(pd++) * vx0[jcol];
		    } else {
			aa = *(pd++);
			assert(fabs(aa) > 1e-14);
		    }
		}
	    }
	    
	    /* remote data */
	    if (pc_offp != NULL && (n = *(pc_offp++)) != 0) {
		for (j = 0; j < n; j++) {
		    jcol = *(pc_offp++);
		    sum -= *(pd_offp++) * offp_data[jcol];
		}
	    }
	    
	    dx = sum / aa - vx[i];
	    vx[i] += omega * dx;
	}

#if USE_MPI
	if (A->cmap->nprocs > 1) {
	    phgMapScatterBegin(A->cinfo, x->nvec, x->data, offp_data);
	    phgMapScatterEnd(A->cinfo, x->nvec, x->data, offp_data);
	}
#endif	/* USE_MPI */
    }


    phgVecDestroy(&x0);
#if USE_MPI
    phgFree(offp_data);
#endif	/* USE_MPI */

    return;
}
Beispiel #6
0
/****************************************************************
 * Build RHS which is the residual of the nonlinear system.
 ***************************************************************/
void 
phgNSBuildSolverURHS(NSSolver *ns)
{
    GRID *g = ns->g;
    SIMPLEX *e;
    SOLVER *solver_u = ns->solver_u;
    int i, k, l, q, s;
    FLOAT *dt = ns->dt;
    BOOLEAN tstep_minus = (ns->u[-1] != NULL);
    VEC *vec_rhs = phgMapCreateVec(solver_u->rhs->map, 1);
    FLOAT Theta = _nsp->Theta, nu = _nsp->nu, Thet1;
    int viscosity_type = ns->viscosity_type;

    SURF_BAS *surf_bas = ns->surf_bas;
    DOF *surf_dof = surf_bas->dof;
    BOOLEAN *rotated = surf_bas->rotated;
    const FLOAT *Trans = DofData(surf_dof);


#if STEADY_STATE
    assert(fabs(Theta - 1) < 1e-12);
    Thet1 = 0; Unused(Thet1);
    Unused(dt);
#else
    Thet1 = 1 - Theta;
    Unused(dt);
#endif /* STEADY_STATE */

    phgPrintf("   DB_mask: [");
    for (k = 0; k < Dim; k++)
	phgPrintf("%d ", ns->u[1]->DB_masks[k]);
    phgPrintf("]   ");

    nu_max = -1e10;
    nu_min = +1e10;

    phgVecDisassemble(vec_rhs);
    ForAllElements(g, e) {
	int M = ns->u[1]->type->nbas;	/* num of bases of Velocity */
	int N = ns->p[1]->type->nbas;	/* num of bases of Pressure */
	int order = DofTypeOrder(ns->u[1], e) * 3 - 1; /* Note:
							*   quad order is really high here,
							*   highest order term (u \nabla u, phi)  */
	FLOAT bufu[M], bufp[N], rhsu[M][Dim], rhsp[N];
	INT Iu[M][Dim], Ip[N];
	QUAD *quad;
	FLOAT vol, area, det;
	const FLOAT *w, *p, *normal,
	    **vu, *vu_queue[3],
	    *vf[2], *gu[2], *vp[2], *vw, *vT;
	FLOAT *vf_cache[2];

	vu = vu_queue + 1;

	quad = phgQuadGetQuad3D(order);
	vu[0] = phgQuadGetDofValues(e, ns->u[0], quad);	           /* u^{n} */
	vp[0] = phgQuadGetDofValues(e, ns->p[0], quad);	           /* p^{n} */
	gu[0] = phgQuadGetDofValues(e, ns->gradu[0], quad);        /* grad u^{n} */
	vw = phgQuadGetDofValues(e, ns->wind, quad);               /* wind */
	vT = phgQuadGetDofValues(e, ns->T[1], quad);	           /* T^{n} */


	if (tstep_minus) { 
	    vu[-1] = phgQuadGetDofValues(e, ns->u[-1], quad);      /* u^{n-1} */
	} else {
	    vu[-1] = vu[0];
	}

#if STEADY_STATE || TIME_DEP_NON
	vu[1] = phgQuadGetDofValues(e, ns->u[1], quad);          /* u^{n+1} */
	gu[1] = phgQuadGetDofValues(e, ns->gradu[1], quad);      /* grad u^{n} */
	vp[1] = phgQuadGetDofValues(e, ns->p[1], quad);          /* p^{n+1} */
#else
	TIME_DEP_LINEAR_ENTRY; /* Unavailable */
#endif /* STEADY_STATE || TIME_DEP_NON */

	Unused(l);
	Unused(vf); Unused(vf_cache); 

	if (!_nsp->no_extern_source) {
	    /* cache f values */
	    for (l = 0; l < 2; l++) {
		const FLOAT *cache;
		size_t cache_size;
		setFuncTime(ns->time[l]); /* set static time in ins-test.c */

		/* cache f */
		cache_size = Dim * quad->npoints * sizeof(FLOAT);
		cache = phgQuadGetFuncValues(g, e, Dim, func_f, quad);
		vf[l] = vf_cache[l] = phgAlloc(cache_size);
		memcpy(vf_cache[l], cache, cache_size);

		phgQuadGetFuncValues(NULL, NULL, 0, NULL, NULL); /* clear cache */
	    }
	}

	/* Global Matrix */
	Bzero(rhsu); Bzero(rhsp);
    
	p = quad->points;
	w = quad->weights;
	for (q = 0; q < quad->npoints; q++) {
	    phgGeomGetCurvedJacobianAtLambda(g, e, p, &det);
	    vol = fabs(det / 6.);

	    /* rhs u */
	    for (i = 0; i < M; i++) {
		/* interior node or Neumann */
		const FLOAT *gi_u = phgQuadGetBasisValues(e, ns->u[1], i, quad) + q;       /* phi_i */
		const FLOAT *ggi_u = phgQuadGetBasisCurvedGradient(e, ns->u[1], i, quad, q);    /* grad phi_i */

		for (k = 0; k < Dim; k++) {
#if ICE_BENCH_TEST
		    nu = get_effective_viscosity(gu[1], 0, 0, viscosity_type);
		    FLOAT eu[DDim];

		    MAT3_SYM(gu[1], eu);
		    rhsu[i][k] += vol*(*w) * EQU_SCALING * (- nu * INNER_PRODUCT(eu+k*Dim, ggi_u) 
							    + (*vp[1]) * *(ggi_u+k) * LEN_SCALING * PRES_SCALING
							    );     /* left */

		    if (k == Z_DIR) { 
			const FLOAT rho = RHO_ICE;
			const FLOAT grav = GRAVITY;
			const FLOAT a = SEC_PER_YEAR;
			const FLOAT f = rho*grav * EQU_SCALING * LEN_SCALING2; 

			Unused(a);
			rhsu[i][k] += vol*(*w) * (-f * (*gi_u) 
						  ); /* right */
		    }


#elif ESIMINT_TEST ||				\
    HEINO_TEST ||				\
    TEST_CASE == ICE_GREEN_LAND
		    nu = get_effective_viscosity(gu[1], *vT, 0, viscosity_type);
		    FLOAT eu[DDim];

		    MAT3_SYM(gu[1], eu);
		    rhsu[i][k] += vol*(*w) * EQU_SCALING * (- nu * INNER_PRODUCT(eu+k*Dim, ggi_u) 
							    + (*vp[1]) * *(ggi_u+k) * LEN_SCALING * PRES_SCALING
							    );     /* left */

		    if (k == Z_DIR) { 
			const FLOAT rho = RHO_ICE;
			const FLOAT grav = GRAVITY;
			const FLOAT a = SEC_PER_YEAR;
			const FLOAT f = rho*grav * EQU_SCALING * LEN_SCALING2; 

			Unused(a);
			rhsu[i][k] += vol*(*w) * (-f * (*gi_u) 
						  ); /* right */
		    }

#elif STEADY_STATE
		    rhsu[i][k] += vol*(*w) * (- nu * INNER_PRODUCT(gu[1]+k*Dim, ggi_u)
					      + (*vp[1]) * *(ggi_u+k)
					      );     /* left */
		    if (!_nsp->no_extern_source)
			rhsu[i][k] += vol*(*w) * (*(vf[1]+k) * (*gi_u)
						  ); /* right */
#elif TIME_DEP_NON
		    rhsu[i][k] -= vol*(*w) * ((vu[1][k] - vu[0][k]) * (*gi_u) / dt[0]
					      + Theta * (nu * INNER_PRODUCT(gu[1]+k*Dim, ggi_u)
							 )
					      - (*vp[1]) * *(ggi_u+k)
					      + Thet1 * (nu * INNER_PRODUCT(gu[0]+k*Dim, ggi_u)
							 )
					      );     /* left */
		    if (!_nsp->no_extern_source)
			rhsu[i][k] += vol*(*w) * (Theta * *(vf[1]+k) * (*gi_u)
						  + Thet1 * *(vf[0]+k) * (*gi_u)
						  ); /* right */
#else
		    TIME_DEP_LINEAR_ENTRY; /* Unavailable */
#endif /* STEADY_STATE */
		}
	    }

	    /* rhs p */
	    for (i = 0; i < N; i++) {
		const FLOAT *gi_p = phgQuadGetBasisValues(e, ns->p[1], i, quad) + q;       /* psi_i */
		FLOAT divu1 = gu[1][0] + gu[1][4] + gu[1][8];
		//FLOAT divu0 = gu[0][0] + gu[0][4] + gu[0][8];
		rhsp[i] += vol*(*w) * (divu1 * (*gi_p)
				       );
	    }
	    
	    if (tstep_minus) 
		vu[-1] += Dim;

#if STEADY_STATE || TIME_DEP_NON
	    vu[1] += Dim;
	    gu[1] += Dim*Dim;
	    vp[1]++;
#else
	    TIME_DEP_LINEAR; /* Unavailable */
#endif /* STEADY_STATE || TIME_DEP_NON */
	    vu[0] += Dim;
	    gu[0] += Dim * Dim;
	    vp[0]++; 
	    vw += Dim;
	    if (!_nsp->no_extern_source) {
		vf[0] += Dim; vf[1] += Dim;
	    }
	    vT++;
	    w++; p += Dim + 1;
	}

	if (!_nsp->no_extern_source) {
	    phgFree(vf_cache[0]);
	    phgFree(vf_cache[1]);
	}

	normal = NULL; Unused(normal);
	area = 0; Unused(area);

	if (!_nsp->enclosed_flow) {
	    /* slip boundary */
	    for (s = 0; s < NFace; s++) {
		if (e->bound_type[s] & INFLOW) {
		    int v0, v1, v2;
		    int nbas_face = NbasFace(ns->u[1]);
		    SHORT bases[nbas_face];
		    FLOAT lambda[Dim + 1], x,y,z, beta;
		    order = DofTypeOrder(ns->u[1], e) * 3 - 1;

		    phgDofGetBasesOnFace(ns->u[1], e, s, bases);
		    v0 = GetFaceVertex(s, 0);
		    v1 = GetFaceVertex(s, 1);
		    v2 = GetFaceVertex(s, 2);
		    lambda[s] = 0.;
		    
		    area = phgGeomGetFaceArea(g, e, s);
		    normal = phgGeomGetFaceOutNormal(g, e, s);
		    quad = phgQuadGetQuad2D(order);
		
		    p = quad->points;
		    w = quad->weights;
		    for (q = 0; q < quad->npoints; q++) {
			FLOAT vu[Dim];
			lambda[v0] = *(p++);
			lambda[v1] = *(p++);
			lambda[v2] = *(p++);
			
			phgGeomLambda2XYZ(g, e, lambda, &x, &y, &z);
			func_beta(x, y, z, &beta);
			phgDofEval(ns->u[1], e, lambda, vu);

			for (i = 0; i < nbas_face; i++) {
			    int ii = bases[i];
			    FLOAT gi_u = 
				*ns->u[1]->type->BasFuncs(ns->u[1], e, ii, ii + 1, lambda);

			    for (k = 0; k < Dim; k++) {
#if STEADY_STATE
				rhus[ii][k] += 0.;
#elif TIME_DEP_NON
#  if USE_SLIDING_BC
				abort();
				rhsu[ii][k] += SIGN_FRICTION * area*(*w) * beta * vu[k] * (gi_u)
				    * EQU_SCALING * LEN_SCALING;
#  else
				Unused(gi_u);
#  endif
#else
				TIME_DEP_LINEAR_ENTRY; /* Unavailable */
#endif /* STEADY_STATE */
			    }
			}     /* end of bas_i */
			w++;
		    }		/* end of quad point */
		}		/* end of face outflow */
	    }			/* end of all outflow face in element */
	}                       /* end out flow boundary */


#if USE_SLIDING_BC
	/* Rotate bases */
	for (i = 0; i < M; i++) {
	    INT id = phgDofMapE2D(surf_dof, e, i * (Dim*Dim)) / (Dim*Dim);
	    if (!rotated[id])
		continue;	
	    const FLOAT *trans = Trans + id*(Dim*Dim);

	    trans_left(&rhsu[i][0], 1, 1, trans);
	}
#else
	Unused(Trans);
	Unused(rotated);
#endif


	/* Map: Element -> system */
	for (i = 0; i < M; i++)
	    for (k = 0; k < Dim; k++)
		Iu[i][k] = phgMapE2L(solver_u->rhs->map, 0, e, i * Dim + k);
	for (i = 0; i < N; i++)
	    Ip[i] = phgMapE2L(solver_u->rhs->map, 1, e, i);

	/* set velocity dirichlet bdry */
	FLOAT tmp[Dim];
	for (i = 0; i < M; i++)
	    for (k = 0; k < Dim; k++)
		if (phgDofDirichletBC_(ns->u[1], e, i*Dim+k, NULL, bufu, tmp,
				       DOF_PROJ_NONE)) {
		    rhsu[i][k] = 0.;
		}

#if STEADY_STATE || TIME_DEP_NON
	/* set pressure dirichlet bdry for pinned point */
	for (i = 0; i < N; i++)
	    if (phgDofDirichletBC(ns->p[1], e, i, NULL, bufp, &rhsp[i],
				  DOF_PROJ_NONE)) {
		if (!_nsp->enclosed_flow)
		    phgError(1, "No dirichlet bc for Unenclosed flow!\n");
		if (_nsp->pin_node) {
# if PIN_AT_ROOT 
		    if (g->rank != 0)
		    	phgError(1, "Pinned node only on rank 0!\n");
		    if (g, e->verts[i] != ns->pinned_node_id)
			phgError(1, "Build rhs: pinned node e:%d, bas:%d, [%d] and [%d] "
				 "doesn't coincide when build RHS!\n", 
				 e->index, i, e->verts[i], ns->pinned_node_id);
# else
		    if (GlobalVertex(g, e->verts[i]) != ns->pinned_node_id)
			phgError(1, "Build rhs: pinned node e:%d, bas:%d, [%d] and [%d] "
				 "doesn't coincide when build RHS!\n", 
				 e->index, i, e->verts[i], ns->pinned_node_id);
# endif /* PIN_AT_ROOT */
		}
	    } 
#else
	TIME_DEP_LINEAR; /* Unavailable */
#endif /* STEADY_STATE || TIME_DEP_NON */


	/* Global res */
	phgVecAddEntries(vec_rhs, 0, M * Dim, Iu[0], &rhsu[0][0]);
	phgVecAddEntries(vec_rhs, 0, N, Ip, rhsp);
    }				/* end element */
int
main(int argc, char *argv[])
{
    MAT *A, *B;
    VEC *U = NULL, *x;
    FLOAT *C;
    SOLVER *solver, *solver1 = NULL;
    INT i, j, k, n, *pvt, N = 1000, K = 2;
    char *main_opts = NULL, *sub_opts = NULL;

    phgOptionsRegisterInt("-n", "N value", &N);
    phgOptionsRegisterInt("-k", "K value", &K);
    phgOptionsRegisterString("-main_solver_opts", "Options for the main solver",
				&main_opts);
    phgOptionsRegisterString("-sub_solver_opts", "Options for the subsolver",
				&sub_opts);

    /* a direct solver is preferable for the sparse matrix */
    phgOptionsPreset("-solver mumps");
    phgInit(&argc, &argv);

    phgPrintf(
"----------------------------------------------------------------------------\n"
"This code solves (A+UU^t)x=b using the Sherman-Morrison-Woodbury formula.\n"
"Note: may use the following to disable use of the Sherman-Morrison-Woodbury\n"
"algorithm and change to the default solver instead:\n"
"	-preonly_pc_type solver -preonly_pc_opts \"-solver_maxit 2000\"\n"
"----------------------------------------------------------------------------\n"
    );

   phgPrintf("Generating the linear system: N = %"dFMT", K = %"dFMT"\n", N, K);

    /* A is a distributed NxN SPD tridiagonal matrix (A = [-1, 2, -1]) */
    n = N / phgNProcs + (phgRank < (N % phgNProcs) ? 1 : 0);
    A = phgMatCreate(phgComm, n, N);
    phgPrintf("  Generating matrix A.\n");
    for (i = 0; i < n; i++) {
	/* diagonal */
	phgMatAddEntry(A, i, i, 2.0);
	/* diagonal - 1 */
	if (i > 0)
	    phgMatAddEntry(A, i, i - 1, -1.0);
	else if (phgRank > 0)
	    phgMatAddLGEntry(A, i, A->rmap->partition[phgRank] - 1, -1.0);
	/* diagonal + 1 */
	if (i < n - 1)
	    phgMatAddEntry(A, i, i + 1, -1.0);
	else if (phgRank < phgNProcs - 1)
	    phgMatAddLGEntry(A, i, A->rmap->partition[phgRank] + n, -1.0);
    }
    phgMatAssemble(A);

    /* U is a K-component vector */
    U = phgMapCreateVec(A->rmap, K);
    phgVecRandomize(U, 123);

    /* solver1 is the solver for A */
    phgOptionsPush();
    phgOptionsSetOptions(sub_opts);
    solver1 = phgMat2Solver(SOLVER_DEFAULT, A);
    phgOptionsPop();

    /* x is a scratch vector */
    x = phgMapCreateVec(A->rmap, 1);

    /* C is a KxK dense matrix, pvt is an integer array, they store the LU
     * factorization of (I + U^t*inv(A)*U) */
    phgPrintf("  Generating the dense matrix I+U^t*inv(A)*U.\n");
    C = phgCalloc(K * K, sizeof(*C));
    pvt = phgAlloc(K * sizeof(*pvt));
    for (i = 0; i < K; i++) {
	for (j = 0; j < n; j++) {
	    solver1->rhs->data[j] = U->data[i * n + j];
	    x->data[j] = 0.0;
	}
	solver1->rhs->assembled = TRUE;
	phgSolverVecSolve(solver1, FALSE, x);
	for (j = 0; j < K; j++)
	    for (k = 0; k < n; k++)
		C[i * K + j] += U->data[j * n + k] * x->data[k];
    }
#if USE_MPI
    if (U->map->nprocs > 1) {
	FLOAT *tmp = phgAlloc(K * K * sizeof(*tmp));
	MPI_Allreduce(C, tmp, K * K, PHG_MPI_FLOAT, MPI_SUM, U->map->comm);
	phgFree(C);
	C = tmp;
    }
#endif	/* USE_MPI */
    for (i = 0; i < K; i++)
	C[i * K + i] += 1.0;
    phgPrintf("  Factorizing the dense matrix I+U^t*inv(A)*U.\n");
    phgSolverDenseLU(K, C, pvt);

    /* B is a matrix-free matrix representing A + U*U^t, B->mv_data is used
     * to pass A, U, solver1, C and pvt to callback functions */
    B = phgMapCreateMatrixFreeMat(A->rmap, A->cmap, funcB,
				  /* arguments carried over to CB functions */
				  A, U, solver1, C, pvt, NULL);

    /* solver is a PreOnly solver for B whose pc_proc is set to sherman().
     * 
     * Note: can also use pcg, gmres, or petsc for this solver, in this case
     * the solution obtained with the Sherman-Morisson formula is iteratively
     * refined. */
    phgOptionsPush();
    phgOptionsSetOptions("-solver preonly");
    phgOptionsSetOptions(main_opts);
    solver = phgMat2Solver(SOLVER_DEFAULT, B);
    phgSolverSetPC(solver, solver, sherman);
    phgOptionsPop();

    for (i = 0; i < n; i++)
	x->data[i] = 1.0;
    phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, &solver->rhs);

    phgPrintf("Solving the linear system.\n");

    /* reset initial solution to zero */
    memset(x->data, 0, n * sizeof(*x->data));
    phgSolverVecSolve(solver, TRUE, x);
    for (i = 0; i < n; i++)
	solver->rhs->data[i] = 1.0;
    phgVecAXPBY(-1.0, solver->rhs, 1.0, &x);
    phgPrintf("Checking the result: |x - x_exact| / |x_exact| = %lg\n",
			(double)phgVecNorm2(x, 0, NULL) / sqrt((double)N));

    phgSolverDestroy(&solver);
    phgSolverDestroy(&solver1);
    phgMatDestroy(&A);
    phgMatDestroy(&B);
    phgVecDestroy(&U);
    phgVecDestroy(&x);
    phgFree(C);
    phgFree(pvt);

    phgFinalize();

    return 0;
}
/***************************************************************************
 * Build matrices *F, *Fu, *B, *Bt, *Fp, *Ap, and *Qp used * by the solvers.
 **************************************************************************/
static void
build_matrices(SOLVER *solver, SOLVER *pc, DOF **dofs, MAT **mats, LTYPE type)
{
    DOF *u = dofs[0], *p = dofs[1];
    DOF *f, *pbc, *gn[3], *gradu, *divu;
    int M = u->type->nbas;	/* num of bases of Velocity */
    int N = p->type->nbas;	/* num of bases of Pressure */
    int i, j, k, l;
    GRID *g = u->g;
    ELEMENT *e;
    MAT *matB, *matC, *matAp, *matQp;
    FLOAT F[M][Dim][M][Dim], Fu[M][M],
	B[N][M][Dim], Bt[M][Dim][N],
	Ap[N][N], Fp[N][N], Qp[N][N], bufu[M], bufp[N], tmp[9];
    INT Iu[M][Dim], Ju[Dim][M], Iu0[M], Ip[N];

    /* Unpack Dofs */
    unpackDof(dofs, 9, &u, &p, &gradu, &divu, &f, &pbc, &gn[0], &gn[1], &gn[2]);
    unpackMat(mats, 4, &matB, &matC, &matAp, &matQp);

#if 0
#warning remove me!
SOLVER *s = phgSolverCreate(SOLVER_PCG, p, NULL);
#endif

    ForAllElements(g, e) {
	/* Map: Element -> system */
	for (i = 0; i < M; i++) {
	    for (k = 0; k < Dim; k++)
		Ju[k][i] = Iu[i][k] = phgMapE2L(matF->cmap, 0, e, i * Dim + k);
	    Iu0[i] = phgMapE2L(matFu->cmap, 0, e, i);
	}
	for (i = 0; i < N; i++) {
	    Ip[i] = phgMapE2L(matFp->cmap, 0, e, i);
	}

	/* A V W */
	for (i = 0; i < M; i++) {
	    for (j = 0; j < M; j++) {
		FLOAT m, a, v, w[9];
		/* \phi_j \dot \phi_i */
		m = phgQuadBasDotBas(e, u, j, u, i, QUAD_DEFAULT);

		/* (\grad \phi_j) \cdot (\grad \phi_i) */
		a = nu * phgQuadGradBasDotGradBas(e, u, j, u, i,
						  QUAD_DEFAULT);
		/* (u \cdot \grad) \phi_j \times \phi_i, 1 item */
		v = phgQuadDofDotGradBasBas(e, u, u, j, i, QUAD_DEFAULT);
		Fu[i][j] = a + v;
		if (type == PICARD) {
		    memset(w, 0, sizeof(w));
		}
		else {
		    /* \phi_j (\grad u) \times \phi_i, 9 items */
		    phgQuadGradDofBasDotBas(e, gradu, u, j, i, QUAD_DEFAULT, w);
		}
		for (k = 0; k < Dim; k++) {
		    for (l = 0; l < Dim; l++) {
			F[i][k][j][l] = Theta * dt * *(w + k * Dim + l);
			if (k == l)
			    F[i][k][j][k] += m + Theta * dt * (a + v);
		    }
		}
	    }
	}

	/* B Bt */
	for (i = 0; i < M; i++) {
	    for (j = 0; j < N; j++) {
		FLOAT bxyz[3];
		phgQuadGradBasDotBas3(e, u, i, p, j, bxyz, QUAD_DEFAULT);
		for (k = 0; k < Dim; k++)
		    Bt[i][k][j] = B[j][i][k] = -Theta * dt * bxyz[k];
	    }
	}

	/* Ap Qp;  Fp(update) */
	for (i = 0; i < N; i++) {
	    for (j = 0; j < N; j++) {
		FLOAT ap, qp, cp;
		ap = phgQuadGradBasDotGradBas(e, p, j, p, i,
						   QUAD_DEFAULT);
		Ap[i][j] = Theta * dt * ap;

		qp = phgQuadBasDotBas(e, p, j, p, i, QUAD_DEFAULT);
		Qp[i][j] = Theta * dt * qp;

		cp = phgQuadDofDotGradBasBas(e, u, p, j, i, QUAD_DEFAULT);
		Fp[i][j] = qp + Theta * dt * (nu * ap + cp);
	    }
	}

	/* Global Matrix */
	for (i = 0; i < M; i++) {
	    /* Dirichle Boundary for velocity. */
	    if (phgDofDirichletBC(u, e, i, NULL, bufu, NULL, DOF_PROJ_NONE)) {
		for (k = 0; k < Dim; k++)
		    phgMatAddEntries(matF, 1, Iu[i] + k, M, Ju[k], bufu);
		phgMatAddEntries(matFu, 1, Iu0 + i, M, Iu0, bufu);
	    }
	    else {		/* interior node Or Neumann */
		/* Matrix F */
		phgMatAddEntries(matF, Dim, Iu[i], M * Dim, Iu[0],
				 &(F[i][0][0][0]));
		/* Matrix Fu */
		phgMatAddEntries(matFu, 1, Iu0 + i, M, Iu0, &(Fu[i][0]));
		/* Matrix Bt */
		for (k = 0; k < Dim; k++)
		    phgMatAddEntries(matBt, 1, &Iu[i][k], N, Ip, &Bt[i][k][0]);
	    }
	}			/* end of Block (1,1), (1,2) */

	for (i = 0; i < N; i++)
	    phgMatAddEntries(matB, 1, Ip + i, M * Dim, Iu[0], &B[i][0][0]);

	/* Matrix Ap Qp Fp */
	for (i = 0; i < N; i++) {
	    if (phgDofDirichletBC(pbc, e, i, NULL, bufp, tmp, DOF_PROJ_NONE)) {
		/* Dirichle Boundary for pressure PC. */
		phgMatAddEntries(matAp, 1, Ip + i, N, Ip, bufp);
		phgMatAddEntries(matFp, 1, Ip + i, N, Ip, bufp);
	    }
	    else {
		/* interior node Or Neumann */
		phgMatAddEntries(matAp, 1, Ip + i, N, Ip, Ap[i]);
		phgMatAddEntries(matFp, 1, Ip + i, N, Ip, Fp[i]);
	    }
#if 0
	    /* use only diagonal of the mass matrix in the preconditioner */
	    phgMatAddEntries(matQp, 1, Ip + i, 1, Ip + i, &Qp[i][i]);
#else
	    /* use full mass matrix in the preconditioner */
	    phgMatAddEntries(matQp, 1, Ip + i, N, Ip, Qp[i]);
#endif
#if 0
phgMatAddEntries(s->mat, 1, Ip + i, N, Ip, Qp[i]);
#endif
	}
    }				/* end element */

#if 0
VEC *v = phgMapCreateVec(s->rhs->map, 1);
memcpy(s->rhs->data, solver->rhs->data + DofGetDataCount(u), s->rhs->map->nlocal * sizeof(FLOAT));
s->rhs_updated = TRUE;
s->rhs->assembled = TRUE;
s->rtol = 1e-10;
s->maxit = 10000;
phgSolverVecSolve(s, TRUE, v);
phgPrintf("v = %e\n", phgVecNormInfty(v, 0, NULL));
phgVecDestroy(&v);
phgSolverDestroy(&s);
#endif

    return;
}
/*
 *  Preconditioning procedure:
 *     p = Qp^-1 Fp Ap^-1 * q
 *     u = F^-1 ( bu - Bt * p )
 *  */
static void
pc_proc(void *ctx, VEC *b0, VEC **x0)
{
    SOLVER *pc_solver = (SOLVER *) ctx;
    GRID *g = matF->rmap->dofs[0]->g;
    VEC *xu, *xp, *xu2, *xp2;
    int Nu = matF->rmap->nlocal;
    int Np = solver_Ap->mat->rmap->nlocal;
    INT verb = phgVerbosity;
    FLOAT *rhsF, *rhsAp, *rhsQp;
    double t;

    if (phgVerbosity > 0)
	phgVerbosity--;

    /* save old rhs */
    rhsF = solver_F->rhs->data;
    rhsAp = solver_Ap->rhs->data;
    rhsQp = solver_Qp->rhs->data;

    xu = phgMapCreateVec(matF->rmap, 1);
    xu2 = phgMapCreateVec(matF->rmap, 1);
    xp = phgMapCreateVec(solver_Ap->mat->rmap, 1);
    xp2 = phgMapCreateVec(solver_Ap->mat->rmap, 1);

    /* Ap^-1 * q */
    t = phgGetTime(NULL);
    solver_Ap->rhs->data = xp->data;
    solver_Ap->rhs->assembled = TRUE;
    solver_Ap->rhs_updated = TRUE;
    memcpy(xp->data, b0->data + Nu, sizeof(*xp->data) * Np);
    bzero(xp2->data, sizeof(*xp->data) * Np);
    phgSolverVecSolve(solver_Ap, FALSE, xp2);
    memcpy(xp->data, xp2->data, sizeof(*xp->data) * Np);
    if (verb > 0)
	phgPrintf("\t    Ap: nits = %d, residual = %0.4le [%0.4lgMB %0.4lfs]\n",
		  solver_Ap->nits, (double)solver_Ap->residual,
		  phgMemoryUsage(g, NULL) / (1024.0 * 1024.0),
		  phgGetTime(NULL) - t);

    /* Fp Ap^-1 * q */
    phgMatVec(MAT_OP_N, 1.0, matFp, xp, 0., &xp2);
    memcpy(xp->data, xp2->data, sizeof(*xp->data) * Np);

    /* xp = Qp^-1 Fp Ap^-1 * q */
    t = phgGetTime(NULL);
    solver_Qp->rhs->data = xp->data;
    solver_Qp->rhs->assembled = TRUE;
    solver_Qp->rhs_updated = TRUE;
    bzero(xp2->data, sizeof(*xp->data) * Np);
    phgSolverVecSolve(solver_Qp, FALSE, xp2);
    if (verb > 0)
	phgPrintf("\t    Qp: nits = %d, residual = %0.4le [%0.4lgMB %0.4lfs]\n",
		  solver_Qp->nits, (double)solver_Qp->residual,
		  phgMemoryUsage(g, NULL) / (1024.0 * 1024.0),
		  phgGetTime(NULL) - t);

    /* xu = F^-1 ( bu - Bt * xp ) */
    t = phgGetTime(NULL);
    memcpy(xu->data, b0->data, sizeof(*xu->data) * Nu);
    phgMatVec(MAT_OP_N, -1.0, matBt, xp2, 1., &xu);

    if (solver_F->mat->cmap->nlocal == Nu) {
	/* use F in the preconditioning matrix */
	solver_F->rhs->data = xu->data;
	solver_F->rhs->assembled = TRUE;
	solver_F->rhs_updated = TRUE;
	bzero(xu2->data, sizeof(*xu->data) * Nu);
	phgSolverVecSolve(solver_F, FALSE, xu2);
    }
    else {
	pc_proc1(solver_F, xu, &xu2);
    }

    if (verb > 0)
	phgPrintf("\t     F: nits = %d, residual = %0.4le [%0.4lgMB %0.4lfs]\n",
		  solver_F->nits, (double)solver_F->residual,
		  phgMemoryUsage(g, NULL) / (1024.0 * 1024.0),
		  phgGetTime(NULL) - t);

    /* Copy xu, xp to x */
    memcpy((*x0)->data, xu2->data, sizeof(*xu->data) * Nu);
    memcpy((*x0)->data + Nu, xp2->data, sizeof(*xp->data) * Np);

    /* restore rhs */
    solver_F->rhs->data = rhsF;
    solver_Ap->rhs->data = rhsAp;
    solver_Qp->rhs->data = rhsQp;

    phgVecDestroy(&xu);
    phgVecDestroy(&xu2);
    phgVecDestroy(&xp);
    phgVecDestroy(&xp2);

    phgVerbosity = verb;

    return;
}
Beispiel #10
0
static void
build_matrices(MAT *matA, MAT *matM, MAT *matC, MAT *S, FLOAT s,
		DOF *u_h, DOF *p_h)
/* S is used to store s*diag(M(p_h))^(-1) */
{
    int N = u_h->type->nbas;	/* number of basis functions in an element */
    int M = p_h->type->nbas;
    int i, j;
    GRID *g = u_h->g;
    ELEMENT *e;
    FLOAT A[N][N], B[N][N], C[N][M];
    INT I[N], Ip[M];
    INT k, n0;
    VEC *V = phgMapCreateVec(S->rmap, 1);

    phgVecDisassemble(V);	/* for phgVecAddEntry */
    ForAllElements(g, e) {
	for (i = 0; i < N; i++) {
	    I[i] = phgMapE2L(matA->rmap, 0, e, i);
	    for (k = 0; k < M; k++) {
		/* \int \grad\psi_k\cdot\phi_i */
		C[i][k] = phgQuadGradBasDotBas(e, p_h, k, u_h, i, QUAD_DEFAULT);
	    }
	    for (j = 0; j <= i; j++) {
		/* \int \phi_i\cdot\phi_j */
		B[j][i] = B[i][j] = 
		    phgQuadBasDotBas(e, u_h, j, u_h, i, QUAD_DEFAULT);
		/* \int \curl\phi_i\cdot\curl\phi_j */
		A[j][i] = A[i][j] =
		    phgQuadCurlBasDotCurlBas(e, u_h, j, u_h, i, QUAD_DEFAULT);
	    }
	}

	for (i = 0; i < M; i++) {
	    Ip[i] = phgMapE2L(matC->cmap, 0, e, i);
	    if (Ip[i] < 0)	/* boundary entry */
		continue;
	    phgVecAddEntry(V, 0, Ip[i],
			phgQuadBasDotBas(e, p_h, i, p_h, i, QUAD_DEFAULT));
	}

	/* loop on basis functions */
	for (i = 0; i < N; i++) {
	    if (phgDofDirichletBC(u_h, e, i, NULL, NULL, NULL, DOF_PROJ_CROSS))
		continue;
	    phgMatAddEntries(matA, 1, I + i, N, I, A[i]); 
	    phgMatAddEntries(matM, 1, I + i, N, I, B[i]); 
	    phgMatAddEntries(matC, 1, I + i, M, Ip, C[i]); 
	}
    }

    phgVecAssemble(V);
    n0 = V->map->partition[V->map->rank];
    for (k = 0; k < V->map->nlocal; k++)
	phgMatAddGlobalEntry(S, k + n0, k + n0, s / V->data[k]);
    phgVecDestroy(&V);
    phgMatAssemble(S);
    phgMatSetupDiagonal(S);
    phgMatAssemble(matA);
    phgMatAssemble(matM);
    phgMatAssemble(matC);
}
Beispiel #11
0
int
main(int argc, char *argv[])
{
    GRID *g;
    DOF *u_h;
    MAT *A, *A0, *B;
    MAP *map;
    INT i;
    size_t nnz, mem, mem_peak;
    VEC *x, *y0, *y1, *y2;
    double t0, t1, dnz, dnz1, mflops, mop;
    char *fn = "../test/cube.dat";
    FLOAT mem_max = 300;
    INT refine = 0;

    phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn);
    phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count);
    phgOptionsRegisterInt("-refine", "Refinement level", &refine);
    phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max);

    phgInit(&argc, &argv);
    g = phgNewGrid(-1);
    if (!phgImport(g, fn, FALSE))
	phgError(1, "can't read file \"%s\".\n", fn);
    phgRefineAllElements(g, refine);
    u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction);

    while (TRUE) {
	phgPrintf("\n");
	if (phgBalanceGrid(g, 1.2, 1, NULL, 0.))
	    phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n",
			g->nprocs, (double)g->lif);
	map = phgMapCreate(u_h, NULL);
	A = phgMapCreateMat(map, map);
	A->handle_bdry_eqns = TRUE;
	build_matrix(A, u_h);
	phgMatAssemble(A);

	/* Note: A is unsymmetric (A' != A) if boundary entries not removed */
	phgMatRemoveBoundaryEntries(A);

#if 0
	/* test block matrix operation */
	A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL);
#else
	A0 = A;
#endif

	phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n",
			DofGetDataCountGlobal(u_h), g->nleaf_global,
			g->nprocs, A->rmap->nglobal, (double)g->lif);

	/* test PHG mat-vec multiply */
	x = phgMapCreateVec(A->cmap, 1);
	y1 = phgMapCreateVec(A->rmap, 1);
	phgVecRandomize(x, 123);
	phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1);

	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	for (i = 0; i < loop_count; i++) {
	    phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1);
	}
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	y0 = phgVecCopy(y1, NULL);
	nnz = A->nnz_d + A->nnz_o;
#if USE_MPI
	dnz1 = nnz;
	MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	dnz = nnz;
#endif
	mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6;

	phgPrintf("\n");
	t1 -= t0;
	phgPrintf("   PHG:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n",
			t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops);

	/* test trans(A)*x */
	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	for (i = 0; i < loop_count; i++) {
	    phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1);
	}
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	t1 -= t0;
	phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		  "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	/* time A * trans(A) */
	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL);
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	nnz = B->nnz_d + B->nnz_o;
#if USE_MPI
	dnz1 = nnz;
	MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	dnz = nnz;
#endif
	/* compare B*x <--> A*A*x */
	y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL);
	phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1);
	phgMatDestroy(&B);
	t1 -= t0;
	phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

#if USE_PETSC
	{
	    Mat ma, mb;
	    MatInfo info;
	    Vec va, vb, vc;
	    PetscScalar *vec;

	    ma = phgPetscCreateMatAIJ(A);
	    MatGetVecs(ma, PETSC_NULL, &va);
	    VecDuplicate(va, &vb);
	    VecGetArray(va, &vec);
	    memcpy(vec, x->data, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(va, &vec);
	    MatMult(ma, va, vb);
	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		MatMult(ma, va, vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);

	    MatGetInfo(ma, MAT_GLOBAL_SUM, &info);
	    /*phgPrintf("    --------------------------------------------"
		      "-------------------------\n");*/
	    phgPrintf("\n");
	    t1 -= t0;
	    dnz = info.nz_used;
	    phgPrintf(" PETSc:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		MatMultTranspose(ma, va, vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);
	    t1 -= t0;
	    phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb);
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    t1 -= t0;
	    MatGetInfo(mb, MAT_GLOBAL_SUM, &info);
	    dnz = info.nz_used;
	    VecDuplicate(va, &vc);
	    /* compare B*x <--> A*A*x */
	    MatMult(ma, vb, vc);
	    MatMult(mb, va, vb);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);
	    VecGetArray(vc, &vec);
	    memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vc, &vec);
	    phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

	    phgPetscMatDestroy(&mb);
	    phgPetscMatDestroy(&ma);
	    phgPetscVecDestroy(&va);
	    phgPetscVecDestroy(&vb);
	    phgPetscVecDestroy(&vc);
	}
#endif	/* USE_PETSC */

#if USE_HYPRE
	{
	    HYPRE_IJMatrix ma;
	    HYPRE_IJVector va, vb, vc;
	    HYPRE_ParCSRMatrix  par_ma;
	    hypre_ParCSRMatrix  *par_mb;
	    HYPRE_ParVector	par_va, par_vb, par_vc;
	    HYPRE_Int offset, *ni, start, end;
	    assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double));
	    setup_hypre_mat(A, &ma);
	    ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni));
	    offset = A->cmap->partition[A->cmap->rank];
	    for (i = 0; i < A->rmap->nlocal; i++)
		ni[i] = i + offset;
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &va);
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &vb);
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &vc);
	    HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR);
	    HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR);
	    HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR);
	    HYPRE_IJVectorSetMaxOffProcElmts(va, 0);
	    HYPRE_IJVectorSetMaxOffProcElmts(vb, 0);
	    HYPRE_IJVectorSetMaxOffProcElmts(vc, 0);
	    HYPRE_IJVectorInitialize(va);
	    HYPRE_IJVectorInitialize(vb);
	    HYPRE_IJVectorInitialize(vc);
	    HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma);
	    HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va);
	    HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb);
	    HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc);
	    HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data);
	    HYPRE_IJVectorAssemble(va);
	    HYPRE_IJVectorAssemble(vb);
	    HYPRE_IJVectorAssemble(vc);

	    HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal,
					ni, ni + A->rmap->nlocal);
	    for (i = 0, nnz = 0; i < A->rmap->nlocal; i++)
		nnz += ni[A->rmap->nlocal + i];
#if USE_MPI
	    dnz1 = nnz;
	    MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	    dnz = nnz;
#endif

	    HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb);
	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    /*phgPrintf("    --------------------------------------------"
		      "-------------------------\n");*/
	    phgPrintf("\n");
	    t1 -= t0;
	    phgPrintf(" HYPRE:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    t1 -= t0;
	    phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to
	     *	     'hypre_ParCSRMatrix *' */
	    par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma,
					(hypre_ParCSRMatrix *)par_ma);
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    start = hypre_ParCSRMatrixFirstRowIndex(par_mb);
	    end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1;
	    for (i = start, nnz = 0; i < end; i++) {
		HYPRE_Int ncols;
		hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL);
		hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL);
		nnz += ncols;
	    }
#if USE_MPI
	    dnz1 = nnz;
	    MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	    dnz = nnz;
#endif
	    /* compare B*x <--> A*A*x */
	    HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc);
	    HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data);
	    hypre_ParCSRMatrixDestroy((par_mb));
	    t1 -= t0;
	    phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

	    phgFree(ni);
	    HYPRE_IJMatrixDestroy(ma);
	    HYPRE_IJVectorDestroy(va);
	    HYPRE_IJVectorDestroy(vb);
	    HYPRE_IJVectorDestroy(vc);
	}
#endif	/* USE_HYPRE */

	if (A0 != A)
	    phgMatDestroy(&A0);
#if 0
if (A->rmap->nglobal > 1000) {
    VEC *v = phgMapCreateVec(A->rmap, 3);
    for (i = 0; i < v->map->nlocal; i++) {
	v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]);
	v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]);
	v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]);
    }
    phgMatDumpMATLAB(A, "A", "A.m");
    phgVecDumpMATLAB(v, "v", "v.m");
    phgFinalize();
    exit(0);
}
#endif
	phgMatDestroy(&A);
	phgVecDestroy(&x);
	phgVecDestroy(&y0);
	phgVecDestroy(&y1);
	phgVecDestroy(&y2);
	phgMapDestroy(&map);
	mem = phgMemoryUsage(g, &mem_peak);
	dnz = mem / (1024.0 * 1024.0);
	dnz1 = mem_peak / (1024.0 * 1024.0);
	/*phgPrintf("    --------------------------------------------"
		  "-------------------------\n");*/
	phgPrintf("\n");
	phgPrintf("  Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1);
#if 0
{
    static int loop_count = 0;
    if (++loop_count == 4)
	break;
}
#endif
	if (mem_peak > 1024 * (size_t)1024 * mem_max)
	    break;
	phgRefineAllElements(g, 1);
    }
    phgDofFree(&u_h);
    phgFreeGrid(&g);
    phgFinalize();

    return 0;
}
Beispiel #12
0
void phgNSInitPc(NSSolver *ns)
{
    GRID *g = ns->g;
    MAP *Pmap = ns->Pmap, *Pbcmap;
    BOOLEAN use_Fu = _nsp->use_Fu;
    int verb;

    /* pcd boundary type test */
    _pcd->dof_inflow = phgDofNew(g, _nsp->ptype, 1, "dof inflow", DofNoAction);
    _pcd->dof_outflow = phgDofNew(g, _nsp->ptype, 1, "dof outflow", DofNoAction);
    _pcd->dof_nobdry = phgDofNew(g, _nsp->ptype, 1, "dof nobdry", DofNoAction);

    phgDofSetDirichletBoundaryMask(_pcd->dof_inflow, INFLOW);
    phgDofSetDirichletBoundaryMask(_pcd->dof_outflow, OUTFLOW);
    phgDofSetDirichletBoundaryMask(_pcd->dof_nobdry, 0);

    _pcd->map_inflow = phgMapCreate(_pcd->dof_inflow, NULL);
    _pcd->map_outflow = phgMapCreate(_pcd->dof_outflow, NULL);
    _pcd->map_nobdry = phgMapCreate(_pcd->dof_nobdry, NULL);

    _pcd->Pbcmap = phgMapCreate(_pcd->pbc, NULL);
    Pbcmap = _pcd->Pbcmap;

    Unused(Pmap);
#warning PCD B.C.: step 1. build mat using map... 
    /*
     * PCD boundary setup: should be consistent with code above
     */
    if (_nsp->pin_node) {
	_pcd->matFp = phgMapCreateMat(Pbcmap, Pbcmap);
	_pcd->matAp = phgMapCreateMat(Pbcmap, Pbcmap);
	_pcd->matQp = phgMapCreateMat(Pbcmap, Pbcmap);
    } else {
	//_pcd->matAp = phgMapCreateMat(_pcd->map_inflow, _pcd->map_inflow);
	//_pcd->matFp = phgMapCreateMat(_pcd->map_inflow, _pcd->map_inflow);
	_pcd->matFp = phgMapCreateMat(_pcd->map_outflow, _pcd->map_outflow);
	_pcd->matAp = phgMapCreateMat(_pcd->map_outflow, _pcd->map_outflow);
	//_pcd->matQp = phgMapCreateMat(_pcd->map_outflow, _pcd->map_outflow);
	//_pcd->matQp = phgMapCreateMat(_pcd->map_inflow, _pcd->map_inflow);
	//_pcd->matFp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry);
	//_pcd->matAp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry);
	//_pcd->matFp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry);
	_pcd->matQp = phgMapCreateMat(_pcd->map_nobdry, _pcd->map_nobdry);
    }

    /* stokes problem: get SYMETRIC mat when assemble.
     * Handle_bdry_eqns means mat is composed with row of boundary row
     * and non-bdry row, and eliminating mat columes of dirichlet dof. 
     */
    if (_nsp->use_symetric) {
	_pcd->matFp->handle_bdry_eqns = TRUE;
	_pcd->matAp->handle_bdry_eqns = TRUE;
	_pcd->matQp->handle_bdry_eqns = TRUE;
    } 
    /* genearl NS: no need to eliminate mat columes of dirichlet dof */
    else {
	_pcd->matFp->handle_bdry_eqns = FALSE;
	_pcd->matAp->handle_bdry_eqns = FALSE;
	_pcd->matQp->handle_bdry_eqns = FALSE;
    }	
    
    _pcd->rhsScale = phgMapCreateVec(_pcd->matQp->rmap, 1);
    phgVecDisassemble(_pcd->rhsScale);

    ns->pc = phgMat2Solver(SOLVER_PreOnly, ns->solver_u->mat);
    if (_nsp->use_PCD)
	phgSolverSetPC(ns->solver_u, ns->pc, pc_proc);

    /* solver F */
    phgOptionsPush();
    phgOptionsSetOptions("-solver hypre "
			 "-hypre_solver pcg "
			 "-hypre_pc boomeramg "
			 "-solver_maxit 10 "
			 "-solver_rtol 1e-4");
    phgOptionsSetOptions(_nsp->F_opts);
    /* use matF in the preconditioning matrix */
    _pcd->solver_F = phgMat2Solver(SOLVER_DEFAULT, ns->matF);
    _pcd->solver_F->verb = SUB_SOLVER_VERB; /* Set user options. */
    _pcd->pc_F = NULL;
#if USE_MG 
    if (ns_params->use_mg_F) {
	MAT *matF = ns->matF;

	assert(ns_params->use_PCD && !ns_params->use_Fu);
	_pcd->pc_F = phgMat2Solver(SOLVER_PreOnly, matF);
	phgOptionsSetOptions("-solver petsc ");
	matF->mv_data = phgAlloc(sizeof(*matF->mv_data));
	matF->mv_data[0] = (void *) ns->mg;
	phgSolverSetPC(_pcd->solver_F, _pcd->pc_F, mg_pc_proc);
    }
#endif /* USE_MG */
    _pcd->solver_F->warn_maxit = FALSE;
    phgOptionsPop();

    /* solver Ap */
    phgOptionsPush();
    phgOptionsSetOptions("-solver hypre "
			 "-hypre_solver gmres "
			 "-hypre_pc boomeramg "
			 "-solver_maxit 10 "
			 "-solver_rtol 1e-3");
    phgOptionsSetOptions(_nsp->Ap_opts);
    _pcd->solver_Ap = phgMat2Solver(SOLVER_DEFAULT, _pcd->matAp);
    _pcd->solver_Ap->warn_maxit = FALSE;
    _pcd->solver_Ap->verb = SUB_SOLVER_VERB;
    phgOptionsPop();
    _pcd->pc_Ap = NULL;
#if USE_MG 
    if (ns_params->use_mg_Ap) {
	MAT *matAp = _pcd->matAp;

	assert(ns_params->use_PCD);
	_pcd->pc_Ap = phgMat2Solver(SOLVER_PreOnly, matAp);
	phgOptionsSetOptions("-solver petsc ");
	matAp->mv_data = phgAlloc(sizeof(*matAp->mv_data));
	matAp->mv_data[0] = (void *) ns->mg;
	phgSolverSetPC(_pcd->solver_Ap, _pcd->pc_Ap, mg_pc_proc);
    }
#endif /* USE_MG */


    /* solver Qp */
    phgOptionsPush();
    phgOptionsSetOptions("-solver hypre "
			 "-hypre_solver pcg "
			 "-hypre_pc boomeramg "
			 "-solver_maxit 10 "
			 "-solver_rtol 1e-3");
    phgOptionsSetOptions(_nsp->Qp_opts);
    _pcd->solver_Qp = phgMat2Solver(SOLVER_DEFAULT, _pcd->matQp);
    _pcd->solver_Qp->warn_maxit = FALSE;
    _pcd->solver_Qp->verb = SUB_SOLVER_VERB;
    phgOptionsPop();
    _pcd->pc_Qp = NULL;
#if USE_MG 
    if (ns_params->use_mg_Qp) {
	MAT *matQp = _pcd->matQp;

	assert(ns_params->use_PCD);
	_pcd->pc_Qp = phgMat2Solver(SOLVER_PreOnly, matQp);
	phgOptionsSetOptions("-solver petsc ");
	matQp->mv_data = phgAlloc(sizeof(*matQp->mv_data));
	matQp->mv_data[0] = (void *) ns->mg;
	phgSolverSetPC(_pcd->solver_Qp, _pcd->pc_Qp, mg_pc_proc);
    }
#endif /* USE_MG */

    /* Fu for solve F^{-1} */
    if (use_Fu) { /* _nsp->implicit_centrip &&  */
	DOF *u1;
	MAP *u1map;
	MAT *matFu;
	u1 = _pcd->u1 = 
	    phgDofNew(g, _nsp->utype, 1, "velocity component u", DofNoAction);
	phgDofSetDirichletBoundaryMask(u1, SETFLOW);
	
	u1map = _pcd->u1map
	    = phgMapCreate(_pcd->u1, NULL);
	matFu = _pcd->matFu 
	    = phgMapCreateMat(u1map, u1map);
	if (_nsp->use_symetric)
	    matFu->handle_bdry_eqns = TRUE;

	/* solver Fu */
	phgOptionsPush();
	phgOptionsSetOptions("-solver hypre "
			     "-hypre_solver pcg "
			     "-hypre_pc boomeramg "
			     "-solver_maxit 10 "
			     "-solver_rtol 1e-4");
	phgOptionsSetOptions(_nsp->Fu_opts);
	_pcd->solver_Fu = phgMat2Solver(SOLVER_DEFAULT, _pcd->matFu);
	_pcd->solver_Fu->warn_maxit = FALSE;
	_pcd->solver_Fu->verb = SUB_SOLVER_VERB;
	phgOptionsPop();

    }

    return;
}