Beispiel #1
0
PARAMS 
*phgParametersCreate()
{
    PARAMS *params = (PARAMS *) phgAlloc(sizeof(*params));
    
    /* default settings */
    _p->fn = "cube.bdface.mesh";
    _p->fn_bdry = "cube.bdry.dat";

    _p->fix_edge = FALSE;
    _p->fix_bdry = FALSE;

    _p->pre_refines = 0;
    _p->tol = 1e-2;
    _p->n_move_substep = 1;
    _p->n_smooth_monitor = 1;

    _p->move_opts = NULL;
    _p->dof_opts = NULL;

    _p->verb = 0;
    _p->viz_move = FALSE;

    /* get user defined parameter from file*/
    phgOptionsRegisterFilename("mesh_file", "Mesh file", (char **)&_p->fn);
    phgOptionsRegisterFilename("bdry_face_file", "Boundary face file", (char **)&_p->fn_bdry);
    phgOptionsRegisterInt("pre_refines", "Pre-refines", &_p->pre_refines);
    
    phgOptionsRegisterInt("mm_n_move_substep", "Moving Mesh: num of substep in one step", &_p->n_move_substep);
    phgOptionsRegisterInt("mm_n_smooth_monitor", "Moving Mesh: num of smoothing mointor", &_p->n_smooth_monitor);
    phgOptionsRegisterInt("mm_verbosity", "Moving Mesh: Verbosity", &_p->verb);
    phgOptionsRegisterFloat("mm_tol", "Moving Mesh: tolerence of error", &_p->tol);
    phgOptionsRegisterNoArg("mm_fix_edge", "Moving Mesh: fix edge boundary nodes", &_p->fix_edge);
    phgOptionsRegisterNoArg("mm_fix_bdry", "Moving Mesh: fix all boundary nodes", &_p->fix_bdry);
    phgOptionsRegisterNoArg("mm_viz_move", "Moving Mesh: visualize nodes move", &_p->viz_move);
    phgOptionsRegisterString("mm_move_opts", "Moving Mesh Solver of moving options", &_p->move_opts);
    phgOptionsRegisterString("mm_dof_opts", "Moving Mesh Solver of dof updating options", &_p->dof_opts);
    
    _p->fix_edge |= _p->fix_bdry;
    return params;
}
Beispiel #2
0
int
main(int argc, char *argv[])
{
    GRID *g;
    MAP *map, *map0;
    INT n = 4;
    DOF *u;
    static INT pre_refines = 0;
    static char *fn = "../test/cube.dat";
    /*static char *fn = "../albert-files/cube5.dat";*/

    phgOptionsPreset("-dof_type P1");
    phgOptionsRegisterInt("-pre_refines", "Pre-refinements", &pre_refines);
    phgInit(&argc, &argv);
    g = phgNewGrid(-1);
    if (!phgImport(g, fn, FALSE))
	phgError(1, "can't read file \"%s\".\n", fn);
    phgRefineAllElements(g, pre_refines);
    phgPartitionGrid(g);

    u = phgDofNew(g, DOF_DEFAULT, 1, "u", DofNoAction);
    map0 = phgMapCreate(u, NULL);

    phgInfo(-1, "\n");
    phgInfo(-1, "Test special map with size = 0 on all but one process:\n");
    map = phgMapCreateSimpleMap(g->comm, g->rank == g->nprocs - 1 ? n : 0, n);
    phgInfo(-1, "---- DOF rows, special columns:\n");
    do_test(map0, map);
    phgInfo(-1, "---- Special rows, DOF columns:\n");
    do_test(map, map0);
    phgInfo(-1, "---- Special rows, special columns:\n");
    do_test(map, map);
    phgMapDestroy(&map);

    phgInfo(-1, "\n");
    phgInfo(-1, "Test serial map:\n");
    map = phgMapCreateSimpleMap(g->comm, n, n);
    phgInfo(-1, "---- DOF rows, serial columns:\n");
    do_test(map0, map);
    phgInfo(-1, "---- serial rows, DOF columns:\n");
    do_test(map, map0);
    phgInfo(-1, "---- Serial rows, serial columns:\n");
    do_test(map, map);
    phgMapDestroy(&map);

    phgMapDestroy(&map0);
    phgDofFree(&u);
    phgFreeGrid(&g);
    phgFinalize();

    return 0;
}
int
main(int argc, char *argv[])
{
    MAT *A, *B;
    VEC *U = NULL, *x;
    FLOAT *C;
    SOLVER *solver, *solver1 = NULL;
    INT i, j, k, n, *pvt, N = 1000, K = 2;
    char *main_opts = NULL, *sub_opts = NULL;

    phgOptionsRegisterInt("-n", "N value", &N);
    phgOptionsRegisterInt("-k", "K value", &K);
    phgOptionsRegisterString("-main_solver_opts", "Options for the main solver",
				&main_opts);
    phgOptionsRegisterString("-sub_solver_opts", "Options for the subsolver",
				&sub_opts);

    /* a direct solver is preferable for the sparse matrix */
    phgOptionsPreset("-solver mumps");
    phgInit(&argc, &argv);

    phgPrintf(
"----------------------------------------------------------------------------\n"
"This code solves (A+UU^t)x=b using the Sherman-Morrison-Woodbury formula.\n"
"Note: may use the following to disable use of the Sherman-Morrison-Woodbury\n"
"algorithm and change to the default solver instead:\n"
"	-preonly_pc_type solver -preonly_pc_opts \"-solver_maxit 2000\"\n"
"----------------------------------------------------------------------------\n"
    );

   phgPrintf("Generating the linear system: N = %"dFMT", K = %"dFMT"\n", N, K);

    /* A is a distributed NxN SPD tridiagonal matrix (A = [-1, 2, -1]) */
    n = N / phgNProcs + (phgRank < (N % phgNProcs) ? 1 : 0);
    A = phgMatCreate(phgComm, n, N);
    phgPrintf("  Generating matrix A.\n");
    for (i = 0; i < n; i++) {
	/* diagonal */
	phgMatAddEntry(A, i, i, 2.0);
	/* diagonal - 1 */
	if (i > 0)
	    phgMatAddEntry(A, i, i - 1, -1.0);
	else if (phgRank > 0)
	    phgMatAddLGEntry(A, i, A->rmap->partition[phgRank] - 1, -1.0);
	/* diagonal + 1 */
	if (i < n - 1)
	    phgMatAddEntry(A, i, i + 1, -1.0);
	else if (phgRank < phgNProcs - 1)
	    phgMatAddLGEntry(A, i, A->rmap->partition[phgRank] + n, -1.0);
    }
    phgMatAssemble(A);

    /* U is a K-component vector */
    U = phgMapCreateVec(A->rmap, K);
    phgVecRandomize(U, 123);

    /* solver1 is the solver for A */
    phgOptionsPush();
    phgOptionsSetOptions(sub_opts);
    solver1 = phgMat2Solver(SOLVER_DEFAULT, A);
    phgOptionsPop();

    /* x is a scratch vector */
    x = phgMapCreateVec(A->rmap, 1);

    /* C is a KxK dense matrix, pvt is an integer array, they store the LU
     * factorization of (I + U^t*inv(A)*U) */
    phgPrintf("  Generating the dense matrix I+U^t*inv(A)*U.\n");
    C = phgCalloc(K * K, sizeof(*C));
    pvt = phgAlloc(K * sizeof(*pvt));
    for (i = 0; i < K; i++) {
	for (j = 0; j < n; j++) {
	    solver1->rhs->data[j] = U->data[i * n + j];
	    x->data[j] = 0.0;
	}
	solver1->rhs->assembled = TRUE;
	phgSolverVecSolve(solver1, FALSE, x);
	for (j = 0; j < K; j++)
	    for (k = 0; k < n; k++)
		C[i * K + j] += U->data[j * n + k] * x->data[k];
    }
#if USE_MPI
    if (U->map->nprocs > 1) {
	FLOAT *tmp = phgAlloc(K * K * sizeof(*tmp));
	MPI_Allreduce(C, tmp, K * K, PHG_MPI_FLOAT, MPI_SUM, U->map->comm);
	phgFree(C);
	C = tmp;
    }
#endif	/* USE_MPI */
    for (i = 0; i < K; i++)
	C[i * K + i] += 1.0;
    phgPrintf("  Factorizing the dense matrix I+U^t*inv(A)*U.\n");
    phgSolverDenseLU(K, C, pvt);

    /* B is a matrix-free matrix representing A + U*U^t, B->mv_data is used
     * to pass A, U, solver1, C and pvt to callback functions */
    B = phgMapCreateMatrixFreeMat(A->rmap, A->cmap, funcB,
				  /* arguments carried over to CB functions */
				  A, U, solver1, C, pvt, NULL);

    /* solver is a PreOnly solver for B whose pc_proc is set to sherman().
     * 
     * Note: can also use pcg, gmres, or petsc for this solver, in this case
     * the solution obtained with the Sherman-Morisson formula is iteratively
     * refined. */
    phgOptionsPush();
    phgOptionsSetOptions("-solver preonly");
    phgOptionsSetOptions(main_opts);
    solver = phgMat2Solver(SOLVER_DEFAULT, B);
    phgSolverSetPC(solver, solver, sherman);
    phgOptionsPop();

    for (i = 0; i < n; i++)
	x->data[i] = 1.0;
    phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, &solver->rhs);

    phgPrintf("Solving the linear system.\n");

    /* reset initial solution to zero */
    memset(x->data, 0, n * sizeof(*x->data));
    phgSolverVecSolve(solver, TRUE, x);
    for (i = 0; i < n; i++)
	solver->rhs->data[i] = 1.0;
    phgVecAXPBY(-1.0, solver->rhs, 1.0, &x);
    phgPrintf("Checking the result: |x - x_exact| / |x_exact| = %lg\n",
			(double)phgVecNorm2(x, 0, NULL) / sqrt((double)N));

    phgSolverDestroy(&solver);
    phgSolverDestroy(&solver1);
    phgMatDestroy(&A);
    phgMatDestroy(&B);
    phgVecDestroy(&U);
    phgVecDestroy(&x);
    phgFree(C);
    phgFree(pvt);

    phgFinalize();

    return 0;
}
Beispiel #4
0
int
main(int argc, char *argv[])
{
  ELEMENT *e;
  GRID *g;
  DOF *u, *v, *u_hp, *v_hp;
  HP_TYPE *hp;
  MAP *map;
  char *fn = "cube.dat";
  char *dof_u = "P2", *dof_v = "P1";
  INT step = 0, pre_refines = 0;

  phgOptionsRegisterFilename("-mesh_file", "Mesh file", &fn);
  phgOptionsRegisterInt("-pre_refines", "Pre-refinements", &pre_refines);
  phgOptionsRegisterString("-dof_u", "DOF type for u", &dof_u);
  phgOptionsRegisterString("-dof_v", "DOF type for v", &dof_v);

  phgInit(&argc, &argv);
  g = phgNewGrid(-1);
  if (!phgImport(g, fn, FALSE))
    phgError(1, "can't read file \"%s\".\n", fn);
  phgRefineAllElements(g, pre_refines);

  phgOptionsSetHandler("-dof_type", dof_u);
  u = phgDofNew(g, DOF_DEFAULT, 1, "u", DofInterpolation);
  phgOptionsSetHandler("-dof_type", dof_v);
  v = phgDofNew(g, DOF_DEFAULT, 1, "v", DofInterpolation);
  phgPrintf("u->type = %s, v->type = %s\n", u->type->name, v->type->name);

  hp = phgHPNew(g, HP_HB);
  u_hp = phgHPDofNew(g, hp, 1, "u_hp", DofInterpolation);
  phgHPFree(&hp);
  hp = phgHPNew(g, HP_HC);
  v_hp = phgHPDofNew(g, hp, 1, "v_hp", DofInterpolation);
  phgHPFree(&hp);

  while (TRUE) {
    if (phgBalanceGrid(g, 1.1, 1, NULL, 0.))
      phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n",
		g->nprocs, (double)g->lif);

    phgPrintf("Testing map with non HP DOFs:\n");
    map = phgMapCreate(u, v, NULL);
    phgPrintf("    nlocal = %d, nglobal = %d\n", map->nlocal, map->nglobal);
    phgMapDestroy(&map);

    phgPrintf("Testing map with HP DOFs:\n");
    ForAllElements(g, e)
	e->hp_order = 1 + GlobalElement(g, e->index) % 4;
    phgHPSetup(u_hp->hp, FALSE);

    ForAllElements(g, e)
	e->hp_order = 1 + (3 - GlobalElement(g, e->index) % 4);
    phgHPSetup(v_hp->hp, FALSE);

    map = phgMapCreate(u_hp, v_hp, NULL);
    phgPrintf("    nlocal = %d, nglobal = %d\n", map->nlocal, map->nglobal);
    phgMapDestroy(&map);

    phgPrintf("Testing map with HP and non HP DOFs:\n");
    map = phgMapCreate(u, u_hp, v, v_hp, NULL);
    phgPrintf("    nlocal = %d, nglobal = %d\n", map->nlocal, map->nglobal);
    phgMapDestroy(&map);

    if (++step >= 1)
	break;
    phgRefineAllElements(g, 1);
  }

  phgDofFree(&u_hp);
  phgDofFree(&v_hp);
  phgDofFree(&u);
  phgDofFree(&v);

  phgFreeGrid(&g);
  phgFinalize();

  return 0;
}
Beispiel #5
0
int
main(int argc, char *argv[])
{
    GRID *g;
    DOF *u_h;
    MAT *A, *A0, *B;
    MAP *map;
    INT i;
    size_t nnz, mem, mem_peak;
    VEC *x, *y0, *y1, *y2;
    double t0, t1, dnz, dnz1, mflops, mop;
    char *fn = "../test/cube.dat";
    FLOAT mem_max = 300;
    INT refine = 0;

    phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn);
    phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count);
    phgOptionsRegisterInt("-refine", "Refinement level", &refine);
    phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max);

    phgInit(&argc, &argv);
    g = phgNewGrid(-1);
    if (!phgImport(g, fn, FALSE))
	phgError(1, "can't read file \"%s\".\n", fn);
    phgRefineAllElements(g, refine);
    u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction);

    while (TRUE) {
	phgPrintf("\n");
	if (phgBalanceGrid(g, 1.2, 1, NULL, 0.))
	    phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n",
			g->nprocs, (double)g->lif);
	map = phgMapCreate(u_h, NULL);
	A = phgMapCreateMat(map, map);
	A->handle_bdry_eqns = TRUE;
	build_matrix(A, u_h);
	phgMatAssemble(A);

	/* Note: A is unsymmetric (A' != A) if boundary entries not removed */
	phgMatRemoveBoundaryEntries(A);

#if 0
	/* test block matrix operation */
	A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL);
#else
	A0 = A;
#endif

	phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n",
			DofGetDataCountGlobal(u_h), g->nleaf_global,
			g->nprocs, A->rmap->nglobal, (double)g->lif);

	/* test PHG mat-vec multiply */
	x = phgMapCreateVec(A->cmap, 1);
	y1 = phgMapCreateVec(A->rmap, 1);
	phgVecRandomize(x, 123);
	phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1);

	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	for (i = 0; i < loop_count; i++) {
	    phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1);
	}
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	y0 = phgVecCopy(y1, NULL);
	nnz = A->nnz_d + A->nnz_o;
#if USE_MPI
	dnz1 = nnz;
	MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	dnz = nnz;
#endif
	mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6;

	phgPrintf("\n");
	t1 -= t0;
	phgPrintf("   PHG:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n",
			t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops);

	/* test trans(A)*x */
	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	for (i = 0; i < loop_count; i++) {
	    phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1);
	}
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	t1 -= t0;
	phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		  "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	/* time A * trans(A) */
	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL);
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	nnz = B->nnz_d + B->nnz_o;
#if USE_MPI
	dnz1 = nnz;
	MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	dnz = nnz;
#endif
	/* compare B*x <--> A*A*x */
	y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL);
	phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1);
	phgMatDestroy(&B);
	t1 -= t0;
	phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

#if USE_PETSC
	{
	    Mat ma, mb;
	    MatInfo info;
	    Vec va, vb, vc;
	    PetscScalar *vec;

	    ma = phgPetscCreateMatAIJ(A);
	    MatGetVecs(ma, PETSC_NULL, &va);
	    VecDuplicate(va, &vb);
	    VecGetArray(va, &vec);
	    memcpy(vec, x->data, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(va, &vec);
	    MatMult(ma, va, vb);
	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		MatMult(ma, va, vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);

	    MatGetInfo(ma, MAT_GLOBAL_SUM, &info);
	    /*phgPrintf("    --------------------------------------------"
		      "-------------------------\n");*/
	    phgPrintf("\n");
	    t1 -= t0;
	    dnz = info.nz_used;
	    phgPrintf(" PETSc:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		MatMultTranspose(ma, va, vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);
	    t1 -= t0;
	    phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb);
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    t1 -= t0;
	    MatGetInfo(mb, MAT_GLOBAL_SUM, &info);
	    dnz = info.nz_used;
	    VecDuplicate(va, &vc);
	    /* compare B*x <--> A*A*x */
	    MatMult(ma, vb, vc);
	    MatMult(mb, va, vb);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);
	    VecGetArray(vc, &vec);
	    memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vc, &vec);
	    phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

	    phgPetscMatDestroy(&mb);
	    phgPetscMatDestroy(&ma);
	    phgPetscVecDestroy(&va);
	    phgPetscVecDestroy(&vb);
	    phgPetscVecDestroy(&vc);
	}
#endif	/* USE_PETSC */

#if USE_HYPRE
	{
	    HYPRE_IJMatrix ma;
	    HYPRE_IJVector va, vb, vc;
	    HYPRE_ParCSRMatrix  par_ma;
	    hypre_ParCSRMatrix  *par_mb;
	    HYPRE_ParVector	par_va, par_vb, par_vc;
	    HYPRE_Int offset, *ni, start, end;
	    assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double));
	    setup_hypre_mat(A, &ma);
	    ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni));
	    offset = A->cmap->partition[A->cmap->rank];
	    for (i = 0; i < A->rmap->nlocal; i++)
		ni[i] = i + offset;
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &va);
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &vb);
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &vc);
	    HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR);
	    HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR);
	    HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR);
	    HYPRE_IJVectorSetMaxOffProcElmts(va, 0);
	    HYPRE_IJVectorSetMaxOffProcElmts(vb, 0);
	    HYPRE_IJVectorSetMaxOffProcElmts(vc, 0);
	    HYPRE_IJVectorInitialize(va);
	    HYPRE_IJVectorInitialize(vb);
	    HYPRE_IJVectorInitialize(vc);
	    HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma);
	    HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va);
	    HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb);
	    HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc);
	    HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data);
	    HYPRE_IJVectorAssemble(va);
	    HYPRE_IJVectorAssemble(vb);
	    HYPRE_IJVectorAssemble(vc);

	    HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal,
					ni, ni + A->rmap->nlocal);
	    for (i = 0, nnz = 0; i < A->rmap->nlocal; i++)
		nnz += ni[A->rmap->nlocal + i];
#if USE_MPI
	    dnz1 = nnz;
	    MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	    dnz = nnz;
#endif

	    HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb);
	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    /*phgPrintf("    --------------------------------------------"
		      "-------------------------\n");*/
	    phgPrintf("\n");
	    t1 -= t0;
	    phgPrintf(" HYPRE:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    t1 -= t0;
	    phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to
	     *	     'hypre_ParCSRMatrix *' */
	    par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma,
					(hypre_ParCSRMatrix *)par_ma);
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    start = hypre_ParCSRMatrixFirstRowIndex(par_mb);
	    end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1;
	    for (i = start, nnz = 0; i < end; i++) {
		HYPRE_Int ncols;
		hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL);
		hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL);
		nnz += ncols;
	    }
#if USE_MPI
	    dnz1 = nnz;
	    MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	    dnz = nnz;
#endif
	    /* compare B*x <--> A*A*x */
	    HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc);
	    HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data);
	    hypre_ParCSRMatrixDestroy((par_mb));
	    t1 -= t0;
	    phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

	    phgFree(ni);
	    HYPRE_IJMatrixDestroy(ma);
	    HYPRE_IJVectorDestroy(va);
	    HYPRE_IJVectorDestroy(vb);
	    HYPRE_IJVectorDestroy(vc);
	}
#endif	/* USE_HYPRE */

	if (A0 != A)
	    phgMatDestroy(&A0);
#if 0
if (A->rmap->nglobal > 1000) {
    VEC *v = phgMapCreateVec(A->rmap, 3);
    for (i = 0; i < v->map->nlocal; i++) {
	v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]);
	v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]);
	v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]);
    }
    phgMatDumpMATLAB(A, "A", "A.m");
    phgVecDumpMATLAB(v, "v", "v.m");
    phgFinalize();
    exit(0);
}
#endif
	phgMatDestroy(&A);
	phgVecDestroy(&x);
	phgVecDestroy(&y0);
	phgVecDestroy(&y1);
	phgVecDestroy(&y2);
	phgMapDestroy(&map);
	mem = phgMemoryUsage(g, &mem_peak);
	dnz = mem / (1024.0 * 1024.0);
	dnz1 = mem_peak / (1024.0 * 1024.0);
	/*phgPrintf("    --------------------------------------------"
		  "-------------------------\n");*/
	phgPrintf("\n");
	phgPrintf("  Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1);
#if 0
{
    static int loop_count = 0;
    if (++loop_count == 4)
	break;
}
#endif
	if (mem_peak > 1024 * (size_t)1024 * mem_max)
	    break;
	phgRefineAllElements(g, 1);
    }
    phgDofFree(&u_h);
    phgFreeGrid(&g);
    phgFinalize();

    return 0;
}