HYPRE_Int hypre_AMESetup(void *esolver) { HYPRE_Int ne, *edge_bc; hypre_AMEData *ame_data = esolver; hypre_AMSData *ams_data = ame_data -> precond; if (ams_data -> beta_is_zero) { ame_data -> t1 = hypre_ParVectorInDomainOf(ams_data -> G); ame_data -> t2 = hypre_ParVectorInDomainOf(ams_data -> G); } else { ame_data -> t1 = ams_data -> r1; ame_data -> t2 = ams_data -> g1; } ame_data -> t3 = ams_data -> r0; /* Eliminate boundary conditions in G = [Gii, Gib; 0, Gbb], i.e., compute [Gii, 0; 0, 0] */ { HYPRE_Int i, j, k, nv; HYPRE_Int *offd_edge_bc; hypre_ParCSRMatrix *Gt; nv = hypre_ParCSRMatrixNumCols(ams_data -> G); ne = hypre_ParCSRMatrixNumRows(ams_data -> G); edge_bc = hypre_TAlloc(HYPRE_Int, ne); for (i = 0; i < ne; i++) edge_bc[i] = 0; /* Find boundary (eliminated) edges */ { hypre_CSRMatrix *Ad = hypre_ParCSRMatrixDiag(ams_data -> A); HYPRE_Int *AdI = hypre_CSRMatrixI(Ad); HYPRE_Int *AdJ = hypre_CSRMatrixJ(Ad); HYPRE_Real *AdA = hypre_CSRMatrixData(Ad); hypre_CSRMatrix *Ao = hypre_ParCSRMatrixOffd(ams_data -> A); HYPRE_Int *AoI = hypre_CSRMatrixI(Ao); HYPRE_Real *AoA = hypre_CSRMatrixData(Ao); HYPRE_Real l1_norm; /* A row (edge) is boundary if its off-diag l1 norm is less than eps */ HYPRE_Real eps = DBL_EPSILON * 1e+4; for (i = 0; i < ne; i++) { l1_norm = 0.0; for (j = AdI[i]; j < AdI[i+1]; j++) if (AdJ[j] != i) l1_norm += fabs(AdA[j]); if (AoI) for (j = AoI[i]; j < AoI[i+1]; j++) l1_norm += fabs(AoA[j]); if (l1_norm < eps) edge_bc[i] = 1; } } hypre_ParCSRMatrixTranspose(ams_data -> G, &Gt, 1); /* Use a Matvec communication to find which of the edges connected to local vertices are on the boundary */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; offd_edge_bc = hypre_CTAlloc(HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(Gt))); hypre_MatvecCommPkgCreate(Gt); comm_pkg = hypre_ParCSRMatrixCommPkg(Gt); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j); int_buf_data[index++] = edge_bc[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_edge_bc); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } /* Eliminate boundary vertex entries in G^t */ { hypre_CSRMatrix *Gtd = hypre_ParCSRMatrixDiag(Gt); HYPRE_Int *GtdI = hypre_CSRMatrixI(Gtd); HYPRE_Int *GtdJ = hypre_CSRMatrixJ(Gtd); HYPRE_Real *GtdA = hypre_CSRMatrixData(Gtd); hypre_CSRMatrix *Gto = hypre_ParCSRMatrixOffd(Gt); HYPRE_Int *GtoI = hypre_CSRMatrixI(Gto); HYPRE_Int *GtoJ = hypre_CSRMatrixJ(Gto); HYPRE_Real *GtoA = hypre_CSRMatrixData(Gto); HYPRE_Int bdr; for (i = 0; i < nv; i++) { bdr = 0; /* A vertex is boundary if it belongs to a boundary edge */ for (j = GtdI[i]; j < GtdI[i+1]; j++) if (edge_bc[GtdJ[j]]) { bdr = 1; break; } if (!bdr && GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) if (offd_edge_bc[GtoJ[j]]) { bdr = 1; break; } if (bdr) { for (j = GtdI[i]; j < GtdI[i+1]; j++) /* if (!edge_bc[GtdJ[j]]) */ GtdA[j] = 0.0; if (GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) /* if (!offd_edge_bc[GtoJ[j]]) */ GtoA[j] = 0.0; } } } hypre_ParCSRMatrixTranspose(Gt, &ame_data -> G, 1); hypre_ParCSRMatrixDestroy(Gt); hypre_TFree(offd_edge_bc); } /* Compute G^t M G */ { if (!hypre_ParCSRMatrixCommPkg(ame_data -> G)) hypre_MatvecCommPkgCreate(ame_data -> G); if (!hypre_ParCSRMatrixCommPkg(ame_data -> M)) hypre_MatvecCommPkgCreate(ame_data -> M); hypre_BoomerAMGBuildCoarseOperator(ame_data -> G, ame_data -> M, ame_data -> G, &ame_data -> A_G); hypre_ParCSRMatrixFixZeroRows(ame_data -> A_G); } /* Create AMG preconditioner and PCG-AMG solver for G^tMG */ { HYPRE_BoomerAMGCreate(&ame_data -> B1_G); HYPRE_BoomerAMGSetCoarsenType(ame_data -> B1_G, ams_data -> B_G_coarsen_type); HYPRE_BoomerAMGSetAggNumLevels(ame_data -> B1_G, ams_data -> B_G_agg_levels); HYPRE_BoomerAMGSetRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type); HYPRE_BoomerAMGSetNumSweeps(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetMaxLevels(ame_data -> B1_G, 25); HYPRE_BoomerAMGSetTol(ame_data -> B1_G, 0.0); HYPRE_BoomerAMGSetMaxIter(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetStrongThreshold(ame_data -> B1_G, ams_data -> B_G_theta); /* don't use exact solve on the coarsest level (matrix may be singular) */ HYPRE_BoomerAMGSetCycleRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type, 3); HYPRE_ParCSRPCGCreate(hypre_ParCSRMatrixComm(ame_data->A_G), &ame_data -> B2_G); HYPRE_PCGSetPrintLevel(ame_data -> B2_G, 0); HYPRE_PCGSetTol(ame_data -> B2_G, 1e-12); HYPRE_PCGSetMaxIter(ame_data -> B2_G, 20); HYPRE_PCGSetPrecond(ame_data -> B2_G, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, ame_data -> B1_G); HYPRE_ParCSRPCGSetup(ame_data -> B2_G, (HYPRE_ParCSRMatrix)ame_data->A_G, (HYPRE_ParVector)ame_data->t1, (HYPRE_ParVector)ame_data->t2); } /* Setup LOBPCG */ { HYPRE_Int seed = 75; mv_InterfaceInterpreter* interpreter; mv_MultiVectorPtr eigenvectors; ame_data -> interpreter = hypre_CTAlloc(mv_InterfaceInterpreter,1); interpreter = (mv_InterfaceInterpreter*) ame_data -> interpreter; HYPRE_ParCSRSetupInterpreter(interpreter); ame_data -> eigenvalues = hypre_CTAlloc(HYPRE_Real, ame_data -> block_size); ame_data -> eigenvectors = mv_MultiVectorCreateFromSampleVector(interpreter, ame_data -> block_size, ame_data -> t3); eigenvectors = (mv_MultiVectorPtr) ame_data -> eigenvectors; mv_MultiVectorSetRandom (eigenvectors, seed); /* Make the initial vectors discretely divergence free */ { HYPRE_Int i, j; HYPRE_Real *data; mv_TempMultiVector* tmp = mv_MultiVectorGetData(eigenvectors); HYPRE_ParVector *v = (HYPRE_ParVector*)(tmp -> vector); hypre_ParVector *vi; for (i = 0; i < ame_data -> block_size; i++) { vi = (hypre_ParVector*) v[i]; data = hypre_VectorData(hypre_ParVectorLocalVector(vi)); for (j = 0; j < ne; j++) if (edge_bc[j]) data[j] = 0.0; hypre_AMEDiscrDivFreeComponent(esolver, vi); } } } hypre_TFree(edge_bc); return hypre_error_flag; }
int main(int argc,char **args) { Vec u; Mat A; PetscErrorCode ierr; mv_MultiVectorPtr eigenvectors; PetscScalar * eigs; PetscScalar * eigs_hist; double * resid; double * resid_hist; int iterations; PetscMPIInt rank; int n_eigs = 1; int seed = 1; int i,j; PetscLogDouble t1,t2,elapsed_time; DA da; double tol=1e-08; PetscTruth option_present; PetscTruth freepart=PETSC_FALSE; PetscTruth full_output=PETSC_FALSE; PetscInt m,n,p; KSP ksp; lobpcg_Tolerance lobpcg_tol; int maxIt = 100; mv_InterfaceInterpreter ii; lobpcg_BLASLAPACKFunctions blap_fn; aux_data_struct aux_data; /* PetscViewer viewer; */ PetscInt tmp_int; mv_TempMultiVector * xe; PetscInt N; PetscScalar * xx; PetscInitialize(&argc,&args,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-n_eigs",&tmp_int,&option_present);CHKERRQ(ierr); if (option_present) n_eigs = tmp_int; ierr = PetscOptionsGetReal(PETSC_NULL,"-tol", &tol,PETSC_NULL); CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL,"-freepart",&freepart); CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL,"-full_out",&full_output); CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-seed",&tmp_int,&option_present);CHKERRQ(ierr); if (option_present) seed = tmp_int; ierr = PetscOptionsGetInt(PETSC_NULL,"-itr",&tmp_int,&option_present);CHKERRQ(ierr); if (option_present) maxIt = tmp_int; if (seed<1) seed=1; /* we actually run our code twice: first time we solve small problem just to make sure that all program code is actually loaded into memory; then we solve the problem we are interested in; this trick is done for accurate timing */ PreLoadBegin(PETSC_TRUE,"grid and matrix assembly"); /* "create" the grid and stencil data; on first run we form small problem */ if (PreLoadIt==0) { /* small problem */ ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,10,10,10, 1,PETSC_DECIDE,1,1,1,0,0,0,&da); CHKERRQ(ierr); } else { /* actual problem */ if (freepart) /* petsc determines partitioning */ { ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,-10,-10,-10, PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0,0,&da); CHKERRQ(ierr); } else /* (1,NP,1) partitioning */ { ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,-10,-10,-10, 1,PETSC_DECIDE,1,1,1,0,0,0,&da); CHKERRQ(ierr); } /* now we print what partitioning is chosen */ ierr=DAGetInfo(da,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL,&m, &n,&p,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Partitioning: %u %u %u\n",m,n,p); } /* create matrix, whose nonzero structure and probably partitioning corresponds to grid and stencil structure */ ierr=DAGetMatrix(da,MATMPIAIJ,&A); CHKERRQ(ierr); /* fill the matrix with values. I intend to build 7-pt Laplas */ /* this procedure includes matrix assembly */ ierr=FillMatrix(da,A); CHKERRQ(ierr); /* PetscViewerBinaryOpen(PETSC_COMM_WORLD,"matrix.dat",FILE_MODE_WRITE,&viewer); MatView(A,PETSC_VIEWER_STDOUT_WORLD); PetscViewerDestroy(viewer); */ /* Create parallel vectors. - We form 1 vector from scratch and then duplicate as needed. */ ierr = DACreateGlobalVector(da,&u); CHKERRQ(ierr); /* ierr = VecSetFromOptions(u);CHKERRQ(ierr); */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the linear solver and set various options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Here we START measuring time for preconditioner setup */ PreLoadStage("preconditioner setup"); ierr = PetscGetTime(&t1);CHKERRQ(ierr); /* Create linear solver context */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); /* Set operators. Here the matrix that defines the linear system also serves as the preconditioning matrix. */ ierr = KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); /* Set runtime options, e.g., -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol> These options will override those specified above as long as KSPSetFromOptions() is called _after_ any other customization routines. */ ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* probably this call actually builds the preconditioner */ ierr = KSPSetUp(ksp);CHKERRQ(ierr); /* Here we STOP measuring time for preconditioner setup */ PreLoadStage("solution"); ierr = PetscGetTime(&t2);CHKERRQ(ierr); elapsed_time=t2-t1; if (PreLoadIt==1) PetscPrintf(PETSC_COMM_WORLD,"Preconditioner setup, seconds: %f\n",elapsed_time); /* request memory for eig-vals */ ierr = PetscMalloc(sizeof(PetscScalar)*n_eigs,&eigs); CHKERRQ(ierr); /* request memory for eig-vals history */ ierr = PetscMalloc(sizeof(PetscScalar)*n_eigs*(maxIt+1),&eigs_hist); CHKERRQ(ierr); /* request memory for resid. norms */ ierr = PetscMalloc(sizeof(double)*n_eigs,&resid); CHKERRQ(ierr); /* request memory for resid. norms hist. */ ierr = PetscMalloc(sizeof(double)*n_eigs*(maxIt+1),&resid_hist); CHKERRQ(ierr); LOBPCG_InitRandomContext(); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); PETSCSetupInterpreter( &ii ); eigenvectors = mv_MultiVectorCreateFromSampleVector(&ii, n_eigs,u); xe = (mv_TempMultiVector *) mv_MultiVectorGetData( eigenvectors ); /* VecView( (Vec)xe->vector[0],PETSC_VIEWER_STDOUT_WORLD); */ for (i=0; i<seed; i++) /* this cycle is to imitate changing random seed */ mv_MultiVectorSetRandom (eigenvectors, 1234); /* VecView( (Vec)xe->vector[0],PETSC_VIEWER_STDOUT_WORLD); */ VecGetSize( (Vec)xe->vector[0], &N ); N=mv_TempMultiVectorHeight( xe ); VecGetArray( (Vec)xe->vector[0],&xx); lobpcg_tol.absolute = tol; lobpcg_tol.relative = 1e-50; #ifdef PETSC_USE_COMPLEX blap_fn.zpotrf = PETSC_zpotrf_interface; blap_fn.zhegv = PETSC_zsygv_interface; #else blap_fn.dpotrf = PETSC_dpotrf_interface; blap_fn.dsygv = PETSC_dsygv_interface; #endif aux_data.A = A; aux_data.ksp = ksp; aux_data.ii = ii; /* Here we START measuring time for solution process */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); #ifdef PETSC_USE_COMPLEX lobpcg_solve_complex( eigenvectors, /*input-initial guess of e-vectors */ &aux_data, /*input-matrix A */ OperatorAMultiVector, /*input-operator A */ NULL, /*input-matrix B */ NULL, /*input-operator B */ &aux_data, /*input-matrix T */ Precond_FnMultiVector, /*input-operator T */ NULL, /*input-matrix Y */ blap_fn, /*input-lapack functions */ lobpcg_tol, /*input-tolerances */ PreLoadIt? maxIt:1, /*input-max iterations */ !rank && PreLoadIt, /*input-verbosity level */ &iterations, /*output-actual iterations */ (komplex *) eigs, /*output-eigenvalues */ (komplex *) eigs_hist, /*output-eigenvalues history */ n_eigs, /*output-history global height */ resid, /*output-residual norms */ resid_hist , /*output-residual norms history */ n_eigs /*output-history global height */ ); #else lobpcg_solve_double( eigenvectors, &aux_data, OperatorAMultiVector, NULL, NULL, &aux_data, Precond_FnMultiVector, NULL, blap_fn, lobpcg_tol, PreLoadIt? maxIt:1, !rank && PreLoadIt, &iterations, eigs, /* eigenvalues; "lambda_values" should point to array containing <blocksize> doubles where <blocksize> is the width of multivector "blockVectorX" */ eigs_hist, /* eigenvalues history; a pointer to the entries of the <blocksize>-by-(<maxIterations>+1) matrix stored in fortran-style. (i.e. column-wise) The matrix may be a submatrix of a larger matrix, see next argument */ n_eigs, /* global height of the matrix (stored in fotran-style) specified by previous argument */ resid, /* residual norms; argument should point to array of <blocksize> doubles */ resid_hist , /* residual norms history; a pointer to the entries of the <blocksize>-by-(<maxIterations>+1) matrix stored in fortran-style. (i.e. column-wise) The matrix may be a submatrix of a larger matrix, see next argument */ n_eigs /* global height of the matrix (stored in fotran-style) specified by previous argument */ ); #endif /* Here we STOP measuring time for solution process */ ierr = PetscGetTime(&t2);CHKERRQ(ierr); elapsed_time=t2-t1; if (PreLoadIt) PetscPrintf(PETSC_COMM_WORLD,"Solution process, seconds: %e\n",elapsed_time); if (PreLoadIt && full_output) { PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, eigenvalues:\n"); for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",PetscRealPart(eigs[i])); CHKERRQ(ierr); } PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, eigenvalues history:\n"); for (j=0; j<iterations+1; j++) for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",PetscRealPart(*(eigs_hist+j*n_eigs+i))); CHKERRQ(ierr); } PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, residual norms:\n"); for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",resid[i]); CHKERRQ(ierr); } PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, residual norms history:\n"); for (j=0; j<iterations+1; j++) for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",*(resid_hist+j*n_eigs+i)); CHKERRQ(ierr); } } /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = VecDestroy(u);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); ierr = KSPDestroy(ksp);CHKERRQ(ierr); ierr = DADestroy(da); CHKERRQ(ierr); LOBPCG_DestroyRandomContext(); mv_MultiVectorDestroy(eigenvectors); /* free memory used for eig-vals */ ierr = PetscFree(eigs); CHKERRQ(ierr); ierr = PetscFree(eigs_hist); CHKERRQ(ierr); ierr = PetscFree(resid); CHKERRQ(ierr); ierr = PetscFree(resid_hist); CHKERRQ(ierr); /* Always call PetscFinalize() before exiting a program. This routine - finalizes the PETSc libraries as well as MPI - provides summary and diagnostic information if certain runtime options are chosen (e.g., -log_summary). */ PreLoadEnd(); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }