AZ_MATRIX *user_Kn_build(struct user_partition *Node_Partition) { int *Kn_bindx; double *Kn_val; int proc_config[AZ_PROC_SIZE]; AZ_MATRIX *Kn_mat; int *reordered_glob_nodes = NULL, *cpntr = NULL, *Kn_data_org = NULL; int i, ii, jj, nx, gid, Nlocal_nodes, nz_ptr; Nlocal_nodes = Node_Partition->Nlocal; Kn_bindx = (int *) malloc((27*Nlocal_nodes+5)*sizeof(int)); Kn_val = (double *) malloc((27*Nlocal_nodes+5)*sizeof(double)); Kn_bindx[0] = Nlocal_nodes+1; nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001); for (i = 0; i < Nlocal_nodes; i++) { gid = (Node_Partition->my_global_ids)[i]; nz_ptr = Kn_bindx[i]; ii = gid%nx; jj = (gid - ii)/nx; if (ii != nx-1) { Kn_bindx[nz_ptr] = gid+ 1; Kn_val[nz_ptr++] = -1.;} if (jj != nx-1) { Kn_bindx[nz_ptr] = gid+nx; Kn_val[nz_ptr++] = -1.;} if (jj != 0) { Kn_bindx[nz_ptr] = gid-nx; Kn_val[nz_ptr++] = -1.;} if (ii != 0) { Kn_bindx[nz_ptr] = gid- 1; Kn_val[nz_ptr++] = -1.;} if ((ii != nx-1) && (jj != 0)) {Kn_bindx[nz_ptr] = gid-nx+1; Kn_val[nz_ptr++] = -1.;} if ((ii != nx-1) && (jj != nx-1)) {Kn_bindx[nz_ptr] = gid+nx+1; Kn_val[nz_ptr++] = -1.;} if ((ii != 0) && (jj != nx-1)) {Kn_bindx[nz_ptr] = gid+nx-1; Kn_val[nz_ptr++] = -1.;} if ((ii != 0) && (jj != 0)) {Kn_bindx[nz_ptr] = gid-nx-1; Kn_val[nz_ptr++] = -1.;} Kn_val[i] = (double) (nz_ptr - Kn_bindx[i]); Kn_bindx[i+1] = nz_ptr; } AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_transform_norowreordering(proc_config,&(Node_Partition->needed_external_ids), Kn_bindx, Kn_val, Node_Partition->my_global_ids, &reordered_glob_nodes, &reordered_node_externs, &Kn_data_org, Nlocal_nodes, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Node_Partition->Nghost = Kn_data_org[AZ_N_external]; AZ_free(reordered_glob_nodes); /* Convert old style Aztec matrix to newer style Aztec matrix */ Kn_mat = AZ_matrix_create( Nlocal_nodes ); AZ_set_MSR(Kn_mat, Kn_bindx, Kn_val, Kn_data_org, 0, NULL, AZ_LOCAL); return(Kn_mat); }
void AZ_ifpack_prec_destroy(int *options, double *params, int *proc_config, AZ_MATRIX *Amat, AZ_PRECOND *Prec) { AZ_IFPACK *Prec_pass_data; void *precon, *bmat; Prec_pass_data = (AZ_IFPACK *) Prec->Pmat->aux_ptr; precon = Prec_pass_data->precon; bmat = Prec_pass_data->bmat; options[AZ_precond] = Prec_pass_data->user_precon; /* Restore user prec*/ /* Free allocated memory */ ifp_freepreconditioner(precon); /*ifp_freeblockmatrix(bmat); Need to fix the destructor for BlockMat*/ /* Must make sure to clean up everything!!!!*/ AZ_free((void *) Prec_pass_data); }
int AZ_adjust_N_nz_to_fit_memory(int N,int N_int_arrays, int N_dbl_arrays) { /**************************************************************************** Find (and return) the largest value of k <= N such that we can successfully allocate N_int_arrays integer arrays of size k and N_dbl_arrays double arrays of size k. Author: Ray Tuminaro, SNL, 9222 Return code: int ============ Parameter list: =============== N: On input, the maximum number of integers and doubles that we wish to try and allocate. */ double **dptr; int **iptr; int i; iptr = (int **) AZ_allocate(N_int_arrays*sizeof(int *)); dptr = (double **) AZ_allocate(N_dbl_arrays*sizeof(double *)); if ( (dptr == 0) || (iptr == 0) ) AZ_perror("ERROR: not enough memory for preconditioner.\n"); for (i = 0 ; i < N_int_arrays ; i++ ) iptr[i] = (int *) AZ_allocate((N+20)*sizeof(int)); for (i = 0 ; i < N_dbl_arrays ; i++ ) dptr[i] = (double *) AZ_allocate((N+20)*sizeof(double)); /* add a little extra */ /* for manage memory */ /* Decrease memory until the problem fits */ while ( (dptr[N_dbl_arrays-1] == NULL) || (iptr[N_int_arrays-1] == NULL) ) { for (i = N_dbl_arrays-1 ; i >= 0; i-- ) if (dptr[i] != NULL) AZ_free(dptr[i]); for (i = N_int_arrays-1 ; i >= 0; i-- ) if (iptr[i] != NULL) AZ_free(iptr[i]); N = (int) ( ((double) N)*.91); if (N == 0) AZ_perror("ERROR: not enough memory for preconditioner.\n"); for (i = 0 ; i < N_int_arrays ; i++ ) iptr[i] = (int *) AZ_allocate((N+20)*sizeof(int)); for (i = 0 ; i < N_dbl_arrays ; i++ ) dptr[i] = (double *) AZ_allocate((N+20)*sizeof(double)); } for (i = N_dbl_arrays-1 ; i >= 0; i-- ) AZ_free(dptr[i]); for (i = N_int_arrays-1 ; i >= 0; i-- ) AZ_free(iptr[i]); AZ_free(dptr); AZ_free(iptr); return(N); }
void AZ_free_space_holder(struct context *context) { /**************************************************************************** This routine is used in conjunction with AZ_hold_space(). Essentially, this routine deallocates memory allocated via AZ_hold_space(). The whole point of these two routines is to allocated all the space needed during the factorization process EXCLUDING all arrays whose size is related to the number of nonzeros. Once this is done, we can determine how much space there is left for the large arrays required for the factorization and split the remaining space amoung these large arrays. In this way LU routines where it is difficult to know the space requirements ahead of time can try to use as large an array as possible. Note: after factorization 'realloc' is used to reduce the array sizes. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== context On input, context->aztec_choices-> options[AZ_subdomain_solve] contains the preconditioner choice while context->space_holder holds memory previously allocated via AZ_hold_space(). On output, context->space_holder is deallocated. *******************************************************************************/ int which = context->aztec_choices->options[AZ_subdomain_solve]; /* Begin Aztec 2.1 mheroux mod */ if ( (which == AZ_ilut) || (which == AZ_lu ) || (which == AZ_bilu) || (which == AZ_bilu_ifp) || (which == AZ_rilu )|| (which == AZ_ilu) || (which == AZ_icc) ) AZ_free(context->space_holder); /* End Aztec 2.1 mheroux mod */ }
void AZK_destroy_linsys( int *options, double *params, int *proc_config, double **x, double **b, AZ_MATRIX **Amat_komplex) { AZ_KOMPLEX *linsys_pass_data; int *komplex_to_real, *komplex_to_imag; linsys_pass_data = (AZ_KOMPLEX *) (*Amat_komplex)->aux_ptr; if (linsys_pass_data->Form_of_Equations != AZK_Komplex_No_Copy) { /* Destroy RHS, initial guess and matrix */ AZK_destroy_vector( options, params, proc_config, (*Amat_komplex), x); AZK_destroy_vector( options, params, proc_config, (*Amat_komplex), b); AZK_destroy_matrix( options, params, proc_config, Amat_komplex); } else { komplex_to_real = linsys_pass_data->komplex_to_real; komplex_to_imag = linsys_pass_data->komplex_to_imag; /* Free allocated memory */ AZ_free((void *) komplex_to_real); AZ_free((void *) komplex_to_imag); AZ_free ((void **) x); AZ_free ((void **) b); AZ_free((void *) linsys_pass_data); /* Free data_org if Aztec doesn't do it */ if (!(*Amat_komplex)->must_free_data_org) AZ_free((void *) (*Amat_komplex)->data_org); AZ_matrix_destroy (Amat_komplex); } }
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
int main(int argc, char *argv[]) { int Nnodes=16*16; /* Total number of nodes in the problem.*/ /* 'Nnodes' must be a perfect square. */ int MaxMgLevels=6; /* Maximum number of Multigrid Levels */ int Nits_per_presmooth=1; /* # of pre & post smoothings per level */ double tolerance = 1.0e-8; /* At convergence: */ /* ||r_k||_2 < tolerance ||r_0||_2 */ int smoothPe_flag = ML_YES; /* ML_YES: smooth tentative prolongator */ /* ML_NO: don't smooth prolongator */ /***************************************************************************/ /* Select Hiptmair relaxation subsmoothers for the nodal and edge problems */ /* Choices include */ /* 1) ML_Gen_Smoother_SymGaussSeidel: this corresponds to a processor */ /* local version of symmetric Gauss-Seidel/SOR. The number of sweeps */ /* can be set via either 'edge_its' or 'nodal_its'. The damping can */ /* be set via 'edge_omega' or 'nodal_omega'. When set to ML_DDEFAULT, */ /* the damping is set to '1' on one processor. On multiple processors */ /* a lower damping value is set. This is needed to converge processor */ /* local SOR. */ /* 2) ML_Gen_Smoother_Cheby: this corresponds to polynomial relaxation. */ /* The degree of the polynomial is set via 'edge_its' or 'nodal_its'. */ /* If the degree is '-1', Marian Brezina's MLS polynomial is chosen. */ /* Otherwise, a Chebyshev polynomial is used over high frequencies */ /* [ lambda_max/alpha , lambda_max]. Lambda_max is computed. 'alpha' */ /* is hardwired in this example to correspond to twice the ratio of */ /* unknowns in the fine and coarse meshes. */ /* */ /* Using 'hiptmair_type' (see comments below) it is also possible to choose*/ /* when edge and nodal problems are relaxed within the Hiptmair smoother. */ /***************************************************************************/ void *edge_smoother=(void *) /* Edge relaxation: */ ML_Gen_Smoother_Cheby; /* ML_Gen_Smoother_Cheby */ /* ML_Gen_Smoother_SymGaussSeidel */ void *nodal_smoother=(void *) /* Nodal relaxation */ ML_Gen_Smoother_Cheby;/* ML_Gen_Smoother_Cheby */ /* ML_Gen_Smoother_SymGaussSeidel */ int edge_its = 3; /* Iterations or polynomial degree for */ int nodal_its = 3; /* edge/nodal subsmoothers. */ double nodal_omega = ML_DDEFAULT, /* SOR damping parameter for noda/edge */ edge_omega = ML_DDEFAULT; /* subsmoothers (see comments above). */ int hiptmair_type=HALF_HIPTMAIR;/* FULL_HIPTMAIR: each invokation */ /* smoothes on edges, then nodes, */ /* and then once again on edges. */ /* HALF_HIPTMAIR: each pre-invokation */ /* smoothes on edges, then nodes. */ /* Each post-invokation smoothes */ /* on nodes then edges. . */ ML_Operator *Tmat, *Tmat_trans, **Tmat_array, **Tmat_trans_array; ML *ml_edges, *ml_nodes; ML_Aggregate *ag; int Nfine_edge, Ncoarse_edge, Nfine_node, Ncoarse_node, Nlevels; int level, coarsest_level, itmp; double edge_coarsening_rate, node_coarsening_rate, *rhs, *xxx; void **edge_args, **nodal_args; struct user_partition Edge_Partition = {NULL, NULL,0,0}, Node_Partition = {NULL, NULL,0,0}; struct Tmat_data Tmat_data; int i, Ntotal; ML_Comm *comm; /* See Aztec User's Guide for information on these variables */ #ifdef AZTEC AZ_MATRIX *Ke_mat, *Kn_mat; AZ_PRECOND *Pmat = NULL; int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; #endif /* get processor information (proc id & # of procs) and set ML's printlevel. */ #ifdef ML_MPI MPI_Init(&argc,&argv); #endif #ifdef AZTEC AZ_set_proc_config(proc_config, COMMUNICATOR); #endif ML_Set_PrintLevel(10); /* set ML's output level: 0 gives least output */ /* Set the # of global nodes/edges and partition both the edges and the */ /* nodes over the processors. NOTE: I believe we assume that if an edge */ /* is assigned to a processor at least one of its nodes must be also */ /* assigned to that processor. */ Node_Partition.Nglobal = Nnodes; Edge_Partition.Nglobal = Node_Partition.Nglobal*2; Node_Partition.type = NODE; Edge_Partition.type = EDGE; #define perxodic #ifdef periodic Node_Partition.Nglobal += 2; #endif partition_edges(&Edge_Partition); partition_nodes(&Node_Partition); xxx = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); rhs = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); for (i = 0; i < Edge_Partition.Nlocal + 100; i++) xxx[i] = -1.; for (i = 0; i < Edge_Partition.Nlocal; i++) xxx[i] = (double) Edge_Partition.my_global_ids[i]; update_ghost_edges(xxx, (void *) &Edge_Partition); /* Create an empty multigrid hierarchy and set the 'MaxMGLevels-1'th */ /* level discretization within this hierarchy to the ML matrix */ /* representing Ke (Maxwell edge discretization). */ ML_Create(&ml_edges, MaxMgLevels); #ifdef AZTEC /* Build Ke as an Aztec matrix. Use built-in function AZ_ML_Set_Amat() */ /* to convert to an ML matrix and put in hierarchy. */ Ke_mat = user_Ke_build(&Edge_Partition); AZ_ML_Set_Amat(ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal, Edge_Partition.Nlocal, Ke_mat, proc_config); #else /* Build Ke directly as an ML matrix. */ ML_Init_Amatrix (ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal, Edge_Partition.Nlocal, &Edge_Partition); Ntotal = Edge_Partition.Nlocal; if (Edge_Partition.nprocs == 2) Ntotal += Edge_Partition.Nghost; ML_Set_Amatrix_Getrow(ml_edges, MaxMgLevels-1, Ke_getrow, update_ghost_edges, Ntotal); ML_Set_Amatrix_Matvec(ml_edges, MaxMgLevels-1, Ke_matvec); #endif /* Build an Aztec matrix representing an auxiliary nodal PDE problem. */ /* This should be a variable coefficient Poisson problem (with unknowns*/ /* at the nodes). The coefficients should be chosen to reflect the */ /* conductivity of the original edge problems. */ /* Create an empty multigrid hierarchy. Convert the Aztec matrix to an */ /* ML matrix and put it in the 'MaxMGLevels-1' level of the hierarchy. */ /* Note it is possible to multiply T'*T for get this matrix though this*/ /* will not incorporate material properties. */ ML_Create(&ml_nodes, MaxMgLevels); #ifdef AZTEC Kn_mat = user_Kn_build( &Node_Partition); AZ_ML_Set_Amat(ml_nodes, MaxMgLevels-1, Node_Partition.Nlocal, Node_Partition.Nlocal, Kn_mat, proc_config); #else ML_Init_Amatrix (ml_nodes, MaxMgLevels-1 , Node_Partition.Nlocal, Node_Partition.Nlocal, &Node_Partition); Ntotal = Node_Partition.Nlocal; if (Node_Partition.nprocs == 2) Ntotal += Node_Partition.Nghost; ML_Set_Amatrix_Getrow(ml_nodes, MaxMgLevels-1, Kn_getrow, update_ghost_nodes, Ntotal); #endif /* Build an ML matrix representing the null space of the PDE problem. */ /* This should be a discrete gradient (nodes to edges). */ #ifdef AZTEC Tmat = user_T_build (&Edge_Partition, &Node_Partition, &(ml_nodes->Amat[MaxMgLevels-1])); #else Tmat = ML_Operator_Create(ml_nodes->comm); Tmat_data.edge = &Edge_Partition; Tmat_data.node = &Node_Partition; Tmat_data.Kn = &(ml_nodes->Amat[MaxMgLevels-1]); ML_Operator_Set_ApplyFuncData( Tmat, Node_Partition.Nlocal, Edge_Partition.Nlocal, ML_EMPTY, (void *) &Tmat_data, Edge_Partition.Nlocal, NULL, 0); ML_Operator_Set_Getrow( Tmat, ML_INTERNAL, Edge_Partition.Nlocal,Tmat_getrow); ML_Operator_Set_ApplyFunc(Tmat, ML_INTERNAL, Tmat_matvec); ML_Comm_Create( &comm); ML_CommInfoOP_Generate( &(Tmat->getrow->pre_comm), update_ghost_nodes, &Node_Partition,comm, Tmat->invec_leng, Node_Partition.Nghost); #endif /********************************************************************/ /* Set some ML parameters. */ /*------------------------------------------------------------------*/ ML_Set_ResidualOutputFrequency(ml_edges, 1); ML_Set_Tolerance(ml_edges, 1.0e-8); ML_Aggregate_Create( &ag ); ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); ML_Aggregate_Set_DampingFactor(ag, 0.0); /* must use 0 for maxwell */ ML_Aggregate_Set_MaxCoarseSize(ag, 30); ML_Aggregate_Set_Threshold(ag, 0.0); /********************************************************************/ /* Set up Tmat_trans */ /*------------------------------------------------------------------*/ Tmat_trans = ML_Operator_Create(ml_edges->comm); ML_Operator_Transpose_byrow(Tmat, Tmat_trans); Nlevels=ML_Gen_MGHierarchy_UsingReitzinger(ml_edges, &ml_nodes,MaxMgLevels-1, ML_DECREASING,ag,Tmat,Tmat_trans, &Tmat_array,&Tmat_trans_array, smoothPe_flag, 1.5); /* Set the Hiptmair subsmoothers */ if (nodal_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) { nodal_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its); ML_Smoother_Arglist_Set(nodal_args, 1, &nodal_omega); } if (edge_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) { edge_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(edge_args, 0, &edge_its); ML_Smoother_Arglist_Set(edge_args, 1, &edge_omega); } if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) { nodal_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its); Nfine_node = Tmat_array[MaxMgLevels-1]->invec_leng; Nfine_node = ML_gsum_int(Nfine_node, ml_edges->comm); } if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) { edge_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(edge_args, 0, &edge_its); Nfine_edge = Tmat_array[MaxMgLevels-1]->outvec_leng; Nfine_edge = ML_gsum_int(Nfine_edge, ml_edges->comm); } /**************************************************** * Set up smoothers for all levels but the coarsest. * ****************************************************/ coarsest_level = MaxMgLevels - Nlevels; for (level = MaxMgLevels-1; level > coarsest_level; level--) { if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) { Ncoarse_edge = Tmat_array[level-1]->outvec_leng; Ncoarse_edge = ML_gsum_int(Ncoarse_edge, ml_edges->comm); edge_coarsening_rate = 2.*((double) Nfine_edge)/ ((double) Ncoarse_edge); ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate); Nfine_edge = Ncoarse_edge; } if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) { Ncoarse_node = Tmat_array[level-1]->invec_leng; Ncoarse_node = ML_gsum_int(Ncoarse_node, ml_edges->comm); node_coarsening_rate = 2.*((double) Nfine_node)/ ((double) Ncoarse_node); ML_Smoother_Arglist_Set(nodal_args, 1, &node_coarsening_rate); Nfine_node = Ncoarse_node; } ML_Gen_Smoother_Hiptmair(ml_edges, level, ML_BOTH, Nits_per_presmooth, Tmat_array, Tmat_trans_array, NULL, edge_smoother, edge_args, nodal_smoother,nodal_args, hiptmair_type); } /******************************************* * Set up coarsest level smoother *******************************************/ if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) { edge_coarsening_rate = (double) Nfine_edge; ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate); } if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) { node_coarsening_rate = (double) Nfine_node; ML_Smoother_Arglist_Set(nodal_args,1,&node_coarsening_rate); } ML_Gen_CoarseSolverSuperLU( ml_edges, coarsest_level); /* Must be called before invoking the preconditioner */ ML_Gen_Solver(ml_edges, ML_MGV, MaxMgLevels-1, coarsest_level); /* Set the initial guess and the right hand side. Invoke solver */ xxx = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); ML_random_vec(xxx, Edge_Partition.Nlocal, ml_edges->comm); rhs = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); ML_random_vec(rhs, Edge_Partition.Nlocal, ml_edges->comm); #ifdef AZTEC /* Choose the Aztec solver and criteria. Also tell Aztec that */ /* ML will be supplying the preconditioner. */ AZ_defaults(options, params); options[AZ_solver] = AZ_fixed_pt; options[AZ_solver] = AZ_gmres; options[AZ_kspace] = 80; params[AZ_tol] = tolerance; AZ_set_ML_preconditioner(&Pmat, Ke_mat, ml_edges, options); options[AZ_conv] = AZ_noscaled; AZ_iterate(xxx, rhs, options, params, status, proc_config, Ke_mat, Pmat, NULL); #else ML_Iterate(ml_edges, xxx, rhs); #endif /* clean up. */ ML_Smoother_Arglist_Delete(&nodal_args); ML_Smoother_Arglist_Delete(&edge_args); ML_Aggregate_Destroy(&ag); ML_Destroy(&ml_edges); ML_Destroy(&ml_nodes); #ifdef AZTEC AZ_free((void *) Ke_mat->data_org); AZ_free((void *) Ke_mat->val); AZ_free((void *) Ke_mat->bindx); if (Ke_mat != NULL) AZ_matrix_destroy(&Ke_mat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); if (Kn_mat != NULL) AZ_matrix_destroy(&Kn_mat); #endif free(xxx); free(rhs); ML_Operator_Destroy(&Tmat); ML_Operator_Destroy(&Tmat_trans); ML_MGHierarchy_ReitzingerDestroy(MaxMgLevels-2, &Tmat_array, &Tmat_trans_array); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
void AZ_calc_blk_diag_LU(double *val, int *indx, int *bindx, int *rpntr, int *cpntr, int *bpntr, double *d_inv, int *d_indx, int *d_bindx, int *d_rpntr, int *d_bpntr, int *data_org, int *ipvt) /******************************************************************************* Routine to calculate the LU factors of the block-diagonal portion of sparse matrix in 'val' and the associated integer pointer vectors. This is used for scaling. Author: Scott A. Hutchinson, SNL, 1421 ======= Return code: void ============ Parameter list: =============== val: Array containing the nonzero entries of the matrix (see Aztec User's Guide). indx, bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). d_inv: Vector containing the LU of the diagonal blocks. d_indx: The 'indx' array corresponding to the LU-block diagonals. d_bindx: The 'bindx' array corresponding to the LU-block diagonals. d_rpntr: The 'rpntr' array corresponding to the LU-block diagonals. d_bpntr: The 'bpntr' array corresponding to the LU-block diagonals. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i, j, iblk_row, jblk, icount = 0, iblk_count = 0, ival; int m1, n1, itemp; int m; int bpoff, idoff; int info; double *work; char *yo = "AZ_calc_blk_diag_inv: "; /**************************** execution begins ******************************/ m = data_org[AZ_N_int_blk] + data_org[AZ_N_bord_blk]; if (m == 0) return; /* allocate vectors for lapack routines */ work = (double *) AZ_allocate(rpntr[m]*sizeof(double)); if (work == NULL) AZ_perror("Not enough space for Block Jacobi\n"); /* offset of the first block */ bpoff = *bpntr; idoff = *indx; /* loop over block rows */ for (iblk_row = 0; iblk_row < m; iblk_row++) { /* number of rows in the current row block */ m1 = rpntr[iblk_row+1] - rpntr[iblk_row]; /* starting index of current row block */ ival = indx[bpntr[iblk_row] - bpoff] - idoff; /* loop over column block numbers, looking for the diagonal block */ for (j = bpntr[iblk_row] - bpoff; j < bpntr[iblk_row+1] - bpoff; j++) { jblk = bindx[j]; /* determine the number of columns in this block */ n1 = cpntr[jblk+1] - cpntr[jblk]; itemp = m1*n1; if (jblk == iblk_row) { /* diagonal block */ /* error check */ if (n1 != m1) { (void) AZ_printf_err( "%sERROR: diagonal blocks are not square\n.", yo); exit(-1); } else { /* fill the vectors */ d_indx[iblk_count] = icount; d_rpntr[iblk_count] = rpntr[iblk_row]; d_bpntr[iblk_count] = iblk_row; d_bindx[iblk_count] = iblk_row; for (i = 0; i < itemp; i++) d_inv[icount++] = val[ival + i]; /* invert the dense matrix */ DGETRF_F77(&m1, &m1, &d_inv[d_indx[iblk_count]], &m1, &(ipvt[rpntr[iblk_row]]), &info); if (info < 0) { (void) AZ_printf_err( "%sERROR: argument %d is illegal.\n", yo, -info); exit(-1); } else if (info > 0) { (void) AZ_printf_err( "%sERROR: the factorization has produced a " "singular U with U[%d][%d] being exactly zero.\n", yo, info, info); exit(-1); } iblk_count++; } break; } else ival += itemp; } } d_indx[iblk_count] = icount; d_rpntr[iblk_count] = rpntr[iblk_row]; d_bpntr[iblk_count] = iblk_row; AZ_free((void *) work); } /* AZ_calc_blk_diag_inv */
int main(int argc, char *argv[]) { int num_PDE_eqns=1, N_levels=3, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; int leng1,leng2; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL; struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid=NULL, alpha; char filename[80]; int one = 1; int proc,nprocs; char pathfilename[100]; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); proc = proc_config[AZ_node]; nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); proc = 0; nprocs = 1; #endif if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/inputfile",argv[1]); ML_Reader_ReadInput(pathfilename, &context); } else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context)); AZ_broadcast((char *) context, sizeof(struct reader_context), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); N_levels = context->N_levels; printf("N_levels %d\n",N_levels); nsmooth = context->nsmooth; num_PDE_eqns = context->N_dofPerNode; printf("num_PDE_eqns %d\n",num_PDE_eqns); ML_Set_PrintLevel(context->output_level); /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); fp=fopen(pathfilename,"r"); if (fp==NULL) { printf("**ERR** couldn't open file data_matrix.txt\n"); exit(1); } fscanf(fp,"%d",&leng); fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ #if 0 if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; #endif i = AZ_linear; /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/ /* make a linear distribution of the matrix */ /* if the linear distribution does not align with the blocks, */ /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */ leng1 = leng/nprocs; leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } N_update = leng1; update = (int*)AZ_allocate((N_update+1)*sizeof(int)); if (update==NULL) { (void) fprintf (stderr, "Not enough space to allocate 'update'\n"); fflush(stderr); exit(EXIT_FAILURE); } for (i=0; i<N_update; i++) update[i] = i+leng2; #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]); ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde, &N_update,&update,proc_config); #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all blocks (including the ghost nodes) the same size. */ /* not sure, whether this is a good idea with variable blocks */ /* the examples inpufiles (see top of this file) don't need it */ /* anyway */ /* AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); */ AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config); ML_Set_ResidualOutputFrequency(ml, context->output); ML_Set_Tolerance(ml, context->tol); ML_Aggregate_Create( &ag ); if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) { ML_Aggregate_Set_CoarsenScheme_MIS(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Coupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) { ML_Aggregate_Set_CoarsenScheme_METIS(ag); for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) { /* when no blocks read, use standard metis assuming constant block sizes */ if (!blocks) ML_Aggregate_Set_CoarsenScheme_METIS(ag); else { ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag); ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks, blocks,block_pde,N_update); } for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else { printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme); exit(-1); } ML_Aggregate_Set_DampingFactor(ag, context->agg_damping); ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize); ML_Aggregate_Set_Threshold(ag, context->agg_thresh); if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) { ML_Set_SpectralNormScheme_Calc(ml); } else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) { ML_Set_SpectralNormScheme_Anorm(ml); } else { printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm); } /* read in the rigid body modes */ Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); while( (fp = fopen(pathfilename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } /* Set rhs */ sprintf(pathfilename,"%s/data_rhs.txt",argv[1]); fp = fopen(pathfilename,"r"); if (fp == NULL) { rhs=(double *)ML_allocate(leng*sizeof(double)); if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n"); for (i = 0; i < N_update; i++) rhs[i] = (double) update[i]; } else { fclose(fp); if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n"); AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, proc_config); } AZ_reorder_vec(rhs, data_org, update_index, NULL); for (i = 0; i < Nrigid; i++) { sprintf(filename,"data_nullsp%d.txt",i); sprintf(pathfilename,"%s/%s",argv[1],filename); AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); #if 0 /* test the given rigid body mode, output-vector should be ~0 */ Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); #endif for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); } /* rhs orthogonalization */ alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/ AZ_gdot(N_update, mode, mode, proc_config); DAXPY_F77(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); } #if 0 /* for testing the default nullsp */ ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update); #else if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); } #endif if (rigid) ML_free(rigid); ag->keep_agg_information = 1; coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, ML_INCREASING, ag); coarsest_level--; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); #if 0 /* set up smoothers */ if (!blocks) blocks = (int *) ML_allocate(sizeof(int)*N_update); #endif for (level = 0; level < coarsest_level; level++) { num_PDE_eqns = ml->Amat[level].num_PDEs; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->smoother,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); blocks = NULL; block_pde = NULL; nblocks = 0; ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks, &blocks,&block_pde); if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng; for (i =0; i < nblocks; i++) blocks[i] = i; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); */ /* Jacobi Smoothing */ else if (ML_strcmp(context->smoother,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4); } /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* */ else if (ML_strcmp(context->smoother,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } else { printf("unknown smoother %s\n",context->smoother); exit(1); } } /* set coarse level solver */ nsmooth = context->coarse_its; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->coarse_solve,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5); } else if (ML_strcmp(context->coarse_solve,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) { ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); } else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) { ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0); } else { printf("unknown coarse grid solver %s\n",context->coarse_solve); exit(1); } ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); AZ_defaults(options, params); if (ML_strcmp(context->krylov,"Cg") == 0) { options[AZ_solver] = AZ_cg; } else if (ML_strcmp(context->krylov,"Bicgstab") == 0) { options[AZ_solver] = AZ_bicgstab; } else if (ML_strcmp(context->krylov,"Tfqmr") == 0) { options[AZ_solver] = AZ_tfqmr; } else if (ML_strcmp(context->krylov,"Gmres") == 0) { options[AZ_solver] = AZ_gmres; } else { printf("unknown krylov method %s\n",context->krylov); } if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_output] = 1; options[AZ_max_iter] = context->max_outer_its; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = context->tol; options[AZ_output] = context->output; ML_free(context); AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ /* there is no initguess supplied with these examples for the moment.... */ fp = fopen("initguessfile","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, proc_config); options[AZ_conv] = AZ_expected_values; } else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n"); AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
double AZK_residual_norm_no_copy(double *xr, double *xi, double *br, double *bi, int *options, double *params, int *proc_config, AZ_MATRIX *Amat_real, AZ_MATRIX *Amat_imag) /******************************************************************************* Author: Mike Heroux, SNL, 9222 ======= Return code: double ============ Parameter list: =============== xr,xi: On input, contains the initial guess, real part in xr and imaginary part in xi. br,bi: Right hand side of linear system. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. Amat_real, Amat_imag: The real and imaginary parts of the complex operator, each stored separately as AZ_MATRIX structures. Overview ======== AZK_residual_norm_no_copy computes the two norm of the residual ||r|| where r = b - A*x. Specifically, writing in terms of real and imaginary parts, we have (rr + i*ri) = (br + i*bi) - (Ar + i*Ai)*(xr + i*xi). The two-norm of the complex vector r is identical to the two-norm of the twice-length real vector formed by concatenating rr = real(r) and ri = imag(r). *******************************************************************************/ { AZ_MATRIX *Amat; /* Structure representing matrix to be solved. */ double *x, *b; /* Solution and right-hand side to linear system. */ int N_equations, i; double *y_tmp, result; /* Transform complex system into komplex system */ AZK_create_linsys_no_copy (xr, xi, br, bi, options, params, proc_config, Amat_real, Amat_imag, &x, &b, &Amat); /* Allocate temp vector y */ N_equations = Amat->data_org[AZ_N_internal] + Amat->data_org[AZ_N_border]; y_tmp = (double *) AZ_allocate(N_equations*sizeof(double)); if (y_tmp == NULL) AZ_perror("AZK_residual_norm_no_copy: Out of memory."); /* Compute y = A*x. */ Amat->matvec(x, y_tmp, Amat, proc_config); /* Compute r = b - A*x (put in y_tmp) */ /*daxpy_(&N_equations, &neg_one, b, &ione, y_tmp, &ione);*/ for (i=0; i<N_equations; i++) y_tmp[i] = y_tmp[i] - b[i]; /* Use Aztec function to compute norm */ result = AZ_gvector_norm(N_equations, 2, y_tmp, proc_config); /* Free memory space */ AZK_destroy_linsys (options, params, proc_config, &x, &b, &Amat); AZ_free((void *) y_tmp); result = sqrt(result); return(result); /* AZK_residual_norm */ }
double AZK_residual_norm(double *xk, double *bk, int *options, double *params, int *proc_config, AZ_MATRIX *Amat_komplex) /******************************************************************************* Author: Mike Heroux, SNL, 9222 ======= Return code: double ============ Parameter list: =============== xk: On input, contains the initial guess. bk: Right hand side of linear system. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. Amat_komplex: The komplex operator, stored as an AZ_MATRIX structure. Overview ======== AZK_residual_norm computes the two norm of the residual ||r|| where r = b - A*x. Specifically, writing in terms of real and imaginary parts, we have (rr + i*ri) = (br + i*bi) - (Ar + i*Ai)*(xr + i*xi). The two-norm of the complex vector r is identical to the two-norm of the twice-length real vector formed by concatenating rr = real(r) and ri = imag(r). *******************************************************************************/ { int N_equations, i; double *y_tmp, result; /* Allocate temp vector y */ N_equations = Amat_komplex->data_org[AZ_N_internal] + Amat_komplex->data_org[AZ_N_border]; y_tmp = (double *) AZ_allocate(N_equations*sizeof(double)); if (y_tmp == NULL) AZ_perror("AZK_residual_norm: Out of memory."); /* Compute y = A*x. */ Amat_komplex->matvec(xk, y_tmp, Amat_komplex, proc_config); /* Compute r = b - A*x (put in y_tmp) */ /*daxpy_(&N_equations, &neg_one, bk, &ione, y_tmp, &ione);*/ for (i=0; i<N_equations; i++) y_tmp[i] = y_tmp[i] - bk[i]; /* Use Aztec function to compute norm */ result = AZ_gvector_norm(N_equations, 2, y_tmp, proc_config); /* Free memory space */ AZ_free((void *) y_tmp); result = sqrt(result); return(result); /* AZK_residual_norm */ }
void AZ_domain_decomp(double x[], AZ_MATRIX *Amat, int options[], int proc_config[], double params[], struct context *context) /******************************************************************************* Precondition 'x' using an overlapping domain decomposition method where a solver specified by options[AZ_subdomain_solve] is used on the subdomains. Note: if a factorization needs to be computed on the first iteration, this will be done and stored for future iterations. Author: Lydie Prevost, SNL, 9222 ======= Revised by R. Tuminaro (4/97), SNL, 9222 Return code: void ============ Parameter list: =============== N_unpadded: On input, number of rows in linear system (unpadded matrix) that will be factored (after adding values for overlapping). Nb_unpadded: On input, number of block rows in linear system (unpadded) that will be factored (after adding values for overlapping). N_nz_unpadded: On input, number of nonzeros in linear system (unpadded) that will be factored (after adding values for overlapping). x: On output, x[] is preconditioned by performing the subdomain solve indicated by options[AZ_subdomain_solve]. val indx bindx rpntr: On input, arrays containing matrix nonzeros (see manual). cpntr bpntr options: Determines specific solution method and other parameters. In this routine, we are concerned with options[AZ_overlap]: == AZ_none: nonoverlapping domain decomposition == AZ_diag: use rows corresponding to external variables but only keep the diagonal for these rows. == k : Obtain rows that are a distance k away from rows owned by this processor. data_org: Contains information on matrix data distribution and communication parameters (see manual). *******************************************************************************/ { int N_unpadded, Nb_unpadded, N_nz_unpadded; double *x_pad = NULL, *x_reord = NULL, *ext_vals = NULL; int N_nz, N_nz_padded, nz_used; int mem_orig, mem_overlapped, mem_factor; int name, i, bandwidth; int *ordering = NULL; double condest; /* double start_t; */ int estimated_requirements; char str[80]; int *garbage; int N; int *padded_data_org = NULL, *bindx, *data_org; double *val; int *inv_ordering = NULL; int *map = NULL; AZ_MATRIX *A_overlapped = NULL; struct aztec_choices aztec_choices; /**************************** execution begins ******************************/ data_org = Amat->data_org; bindx = Amat->bindx; val = Amat->val; N_unpadded = data_org[AZ_N_internal] + data_org[AZ_N_border]; Nb_unpadded = data_org[AZ_N_int_blk]+data_org[AZ_N_bord_blk]; if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) N_nz_unpadded = bindx[N_unpadded]; else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) N_nz_unpadded = (Amat->indx)[(Amat->bpntr)[Nb_unpadded]]; else { if (Amat->N_nz < 0) AZ_matfree_Nnzs(Amat); N_nz_unpadded = Amat->N_nz; } aztec_choices.options = options; aztec_choices.params = params; context->aztec_choices = &aztec_choices; context->proc_config = proc_config; name = data_org[AZ_name]; if ((options[AZ_pre_calc] >= AZ_reuse) && (context->Pmat_computed)) { N = context->N; N_nz = context->N_nz; A_overlapped = context->A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; } else { sprintf(str,"A_over %s",context->tag); A_overlapped = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC, name, str, &i); AZ_matrix_init(A_overlapped, 0); context->A_overlapped = A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; A_overlapped->matrix_type = AZ_MSR_MATRIX; AZ_init_subdomain_solver(context); AZ_compute_matrix_size(Amat, options, N_nz_unpadded, N_unpadded, &N_nz_padded, data_org[AZ_N_external], &(context->max_row), &N, &N_nz, params[AZ_ilut_fill], &(context->extra_fact_nz_per_row), Nb_unpadded,&bandwidth); estimated_requirements = N_nz; if (N_nz*2 > N_nz) N_nz = N_nz*2; /* check for overflow */ /* Add extra memory to N_nz. */ /* This extra memory is used */ /* as temporary space during */ /* overlapping calculation */ /* Readjust N_nz by allocating auxilliary arrays and allocate */ /* the MSR matrix to check that there is enough space. */ /* block off some space for map and padded_data_org in dd_overlap */ garbage = (int *) AZ_allocate((AZ_send_list + 2*(N-N_unpadded) +10)* sizeof(int)); AZ_hold_space(context, N); if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ N_nz = AZ_adjust_N_nz_to_fit_memory(N_nz, context->N_large_int_arrays, context->N_large_dbl_arrays); context->N_nz_factors = N_nz; if (N_nz <= N_nz_unpadded) { AZ_printf_out("Error: Not enough space for domain decomposition\n"); AZ_exit(1); } if (estimated_requirements > N_nz ) estimated_requirements = N_nz; /* allocate matrix via AZ_manage_memory() */ sprintf(str,"bindx %s",context->tag); A_overlapped->bindx =(int *) AZ_manage_memory(N_nz*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"val %s",context->tag); A_overlapped->val =(double *) AZ_manage_memory(N_nz*sizeof(double), AZ_ALLOC, name, str, &i); context->N_nz_allocated = N_nz; AZ_free_space_holder(context); AZ_free(garbage); /* convert to MSR if necessary */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_vb2msr(Nb_unpadded,val,Amat->indx,bindx,Amat->rpntr,Amat->cpntr, Amat->bpntr, A_overlapped->val, A_overlapped->bindx); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0 ; i < N_nz_unpadded; i++ ) { A_overlapped->bindx[i] = bindx[i]; A_overlapped->val[i] = val[i]; } } else AZ_matfree_2_msr(Amat,A_overlapped->val,A_overlapped->bindx,N_nz); mem_orig = AZ_gsum_int(A_overlapped->bindx[N_unpadded],proc_config); /* start_t = AZ_second(); */ AZ_pad_matrix(context, proc_config, N_unpadded, &N, &(context->map), &(context->padded_data_org), &N_nz, estimated_requirements); /* if (proc_config[AZ_node] == 0) AZ_printf_out("matrix padding took %e seconds\n",AZ_second()-start_t); */ mem_overlapped = AZ_gsum_int(A_overlapped->bindx[N],proc_config); if (options[AZ_reorder]) { /* start_t = AZ_second(); */ AZ_find_MSR_ordering(A_overlapped->bindx, &(context->ordering),N, &(context->inv_ordering),name,context); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to find ordering\n", AZ_second()-start_t); */ /* start_t = AZ_second(); */ AZ_mat_reorder(N,A_overlapped->bindx,A_overlapped->val, context->ordering, context->inv_ordering); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to reorder\n", AZ_second()-start_t); */ /* NOTE: ordering is freed inside AZ_mat_reorder */ #ifdef AZ_COL_REORDER if (options[AZ_reorder]==2) { AZ_mat_colperm(N,A_overlapped->bindx,A_overlapped->val, &(context->ordering), name, context ); } #endif } /* Do a factorization if needed. */ /* start_t = AZ_second(); */ AZ_factor_subdomain(context, N, N_nz, &nz_used); if (options[AZ_output] > 0 && options[AZ_diagnostics]!=AZ_none) { AZ_printf_out("\n*********************************************************************\n"); condest = AZ_condest(N, context); AZ_printf_out("***** Condition number estimate for subdomain preconditioner on PE %d = %.4e\n", proc_config[AZ_node], condest); AZ_printf_out("*********************************************************************\n"); } /* start_t = AZ_second()-start_t; max_time = AZ_gmax_double(start_t,proc_config); min_time = AZ_gmin_double(start_t,proc_config); if (proc_config[AZ_node] == 0) AZ_printf_out("time for subdomain solvers ranges from %e to %e\n", min_time,max_time); */ if ( A_overlapped->matrix_type == AZ_MSR_MATRIX) AZ_compress_msr(&(A_overlapped->bindx), &(A_overlapped->val), context->N_nz_allocated, nz_used, name, context); context->N_nz = nz_used; context->N = N; context->N_nz_allocated = nz_used; mem_factor = AZ_gsum_int(nz_used,proc_config); if (proc_config[AZ_node] == 0) AZ_print_header(options,mem_overlapped,mem_orig,mem_factor); if (options[AZ_overlap] >= 1) { sprintf(str,"x_pad %s",context->tag); context->x_pad = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); sprintf(str,"ext_vals %s",context->tag); context->ext_vals = (double *) AZ_manage_memory((N-N_unpadded)* sizeof(double), AZ_ALLOC, name, str, &i); } if (options[AZ_reorder]) { sprintf(str,"x_reord %s",context->tag); context->x_reord = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); } } /* Solve L u = x where the solution u overwrites x */ x_reord = context->x_reord; inv_ordering = context->inv_ordering; ordering = context->ordering; x_pad = context->x_pad; ext_vals = context->ext_vals; padded_data_org = context->padded_data_org; map = context->map; if (x_pad == NULL) x_pad = x; if (options[AZ_overlap] >= 1) { for (i = 0 ; i < N_unpadded ; i++) x_pad[i] = x[i]; AZ_exchange_bdry(x_pad,padded_data_org, proc_config); for (i = 0 ; i < N-N_unpadded ; i++ ) ext_vals[map[i]-N_unpadded] = x_pad[i+N_unpadded]; for (i = 0 ; i < N-N_unpadded ; i++ ) x_pad[i + N_unpadded] = ext_vals[i]; } else if (options[AZ_overlap] == AZ_diag) AZ_exchange_bdry(x_pad,data_org, proc_config); if (x_reord == NULL) x_reord = x_pad; if (options[AZ_reorder]) { /* Apply row permutation to the right hand side */ /* ((P'A P)Pi') Pi P'x = P'rhs, b= P'rhs */ for (i = 0 ; i < N ; i++ ) x_reord[inv_ordering[i]] = x_pad[i]; } AZ_solve_subdomain(x_reord,N, context); #ifdef AZ_COL_REORDER /* Apply column permutation to the solution */ if (options[AZ_reorder]==1){ /* ((P'A P) P'sol = P'rhs sol = P( P'sol) */ for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; } if (options[AZ_reorder]==2){ /* * ((P'A P)Pi') Pi P'sol = P'rhs sol = P Pi'( Pi P'sol) * Version 1: * for (i = 0; i < N; i++) pi_sol[i] = x_reord[ordering[i]]; * for (j = 0; j < N; j++) x_pad[j] = pi_sol[inv_ordering[j]]; * Version 2: */ for (i = 0; i < N; i++) x_pad[i] = x_reord[ ordering[inv_ordering[i]] ]; } #else if (options[AZ_reorder]) for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; #endif AZ_combine_overlapped_values(options[AZ_type_overlap],padded_data_org, options, x_pad, map,ext_vals,name,proc_config); if (x_pad != x) for (i = 0 ; i < N_unpadded ; i++ ) x[i] = x_pad[i]; } /* subdomain driver*/
void AZ_find_MSR_ordering(int bindx2[],int **ordering,int N, int **inv_ordering, int name, struct context *context) /******************************************************************************* Use a reverse cuthill McKee algorithm to find an ordering for the matrix. Author: R. Tuminaro Return code: void ============ Parameter list: =============== bindx2: On input, the nonzero sparsity pattern of the matrix for which we will determine a new ordering. Note: bindx2 is changed in this routine, but then returned to its original values before exiting. ordering: On output, ordering[i] gives the new location of row i in the reordered system. inv_ordering: On output, inv_ordering[i] gives the location of row */ { int i; int *mask; int root, nlvl, ccsize; int total = 0; char str[80]; /* convert matrix to Fortran format */ if (N==0) return; for (i = N+1 ; i < bindx2[N]; i++ ) bindx2[i]++; for (i = 0 ; i <= N ; i++ ) bindx2[i] -= N; /* initialize arrays for fnroot() and rcm() */ sprintf(str,"inv_ordering %s",context->tag); *inv_ordering = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str,&i); sprintf(str,"ordering %s",context->tag); *ordering = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str,&i); mask = (int *) AZ_allocate((N+1)*sizeof(int)); if (mask == NULL) { AZ_printf_out("Not enough space for RCM reordering\n"); AZ_exit(1); } for (i = 0 ; i < N ; i++ ) mask[i] = 1; root = 1; while (total != N ) { AZ_FNROOT_F77(&root,bindx2,&(bindx2[N+1]),mask, &nlvl, &((*ordering)[total]), *inv_ordering); AZ_RCM_F77(&root,bindx2,&(bindx2[N+1]),mask,&((*ordering)[total]), &ccsize, *inv_ordering); if ( ccsize != N) { for (i = 0 ; i < ccsize ; i++) mask[(*ordering)[total+i]-1] = 0; for (i = 0 ; i < N ; i++ ) { if ( mask[i] == 1) break; } root = i+1; } total += ccsize; if (ccsize == 0) { AZ_printf_out("Error inside reordering\n"); AZ_exit(1); } } /* convert matrix back to C format */ for (i = 0 ; i <= N ; i++ ) bindx2[i] += N; for (i = N+1 ; i < bindx2[N]; i++ ) bindx2[i]--; /* convert ordering to C format */ for (i = 0 ; i < N ; i++ ) (*ordering)[i]--; /* produce the inverse order */ for (i = 0 ; i < N ; i++) (*inv_ordering)[(*ordering)[i]] = i; AZ_free(mask); }
/*extern void mc64ad_(int *, int *, int *, int *, int *, double*, * int *, int *, int *, int *, int *, double*, * int *, int *); */ void AZ_mat_colperm(int n, int bindx[], double val[], int **invp, int name, struct context *context) /******************************************************************************* Use the mc64ad algorithm to permute the columns of a matrix. Unresolved issues: 1. Similar Aztec modules return invp and delete perm. 2. The goal of this module is to increase the number of diagonal nonzeros. This reduces the total number of nonzeros in MSR format. Some effort is required to make this consistent with Aztec format. Author: D. Day Return code: void ============ Parameter list: =============== bindx : On input, the nonzero sparsity pattern of the matrix for which we will determine a new ordering. Note: bindx is changed in this routine invp: On output, invp[i] gives the location of row i */ { int job,nnz,nzdiag,liw,ldw,i,p,row,ki,kf,k,nod; char str[80]; int *mcontrol, *info, *rowptr; /* double work; */ double *diag; if (n==0) return; nnz = bindx[n]-1; liw = 5*n; ldw = 2*n + nnz; /* If job=1, then ldw := n */ sprintf(str,"invp %s",context->tag); *invp = (int *) AZ_manage_memory((n+1)*sizeof(int), AZ_ALLOC, name, str,&i); mcontrol = (int *) AZ_allocate(10*sizeof(int)); info = (int *) AZ_allocate(10*sizeof(int)); rowptr = (int *) AZ_allocate(liw*sizeof(int)); diag = (double *) AZ_allocate(ldw*sizeof(double)); if (diag == NULL){ printf("AZ_col_perm: Error: memory insufficient. Try job=1\n"); AZ_exit(1); } /* Echo input matrix * printf("AZ_mat_colperm: bindx[%d] = %d\n", n, bindx[n]); * for (row=0;row<n;row++){ * printf("%d %d %22.14e \n", row+1, row+1, val[row]); * ki = bindx[row]; * kf = bindx[row+1]; * for (k=ki;k<kf;k++) * printf("%d %d %22.14e \n", row+1, bindx[k]+1, val[k]); * } */ /* msr2csr: retract the diagonal and delete zeros */ for (row=0;row<n;row++) diag[row] = val[row]; for (row=0;row<=n;row++) rowptr[row] = bindx[row]; p=0; ki = rowptr[0]; for (row=0;row<n;row++){ rowptr[row] += ( row - n - 1); kf = rowptr[row+1]; val[p] = diag[row]; diag[row] = 0.0; bindx[p] = row; ++p; for (k=ki;k<kf;k++){ val[p] = val[k]; bindx[p] = bindx[k]; ++p; } ki = kf; } --rowptr[n]; p=0; ki = rowptr[0]; for (row=0;row<n;row++){ rowptr[row] = p; kf = rowptr[row+1]; for (k=ki;k<kf;k++){ if( val[k] != 0.0 ){ val[p] = val[k]; bindx[p] = bindx[k]; ++p; } } ki = kf; } rowptr[n] = p; nnz = p; /* * Convert to standard sparse matrix format with Fortran indexing * bindx(1:n+1), bindx(n+2:nnz+n+2), val(1:nnz) * bindx[n+1:rowptr[n]+n] := bindx[0:rowptr[n]-1] and then * bindx[0:n] := rowptr[0:n] * mcontrol[0:2] := -1 turns off output */ for (k=p-1;k>=0;k--) bindx[k+n+1] = bindx[k]+1; for (k=0;k<=n;k++) bindx[k] = rowptr[k]+1; for (k=0;k<=n;k++) rowptr[k] = 0; job = 4; /* job = 1 may do less violence to symmetric form */ /* for (i=0; i<4; i++) mcontrol[i] = 6; */ for (i=0; i<3; i++) mcontrol[i] = -1; for (i=3; i<10; i++) mcontrol[i] = 0; for (i=0; i<10; i++) info[i] = 0; MC64AD_F77(&job,&n,&nnz,bindx,&(bindx[n+1]),val,&nzdiag,*invp,&liw,rowptr,&ldw,diag,mcontrol,info); /* nzdiag is the number of zero diagonals in the permuted matrix */ /* +1 structurally singular matrix (iffi nzdiag < n) +2 the returned scaling factors are large and may cause overflow when used to scale the matrix (for JOB = 5 entry only.) -1 JOB < 1 or JOB > 5. Value of JOB held in INFO(2). -2 N < 1. Value of N held in INFO(2). -3 NE < 1. Value of NE held in INFO(2). -4 the defined length LIW violates the restriction on LIW. Value of LIW required given by INFO(2). -5 the defined length LDW violates the restriction on LDW. Value of LDW required given by INFO(2). -6 entries are found whose row indices are out of range. INFO(2) contains the index of a column in which such an entry is found. -7 repeated entries are found. INFO(2) contains the index of a column in which such entries are found. */ if( info[0] >= 0 ){ /* convert permutation to C indexing and invert perm */ for (i = 0;i< n;i++) (*invp)[i]--; /* 1 2 3 0 */ /* csr2msr: diag = diag(A P) */ for (i = 0;i<= n;i++) bindx[i] += n; p = bindx[n]; for (i = n+1;i<p;i++) bindx[i]--; for (i = n+1;i<p;i++) bindx[i] = (*invp)[bindx[i]]; for (row=0;row<n;row++) diag[row] = 0.; p = n+1; for (row=0;row<n;row++){ ki = bindx[row]; bindx[row] = p; kf = bindx[row+1]; for (k=ki;k<kf;k++){ if( row != bindx[k]){ bindx[p] = bindx[k]; val[p-n-1] = val[k-n-1]; ++p; } else { diag[row] = val[k-n-1]; } } } bindx[n] = p; /* val[n+1: (n+1) + nod-1] := val[0:nod-1], nod = number off-diagonals */ nod = p-(n+1); /* printf("az_colperm: number of off diagonals is %d\n",nod); */ for (i=nod ; i>0 ; i-- ) val[n+i] = val[i-1]; val[n] = 0; for (i = 0 ; i < n ; i++ ) val[i] = diag[i]; /* Sort the colmns to ascend */ /* This appears unnecessary, though one never can be certain. for (row=0;row<n;row++){ ki = bindx[row]; kf = bindx[row+1]; for (p=ki+1;k<kf;k++){ k = p; while ( (k>ki) && (bindx[k-1]>bindx[k]) ){ work = val[k]; val[k] = val[k-1]; val[k-1] = work; i = bindx[k]; bindx[k] = bindx[k-1]; bindx[k-1] = i; --k; } } } */ if( info[0] == 1 ){ printf("AZ_col_perm: Error: Internal matrix is singular\n"); } }else{ /* Ideally an error flag would be returned here */ printf("az_colperm: Error: info = %d %d\n",info[0],info[1]); AZ_exit(1); } AZ_free(mcontrol); AZ_free(info); AZ_free(diag); AZ_free(rowptr); return; }
int main(int argc, char *argv[]) { int Nnodes=32*32; /* Total number of nodes in the problem.*/ /* 'Nnodes' must be a perfect square. */ struct user_partition Edge_Partition = {NULL, NULL,0,0,NULL,0,0,0}, Node_Partition = {NULL, NULL,0,0,NULL,0,0,0}; int proc_config[AZ_PROC_SIZE]; #ifdef ML_MPI MPI_Init(&argc,&argv); #endif AZ_set_proc_config(proc_config, COMMUNICATOR); ML_Comm* comm; ML_Comm_Create(&comm); Node_Partition.Nglobal = Nnodes; Edge_Partition.Nglobal = Node_Partition.Nglobal*2; user_partition_nodes(&Node_Partition); user_partition_edges(&Edge_Partition, &Node_Partition); AZ_MATRIX * AZ_Ke = user_Ke_build(&Edge_Partition); AZ_MATRIX * AZ_Kn = user_Kn_build(&Node_Partition); // convert (put wrappers) from Aztec matrices to ML_Operator's ML_Operator * ML_Ke, * ML_Kn, * ML_Tmat; ML_Ke = ML_Operator_Create( comm ); ML_Kn = ML_Operator_Create( comm ); AZ_convert_aztec_matrix_2ml_matrix(AZ_Ke,ML_Ke,proc_config); AZ_convert_aztec_matrix_2ml_matrix(AZ_Kn,ML_Kn,proc_config); ML_Tmat = user_T_build(&Edge_Partition, &Node_Partition, ML_Kn, comm); Epetra_CrsMatrix * Epetra_Kn, * Epetra_Ke, * Epetra_T; int MaxNumNonzeros; double CPUTime; ML_Operator2EpetraCrsMatrix(ML_Ke,Epetra_Ke, MaxNumNonzeros, true,CPUTime); ML_Operator2EpetraCrsMatrix(ML_Kn, Epetra_Kn,MaxNumNonzeros, true,CPUTime); ML_Operator2EpetraCrsMatrix(ML_Tmat,Epetra_T,MaxNumNonzeros, true,CPUTime); Teuchos::ParameterList MLList; ML_Epetra::SetDefaults("maxwell", MLList); MLList.set("ML output", 0); MLList.set("aggregation: type", "Uncoupled"); MLList.set("coarse: max size", 30); MLList.set("aggregation: threshold", 0.0); MLList.set("coarse: type", "Amesos-KLU"); ML_Epetra::MultiLevelPreconditioner * MLPrec = new ML_Epetra::MultiLevelPreconditioner(*Epetra_Ke, *Epetra_T, *Epetra_Kn, MLList); Epetra_Vector LHS(Epetra_Ke->DomainMap()); LHS.Random(); Epetra_Vector RHS(Epetra_Ke->DomainMap()); RHS.PutScalar(1.0); Epetra_LinearProblem Problem(Epetra_Ke,&LHS,&RHS); AztecOO solver(Problem); solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_cg_condnum); solver.SetAztecOption(AZ_output, 32); solver.Iterate(500, 1e-8); // ========================= // // compute the real residual // // ========================= // Epetra_Vector RHScomp(Epetra_Ke->DomainMap()); int ierr; ierr = Epetra_Ke->Multiply(false, LHS, RHScomp); assert(ierr==0); Epetra_Vector resid(Epetra_Ke->DomainMap()); ierr = resid.Update(1.0, RHS, -1.0, RHScomp, 0.0); assert(ierr==0); double residual; ierr = resid.Norm2(&residual); assert(ierr==0); if (proc_config[AZ_node] == 0) { std::cout << std::endl; std::cout << "==> Residual = " << residual << std::endl; std::cout << std::endl; } // =============== // // C L E A N U P // // =============== // delete MLPrec; // destroy phase prints out some information delete Epetra_Kn; delete Epetra_Ke; delete Epetra_T; ML_Operator_Destroy( &ML_Ke ); ML_Operator_Destroy( &ML_Kn ); ML_Comm_Destroy( &comm ); if (Edge_Partition.my_local_ids != NULL) free(Edge_Partition.my_local_ids); if (Node_Partition.my_local_ids != NULL) free(Node_Partition.my_local_ids); if (Node_Partition.my_global_ids != NULL) free(Node_Partition.my_global_ids); if (Edge_Partition.my_global_ids != NULL) free(Edge_Partition.my_global_ids); if (Node_Partition.needed_external_ids != NULL) free(Node_Partition.needed_external_ids); if (Edge_Partition.needed_external_ids != NULL) free(Edge_Partition.needed_external_ids); if (AZ_Ke!= NULL) { AZ_free(AZ_Ke->bindx); AZ_free(AZ_Ke->val); AZ_free(AZ_Ke->data_org); AZ_matrix_destroy(&AZ_Ke); } if (AZ_Kn!= NULL) { AZ_free(AZ_Kn->bindx); AZ_free(AZ_Kn->val); AZ_free(AZ_Kn->data_org); AZ_matrix_destroy(&AZ_Kn); } ML_Operator_Destroy(&ML_Tmat); if (residual > 1e-5) { std::cout << "`MultiLevelPreconditioner_Maxwell.exe' failed!" << std::endl; exit(EXIT_FAILURE); } #ifdef ML_MPI MPI_Finalize(); #endif if (proc_config[AZ_node] == 0) std::cout << "`MultiLevelPreconditioner_Maxwell.exe' passed!" << std::endl; exit(EXIT_SUCCESS); }
void AZ_solve_subdomain(double x[],int N, struct context *context) { /**************************************************************************** Given a vector 'x' representing the right hand side, solve the system using whatever subdomain solver is indicated by 'context->which' and whatever factorization information has already been computed. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== x On input, the right hand side of the subdomain system that is to be solved. On output, the solution of the subdomain system. N On input, the size of the linear system to be solved. bindx2,val2 On input, matrix or factorization information to be used by the solver. For most schemes, this information is in MSR format. However, the lu and bilu scheme would have this information in another format. Note: additional array information can be passed through context. context On input, the various fields are set to solver specific information corresponding to algorithm parameters as well as a previously done factorization. *******************************************************************************/ double *val2; int *bindx2; int N_blk_rows; #ifdef HAVE_AZLU int ifail; #endif int *sub_options, sub_proc_config[AZ_PROC_SIZE], *hold_data_org, *new_data_org; double *sub_params, *sub_status; AZ_MATRIX *sub_matrix; AZ_PRECOND *sub_precond; struct AZ_SCALING *sub_scaling; #ifdef AZTEC_MPI MPI_AZComm *tptr; #endif double *y; char label[80]; int t1, t2, t3, i, t4, t5 = 0; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK int ione = 1; void *precon; #endif /* End Aztec 2.1 mheroux mod */ val2 = context->A_overlapped->val; bindx2 = context->A_overlapped->bindx; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK y = (double *) malloc (N * sizeof(double)); DCOPY_F77(&N, x, &ione, y, &ione); precon = context->precon; ifp_apply(precon, N, 1, y, N, x, N); free((void *) y); #endif break; /* End Aztec 2.1 mheroux mod */ case AZ_bilu: N_blk_rows = context->N_blk_rows; AZ_lower_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x); AZ_upper_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x, context->ipvt, context->dblock); break; case AZ_ilut: case AZ_rilu: case AZ_ilu: AZ_lower_tsolve(x,N, val2, bindx2, context->iu, x ); AZ_upper_tsolve( x, N, val2, bindx2, context->iu); break; case AZ_icc: AZ_lower_icc(bindx2,val2,N,x); AZ_upper_icc(bindx2,val2,N,x); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; else if (N== 1) { x[0] *= val2[0]; ifail = 0; } else AZ_backsolve(val2, context->pivot,x, bindx2, context->ha, context->iflag, &ifail, &(context->N_nz_factors), &N, &N); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_out("ERROR: Unknown subdomain solver %d\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } else { /* better to put most of this in the factorization */ AZ_recover_sol_params(context->aztec_choices->options[ AZ_subdomain_solve], &sub_options, &sub_params, &sub_status, &sub_matrix, &sub_precond, &sub_scaling); t1 = sub_options[AZ_recursion_level]; sub_options[AZ_recursion_level]++; t2 = sub_options[AZ_output]; if (context->proc_config[AZ_node] != 0 ) sub_options[AZ_output] = AZ_none; t3 = context->proc_config[AZ_MPI_Tag]; /* fix data_org */ hold_data_org = context->A_overlapped->data_org; new_data_org = (int *) AZ_allocate( sizeof(int) * AZ_send_list ); if (new_data_org == NULL) { AZ_printf_out("Error: Not enough space for subdomain matrix\n"); exit(1); } context->A_overlapped->data_org = new_data_org; context->A_overlapped->matvec = AZ_MSR_matvec_mult; new_data_org[AZ_matrix_type] = AZ_MSR_MATRIX; new_data_org[AZ_N_internal] = N; new_data_org[AZ_N_border ] = 0; new_data_org[AZ_N_external] = 0; new_data_org[AZ_N_int_blk ] = N; new_data_org[AZ_N_bord_blk] = 0; new_data_org[AZ_N_ext_blk ] = 0; new_data_org[AZ_N_neigh ] = 0; new_data_org[AZ_total_send] = 0; new_data_org[AZ_name ] = hold_data_org[AZ_name]; new_data_org[AZ_internal_use]= 0; new_data_org[AZ_N_rows ]= N; sub_precond->Pmat = context->A_overlapped; sub_precond->prec_function = AZ_precondition; sub_proc_config[AZ_node] = 0; sub_proc_config[AZ_N_procs] = 1; #ifdef AZTEC_MPI tptr = AZ_get_comm(context->proc_config); AZ_set_comm(sub_proc_config, *tptr); #endif sprintf(label,"y in ssolve%d", sub_options[AZ_recursion_level]); y = AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = x[i]; for (i = 0 ; i < N ; i++ ) x[i] = 0.0; t4 = sub_options[AZ_keep_info]; sub_options[AZ_keep_info] = 1; if (context->aztec_choices->options[AZ_pre_calc] >= AZ_reuse) { t5 = sub_options[AZ_pre_calc]; sub_options[AZ_pre_calc] = AZ_sys_reuse; } AZ_oldsolve(x, y,sub_options,sub_params, sub_status, sub_proc_config, context->A_overlapped, sub_precond, sub_scaling); sub_options[AZ_keep_info] = t4; if (context->aztec_choices->options[AZ_pre_calc] == AZ_sys_reuse) sub_options[AZ_pre_calc] = t5; sub_options[AZ_recursion_level] = t1; sub_options[AZ_output] = t2; context->A_overlapped->data_org = hold_data_org; AZ_free(new_data_org); context->proc_config[AZ_MPI_Tag] = t3; } } }
void AZ_factor_subdomain(struct context *context, int N, int N_nz, int *nz_used) { /**************************************************************************** Given an overlapped subdomain matrix, factor it according to the chosen algorithm and store the result back in subdomain. Additionally, store the number of nonzeros used in the factorization in nz_used. Notes: 1) Matrix comes in as an MSR matrix. 2) context contains several fields which need to be appropriately set. These fields are specific to the individual solvers. 3) The factorization overwrites the matrix. However, different solvers will store the factorization in different formats. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== context On input, context contains the matrix to be factored in context.A_overlapped (MSR format), On output, context contains the factored matrix which is stored in a format specific to the solver and any additional parameters required by the backsolver. N On input, the size of the linear system to be solved. N_nz On input, the number of nonzero values in the matrix to be factored. nz_used On output, the number of nonzero values in the matrix representing the factorization. *******************************************************************************/ #ifdef HAVE_AZLU int ifail, N_nz_matrix, *rnr; double *fake_rhs, *aflag; #endif int i, j, *bindx, *bpntr, *iw; double *cr, *unorm, *a, *val; int *ind, *jnz, *ja, ifill; double dtemp = (context->aztec_choices->params)[AZ_omega]; int N_blk_rows, name = context->A_overlapped->data_org[AZ_name]; char str[80]; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK void *precon, *bmat; double rthresh, athresh; int N_int_blk, N_bord_blk, graph_fill; #endif /* End Aztec 2.1 mheroux mod */ bindx = context->A_overlapped->bindx; *nz_used = bindx[N]; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK if (N == 0) return; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for bilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); /* Let IFPACK handle fillin */ graph_fill = (context->aztec_choices->options)[AZ_graph_fill]; (context->aztec_choices->options)[AZ_graph_fill] = 0; /* recover some space so that there will */ /* be enough room to convert back to vbr */ i = AZ_compress_msr(&(context->A_overlapped->bindx), &(context->A_overlapped->val), context->N_nz_allocated, *nz_used, name, context); context->N_nz = *nz_used; context->N_nz_allocated = *nz_used; AZ_msr2vbr_mem_efficient(N, &(context->A_overlapped->bindx), &(context->A_overlapped->val), &(context->A_overlapped->cpntr), &(context->A_overlapped->bpntr), &(context->A_overlapped->indx), &N_blk_rows, (context->A_overlapped->data_org)[AZ_name], context->tag,i); context->A_overlapped->matrix_type = AZ_VBR_MATRIX; /*ifp_initialize();*/ /* Create IFPACK encapsulation of Amat */ context->A_overlapped->rpntr = context->A_overlapped->cpntr; N_int_blk = context->A_overlapped->data_org[AZ_N_int_blk]; N_bord_blk = context->A_overlapped->data_org[AZ_N_bord_blk]; context->A_overlapped->data_org[AZ_N_int_blk] = N_blk_rows; context->A_overlapped->data_org[AZ_N_bord_blk] = 0; (context->aztec_choices->options)[AZ_graph_fill] = graph_fill; az2ifp_blockmatrix(&bmat, context->A_overlapped); context->A_overlapped->data_org[AZ_N_int_blk] = N_int_blk; context->A_overlapped->data_org[AZ_N_bord_blk] = N_bord_blk; rthresh = (context->aztec_choices->params)[AZ_rthresh]; athresh = (context->aztec_choices->params)[AZ_athresh]; ifill = (context->aztec_choices->options)[AZ_graph_fill]; ifp_preconditioner(&precon, bmat, IFP_BILUK, (double) ifill, 0.0, IFP_SVD, rthresh, athresh); if ((context->aztec_choices->options)[AZ_output]>0) { ifp_biluk_stats(precon); } context->precon = precon; break; /* End Aztec 2.1 mheroux mod */ #else AZ_perror("IFPACK not linked. Must compile with -DIFPACK"); #endif case AZ_bilu: if (N == 0) return; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for bilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) { *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); } /* recover some space so that there will */ /* be enough room to convert back to vbr */ i = AZ_compress_msr(&(context->A_overlapped->bindx), &(context->A_overlapped->val), context->N_nz_allocated, *nz_used, name, context); context->N_nz = *nz_used; context->N_nz_allocated = *nz_used; AZ_msr2vbr_mem_efficient(N, &(context->A_overlapped->bindx), &(context->A_overlapped->val), &(context->A_overlapped->cpntr), &(context->A_overlapped->bpntr), &(context->A_overlapped->indx), &N_blk_rows, (context->A_overlapped->data_org)[AZ_name], context->tag,i); context->A_overlapped->matrix_type = AZ_VBR_MATRIX; bindx = context->A_overlapped->bindx; bpntr = context->A_overlapped->bpntr; val = context->A_overlapped->val; sprintf(str,"ipvt %s",context->tag); context->ipvt = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"dblock %s",context->tag); context->dblock= (int *) AZ_manage_memory((N_blk_rows+1)* sizeof(int), AZ_ALLOC, name, str, &i); context->N_blk_rows = N_blk_rows; /* set dblock to point to the diagonal block in each block row */ for (i = 0 ; i < N_blk_rows ; i++ ) { for (j = bpntr[i] ; j < bpntr[i+1] ; j++ ) { if (bindx[j] == i) context->dblock[i] = j; } } AZ_fact_bilu(N_blk_rows, context->A_overlapped, context->dblock, context->ipvt); break; case AZ_ilut: cr = (double *) AZ_allocate((2*N+3+context->max_row)*sizeof(int)+ (2*N+2+context->max_row)*sizeof(double)); if (cr == NULL) AZ_perror("Out of space in ilut.\n"); unorm = &(cr[N+2]); a = &(unorm[N]); ind = (int *) &(a[context->max_row]); jnz = &(ind[N+3]); ja = &(jnz[N]); sprintf(str,"iu %s",context->tag); context->iu = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str, &i); AZ_fact_ilut(&N, context->A_overlapped, a, ja, (context->aztec_choices->params)[AZ_drop], context->extra_fact_nz_per_row, N_nz - bindx[N], context->iu,cr,unorm,ind, nz_used, jnz, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); AZ_free(cr); break; case AZ_ilu: dtemp = 0.0; case AZ_rilu: if (N == 0) return; sprintf(str,"iu %s",context->tag); bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for ilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) { *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); } context->iu= (int *) AZ_manage_memory((N+1)*sizeof(int),AZ_ALLOC, name, str, &i); iw = (int *) AZ_allocate((N+1)*sizeof(int)); if (iw == NULL) AZ_perror("Out of space in ilu.\n"); AZ_fact_rilu(N, nz_used, context->iu, iw, context->A_overlapped, dtemp, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); AZ_free(iw); break; case AZ_icc: sprintf(str,"iu %s",context->tag); bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for ilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); AZ_fact_chol(context->A_overlapped->bindx, context->A_overlapped->val,N, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; aflag = (double *) AZ_allocate(8*sizeof(double)); rnr = (int *) AZ_allocate(N_nz*sizeof(int)); if (rnr == NULL) AZ_perror("Out of space in lu.\n"); sprintf(str,"iflag %s",context->tag); context->iflag = (int *) AZ_manage_memory(10*sizeof(int), AZ_ALLOC, name, str ,&i); sprintf(str,"ha %s",context->tag); context->ha = (int *) AZ_manage_memory(11*(N+1)*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"pivot %s",context->tag); context->pivot = (double *) AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, name, str,&i); aflag[0] = 16.0; aflag[2] = 1.0e8; aflag[3] = 1.0e-12; aflag[1] = (context->aztec_choices->params)[AZ_drop]; /* set up flags for the sparse factorization solver */ context->iflag[0] = 1; context->iflag[1] = 2; context->iflag[2] = 1; context->iflag[3] = 0; context->iflag[4] = 2; /* Note: if matrix is pos def, iflag[2] = 2 is cheaper */ N_nz_matrix = bindx[N] - 1; AZ_msr2lu(N, context->A_overlapped, rnr); /* Mark bindx so we can see what was not used later */ for (i = N_nz_matrix ; i < N_nz ; i++) bindx[i] = -7; /* factor the matrix */ if (N == 1) { context->A_overlapped->val[0]=1./context->A_overlapped->val[0]; } else { context->N_nz_factors = N_nz; fake_rhs = (double *) AZ_allocate(N*sizeof(double)); if (fake_rhs == NULL) { AZ_printf_out("Not enough memory inside subdomain_solve\n"); } for (i = 0 ; i < N ; i++ ) fake_rhs[i] = 0.0; AZ_fact_lu(fake_rhs, context->A_overlapped,aflag, context->pivot, rnr, context->ha, context->iflag, &N_nz_matrix, &ifail, &(context->N_nz_factors), &N, &N); (context->iflag)[4] = 3; AZ_free(fake_rhs); /* find out what was not used by checking what was not touched */ *nz_used = N_nz; for (i = N_nz_matrix; i < N_nz ; i++ ) { if (bindx[i] != -7) *nz_used = i; } (*nz_used)++; context->N_nz_factors = *nz_used; } AZ_free(rnr); AZ_free(aflag); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_err("Unknown subdomain solver(%d)\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } } }
int main(int argc, char *argv[]) { int num_PDE_eqns=6, N_levels=4, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage = NULL; #ifdef ML_partition int nblocks; int *block_list = NULL; int k; #endif struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid; char filename[80]; double alpha; int allocated = 0; int old_prec, old_sol; double old_tol; /* double *Amode, beta, biggest; int big_ind = -1, ii; */ ML_Operator *Amatrix; int *rowi_col = NULL, rowi_N, count2, ccc; double *rowi_val = NULL; double max_diag, min_diag, max_sum, sum; int nBlocks, *blockIndices, Ndof; #ifdef ML_partition FILE *fp2; int count; if (argc != 2) { printf("Usage: ml_read_elas num_processors\n"); exit(1); } else sscanf(argv[1],"%d",&nblocks); #endif #ifdef HAVE_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { # ifdef binary fp=fopen(".data","rb"); # else fp=fopen(".data","r"); # endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } # ifdef binary fread(&leng, sizeof(int), 1, fp); # else fscanf(fp,"%d",&leng); # endif fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,i); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all block (including the ghost nodes the same size. */ AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; #ifdef SCALE_ME ML_MSR_sym_diagonal_scaling(Amat, proc_config, &scaling_vect); #endif start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); ML_Set_PrintLevel(10); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); #ifdef ML_partition /* this code is meant to partition the matrices so that things can be */ /* run in parallel later. */ /* It is meant to be run on only one processor. */ #ifdef MB_MODIF fp2 = fopen(".update","w"); #else fp2 = fopen("partition_file","w"); #endif ML_Operator_AmalgamateAndDropWeak(&(ml->Amat[N_levels-1]), num_PDE_eqns, 0.0); ML_Gen_Blocks_Metis(ml, N_levels-1, &nblocks, &block_list); for (i = 0; i < nblocks; i++) { count = 0; for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) { if (block_list[j] == i) count++; } fprintf(fp2," %d\n",count*num_PDE_eqns); for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) { if (block_list[j] == i) { for (k = 0; k < num_PDE_eqns; k++) fprintf(fp2,"%d\n",j*num_PDE_eqns+k); } } } fclose(fp2); ML_Operator_UnAmalgamateAndDropWeak(&(ml->Amat[N_levels-1]),num_PDE_eqns,0.0); #ifdef MB_MODIF printf(" partition file dumped in .update\n"); #endif exit(1); #endif ML_Aggregate_Create( &ag ); /* ML_Aggregate_Set_CoarsenScheme_MIS(ag); */ #ifdef MB_MODIF ML_Aggregate_Set_DampingFactor(ag,1.50); #else ML_Aggregate_Set_DampingFactor(ag,1.5); #endif ML_Aggregate_Set_CoarsenScheme_METIS(ag); ML_Aggregate_Set_NodesPerAggr( ml, ag, -1, 35); /* ML_Aggregate_Set_Phase3AggregateCreationAggressiveness(ag, 10.001); */ ML_Aggregate_Set_Threshold(ag, 0.0); ML_Aggregate_Set_MaxCoarseSize( ag, 300); /* read in the rigid body modes */ Nrigid = 0; /* to ensure compatibility with RBM dumping software */ if (proc_config[AZ_node] == 0) { sprintf(filename,"rigid_body_mode%02d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { which_filename = 1; fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%02d",Nrigid+1); } sprintf(filename,"rigid_body_mode%d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%d",Nrigid+1); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } rhs = (double *) malloc(leng*sizeof(double)); xxx = (double *) malloc(leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; for (i = 0; i < Nrigid; i++) { if (which_filename == 1) sprintf(filename,"rigid_body_mode%02d",i+1); else sprintf(filename,"rigid_body_mode%d",i+1); AZ_input_msr_matrix(filename,update,&mode,&garbage,N_update,proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); /* here is something to stick a rigid body mode as the initial */ /* The idea is to solve A x = 0 without smoothing with a two */ /* level method. If everything is done properly, we should */ /* converge in 2 iterations. */ /* Note: we must also zero out components of the rigid body */ /* mode that correspond to Dirichlet bcs. */ if (i == -4) { for (iii = 0; iii < leng; iii++) xxx[iii] = mode[iii]; ccc = 0; Amatrix = &(ml->Amat[N_levels-1]); for (iii = 0; iii < Amatrix->outvec_leng; iii++) { ML_get_matrix_row(Amatrix,1,&iii,&allocated,&rowi_col,&rowi_val, &rowi_N, 0); count2 = 0; for (j = 0; j < rowi_N; j++) if (rowi_val[j] != 0.) count2++; if (count2 <= 1) { xxx[iii] = 0.; ccc++; } } free(rowi_col); free(rowi_val); allocated = 0; rowi_col = NULL; rowi_val = NULL; } /* * Rescale matrix/rigid body modes and checking * AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling); Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); */ /* Here is some code to check that the rigid body modes are */ /* really rigid body modes. The idea is to multiply by A and */ /* then to zero out things that we "think" are boundaries. */ /* In this hardwired example, things near boundaries */ /* correspond to matrix rows that do not have 81 nonzeros. */ /* Amode = (double *) malloc(leng*sizeof(double)); Amat->matvec(mode, Amode, Amat, proc_config); j = 0; biggest = 0.0; for (ii = 0; ii < N_update; ii++) { if ( Amat->bindx[ii+1] - Amat->bindx[ii] != 80) { Amode[ii] = 0.; j++; } else { if ( fabs(Amode[ii]) > biggest) { biggest=fabs(Amode[ii]); big_ind = ii; } } } printf("%d entries zeroed out of %d elements\n",j,N_update); alpha = AZ_gdot(N_update, Amode, Amode, proc_config); beta = AZ_gdot(N_update, mode, mode, proc_config); printf("||A r||^2 =%e, ||r||^2 = %e, ratio = %e\n", alpha,beta,alpha/beta); printf("the biggest is %e at row %d\n",biggest,big_ind); free(Amode); */ /* orthogonalize mode with respect to previous modes. */ for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); /* daxpy_(&N_update,&alpha,&(rigid[j*N_update]), &one, mode, &one); */ } #ifndef MB_MODIF printf(" after mb %e %e %e\n",mode[0],mode[1],mode[2]); #endif for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); garbage = NULL; } if (Nrigid != 0) { ML_Aggregate_Set_BlockDiagScaling(ag); ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); free(rigid); } #ifdef SCALE_ME ML_Aggregate_Scale_NullSpace(ag, scaling_vect, N_update); #endif coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); AZ_defaults(options, params); coarsest_level = N_levels - coarsest_level; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ for (level = N_levels-1; level > coarsest_level; level--) { /* ML_Gen_Smoother_BlockGaussSeidel(ml, level,ML_BOTH, 1, 1., num_PDE_eqns); */ /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ /* ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, parasails_loadbal, parasails_factorized); */ /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ /* ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); */ Ndof = ml->Amat[level].invec_leng; ML_Gen_Blocks_Aggregates(ag, level, &nBlocks, &blockIndices); ML_Gen_Smoother_BlockDiagScaledCheby(ml, level, ML_BOTH, 30.,nsmooth, nBlocks, blockIndices); /* ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); */ /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng/num_PDE_eqns; blocks = (int *) ML_allocate(sizeof(int)*N_update); for (i =0; i < ml->Amat[level].invec_leng; i++) blocks[i] = i/num_PDE_eqns; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); free(blocks); */ /* Block Jacobi Smoothing */ /* nblocks = ml->Amat[level].invec_leng/num_PDE_eqns; blocks = (int *) ML_allocate(sizeof(int)*N_update); for (i =0; i < ml->Amat[level].invec_leng; i++) blocks[i] = i/num_PDE_eqns; ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth, ML_ONE_STEP_CG, nblocks, blocks); free(blocks); */ /* Jacobi Smoothing */ /* ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth, ML_ONE_STEP_CG); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,ML_ONE_STEP_CG); */ /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,ML_ONE_STEP_CG, nblocks, blocks); free(blocks); */ num_PDE_eqns = 6; } /* Choose coarse grid solver: mls, superlu, symGS, or Aztec */ /* ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); */ /* ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); */ old_prec = options[AZ_precond]; old_sol = options[AZ_solver]; old_tol = params[AZ_tol]; params[AZ_tol] = 1.0e-9; params[AZ_tol] = 1.0e-5; options[AZ_precond] = AZ_Jacobi; options[AZ_solver] = AZ_cg; options[AZ_poly_ord] = 1; options[AZ_conv] = AZ_r0; options[AZ_orth_kvecs] = AZ_TRUE; j = AZ_gsum_int(ml->Amat[coarsest_level].outvec_leng, proc_config); options[AZ_keep_kvecs] = j - 6; options[AZ_max_iter] = options[AZ_keep_kvecs]; ML_Gen_SmootherAztec(ml, coarsest_level, options, params, proc_config, status, options[AZ_keep_kvecs], ML_PRESMOOTHER, NULL); options[AZ_conv] = AZ_noscaled; options[AZ_keep_kvecs] = 0; options[AZ_orth_kvecs] = 0; options[AZ_precond] = old_prec; options[AZ_solver] = old_sol; params[AZ_tol] = old_tol; /* */ #ifdef RST_MODIF ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level); #else #ifdef MB_MODIF ML_Gen_Solver(ml, ML_SAAMG, N_levels-1, coarsest_level); #else ML_Gen_Solver(ml, ML_MGFULLV, N_levels-1, coarsest_level); #endif #endif options[AZ_solver] = AZ_GMRESR; options[AZ_solver] = AZ_cg; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_conv] = AZ_noscaled; options[AZ_output] = 1; options[AZ_max_iter] = 500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 40; params[AZ_tol] = 4.8e-6; AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; /* Set rhs */ fp = fopen("AZ_capture_rhs.dat","r"); if (fp == NULL) { AZ_random_vector(rhs, data_org, proc_config); if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); for (i = 0; i < -N_update; i++) { rhs[i] = (double) update[i]; rhs[i] = 7.; } } else { if (proc_config[AZ_node]== 0) printf("reading rhs guess from file\n"); AZ_input_msr_matrix("AZ_capture_rhs.dat", update, &rhs, &garbage, N_update, proc_config); free(garbage); } AZ_reorder_vec(rhs, data_org, update_index, NULL); printf("changing rhs by multiplying with A\n"); Amat->matvec(rhs, xxx, Amat, proc_config); for (i = 0; i < N_update; i++) rhs[i] = xxx[i]; fp = fopen("AZ_capture_init_guess.dat","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("AZ_capture_init_guess.dat", update, &xxx, &garbage, N_update, proc_config); free(garbage); xxx = (double *) realloc(xxx, sizeof(double)*( Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border] + Amat->data_org[AZ_N_external])); } AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ /* for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } */ fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; options[AZ_conv] = AZ_noscaled; options[AZ_conv] = AZ_r0; params[AZ_tol] = 1.0e-7; /* ML_Iterate(ml, xxx, rhs); */ alpha = sqrt(AZ_gdot(N_update, xxx, xxx, proc_config)); printf("init guess = %e\n",alpha); alpha = sqrt(AZ_gdot(N_update, rhs, rhs, proc_config)); printf("rhs = %e\n",alpha); #ifdef SCALE_ME ML_MSR_scalerhs(rhs, scaling_vect, data_org[AZ_N_internal] + data_org[AZ_N_border]); ML_MSR_scalesol(xxx, scaling_vect, data_org[AZ_N_internal] + data_org[AZ_N_border]); #endif max_diag = 0.; min_diag = 1.e30; max_sum = 0.; for (i = 0; i < N_update; i++) { if (Amat->val[i] < 0.) printf("woops negative diagonal A(%d,%d) = %e\n", i,i,Amat->val[i]); if (Amat->val[i] > max_diag) max_diag = Amat->val[i]; if (Amat->val[i] < min_diag) min_diag = Amat->val[i]; sum = fabs(Amat->val[i]); for (j = Amat->bindx[i]; j < Amat->bindx[i+1]; j++) { sum += fabs(Amat->val[j]); } if (sum > max_sum) max_sum = sum; } printf("Largest diagonal = %e, min diag = %e large abs row sum = %e\n", max_diag, min_diag, max_sum); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
int Aztec2Petra(int * proc_config, AZ_MATRIX * Amat, double * az_x, double * az_b, Epetra_Comm * & comm, Epetra_BlockMap * & map, Epetra_RowMatrix * &A, Epetra_Vector * & x, Epetra_Vector * & b, int ** global_indices) { bool do_throw = false; #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES do_throw = true; #else do_throw = map->GlobalIndicesLongLong() || A->RowMatrixRowMap().GlobalIndicesLongLong(); #endif if(do_throw) { // We throw rather than let the compiler error out so that the // rest of the library is available and all possible tests can run. const char* error = "Aztec2Petra: Not available for 64-bit Maps."; std::cerr << error << std::endl; throw error; } #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES // REMOVE BEGIN // If no 32 bit indices, remove the code below using the preprocessor // otherwise VbrMatrix functions cause linker issues. // Build Epetra_Comm object #ifdef AZTEC_MPI MPI_Comm * mpicomm = (MPI_Comm * ) AZ_get_comm(proc_config); comm = (Epetra_Comm *) new Epetra_MpiComm(*mpicomm); #else comm = (Epetra_Comm *) new Epetra_SerialComm(); #endif int * MyGlobalElements, *global_bindx, *update; if (!Amat->has_global_indices) { //create a global bindx AZ_revert_to_global(proc_config, Amat, &global_bindx, &update); MyGlobalElements = update; } else // Already have global ordering { global_bindx = Amat->bindx; MyGlobalElements = Amat->update; if (MyGlobalElements==0) EPETRA_CHK_ERR(-1); } // Get matrix information int NumMyElements = 0; if (Amat->data_org[AZ_matrix_type] == AZ_VBR_MATRIX) NumMyElements = Amat->data_org[AZ_N_int_blk] + Amat->data_org[AZ_N_bord_blk]; else NumMyElements = Amat->data_org[AZ_N_internal] + Amat->data_org[AZ_N_border]; // int NumMyElements = Amat->N_update; // Note: This "official" way does not always work int * bpntr = Amat->bpntr; int * rpntr = Amat->rpntr; int * indx = Amat->indx; double * val = Amat->val; int NumGlobalElements; comm->SumAll(&NumMyElements, &NumGlobalElements, 1); // Make ElementSizeList (if VBR) - number of block entries in each block row int * ElementSizeList = 0; if (Amat->data_org[AZ_matrix_type] == AZ_VBR_MATRIX) { ElementSizeList = new int[NumMyElements]; if (ElementSizeList==0) EPETRA_CHK_ERR(-1); // Ran out of memory for (int i=0; i<NumMyElements; i++) ElementSizeList[i] = rpntr[i+1] - rpntr[i]; #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES map = 0; #else map = new Epetra_BlockMap(NumGlobalElements, NumMyElements, MyGlobalElements, ElementSizeList, 0, *comm); #endif if (map==0) EPETRA_CHK_ERR(-2); // Ran out of memory delete [] ElementSizeList; Epetra_VbrMatrix * AA = new Epetra_VbrMatrix(View, *map, 0); if (AA==0) EPETRA_CHK_ERR(-3); // Ran out of memory /* Add block rows one-at-a-time */ {for (int i=0; i<NumMyElements; i++) { int BlockRow = MyGlobalElements[i]; int NumBlockEntries = bpntr[i+1] - bpntr[i]; int *BlockIndices = global_bindx + bpntr[i]; int ierr = AA->BeginInsertGlobalValues(BlockRow, NumBlockEntries, BlockIndices); if (ierr!=0) { cerr << "Error in BeginInsertGlobalValues(GlobalBlockRow = " << BlockRow << ") = " << ierr << endl; EPETRA_CHK_ERR(ierr); } int LDA = rpntr[i+1] - rpntr[i]; int NumRows = LDA; for (int j=bpntr[i]; j<bpntr[i+1]; j++) { int NumCols = (indx[j+1] - indx[j])/LDA; double * Values = val + indx[j]; ierr = AA->SubmitBlockEntry(Values, LDA, NumRows, NumCols); if (ierr!=0) { cerr << "Error in SubmitBlockEntry, GlobalBlockRow = " << BlockRow << "GlobalBlockCol = " << BlockIndices[j] << "Error = " << ierr << endl; EPETRA_CHK_ERR(ierr); } } ierr = AA->EndSubmitEntries(); if (ierr!=0) { cerr << "Error in EndSubmitEntries(GlobalBlockRow = " << BlockRow << ") = " << ierr << endl; EPETRA_CHK_ERR(ierr); } }} int ierr=AA->FillComplete(); if (ierr!=0) { cerr <<"Error in Epetra_VbrMatrix FillComplete" << ierr << endl; EPETRA_CHK_ERR(ierr); } A = dynamic_cast<Epetra_RowMatrix *> (AA); // cast VBR pointer to RowMatrix pointer } else if (Amat->data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { /* Make numNzBlks - number of block entries in each block row */ int * numNz = new int[NumMyElements]; for (int i=0; i<NumMyElements; i++) numNz[i] = global_bindx[i+1] - global_bindx[i] + 1; #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES Epetra_Map * map1 = 0; #else Epetra_Map * map1 = new Epetra_Map(NumGlobalElements, NumMyElements, MyGlobalElements, 0, *comm); #endif Epetra_CrsMatrix * AA = new Epetra_CrsMatrix(Copy, *map1, numNz); map = (Epetra_BlockMap *) map1; // cast Epetra_Map to Epetra_BlockMap /* Add rows one-at-a-time */ for (int row=0; row<NumMyElements; row++) { double * row_vals = val + global_bindx[row]; int * col_inds = global_bindx + global_bindx[row]; int numEntries = global_bindx[row+1] - global_bindx[row]; #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES int ierr = 1; #else int ierr = AA->InsertGlobalValues(MyGlobalElements[row], numEntries, row_vals, col_inds); #endif if (ierr!=0) { cerr << "Error puting row " << MyGlobalElements[row] << endl; EPETRA_CHK_ERR(ierr); } #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES ierr = 1; #else ierr = AA->InsertGlobalValues(MyGlobalElements[row], 1, val+row, MyGlobalElements+row); #endif if (ierr!=0) { cerr << "Error putting diagonal" << endl; EPETRA_CHK_ERR(ierr); } } int ierr=AA->FillComplete(); if (ierr!=0) { cerr << "Error in Epetra_CrsMatrix_FillComplete" << endl; EPETRA_CHK_ERR(ierr); } A = dynamic_cast<Epetra_RowMatrix *> (AA); // cast CRS pointer to RowMatrix pointer } else cerr << "Not a supported AZ_MATRIX data type" << endl; // Create x vector x = new Epetra_Vector(View, *map,az_x); // RPP: Can not use the OperatorRangeMap in the ctor of the "b" vector // below. In MPSalsa, we delete the VbrMatrix yet still use the vector "b". // Deleting the matrix deletes the OperatorRangeMap that the b vector is // based on. Losing the map means "b" and all vectors that are created // with the copy constructor of "b" break. Mike has suggested // using reference counting (Boost smart pointers) so the map is not // deleted. For now we will use the "map" variable as the base map for "b". //b = new Epetra_Vector (View, A->OperatorRangeMap(), az_b); b = new Epetra_Vector (View, *map, az_b); *global_indices = 0; // Assume return array will be empty if (!Amat->has_global_indices) { AZ_free((void *) update); if (Amat->data_org[AZ_matrix_type] != AZ_VBR_MATRIX) AZ_free((void *) global_bindx); else global_indices = &global_bindx; } #endif // EPETRA_NO_32BIT_GLOBAL_INDICES REMOVE END return 0; }
int main(int argc, char *argv[]) { char global[]="global"; char local[]="local"; int proc_config[AZ_PROC_SIZE];/* Processor information. */ int options[AZ_OPTIONS_SIZE]; /* Array used to select solver options. */ double params[AZ_PARAMS_SIZE]; /* User selected solver paramters. */ int *data_org; /* Array to specify data layout */ double status[AZ_STATUS_SIZE]; /* Information returned from AZ_solve(). */ int *update; /* vector elements updated on this node. */ int *external; /* vector elements needed by this node. */ int *update_index; /* ordering of update[] and external[] */ int *extern_index; /* locally on this processor. */ int *indx; /* MSR format of real and imag parts */ int *bindx; int *bpntr; int *rpntr; int *cpntr; AZ_MATRIX *Amat; AZ_PRECOND *Prec; double *val; double *x, *b, *xexact, *xsolve; int n_nonzeros, n_blk_nonzeros; int N_update; /* # of block unknowns updated on this node */ int N_local; /* Number scalar equations on this node */ int N_global, N_blk_global; /* Total number of equations */ int N_external, N_blk_eqns; double *val_msr; int *bindx_msr; double norm, d ; int matrix_type; int has_global_indices, option; int i, j, m, mp ; int ione = 1; #ifdef TEST_SINGULAR double * xnull; /* will contain difference of given exact solution and computed solution*/ double * Axnull; /* Product of A time xnull */ double norm_Axnull; #endif #ifdef AZTEC_MPI double MPI_Wtime(void) ; #endif double time ; #ifdef AZTEC_MPI MPI_Init(&argc,&argv); #endif /* get number of processors and the name of this processor */ #ifdef AZTEC_MPI AZ_set_proc_config(proc_config,MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config,0); #endif printf("proc %d of %d is alive\n", proc_config[AZ_node],proc_config[AZ_N_procs]) ; #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif #ifdef VBRMATRIX if(argc != 3) perror("error: enter name of data and partition file on command line") ; #else if(argc != 2) perror("error: enter name of data file on command line") ; #endif /* Set exact solution to NULL */ xexact = NULL; /* Read matrix file and distribute among processors. Returns with this processor's set of rows */ #ifdef VBRMATRIX read_hb(argv[1], proc_config, &N_global, &n_nonzeros, &val_msr, &bindx_msr, &x, &b, &xexact); create_vbr(argv[2], proc_config, &N_global, &N_blk_global, &n_nonzeros, &n_blk_nonzeros, &N_update, &update, bindx_msr, val_msr, &val, &indx, &rpntr, &cpntr, &bpntr, &bindx); if(proc_config[AZ_node] == 0) { free ((void *) val_msr); free ((void *) bindx_msr); free ((void *) cpntr); } matrix_type = AZ_VBR_MATRIX; #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif distrib_vbr_matrix( proc_config, N_global, N_blk_global, &n_nonzeros, &n_blk_nonzeros, &N_update, &update, &val, &indx, &rpntr, &cpntr, &bpntr, &bindx, &x, &b, &xexact); #else read_hb(argv[1], proc_config, &N_global, &n_nonzeros, &val, &bindx, &x, &b, &xexact); #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif distrib_msr_matrix(proc_config, N_global, &n_nonzeros, &N_update, &update, &val, &bindx, &x, &b, &xexact); #ifdef DEBUG for (i = 0; i<N_update; i++) if (val[i] == 0.0 ) printf("Zero diagonal at row %d\n",i); #endif matrix_type = AZ_MSR_MATRIX; #endif /* convert matrix to a local distributed matrix */ cpntr = NULL; AZ_transform(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, indx, bpntr, rpntr, &cpntr, matrix_type); printf("Processor %d: Completed AZ_transform\n",proc_config[AZ_node]) ; has_global_indices = 0; option = AZ_LOCAL; #ifdef VBRMATRIX N_local = rpntr[N_update]; #else N_local = N_update; #endif Amat = AZ_matrix_create(N_local); #ifdef VBRMATRIX AZ_set_VBR(Amat, rpntr, cpntr, bpntr, indx, bindx, val, data_org, N_update, update, option); #else AZ_set_MSR(Amat, bindx, val, data_org, N_update, update, option); #endif printf("proc %d Completed AZ_create_matrix\n",proc_config[AZ_node]) ; #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif /* initialize AZTEC options */ AZ_defaults(options, params); options[AZ_solver] = AZ_gmres; options[AZ_precond] = AZ_sym_GS; options[AZ_poly_ord] = 1; options[AZ_graph_fill] = 1; params[AZ_rthresh] = 0.0E-7; params[AZ_athresh] = 0.0E-7; options[AZ_overlap] = 1; /* params[AZ_ilut_fill] = 2.0; params[AZ_drop] = 0.01; options[AZ_overlap] = 0; options[AZ_reorder] = 0; params[AZ_rthresh] = 1.0E-1; params[AZ_athresh] = 1.0E-1; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_bilu_ifp; options[AZ_reorder] = 0; options[AZ_graph_fill] = 0; params[AZ_rthresh] = 1.0E-7; params[AZ_athresh] = 1.0E-7; options[AZ_poly_ord] = 1; options[AZ_precond] = AZ_Jacobi; params[AZ_omega] = 1.0; options[AZ_precond] = AZ_none ; options[AZ_poly_ord] = 1; options[AZ_precond] = AZ_Jacobi ; options[AZ_scaling] = AZ_sym_row_sum ; options[AZ_scaling] = AZ_sym_diag; options[AZ_conv] = AZ_noscaled; options[AZ_scaling] = AZ_Jacobi ; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_icc ; options[AZ_subdomain_solve] = AZ_ilut ; params[AZ_omega] = 1.2; params[AZ_ilut_fill] = 2.0; params[AZ_drop] = 0.01; options[AZ_reorder] = 0; options[AZ_overlap] = 0; options[AZ_type_overlap] = AZ_symmetric; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_bilu ; options[AZ_graph_fill] = 0; options[AZ_overlap] = 0; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_bilu_ifp ; options[AZ_graph_fill] = 0; options[AZ_overlap] = 0; params[AZ_rthresh] = 1.0E-3; params[AZ_athresh] = 1.0E-3; options[AZ_poly_ord] = 1; options[AZ_precond] = AZ_Jacobi ; */ options[AZ_kspace] = 600 ; options[AZ_max_iter] = 600 ; params[AZ_tol] = 1.0e-14; #ifdef BGMRES options[AZ_gmres_blocksize] = 3; options[AZ_gmres_num_rhs] = 1; #endif #ifdef DEBUG if (proc_config[AZ_N_procs]==1) write_vec("rhs.dat", N_local, b); #endif /* xsolve is a little longer vector needed to account for external entries. Make it and copy x (initial guess) into it. */ if (has_global_indices) { N_external = 0; } else { N_external = data_org[AZ_N_external]; } xsolve = (double *) calloc(N_local + N_external, sizeof(double)) ; for (i=0; i<N_local; i++) xsolve[i] = x[i]; /* Reorder rhs and xsolve to match matrix ordering from AZ_transform */ if (!has_global_indices) { AZ_reorder_vec(b, data_org, update_index, rpntr) ; AZ_reorder_vec(xsolve, data_org, update_index, rpntr) ; } #ifdef VBRMATRIX AZ_check_vbr(N_update, data_org[AZ_N_ext_blk], AZ_LOCAL, bindx, bpntr, cpntr, rpntr, proc_config); #else AZ_check_msr(bindx, N_update, N_external, AZ_LOCAL, proc_config); #endif printf("Processor %d of %d N_local = %d N_external = %d NNZ = %d\n", proc_config[AZ_node],proc_config[AZ_N_procs],N_local,N_external, n_nonzeros); /* solve the system of equations using b as the right hand side */ Prec = AZ_precond_create(Amat,AZ_precondition, NULL); AZ_iterate(xsolve, b, options, params, status, proc_config, Amat, Prec, NULL); /*AZ_ifpack_iterate(xsolve, b, options, params, status, proc_config, Amat);*/ if (proc_config[AZ_node]==0) { printf("True residual norm = %22.16g\n",status[AZ_r]); printf("True scaled res = %22.16g\n",status[AZ_scaled_r]); printf("Computed res norm = %22.16g\n",status[AZ_rec_r]); } #ifdef TEST_SINGULAR xnull = (double *) calloc(N_local + N_external, sizeof(double)) ; Axnull = (double *) calloc(N_local + N_external, sizeof(double)) ; for (i=0; i<N_local; i++) xnull[i] = xexact[i]; if (!has_global_indices) AZ_reorder_vec(xnull, data_org, update_index, rpntr); for (i=0; i<N_local; i++) xnull[i] -= xsolve[i]; /* fill with nullerence */ Amat->matvec(xnull, Axnull, Amat, proc_config); norm_Axnull = AZ_gvector_norm(N_local, 2, Axnull, proc_config); if (proc_config[AZ_node]==0) printf("Norm of A(xexact-xsolve) = %12.4g\n",norm_Axnull); free((void *) xnull); free((void *) Axnull); #endif /* Get solution back into original ordering */ if (!has_global_indices) { AZ_invorder_vec(xsolve, data_org, update_index, rpntr, x); free((void *) xsolve); } else { free((void *) x); x = xsolve; } #ifdef DEBUG if (proc_config[AZ_N_procs]==1) write_vec("solution.dat", N_local, x); #endif if (xexact != NULL) { double sum = 0.0; double largest = 0.0; for (i=0; i<N_local; i++) sum += fabs(x[i]-xexact[i]); printf("Processor %d: Difference between exact and computed solution = %12.4g\n", proc_config[AZ_node],sum); for (i=0; i<N_local; i++) largest = AZ_MAX(largest,fabs(xexact[i])); printf("Processor %d: Difference divided by max abs value of exact = %12.4g\n", proc_config[AZ_node],sum/largest); } free((void *) val); free((void *) bindx); #ifdef VBRMATRIX free((void *) rpntr); free((void *) bpntr); free((void *) indx); #endif free((void *) b); free((void *) x); if (xexact!=NULL) free((void *) xexact); AZ_free((void *) update); AZ_free((void *) update_index); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) data_org); if (cpntr!=NULL) AZ_free((void *) cpntr); AZ_precond_destroy(&Prec); AZ_matrix_destroy(&Amat); #ifdef AZTEC_MPI MPI_Finalize() ; #endif /* end main */ return 0 ; }
void AZOO_iterate(double * xsolve, double * b, int * options, double * params, double * status, int *proc_config, AZ_MATRIX * Amat, AZ_PRECOND *precond, struct AZ_SCALING *scaling) { (void)precond; (void)scaling; bool verbose = (options[AZ_output]!=AZ_none); // Print info unless all output is turned off Epetra_Comm * comm; Epetra_BlockMap * map; Epetra_RowMatrix * A; Epetra_Vector * px; Epetra_Vector * pb; int * global_indices; int ierr = Aztec2Petra(proc_config, Amat, xsolve, b, comm, map, A, px, pb, &global_indices); if (ierr!=0) { cerr << "Error detected in Aztec2Petra. Value = " << ierr << endl; exit(1); } Epetra_LinearProblem problem(A, px, pb); Epetra_Vector * leftScaleVec = 0; Epetra_Vector * rightScaleVec = 0; bool doRowScaling = false; bool doColScaling = false; if ((options[AZ_scaling]==AZ_Jacobi) || options[AZ_scaling]==AZ_BJacobi) { doRowScaling = true; leftScaleVec = new Epetra_Vector(*map); A->ExtractDiagonalCopy(*leftScaleVec); // Extract diagonal of matrix leftScaleVec->Reciprocal(*leftScaleVec); // invert it } else if (options[AZ_scaling]==AZ_row_sum) { doRowScaling = true; leftScaleVec = new Epetra_Vector(*map); A->InvRowSums(*leftScaleVec); } else if (options[AZ_scaling]==AZ_sym_diag) { doRowScaling = true; doColScaling = true; leftScaleVec = new Epetra_Vector(*map); A->ExtractDiagonalCopy(*leftScaleVec); // Extract diagonal of matrix int length = leftScaleVec->MyLength(); for (int i=0; i<length; i++) (*leftScaleVec)[i] = sqrt(fabs((*leftScaleVec)[i])); // Take its sqrt rightScaleVec = leftScaleVec; // symmetric, so left and right the same leftScaleVec->Reciprocal(*leftScaleVec); // invert it } else if (options[AZ_scaling]==AZ_sym_row_sum) { doRowScaling = true; doColScaling = true; leftScaleVec = new Epetra_Vector(*map); A->InvRowSums(*leftScaleVec); int length = leftScaleVec->MyLength(); for (int i=0; i<length; i++) (*leftScaleVec)[i] = sqrt(fabs((*leftScaleVec)[i])); // Take its sqrt rightScaleVec = leftScaleVec; // symmetric, so left and right the same } if ((doRowScaling || doColScaling) && verbose) { double norminf = A->NormInf(); double normone = A->NormOne(); if (comm->MyPID()==0) cout << "\n Inf-norm of A before scaling = " << norminf << "\n One-norm of A before scaling = " << normone<< endl << endl; } if (doRowScaling) problem.LeftScale(*leftScaleVec); if (doColScaling) problem.RightScale(*rightScaleVec); if ((doRowScaling || doColScaling) && verbose) { double norminf = A->NormInf(); double normone = A->NormOne(); if (comm->MyPID()==0) cout << "\n Inf-norm of A after scaling = " << norminf << "\n One-norm of A after scaling = " << normone << endl << endl; } AztecOO solver(problem); solver.SetAllAztecParams(params); // set all AztecOO params with user-provided params solver.SetAllAztecOptions(options); // set all AztecOO options with user-provided options solver.CheckInput(); solver.SetAztecOption(AZ_scaling, AZ_none); // Always must have scaling off solver.Iterate(options[AZ_max_iter], params[AZ_tol]); solver.GetAllAztecStatus(status); if (doColScaling) { rightScaleVec->Reciprocal(*rightScaleVec); problem.RightScale(*rightScaleVec); } if (doRowScaling) { leftScaleVec->Reciprocal(*leftScaleVec); problem.LeftScale(*leftScaleVec); } if ((rightScaleVec!=0) && (rightScaleVec!=leftScaleVec)) delete rightScaleVec; if (leftScaleVec!=0) delete leftScaleVec; delete pb; // These are all objects created here and we have to delete them delete px; delete A; delete map; delete comm; if (global_indices!=0) AZ_free((void *) global_indices); // Note: we used a special version of free here return; }
int main(int argc, char *argv[]) { int num_PDE_eqns=3, N_levels=3, nsmooth=1; int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ int *data_org = NULL, *update = NULL, *external = NULL; int *update_index = NULL, *extern_index = NULL; int *cpntr = NULL; int *bindx = NULL, N_update, iii; double *val = NULL; double *xxx, *rhs; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int ch,i,j, Nrigid, *garbage; struct AZ_SCALING *scaling; double solve_time, setup_time, start_time, *mode, *rigid; ML_Aggregate *ag; int nblocks, *blocks; char filename[80]; double alpha; int one = 1; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif leng = 0; if (proc_config[AZ_node] == 0) { #ifdef binary fp=fopen(".data","rb"); #else fp=fopen(".data","r"); #endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } #ifdef binary fread(&leng, sizeof(int), 1, fp); #else fscanf(fp,"%d",&leng); #endif fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns, AZ_linear); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); AZ_transform(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); AZ_defaults(options, params); /* scaling = AZ_scaling_create(); xxx = (double *) calloc( leng,sizeof(double)); rhs=(double *)calloc(leng,sizeof(double)); options[AZ_scaling] = AZ_sym_diag; options[AZ_precond] = AZ_none; options[AZ_max_iter] = 30; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); don't forget vector rescaling ... free(xxx); free(rhs); */ options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); ML_Aggregate_Create( &ag ); Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"rigid_body_mode%d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%d",Nrigid+1); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } rhs=(double *)malloc(leng*sizeof(double)); AZ_random_vector(rhs, data_org, proc_config); for (i = 0; i < Nrigid; i++) { sprintf(filename,"rigid_body_mode%d",i+1); AZ_input_msr_matrix(filename, update, &mode, &garbage, N_update, proc_config); /* AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling); */ /* Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); */ for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); daxpy_(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); printf("alpha1 is %e\n",alpha); } alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/AZ_gdot(N_update, mode, mode, proc_config); printf("alpha2 is %e\n",alpha); daxpy_(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); daxpy_(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); printf("alpha4 is %e\n",alpha); } for (i = 0; i < Nrigid; i++) { alpha = -AZ_gdot(N_update, &(rigid[i*N_update]), rhs, proc_config); printf("alpha is %e\n",alpha); } if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); /* free(rigid); */ } coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); coarsest_level = N_levels - coarsest_level; /* ML_Operator_Print(&(ml->Pmat[N_levels-2]), "Pmat"); exit(1); */ if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ for (level = N_levels-1; level > coarsest_level; level--) { j = 10; if (level == N_levels-1) j = 10; options[AZ_solver] = AZ_cg; options[AZ_precond]=AZ_sym_GS; options[AZ_subdomain_solve]=AZ_icc; /* options[AZ_precond] = AZ_none; */ options[AZ_poly_ord] = 5; ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status, j, ML_PRESMOOTHER,NULL); ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status, j, ML_POSTSMOOTHER,NULL); /* ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth,1.0); ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth,1.0); */ /* nblocks = ML_Aggregate_Get_AggrCount( ag, level ); ML_Aggregate_Get_AggrMap( ag, level, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel( ml , level, ML_BOTH, nsmooth, 1.0, nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidel( ml , level, ML_POSTSMOOTHER, nsmooth, 1.0, nblocks, blocks); */ /* ML_Gen_Smoother_VBlockJacobi( ml , level, ML_PRESMOOTHER, nsmooth, .5, nblocks, blocks); ML_Gen_Smoother_VBlockJacobi( ml , level, ML_POSTSMOOTHER, nsmooth,.5, nblocks, blocks); */ /* ML_Gen_Smoother_GaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth); ML_Gen_Smoother_GaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth); */ /* need to change this when num_pdes is different on different levels */ /* if (level == N_levels-1) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth, 0.5, num_PDE_eqns); ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth, 0.5, num_PDE_eqns); } else { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth, 0.5, 2*num_PDE_eqns); ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth, 0.5, 2*num_PDE_eqns); } */ /* */ /* ML_Gen_SmootherJacobi(ml , level, ML_PRESMOOTHER, nsmooth, .67); ML_Gen_SmootherJacobi(ml , level, ML_POSTSMOOTHER, nsmooth, .67 ); */ } /* ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); */ /* ML_Gen_SmootherSymGaussSeidel(ml , coarsest_level, ML_PRESMOOTHER, 2*nsmooth,1.); */ /* ML_Gen_SmootherBlockGaussSeidel(ml , level, ML_PRESMOOTHER, 50*nsmooth, 1.0, 2*num_PDE_eqns); */ ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, 2*nsmooth, 1.0, num_PDE_eqns); ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level); AZ_defaults(options, params); options[AZ_solver] = AZ_GMRESR; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_rhs; options[AZ_output] = 1; options[AZ_max_iter] = 1500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = 1.0e-8; AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); /* Set rhs */ fp = fopen("AZ_capture_rhs.dat","r"); if (fp == NULL) { if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); /* AZ_random_vector(rhs, data_org, proc_config); AZ_reorder_vec(rhs, data_org, update_index, NULL); AZ_random_vector(xxx, data_org, proc_config); AZ_reorder_vec(xxx, data_org, update_index, NULL); Amat->matvec(xxx, rhs, Amat, proc_config); */ } else { ch = getc(fp); if (ch == 'S') { while ( (ch = getc(fp)) != '\n') ; } else ungetc(ch,fp); for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) fscanf(fp,"%lf",&(rhs[i])); fclose(fp); } for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ fp = fopen("AZ_capture_init_guess.dat","r"); if (fp != NULL) { ch = getc(fp); if (ch == 'S') { while ( (ch = getc(fp)) != '\n') ; } else ungetc(ch,fp); for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) fscanf(fp,"%lf",&(xxx[i])); fclose(fp); options[AZ_conv] = AZ_expected_values; } /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; /* options[AZ_max_iter] = 40; */ AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, xxx, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); daxpy_(&N_update, &alpha, &(rigid[j*N_update]), &one, xxx, &one); printf("alpha5 is %e\n",alpha); } AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
void AZK_destroy_matrix(int options[], double params[], int proc_config[], AZ_MATRIX **Amat_komplex) { int *bindx, *bpntr, *indx, *rpntr, *cpntr, *update; int *external, *update_index, *extern_index; double *val; AZ_KOMPLEX *pass_data; pass_data = (AZ_KOMPLEX *) (*Amat_komplex)->aux_ptr; /* Extract pointers for mat0 and mat1 operators */ val = (*Amat_komplex)->val; bindx = (*Amat_komplex)->bindx; bpntr = (*Amat_komplex)->bpntr; indx = (*Amat_komplex)->indx; rpntr = (*Amat_komplex)->rpntr; cpntr = (*Amat_komplex)->cpntr; update = (*Amat_komplex)->update; AZ_free((void *) val); AZ_free((void *) bindx); AZ_free((void *) bpntr); AZ_free((void *) indx); AZ_free((void *) rpntr); AZ_free((void *) cpntr); external = pass_data->external; update_index = pass_data->update_index; extern_index = pass_data->extern_index; if (!pass_data->From_Global_Indices) AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) update_index); AZ_free((void *) extern_index); AZ_free((void *) pass_data); /* Free data_org if Aztec doesn't do it */ if (!(*Amat_komplex)->must_free_data_org) AZ_free((void *) (*Amat_komplex)->data_org); AZ_matrix_destroy (Amat_komplex); }
int main(int argc, char *argv[]) { int num_PDE_eqns=5, N_levels=3; /* int nsmooth=1; */ int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ int *data_org = NULL, *update = NULL, *external = NULL; int *update_index = NULL, *extern_index = NULL; int *cpntr = NULL; int *bindx = NULL, N_update, iii; double *val = NULL; double *xxx, *rhs; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int ch,i; struct AZ_SCALING *scaling; double solve_time, setup_time, start_time; ML_Aggregate *ag; int *ivec; #ifdef VBR_VERSION ML_Operator *B, *C, *D; int *vbr_cnptr, *vbr_rnptr, *vbr_indx, *vbr_bindx, *vbr_bnptr, total_blk_rows; int total_blk_cols, blk_space, nz_space; double *vbr_val; struct ML_CSR_MSRdata *csr_data; #endif #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif #ifdef binary fp=fopen(".data","rb"); #else fp=fopen(".data","r"); #endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } #ifdef binary fread(&leng, sizeof(int), 1, fp); #else fscanf(fp,"%d",&leng); #endif fclose(fp); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns, AZ_linear); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all block (including the ghost nodes the same size. */ AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); AZ_transform(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); #ifndef VBR_VERSION AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; #else total_blk_rows = N_update/num_PDE_eqns; total_blk_cols = total_blk_rows; blk_space = total_blk_rows*20; nz_space = blk_space*num_PDE_eqns*num_PDE_eqns; vbr_cnptr = (int *) ML_allocate(sizeof(int )*(total_blk_cols+1)); vbr_rnptr = (int *) ML_allocate(sizeof(int )*(total_blk_cols+1)); vbr_bnptr = (int *) ML_allocate(sizeof(int )*(total_blk_cols+2)); vbr_indx = (int *) ML_allocate(sizeof(int )*(blk_space+1)); vbr_bindx = (int *) ML_allocate(sizeof(int )*(blk_space+1)); vbr_val = (double *) ML_allocate(sizeof(double)*(nz_space+1)); for (i = 0; i <= total_blk_cols; i++) vbr_cnptr[i] = num_PDE_eqns; AZ_msr2vbr(vbr_val, vbr_indx, vbr_rnptr, vbr_cnptr, vbr_bnptr, vbr_bindx, bindx, val, total_blk_rows, total_blk_cols, blk_space, nz_space, -1); data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; data_org[AZ_N_int_blk] = data_org[AZ_N_internal]/num_PDE_eqns; data_org[AZ_N_bord_blk] = data_org[AZ_N_bord_blk]/num_PDE_eqns; data_org[AZ_N_ext_blk] = data_org[AZ_N_ext_blk]/num_PDE_eqns; data_org[AZ_matrix_type] = AZ_VBR_MATRIX; AZ_set_VBR(Amat, vbr_rnptr, vbr_cnptr, vbr_bnptr, vbr_indx, vbr_bindx, vbr_val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; #endif start_time = AZ_second(); ML_Create(&ml, N_levels); ML_Set_PrintLevel(3); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); ML_Aggregate_Create( &ag ); ML_Aggregate_Set_Threshold(ag,0.0); ML_Set_SpectralNormScheme_PowerMethod(ml); /* To run SA: a) set damping factor to 1 and use power method ML_Aggregate_Set_DampingFactor(ag, 4./3.); To run NSA: a) set damping factor to 0 ML_Aggregate_Set_DampingFactor(ag, 0.); To run NSR a) set damping factor to 1 and use power method ML_Aggregate_Set_DampingFactor(ag, 1.); ag->Restriction_smoothagg_transpose = ML_FALSE; ag->keep_agg_information=1; ag->keep_P_tentative=1; b) hack code so it calls the energy minimizing restriction line 2973 of ml_agg_genP.c c) turn on the NSR flag in ml_agg_energy_min.cpp To run Emin a) set min_eneryg = 2 and keep_agg_info = 1; ag->minimizing_energy=2; ag->keep_agg_information=1; ag->cheap_minimizing_energy = 0; ag->block_scaled_SA = 1; */ ag->minimizing_energy=2; ag->keep_agg_information=1; ag->block_scaled_SA = 1; ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, num_PDE_eqns, NULL, N_update); ML_Aggregate_Set_MaxCoarseSize( ag, 20); /* ML_Aggregate_Set_RandomOrdering( ag ); ML_Aggregate_Set_DampingFactor(ag, .1); ag->drop_tol_for_smoothing = 1.0e-3; ML_Aggregate_Set_Threshold(ag, 1.0e-3); ML_Aggregate_Set_MaxCoarseSize( ag, 300); */ coarsest_level = ML_Gen_MultiLevelHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); coarsest_level = N_levels - coarsest_level; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ AZ_defaults(options, params); for (level = N_levels-1; level > coarsest_level; level--) { /* This is the Aztec domain decomp/ilu smoother that we */ /* usually use for this problem. */ /* options[AZ_precond] = AZ_dom_decomp; options[AZ_subdomain_solve] = AZ_ilut; params[AZ_ilut_fill] = 1.0; options[AZ_reorder] = 1; ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status, AZ_ONLY_PRECONDITIONER, ML_PRESMOOTHER,NULL); */ /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ /* ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, parasails_loadbal, parasails_factorized); parasails_thresh /= 4.; */ /* This is the symmetric Gauss-Seidel smoothing. In parallel, */ /* it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ /* ML_Gen_Smoother_SymGaussSeidel(ml,level,ML_PRESMOOTHER, nsmooth,1.); ML_Gen_Smoother_SymGaussSeidel(ml,level,ML_POSTSMOOTHER,nsmooth,1.); */ /* Block Gauss-Seidel with block size equal to #DOF per node. */ /* Not a true Gauss-Seidel in that each processor does a */ /* Gauss-Seidel on its local submatrix independent of the other */ /* processors. */ /* ML_Gen_Smoother_BlockGaussSeidel(ml,level,ML_PRESMOOTHER, nsmooth,0.67, num_PDE_eqns); ML_Gen_Smoother_BlockGaussSeidel(ml,level,ML_POSTSMOOTHER, nsmooth, 0.67, num_PDE_eqns); */ ML_Gen_Smoother_SymBlockGaussSeidel(ml,level,ML_POSTSMOOTHER, 1, 1.0, num_PDE_eqns); } ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); ML_Gen_Solver(ml, ML_MGW, N_levels-1, coarsest_level); AZ_defaults(options, params); options[AZ_solver] = AZ_gmres; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; /* options[AZ_conv] = AZ_r0; */ options[AZ_output] = 1; options[AZ_max_iter] = 1500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = 1.0e-8; /* options[AZ_precond] = AZ_dom_decomp; options[AZ_subdomain_solve] = AZ_ilut; params[AZ_ilut_fill] = 2.0; */ AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); rhs=(double *)malloc(leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set rhs */ fp = fopen("AZ_capture_rhs.mat","r"); if (fp == NULL) { if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); AZ_random_vector(rhs, data_org, proc_config); AZ_reorder_vec(rhs, data_org, update_index, NULL); } else { fclose(fp); ivec =(int *)malloc((leng+1)*sizeof(int)); AZ_input_msr_matrix("AZ_capture_rhs.mat", update, &rhs, &ivec, N_update, proc_config); free(ivec); AZ_reorder_vec(rhs, data_org, update_index, NULL); } /* Set x */ fp = fopen("AZ_capture_init_guess.mat","r"); if (fp != NULL) { fclose(fp); ivec =(int *)malloc((leng+1)*sizeof(int)); AZ_input_msr_matrix("AZ_capture_init_guess.mat",update, &xxx, &ivec, N_update, proc_config); free(ivec); AZ_reorder_vec(xxx, data_org, update_index, NULL); } /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
void init_matrix_vector_structures(int proc_config[], int *update_index[], int *update[], int *data_org[], int *external[], int *extern_index[], int input_option, double *val[], int *bindx[], int *indx[], int *bpntr[], int *rpntr[], int *cpntr[]) /* * Read in the points to be updated on this processor, create the global * distributed form of the application matrix, and then convert it to a * local distributed form for AZTEC kernels. Along the way, initialize the * following quantities: * update_index[], update[], data_org[], a[], bindx[], bpntr[], cpntr[], * rpntr[], indx[], external[], extern_index[]. * * Author: Ray Tuminaro, Div 1422, SNL * Date: 3/15/95 * * Parameters * * proc_config == On input, processor information: * proc_config[AZ_node] = name of this processor * proc_config[AZ_N_procs] = # of processors used * update == On output, list of pts to be updated on this node * val,bindx == On output, local distributed form of arrays * holding matrix values * external == On output, list of external vector elements * update_index == On output, ordering of update and external * extern_index == locally on this processor. For example * 'update_index[i]' gives the index location * of the block which has the global index * 'update[i]'. * data_org == On output, indicates how the data is set out on * this node. For example, data_org[] contains * information on how many unknowns are internal, * external, and border unknowns as well as which * points need to be communicated. See User's Guide * for more details. * input_option == Indicates how update[] will be initialized. * = 0, linear decomposition * = 1, points read from file 'update'. * = 2, box decomposition * See AZ_read_update() comments for more details. * * The default finite difference MSR problem corresponds to a setting up * a series of uncoupled 3D Poisson equations on a cube. * To solve other problems, the call 'add_row_3D(...)' in * 'create_msr_matrix()' can be changed to 'add_row_5pt()' or * 'add_row_9pt()'. */ { int N_update; /* Number of pts updated on this processor */ int MSRorVBR; int chunks; int blk_size, num_blk_cols,num_blk_rows,size,kk, convert_to_vbr = 0; double *val2; int *bindx2; MSRorVBR = AZ_MSR_MATRIX; if (application == 1) MSRorVBR = AZ_VBR_MATRIX; chunks = num_PDE_eqns; if (MSRorVBR == AZ_VBR_MATRIX) chunks = 1; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, update, proc_config, N_grid_pts, chunks, input_option); /* create the matrix: each processor creates only the */ /* rows appearing in update[] ... however this row is */ /* created as if it were on a serial machine (i.e. using */ /* the global column numbers) */ if (application == 1) create_vbr_matrix(*update, val, indx, N_update, rpntr, bpntr, bindx); else { *indx = NULL; *bpntr = NULL; *rpntr = NULL; *cpntr = NULL; if (application == 0) create_msr_matrix(*update, val, bindx, N_update); if (application == 2) create_fe_matrix(*update, proc_config[AZ_node], bindx, val, N_update); if (application == 3) { AZ_read_msr_matrix(*update, val, bindx, N_update, proc_config); } } /* convert matrix to a distributed parallel matrix */ AZ_transform(proc_config, external, *bindx, *val, *update, update_index, extern_index, data_org, N_update, *indx, *bpntr, *rpntr, cpntr, MSRorVBR); if ( (convert_to_vbr == 1) && (application == 3) ) { if (proc_config[AZ_node] == 0 ) { printf("enter the block size\n"); scanf("%d",&blk_size); } AZ_broadcast((char *) &blk_size, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); if ( N_update%blk_size != 0 ) { (void) fprintf(stderr," The block size must be a multiple of the number of rows per processor.\n"); exit(-1); } num_blk_rows = N_update/blk_size; num_blk_cols = ( (*data_org)[AZ_N_external] + N_update)/blk_size; *cpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); *rpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); *bpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); size = 20*(num_blk_cols+2); *indx = (int *) AZ_allocate(size*sizeof(int)); bindx2 = *bindx; val2 = *val; *bindx = (int *) AZ_allocate(size*sizeof(int)); *val = (double *) AZ_allocate(size*blk_size*blk_size*sizeof(double)); for (kk = 0 ; kk < num_blk_cols ; kk++ ) (*cpntr)[kk] = blk_size; AZ_msr2vbr(*val,*indx,*rpntr,*cpntr,*bpntr,*bindx,bindx2,val2, num_blk_rows,num_blk_cols,size,size*blk_size*blk_size,blk_size); MSRorVBR = AZ_VBR_MATRIX; N_update /= blk_size; num_PDE_eqns = blk_size; for (kk = 0 ; kk < N_update ; kk++ ) (*update)[kk] = (*update)[blk_size*kk]/blk_size; for (kk = 0 ; kk < (*data_org)[AZ_N_external] ; kk++ ) (*external)[kk] = (*external)[blk_size*kk]/blk_size; (*data_org)[AZ_matrix_type] = AZ_VBR_MATRIX; (*data_org)[AZ_N_int_blk ] /= blk_size; (*data_org)[AZ_N_bord_blk] /= blk_size; (*data_org)[AZ_N_ext_blk ] /= blk_size; AZ_free(bindx2); AZ_free(val2); } } /* init_matrix_vector_structures */
AZ_MATRIX *user_Ke_build(struct user_partition *Edge_Partition) { double dcenter, doff, sigma = .0001; int ii,jj, horv, i, nx, global_id, nz_ptr, Nlocal_edges; /* Aztec matrix and temp variables */ int *Ke_bindx, *Ke_data_org = NULL; double *Ke_val; AZ_MATRIX *Ke_mat; int proc_config[AZ_PROC_SIZE], *cpntr = NULL; int *reordered_glob_edges = NULL, *reordered_edge_externs = NULL; Nlocal_edges = Edge_Partition->Nlocal; nx = (int) sqrt( ((double) Edge_Partition->Nglobal/2) + .00001); Ke_bindx = (int *) malloc((7*Nlocal_edges+1)*sizeof(int)); Ke_val = (double *) malloc((7*Nlocal_edges+1)*sizeof(double)); Ke_bindx[0] = Nlocal_edges+1; dcenter = 2 + 2.*sigma/((double) ( 3 * nx * nx)); doff = -1 + sigma/((double) ( 6 * nx * nx)); for (i = 0; i < Nlocal_edges; i++) { global_id = (Edge_Partition->my_global_ids)[i]; invindex(global_id, &ii, &jj, nx, &horv); nz_ptr = Ke_bindx[i]; Ke_val[i] = dcenter; if (horv == HORIZONTAL) { if (jj != 0) { Ke_bindx[nz_ptr] = north(ii,jj,nx); Ke_val[nz_ptr++] = doff; Ke_bindx[nz_ptr] = east(ii,jj,nx); Ke_val[nz_ptr++] = -1.; if (ii != 0) {Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]= 1.;} jj--; } else { Ke_val[i] = 1. + 2.*sigma/((double) ( 3 * nx * nx)); jj = nx-1; } Ke_bindx[nz_ptr] = east(ii,jj,nx); Ke_val[nz_ptr++] = 1.; if (ii != 0){ Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]=-1.;} if (jj != 0){ Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=doff;} } else { if (ii != 0) { Ke_bindx[nz_ptr] = north(ii,jj,nx); Ke_val[nz_ptr++] = -1.; Ke_bindx[nz_ptr] = east(ii,jj,nx); Ke_val[nz_ptr++] = doff; if (jj != 0) {Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=1.;} ii--; } else { Ke_val[i] = 1 + 2.*sigma/((double) ( 3 * nx * nx)); ii = nx-1; } Ke_bindx[nz_ptr] = north(ii,jj,nx); Ke_val[nz_ptr++] = 1.; if (ii != 0) {Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]=doff;} if (jj != 0) {Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=-1.;} } Ke_bindx[i+1] = nz_ptr; } AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_transform_norowreordering(proc_config, &(Edge_Partition->needed_external_ids), Ke_bindx, Ke_val, Edge_Partition->my_global_ids, &reordered_glob_edges, &reordered_edge_externs, &Ke_data_org, Nlocal_edges, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); AZ_free(reordered_glob_edges); AZ_free(reordered_edge_externs); Edge_Partition->Nghost = Ke_data_org[AZ_N_external]; Ke_mat = AZ_matrix_create( Nlocal_edges ); AZ_set_MSR(Ke_mat, Ke_bindx, Ke_val, Ke_data_org, 0, NULL, AZ_LOCAL); return(Ke_mat); }