//--------------------------------------------------------------------------- Solver_AztecOO::~Solver_AztecOO() { delete azoo_; delete paramlist_; delete linProb; #ifdef HAVE_FEI_ML delete [] ml_aztec_options_; delete [] ml_aztec_params_; delete ml_prec_; #endif AZ_manage_memory(0, AZ_CLEAR_ALL, 0, NULL, NULL); }
void AZ_do_Jacobi(double val[], int indx[], int bindx[], int rpntr[], int cpntr[], int bpntr[], double x[], double b[], double temp[], int options[], int data_org[], int proc_config[], double params[], int flag) { double *v; int i,step; int N; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { if ( (options[AZ_poly_ord] != 0) && (flag == 1) ) for (i = data_org[AZ_N_internal]; i < N; i++) x[i] = b[i]/val[i]; if (options[AZ_poly_ord] > flag) { v = AZ_manage_memory((N+data_org[AZ_N_external])*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, "v in do_jacobi", &i); for (i = 0; i < N; i++) v[i] = x[i]; for (step = flag; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, temp, Amat, proc_config); for(i = 0; i < N; i++) v[i] += (b[i] - temp[i]) / val[i]; } for (i = 0; i < N; i++) x[i] = v[i]; } } else { (void) AZ_printf_err("AZ_slu with option[AZ_poly_ord] > 0 only \n"); (void) AZ_printf_err("implemented for MSR matrices.\n"); exit(-1); } }
void AZ_precondition(double x[], int input_options[], int proc_config[], double input_params[], AZ_MATRIX *Amat, AZ_PRECOND *input_precond) /******************************************************************************* This routine calls appropriate sparse matrix preconditioner. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== x: On input, contains the current solution. On output contains the preconditioned solution to the linear system. options: Determines specific solution method and other parameters. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. params: Drop tolerance and convergence tolerance info. Amat: Structure used to represent the matrix (see az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). * -------------------------------------------------------------------- Related routines: scaling routines: AZ_block_diagonal_scaling -- block-diagonally scales sparse matrix problem. AZ_row_sum_scaling -- row sum scales sparse matrix problem. sym_diagonal_scaling -- diagonaly scales symm. sparse problem. sym_row_sum_scaling -- row sum scales symmetric sparse problem. preconditioners: jacobi -- point Jacobi method. AZ_polynomial_expansion-- Polynomial expansion; Neumann series and least squares. domain decomposition -- Block solvers (LU , ILU or ILUT) used on each processor. The blocks are either non-overlapping or overlapping. icc -- incomplete sparse Choleski (symmetric version). *******************************************************************************/ { /* local variables */ int ione = 1; double *temp; int m, N, k, length; int i, step, j; static int *d2_indx,*d2_bindx,*d2_rpntr,*d2_bpntr; static double *d2_inv; static AZ_MATRIX *Dmat; int tsize, multilevel_flag = 0, max_externals; static int previous_factors = -1; double *v, *y; char *yo = "precond: "; int *data_org, *bindx, *indx, *cpntr, *rpntr, *bpntr; double *val; char label[64],suffix[32]; char tag[80]; double *current_rhs, *orig_rhs = NULL, *x_precond = NULL; int *options, *ioptions, N_fixed, *fixed_pts; double *params, *iparams, *istatus; AZ_MATRIX *Aptr, *Pmat; AZ_PRECOND *Pptr, *precond; struct AZ_SCALING *Sptr; int opt_save1, opt_save2, opt_save3, opt_save4, opt_save5, *itemp; double *tttemp, norm1, *dtemp; #ifdef TIMING double ttt; #endif #ifdef eigen double *tb, *tr; #endif /**************************** execution begins ******************************/ #ifdef TIMING ttt = AZ_second(); #endif precond = input_precond; sprintf(suffix," in precond%d",input_options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ data_org = precond->Pmat->data_org; options = input_options; params = input_params; m = data_org[AZ_N_int_blk] + data_org[AZ_N_bord_blk]; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; max_externals = Amat->data_org[AZ_N_external]; if (max_externals < data_org[AZ_N_external]) max_externals = data_org[AZ_N_external]; current_rhs = x; if (options[AZ_precond] == AZ_multilevel) { /* make extra vectors to hold rhs and residual */ sprintf(tag,"orig_rhs %s",precond->context->tag); orig_rhs = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,tag,&i); sprintf(tag,"x_prec %s",precond->context->tag); x_precond = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0 ; i < N; i++) x_precond[i] = 0.0; for (i = 0 ; i < N; i++) orig_rhs[i] = current_rhs[i]; multilevel_flag = 1; options = precond->options; params = precond->params; } do { data_org = precond->Pmat->data_org; val = precond->Pmat->val; bindx = precond->Pmat->bindx; cpntr = precond->Pmat->cpntr; indx = precond->Pmat->indx; rpntr = precond->Pmat->rpntr; bpntr = precond->Pmat->bpntr; if (max_externals < data_org[AZ_N_external]) max_externals = data_org[AZ_N_external]; switch (options[AZ_precond]) { case AZ_none: break; case AZ_Jacobi: if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0; i < N; i++) current_rhs[i] /= val[i]; if (options[AZ_poly_ord] > 1) { sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0; i < N; i++) v[i] = current_rhs[i]; for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); for(i = 0; i < N; i++) v[i] += current_rhs[i] - y[i] / val[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } else if (data_org[AZ_matrix_type] == AZ_USER_MATRIX) { if (options[AZ_pre_calc] < AZ_sys_reuse) { sprintf(tag,"d2_inv %s",precond->context->tag); d2_inv = (double *) AZ_manage_memory(N*sizeof(double),AZ_ALLOC, data_org[AZ_name],tag,&i); Pmat = precond->Pmat; if ( (Pmat->N_nz < 0) || (Pmat->max_per_row < 0)) AZ_matfree_Nnzs(Pmat); if ( (Pmat->getrow == NULL) && (N != 0) ) { AZ_printf_err("Error: Only matrices with getrow() defined via "); AZ_printf_err("AZ_set_MATFREE_getrow(...) can do Jacobi preconditioning\n"); exit(1); } sprintf(tag,"dtemp %s",precond->context->tag); dtemp = (double *) AZ_manage_memory(Pmat->max_per_row* sizeof(double),AZ_ALLOC, data_org[AZ_name],tag,&i); sprintf(tag,"itemp %s",precond->context->tag); itemp = (int *) AZ_manage_memory(Pmat->max_per_row* sizeof(int ),AZ_ALLOC, data_org[AZ_name],tag,&i); for (i = 0; i < N; i++) { Pmat->getrow(itemp,dtemp,&length,Pmat,1,&i,Pmat->max_per_row); for (k =0; k < length; k++) if (itemp[k] == i) break; if (k == length) d2_inv[i] = 0.0; /* no diagonal */ else d2_inv[i] = 1./dtemp[k]; } } for (i = 0; i < N; i++) current_rhs[i] *= d2_inv[i]; if (options[AZ_poly_ord] > 1) { sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0; i < N; i++) v[i] = current_rhs[i]; for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); for(i = 0; i < N; i++) v[i] += current_rhs[i] - y[i]*d2_inv[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) { /* block Jacobi preconditioning */ if (options[AZ_pre_calc] < AZ_sys_reuse) { /* First, compute block-diagonal inverse */ /* (only if not already computed) */ tsize = 0; for (i = 0; i < m; i++) tsize += (rpntr[i+1] - rpntr[i]) * (cpntr[i+1] - cpntr[i]); sprintf(tag,"d2_indx %s",precond->context->tag); d2_indx = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_bindx %s",precond->context->tag); d2_bindx = (int *) AZ_manage_memory(m*sizeof(int), AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_rpntr %s",precond->context->tag); d2_rpntr = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_bpntr %s",precond->context->tag); d2_bpntr = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_inv %s",precond->context->tag); d2_inv = (double *) AZ_manage_memory(tsize*sizeof(double), AZ_ALLOC, data_org[AZ_name],tag,&i); d2_bpntr[0] = 0; sprintf(tag,"dmat_calk_binv %s",precond->context->tag); Dmat = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC,data_org[AZ_name],tag,&i); Dmat->rpntr = d2_rpntr; Dmat->cpntr = d2_rpntr; Dmat->bpntr = d2_bpntr; Dmat->bindx = d2_bindx; Dmat->indx = d2_indx; Dmat->val = d2_inv; Dmat->data_org = data_org; Dmat->matvec = precond->Pmat->matvec; Dmat->matrix_type = precond->Pmat->matrix_type; if (options[AZ_pre_calc] != AZ_reuse) { AZ_calc_blk_diag_inv(val, indx, bindx, rpntr, cpntr, bpntr, d2_inv, d2_indx, d2_bindx, d2_rpntr, d2_bpntr, data_org); } else if (i == AZ_NEW_ADDRESS) { AZ_printf_err( "Error: options[AZ_pre_calc]==AZ_reuse and" "previous factors\n not found. Check" "data_org[AZ_name].\n"); exit(-1); } } else if (previous_factors != data_org[AZ_name]) { AZ_printf_err( "Warning: Using a previous factorization as a" "preconditioner\neven though matrix" "(data_org[AZ_name]) has changed\n"); } previous_factors = data_org[AZ_name]; /* scale rhs */ sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); Dmat->matvec(current_rhs, v, Dmat, proc_config); DCOPY_F77(&N, v, &ione, current_rhs, &ione); if (options[AZ_poly_ord] > 1) { sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"temp_prec %s",precond->context->tag); temp = AZ_manage_memory(N*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,tag,&i); for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); Dmat->matvec(y, temp, Dmat, proc_config); for (i = 0; i < N; i++) v[i] += current_rhs[i] - temp[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } break; case AZ_sym_GS: /* symmetric Gauss-Seidel preconditioner only available on 1 proc */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_sym_gauss_seidel(); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) AZ_sym_gauss_seidel_sl(val, bindx, current_rhs, data_org, options, precond->context, proc_config); break; case AZ_Neumann: case AZ_ls: if (!options[AZ_poly_ord]) return; AZ_polynomial_expansion(current_rhs, options, proc_config, precond); break; case AZ_dom_decomp: case AZ_rilu: AZ_domain_decomp(current_rhs, precond->Pmat, options, proc_config, params, precond->context); break; case AZ_icc: /* incomplete Cholesky factorization */ (void) AZ_printf_out("Incomplete Cholesky not available (use ilu).\n"); break; case AZ_user_precond: precond->prec_function(current_rhs, options, proc_config, params, Amat, precond); break; case AZ_smoother: sprintf(label,"istatus %s",precond->context->tag); istatus = AZ_manage_memory(AZ_STATUS_SIZE*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label,&i); for (i = 0 ; i < AZ_STATUS_SIZE ; i++ ) istatus[i] = 0.0; sprintf(label,"y %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); sprintf(label,"tttemp %s",precond->context->tag); tttemp = AZ_manage_memory((N+max_externals)*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) tttemp[i] = current_rhs[i]; N_fixed = 0; fixed_pts = NULL; if (Amat->aux_ival != NULL) { N_fixed = Amat->aux_ival[0][0]; fixed_pts = Amat->aux_ival[1]; } else if (options[AZ_pre_calc] != AZ_sys_reuse) AZ_printf_out("Warning: Not fixed points set for local smoothing!!\n"); for (j = 0; j < options[AZ_poly_ord]; j++) { AZ_loc_avg(Amat, tttemp, y, N_fixed, fixed_pts, proc_config); norm1 = sqrt(AZ_gdot(N, y, y, proc_config)); if (proc_config[AZ_node] == 0) { if ((j==0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings)) AZ_printf_out(" %d %e\n",j, norm1); else if ((j==options[AZ_poly_ord]-1) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) AZ_printf_out(" %d %e\n",j, norm1); else if ((options[AZ_output] > 0) && (j%options[AZ_output] == 0)) AZ_printf_out(" %d %e\n",j, norm1); } for (i = 0 ; i < N ; i++ ) tttemp[i] = y[i]; } for (i = 0 ; i < N ; i++ ) y[i] = current_rhs[i] - y[i]; for (i = 0 ; i < N ; i++ ) current_rhs[i] = 0.0; opt_save1 = options[AZ_output]; opt_save2 = options[AZ_solver]; opt_save3 = options[AZ_precond]; opt_save4 = options[AZ_max_iter]; opt_save5 = options[AZ_aux_vec]; options[AZ_output] = AZ_warnings; options[AZ_solver] = AZ_tfqmr; options[AZ_precond] = AZ_dom_decomp; options[AZ_max_iter]= 1000; options[AZ_aux_vec] = AZ_rand; options[AZ_recursion_level]++; AZ_oldsolve(current_rhs, y,options, params, istatus, proc_config, Amat, precond, NULL); options[AZ_recursion_level]--; options[AZ_output] = opt_save1; options[AZ_solver] = opt_save2; options[AZ_precond] = opt_save3; options[AZ_max_iter]= opt_save4; options[AZ_aux_vec] = opt_save5; break; default: if (options[AZ_precond] < AZ_SOLVER_PARAMS) { AZ_recover_sol_params(options[AZ_precond], &ioptions, &iparams, &istatus, &Aptr, &Pptr, &Sptr); sprintf(label,"y %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = current_rhs[i]; for (i = 0 ; i < N ; i++ ) current_rhs[i] = 0.0; ioptions[AZ_recursion_level] = options[AZ_recursion_level] + 1; if ((options[AZ_pre_calc] == AZ_sys_reuse) && (ioptions[AZ_keep_info] == 1)) ioptions[AZ_pre_calc] = AZ_reuse; AZ_oldsolve(current_rhs, y,ioptions,iparams, istatus, proc_config, Aptr, Pptr, Sptr); } else { (void) AZ_printf_err( "%sERROR: invalid preconditioning flag.\n" " options[AZ_precond] improperly set (%d).\n", yo, options[AZ_precond]); exit(-1); } } options[AZ_pre_calc] = AZ_sys_reuse; precond->context->Pmat_computed = 1; if (multilevel_flag) { if (precond->next_prec == NULL) { multilevel_flag = 0; for (i = 0; i < N; i++) current_rhs[i] += x_precond[i]; } else { for (i = 0; i < N; i++) x_precond[i] += current_rhs[i]; AZ_compute_residual(orig_rhs, x_precond, current_rhs, proc_config, Amat); precond = precond->next_prec; options = precond->options; params = precond->params; } } } while (multilevel_flag); proc_config[AZ_MPI_Tag] = AZ_MSG_TYPE; /* reset all the message types. */ /* This is to make sure that all */ /* processors (even those without */ /* any preconditioning work) have */ /* the same message types for the */ /* next message. */ #ifdef TIMING ttt = AZ_second() - ttt; if (input_options[AZ_recursion_level] == 0) input_precond->timing[0] += ttt; #endif } /* precond */
void AZ_factor_subdomain(struct context *context, int N, int N_nz, int *nz_used) { /**************************************************************************** Given an overlapped subdomain matrix, factor it according to the chosen algorithm and store the result back in subdomain. Additionally, store the number of nonzeros used in the factorization in nz_used. Notes: 1) Matrix comes in as an MSR matrix. 2) context contains several fields which need to be appropriately set. These fields are specific to the individual solvers. 3) The factorization overwrites the matrix. However, different solvers will store the factorization in different formats. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== context On input, context contains the matrix to be factored in context.A_overlapped (MSR format), On output, context contains the factored matrix which is stored in a format specific to the solver and any additional parameters required by the backsolver. N On input, the size of the linear system to be solved. N_nz On input, the number of nonzero values in the matrix to be factored. nz_used On output, the number of nonzero values in the matrix representing the factorization. *******************************************************************************/ #ifdef HAVE_AZLU int ifail, N_nz_matrix, *rnr; double *fake_rhs, *aflag; #endif int i, j, *bindx, *bpntr, *iw; double *cr, *unorm, *a, *val; int *ind, *jnz, *ja, ifill; double dtemp = (context->aztec_choices->params)[AZ_omega]; int N_blk_rows, name = context->A_overlapped->data_org[AZ_name]; char str[80]; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK void *precon, *bmat; double rthresh, athresh; int N_int_blk, N_bord_blk, graph_fill; #endif /* End Aztec 2.1 mheroux mod */ bindx = context->A_overlapped->bindx; *nz_used = bindx[N]; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK if (N == 0) return; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for bilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); /* Let IFPACK handle fillin */ graph_fill = (context->aztec_choices->options)[AZ_graph_fill]; (context->aztec_choices->options)[AZ_graph_fill] = 0; /* recover some space so that there will */ /* be enough room to convert back to vbr */ i = AZ_compress_msr(&(context->A_overlapped->bindx), &(context->A_overlapped->val), context->N_nz_allocated, *nz_used, name, context); context->N_nz = *nz_used; context->N_nz_allocated = *nz_used; AZ_msr2vbr_mem_efficient(N, &(context->A_overlapped->bindx), &(context->A_overlapped->val), &(context->A_overlapped->cpntr), &(context->A_overlapped->bpntr), &(context->A_overlapped->indx), &N_blk_rows, (context->A_overlapped->data_org)[AZ_name], context->tag,i); context->A_overlapped->matrix_type = AZ_VBR_MATRIX; /*ifp_initialize();*/ /* Create IFPACK encapsulation of Amat */ context->A_overlapped->rpntr = context->A_overlapped->cpntr; N_int_blk = context->A_overlapped->data_org[AZ_N_int_blk]; N_bord_blk = context->A_overlapped->data_org[AZ_N_bord_blk]; context->A_overlapped->data_org[AZ_N_int_blk] = N_blk_rows; context->A_overlapped->data_org[AZ_N_bord_blk] = 0; (context->aztec_choices->options)[AZ_graph_fill] = graph_fill; az2ifp_blockmatrix(&bmat, context->A_overlapped); context->A_overlapped->data_org[AZ_N_int_blk] = N_int_blk; context->A_overlapped->data_org[AZ_N_bord_blk] = N_bord_blk; rthresh = (context->aztec_choices->params)[AZ_rthresh]; athresh = (context->aztec_choices->params)[AZ_athresh]; ifill = (context->aztec_choices->options)[AZ_graph_fill]; ifp_preconditioner(&precon, bmat, IFP_BILUK, (double) ifill, 0.0, IFP_SVD, rthresh, athresh); if ((context->aztec_choices->options)[AZ_output]>0) { ifp_biluk_stats(precon); } context->precon = precon; break; /* End Aztec 2.1 mheroux mod */ #else AZ_perror("IFPACK not linked. Must compile with -DIFPACK"); #endif case AZ_bilu: if (N == 0) return; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for bilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) { *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); } /* recover some space so that there will */ /* be enough room to convert back to vbr */ i = AZ_compress_msr(&(context->A_overlapped->bindx), &(context->A_overlapped->val), context->N_nz_allocated, *nz_used, name, context); context->N_nz = *nz_used; context->N_nz_allocated = *nz_used; AZ_msr2vbr_mem_efficient(N, &(context->A_overlapped->bindx), &(context->A_overlapped->val), &(context->A_overlapped->cpntr), &(context->A_overlapped->bpntr), &(context->A_overlapped->indx), &N_blk_rows, (context->A_overlapped->data_org)[AZ_name], context->tag,i); context->A_overlapped->matrix_type = AZ_VBR_MATRIX; bindx = context->A_overlapped->bindx; bpntr = context->A_overlapped->bpntr; val = context->A_overlapped->val; sprintf(str,"ipvt %s",context->tag); context->ipvt = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"dblock %s",context->tag); context->dblock= (int *) AZ_manage_memory((N_blk_rows+1)* sizeof(int), AZ_ALLOC, name, str, &i); context->N_blk_rows = N_blk_rows; /* set dblock to point to the diagonal block in each block row */ for (i = 0 ; i < N_blk_rows ; i++ ) { for (j = bpntr[i] ; j < bpntr[i+1] ; j++ ) { if (bindx[j] == i) context->dblock[i] = j; } } AZ_fact_bilu(N_blk_rows, context->A_overlapped, context->dblock, context->ipvt); break; case AZ_ilut: cr = (double *) AZ_allocate((2*N+3+context->max_row)*sizeof(int)+ (2*N+2+context->max_row)*sizeof(double)); if (cr == NULL) AZ_perror("Out of space in ilut.\n"); unorm = &(cr[N+2]); a = &(unorm[N]); ind = (int *) &(a[context->max_row]); jnz = &(ind[N+3]); ja = &(jnz[N]); sprintf(str,"iu %s",context->tag); context->iu = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str, &i); AZ_fact_ilut(&N, context->A_overlapped, a, ja, (context->aztec_choices->params)[AZ_drop], context->extra_fact_nz_per_row, N_nz - bindx[N], context->iu,cr,unorm,ind, nz_used, jnz, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); AZ_free(cr); break; case AZ_ilu: dtemp = 0.0; case AZ_rilu: if (N == 0) return; sprintf(str,"iu %s",context->tag); bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for ilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) { *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); } context->iu= (int *) AZ_manage_memory((N+1)*sizeof(int),AZ_ALLOC, name, str, &i); iw = (int *) AZ_allocate((N+1)*sizeof(int)); if (iw == NULL) AZ_perror("Out of space in ilu.\n"); AZ_fact_rilu(N, nz_used, context->iu, iw, context->A_overlapped, dtemp, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); AZ_free(iw); break; case AZ_icc: sprintf(str,"iu %s",context->tag); bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for ilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); AZ_fact_chol(context->A_overlapped->bindx, context->A_overlapped->val,N, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; aflag = (double *) AZ_allocate(8*sizeof(double)); rnr = (int *) AZ_allocate(N_nz*sizeof(int)); if (rnr == NULL) AZ_perror("Out of space in lu.\n"); sprintf(str,"iflag %s",context->tag); context->iflag = (int *) AZ_manage_memory(10*sizeof(int), AZ_ALLOC, name, str ,&i); sprintf(str,"ha %s",context->tag); context->ha = (int *) AZ_manage_memory(11*(N+1)*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"pivot %s",context->tag); context->pivot = (double *) AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, name, str,&i); aflag[0] = 16.0; aflag[2] = 1.0e8; aflag[3] = 1.0e-12; aflag[1] = (context->aztec_choices->params)[AZ_drop]; /* set up flags for the sparse factorization solver */ context->iflag[0] = 1; context->iflag[1] = 2; context->iflag[2] = 1; context->iflag[3] = 0; context->iflag[4] = 2; /* Note: if matrix is pos def, iflag[2] = 2 is cheaper */ N_nz_matrix = bindx[N] - 1; AZ_msr2lu(N, context->A_overlapped, rnr); /* Mark bindx so we can see what was not used later */ for (i = N_nz_matrix ; i < N_nz ; i++) bindx[i] = -7; /* factor the matrix */ if (N == 1) { context->A_overlapped->val[0]=1./context->A_overlapped->val[0]; } else { context->N_nz_factors = N_nz; fake_rhs = (double *) AZ_allocate(N*sizeof(double)); if (fake_rhs == NULL) { AZ_printf_out("Not enough memory inside subdomain_solve\n"); } for (i = 0 ; i < N ; i++ ) fake_rhs[i] = 0.0; AZ_fact_lu(fake_rhs, context->A_overlapped,aflag, context->pivot, rnr, context->ha, context->iflag, &N_nz_matrix, &ifail, &(context->N_nz_factors), &N, &N); (context->iflag)[4] = 3; AZ_free(fake_rhs); /* find out what was not used by checking what was not touched */ *nz_used = N_nz; for (i = N_nz_matrix; i < N_nz ; i++ ) { if (bindx[i] != -7) *nz_used = i; } (*nz_used)++; context->N_nz_factors = *nz_used; } AZ_free(rnr); AZ_free(aflag); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_err("Unknown subdomain solver(%d)\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } } }
void AZ_solve_subdomain(double x[],int N, struct context *context) { /**************************************************************************** Given a vector 'x' representing the right hand side, solve the system using whatever subdomain solver is indicated by 'context->which' and whatever factorization information has already been computed. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== x On input, the right hand side of the subdomain system that is to be solved. On output, the solution of the subdomain system. N On input, the size of the linear system to be solved. bindx2,val2 On input, matrix or factorization information to be used by the solver. For most schemes, this information is in MSR format. However, the lu and bilu scheme would have this information in another format. Note: additional array information can be passed through context. context On input, the various fields are set to solver specific information corresponding to algorithm parameters as well as a previously done factorization. *******************************************************************************/ double *val2; int *bindx2; int N_blk_rows; #ifdef HAVE_AZLU int ifail; #endif int *sub_options, sub_proc_config[AZ_PROC_SIZE], *hold_data_org, *new_data_org; double *sub_params, *sub_status; AZ_MATRIX *sub_matrix; AZ_PRECOND *sub_precond; struct AZ_SCALING *sub_scaling; #ifdef AZTEC_MPI MPI_AZComm *tptr; #endif double *y; char label[80]; int t1, t2, t3, i, t4, t5 = 0; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK int ione = 1; void *precon; #endif /* End Aztec 2.1 mheroux mod */ val2 = context->A_overlapped->val; bindx2 = context->A_overlapped->bindx; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK y = (double *) malloc (N * sizeof(double)); DCOPY_F77(&N, x, &ione, y, &ione); precon = context->precon; ifp_apply(precon, N, 1, y, N, x, N); free((void *) y); #endif break; /* End Aztec 2.1 mheroux mod */ case AZ_bilu: N_blk_rows = context->N_blk_rows; AZ_lower_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x); AZ_upper_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x, context->ipvt, context->dblock); break; case AZ_ilut: case AZ_rilu: case AZ_ilu: AZ_lower_tsolve(x,N, val2, bindx2, context->iu, x ); AZ_upper_tsolve( x, N, val2, bindx2, context->iu); break; case AZ_icc: AZ_lower_icc(bindx2,val2,N,x); AZ_upper_icc(bindx2,val2,N,x); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; else if (N== 1) { x[0] *= val2[0]; ifail = 0; } else AZ_backsolve(val2, context->pivot,x, bindx2, context->ha, context->iflag, &ifail, &(context->N_nz_factors), &N, &N); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_out("ERROR: Unknown subdomain solver %d\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } else { /* better to put most of this in the factorization */ AZ_recover_sol_params(context->aztec_choices->options[ AZ_subdomain_solve], &sub_options, &sub_params, &sub_status, &sub_matrix, &sub_precond, &sub_scaling); t1 = sub_options[AZ_recursion_level]; sub_options[AZ_recursion_level]++; t2 = sub_options[AZ_output]; if (context->proc_config[AZ_node] != 0 ) sub_options[AZ_output] = AZ_none; t3 = context->proc_config[AZ_MPI_Tag]; /* fix data_org */ hold_data_org = context->A_overlapped->data_org; new_data_org = (int *) AZ_allocate( sizeof(int) * AZ_send_list ); if (new_data_org == NULL) { AZ_printf_out("Error: Not enough space for subdomain matrix\n"); exit(1); } context->A_overlapped->data_org = new_data_org; context->A_overlapped->matvec = AZ_MSR_matvec_mult; new_data_org[AZ_matrix_type] = AZ_MSR_MATRIX; new_data_org[AZ_N_internal] = N; new_data_org[AZ_N_border ] = 0; new_data_org[AZ_N_external] = 0; new_data_org[AZ_N_int_blk ] = N; new_data_org[AZ_N_bord_blk] = 0; new_data_org[AZ_N_ext_blk ] = 0; new_data_org[AZ_N_neigh ] = 0; new_data_org[AZ_total_send] = 0; new_data_org[AZ_name ] = hold_data_org[AZ_name]; new_data_org[AZ_internal_use]= 0; new_data_org[AZ_N_rows ]= N; sub_precond->Pmat = context->A_overlapped; sub_precond->prec_function = AZ_precondition; sub_proc_config[AZ_node] = 0; sub_proc_config[AZ_N_procs] = 1; #ifdef AZTEC_MPI tptr = AZ_get_comm(context->proc_config); AZ_set_comm(sub_proc_config, *tptr); #endif sprintf(label,"y in ssolve%d", sub_options[AZ_recursion_level]); y = AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = x[i]; for (i = 0 ; i < N ; i++ ) x[i] = 0.0; t4 = sub_options[AZ_keep_info]; sub_options[AZ_keep_info] = 1; if (context->aztec_choices->options[AZ_pre_calc] >= AZ_reuse) { t5 = sub_options[AZ_pre_calc]; sub_options[AZ_pre_calc] = AZ_sys_reuse; } AZ_oldsolve(x, y,sub_options,sub_params, sub_status, sub_proc_config, context->A_overlapped, sub_precond, sub_scaling); sub_options[AZ_keep_info] = t4; if (context->aztec_choices->options[AZ_pre_calc] == AZ_sys_reuse) sub_options[AZ_pre_calc] = t5; sub_options[AZ_recursion_level] = t1; sub_options[AZ_output] = t2; context->A_overlapped->data_org = hold_data_org; AZ_free(new_data_org); context->proc_config[AZ_MPI_Tag] = t3; } } }
void AZ_pgmresr(double b[], double x[],double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* This routine uses Saad's restarted Genralized Minimum Residual method to solve the nonsymmetric matrix problem Ax = b. IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the actual residual is not normally computed as part of the gmres algorithm. Thus, if the user uses a convergence condition (see AZ_gmres_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_gmres_global_scalars() sets r_avail = AZ_TRUE and the algorithm computes the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== Amat: Structure used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see file Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int k; int i, N, NN, converged, one = 1, iter, r_avail = AZ_FALSE; int print_freq, proc, kspace; double **UU, **CC, *dots, *tmp, *res; double dble_tmp, r_2norm = 1.0, epsilon; double rec_residual, scaled_r_norm, true_scaled_r=0.0; double actual_residual = -1.0, minus_alpha, alpha; double *dummy = (double *) 0; double *UUblock, *CCblock; int mm, ii; char label[64],suffix[32], prefix[64]; int *data_org, str_leng, first_time = AZ_TRUE; double doubleone = 1.0, minusone = -1.0, init_time = 0.0; char *T = "T"; char *T2 = "N"; /**************************** execution begins ******************************/ sprintf(suffix," in gmresr%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; kspace = options[AZ_kspace]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 0; /* GMRES seldom updates solution */ convergence_info->epsilon = params[AZ_tol]; /* allocate memory for required vectors */ NN = kspace + 1; /* +1: make sure everybody allocates something */ sprintf(label,"dots%s",suffix); dots = AZ_manage_memory(2*NN*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); tmp = &(dots[NN]); sprintf(label,"CC%s",suffix); CC = (double **) AZ_manage_memory(2*NN*sizeof(double *), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); UU = &(CC[NN]); NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for intel */ sprintf(label,"UUblock%s",suffix); UUblock = AZ_manage_memory(2*NN*kspace*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,label, &i); for (k = 0; k < kspace; k++) UU[k] = &(UUblock[k*NN]); CCblock = &(UUblock[kspace*NN]); for (k = 0; k < kspace; k++) CC[k] = &(CCblock[k*NN]); sprintf(label,"res%s",suffix); res = AZ_manage_memory(NN*sizeof(double),AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); AZ_compute_residual(b, x, res, proc_config, Amat); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] */ r_2norm = DDOT_F77(&N, res, &one, res, &one); AZ_gdot_vec(1, &r_2norm, &rec_residual, proc_config); r_2norm = sqrt(r_2norm); rec_residual = r_2norm; AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, NULL, NULL, NULL, convergence_info); r_2norm = rec_residual; converged = scaled_r_norm < epsilon; if ( (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings) && (proc == 0) ) (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); iter = 0; /*rst change while (!converged && iter < options[AZ_max_iter]) { */ while (!(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { convergence_info->iteration = iter; i = 0; /*rst change while (i < kspace && !converged && iter < options[AZ_max_iter]) { */ while (i < kspace && !(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { iter++; convergence_info->iteration = iter; /* v_i+1 = A M^-1 v_i */ DCOPY_F77(&N, res , &one, UU[i], &one); if (iter == 1) init_time = AZ_second(); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int precID = -1; precID = Teuchos_startTimer( "AztecOO: Operation Prec*x", precID ); #endif #endif precond->prec_function(UU[i],options,proc_config,params,Amat,precond); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( precID ); #endif #endif if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int matvecID = -1; matvecID = Teuchos_startTimer( "AztecOO: Operation Op*x", matvecID ); #endif #endif Amat->matvec(UU[i], CC[i], Amat, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( matvecID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoID = -1; orthoID = Teuchos_startTimer( "AztecOO: Orthogonalization", orthoID ); #endif #endif /* Gram-Schmidt orthogonalization */ if (!options[AZ_orthog]) { /* classical (stabilized) */ for (ii = 0 ; ii < 2 ; ii++ ) { dble_tmp = 0.0; mm = i; if (N == 0) for (k = 0 ; k < i ; k++) dots[k] = 0.0; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoInnerProdID = -1; orthoInnerProdID = Teuchos_startTimer( "AztecOO: Ortho (Inner Product)", orthoInnerProdID ); #endif #endif DGEMV_F77(CHAR_MACRO(T[0]), &N, &mm, &doubleone, CCblock, &NN, CC[i], &one, &dble_tmp, dots, &one); AZ_gdot_vec(i, dots, tmp, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoInnerProdID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoUpdateID = -1; orthoUpdateID = Teuchos_startTimer( "AztecOO: Ortho (Update)", orthoUpdateID ); #endif #endif DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, CCblock, &NN, dots, &one, &doubleone, CC[i], &one); DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, UUblock, &NN, dots, &one, &doubleone, UU[i], &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoUpdateID ); #endif #endif } } else { /* modified */ for (k = 0; k < i; k++) { alpha = AZ_gdot(N, CC[k], CC[i], proc_config); minus_alpha = -alpha; DAXPY_F77(&N, &minus_alpha, CC[k], &one, CC[i], &one); DAXPY_F77(&N, &minus_alpha, UU[k], &one, UU[i], &one); } } /* normalize vector */ #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS static int orthoNormID = -1; orthoNormID = Teuchos_startTimer( "AztecOO: Ortho (Norm)", orthoNormID ); #endif #endif dble_tmp = sqrt(AZ_gdot(N, CC[i], CC[i], proc_config)); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoNormID ); #endif #endif if (dble_tmp > DBL_EPSILON*r_2norm) dble_tmp = 1.0 / dble_tmp; else dble_tmp = 0.0; DSCAL_F77(&N, &dble_tmp, CC[i], &one); DSCAL_F77(&N, &dble_tmp, UU[i], &one); dble_tmp = AZ_gdot(N, CC[i], res, proc_config); DAXPY_F77(&N, &dble_tmp, UU[i], &one, x, &one); dble_tmp = -dble_tmp; DAXPY_F77(&N, &dble_tmp, CC[i], &one, res, &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop the timer. */ Teuchos_stopTimer( orthoID ); #endif #endif /* determine residual norm & test convergence */ r_2norm = sqrt(AZ_gdot(N, res, res, proc_config)); rec_residual = r_2norm; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * NOTE: if r_avail = AZ_TRUE or AZ_FIRST is passed in, we perform * step 1), otherwise ||r|| is taken as rec_residual. */ AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, dummy, dummy, dummy, convergence_info); converged = scaled_r_norm < epsilon; /*rst change if ( (iter%print_freq == 0) && proc == 0) */ if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); i++; /* subspace dim. counter dim(K) = i - 1 */ #ifdef out if (options[AZ_check_update_size] & converged) converged = AZ_compare_update_vs_soln(N, -1.,dble_tmp, UU[i-1], x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (converged) { /* compute true residual using 'v[kspace]' as a temporary vector */ AZ_scale_true_residual(x, b, res, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); converged = true_scaled_r < params[AZ_tol]; if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not * converged, AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } #endif } } if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #ifdef out /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #endif } /* AZ_pgmres */
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
void AZ_find_MSR_ordering(int bindx2[],int **ordering,int N, int **inv_ordering, int name, struct context *context) /******************************************************************************* Use a reverse cuthill McKee algorithm to find an ordering for the matrix. Author: R. Tuminaro Return code: void ============ Parameter list: =============== bindx2: On input, the nonzero sparsity pattern of the matrix for which we will determine a new ordering. Note: bindx2 is changed in this routine, but then returned to its original values before exiting. ordering: On output, ordering[i] gives the new location of row i in the reordered system. inv_ordering: On output, inv_ordering[i] gives the location of row */ { int i; int *mask; int root, nlvl, ccsize; int total = 0; char str[80]; /* convert matrix to Fortran format */ if (N==0) return; for (i = N+1 ; i < bindx2[N]; i++ ) bindx2[i]++; for (i = 0 ; i <= N ; i++ ) bindx2[i] -= N; /* initialize arrays for fnroot() and rcm() */ sprintf(str,"inv_ordering %s",context->tag); *inv_ordering = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str,&i); sprintf(str,"ordering %s",context->tag); *ordering = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str,&i); mask = (int *) AZ_allocate((N+1)*sizeof(int)); if (mask == NULL) { AZ_printf_out("Not enough space for RCM reordering\n"); AZ_exit(1); } for (i = 0 ; i < N ; i++ ) mask[i] = 1; root = 1; while (total != N ) { AZ_FNROOT_F77(&root,bindx2,&(bindx2[N+1]),mask, &nlvl, &((*ordering)[total]), *inv_ordering); AZ_RCM_F77(&root,bindx2,&(bindx2[N+1]),mask,&((*ordering)[total]), &ccsize, *inv_ordering); if ( ccsize != N) { for (i = 0 ; i < ccsize ; i++) mask[(*ordering)[total+i]-1] = 0; for (i = 0 ; i < N ; i++ ) { if ( mask[i] == 1) break; } root = i+1; } total += ccsize; if (ccsize == 0) { AZ_printf_out("Error inside reordering\n"); AZ_exit(1); } } /* convert matrix back to C format */ for (i = 0 ; i <= N ; i++ ) bindx2[i] += N; for (i = N+1 ; i < bindx2[N]; i++ ) bindx2[i]--; /* convert ordering to C format */ for (i = 0 ; i < N ; i++ ) (*ordering)[i]--; /* produce the inverse order */ for (i = 0 ; i < N ; i++) (*inv_ordering)[(*ordering)[i]] = i; AZ_free(mask); }
int test_azoo_scaling(Epetra_CrsMatrix& A, Epetra_Vector& x, Epetra_Vector& b, bool verbose) { Epetra_Vector vec1(x); Epetra_Vector vec2(x); Epetra_Vector diag(x); Epetra_Vector vec3(x); Epetra_Vector vec4(x); Epetra_Vector rhs(x); Epetra_Vector soln_none(x); Epetra_Vector soln_jacobi(x); Epetra_Vector soln_rowsum(x); Epetra_Vector soln_symdiag(x); vec1.PutScalar(1.0); A.Multiply(false, vec1, vec2); A.ExtractDiagonalCopy(diag); double* diag_vals = NULL; diag.ExtractView(&diag_vals); int* options = new int[AZ_OPTIONS_SIZE]; double* params = new double[AZ_PARAMS_SIZE]; AZ_defaults(options, params); options[AZ_output] = verbose ? 1 : AZ_none; options[AZ_scaling] = AZ_Jacobi; AztecOO::MatrixData mdata(&A); AZ_MATRIX* Amat = AZ_matrix_create(vec1.Map().NumMyElements()); AZ_set_MATFREE(Amat, (void*)(&mdata), Epetra_Aztec_matvec); AZ_SCALING* scaling = AZ_scaling_create(); double* xvals = NULL, *bvals = NULL; x.ExtractView(&xvals); b.ExtractView(&bvals); int err = AztecOO_scale_epetra(AZ_SCALE_MAT_RHS_SOL, Amat, options, bvals, xvals, NULL, scaling); if (err != 0) { if (verbose) { cout << "AztecOO_scale_epetra returned err="<<err<<endl; } return(err); } A.Multiply(false, vec1, vec3); vec4.Multiply(1.0, diag, vec3, 0.0); double vec2nrm, vec4nrm; vec2.Norm2(&vec2nrm); vec4.Norm2(&vec4nrm); if (fabs(vec2nrm - vec4nrm) > 1.e-6) { return(-1); } //now call the scaling function again, just to allow for //testing memory-leak issues. err = AztecOO_scale_epetra(AZ_SCALE_MAT_RHS_SOL, Amat, options, bvals, xvals, NULL, scaling); if (err != 0) { if (verbose) { cout << "AztecOO_scale_epetra returned err="<<err<<endl; } return(err); } AztecOO_scale_epetra(AZ_DESTROY_SCALING_DATA, Amat, options, bvals, xvals, NULL, scaling); x.PutScalar(1.0); Epetra_CrsMatrix* Atmp = create_and_fill_crs_matrix(A.RowMap()); Atmp->Multiply(false, x, rhs); x.PutScalar(0.0); AztecOO azoo(&A, &x, &b); azoo.SetAztecOption(AZ_scaling, AZ_Jacobi); if (verbose) { azoo.SetAztecOption(AZ_output, 1); } else { azoo.SetAztecOption(AZ_output, AZ_none); } azoo.Iterate(100, 1.e-6); delete Atmp; Epetra_CrsMatrix* Atmp1 = create_and_fill_crs_matrix(A.RowMap()); x.PutScalar(1.0); Atmp1->Multiply(false, x, rhs); soln_rowsum.PutScalar(0.0); AztecOO azoo1(Atmp1, &soln_rowsum, &rhs); azoo1.SetAztecOption(AZ_scaling, AZ_row_sum); azoo1.Iterate(100, 1.e-8); delete Atmp1; Epetra_CrsMatrix* Atmp2 = create_and_fill_crs_matrix(A.RowMap()); x.PutScalar(1.0); Atmp2->Multiply(false, x, rhs); soln_symdiag.PutScalar(0.0); AztecOO azoo2(Atmp2, &soln_symdiag, &rhs); azoo2.SetAztecOption(AZ_scaling, AZ_sym_diag); azoo2.Iterate(100, 1.e-8); delete Atmp2; Epetra_CrsMatrix* Atmp3 = create_and_fill_crs_matrix(A.RowMap()); x.PutScalar(1.0); Atmp3->Multiply(false, x, rhs); soln_none.PutScalar(0.0); AztecOO azoo3(Atmp3, &soln_none, &rhs); azoo3.SetAztecOption(AZ_scaling, AZ_none); azoo3.Iterate(100, 1.e-8); delete Atmp3; Epetra_CrsMatrix* Atmp4 = create_and_fill_crs_matrix(A.RowMap()); x.PutScalar(1.0); Atmp4->Multiply(false, x, rhs); soln_jacobi.PutScalar(0.0); AztecOO azoo4(Atmp4, &soln_jacobi, &rhs); azoo4.SetAztecOption(AZ_scaling, AZ_Jacobi); azoo4.Iterate(100, 1.e-8); delete Atmp4; //at this point, soln_none, soln_jacobi, soln_rowsum and soln_symdiag //should be the same or at least close to the same, since the //matrix used in the solution has well-behaved coefficients. //form vec1 = soln_none - soln_rowsum vec1.PutScalar(0.0); vec1.Update(1.0, soln_none, 0.0); vec1.Update(-1.0, soln_rowsum, 1.0); double norm_check1= 0.0; vec1.Norm2(&norm_check1); //form vec2 = soln_none - soln_symdiag vec2.PutScalar(0.0); vec2.Update(1.0, soln_none, 0.0); vec2.Update(-1.0, soln_symdiag, 1.0); double norm_check2= 0.0; vec2.Norm2(&norm_check2); //form vec3 = soln_none - soln_jacobi vec3.PutScalar(0.0); vec3.Update(1.0, soln_none, 0.0); vec3.Update(-1.0, soln_jacobi, 1.0); double norm_check3= 0.0; vec3.Norm2(&norm_check3); if (std::abs(norm_check1) > 1.e-6) { if (verbose) { cerr << "AZ_row_sum scaling produced bad soln" << endl; } return(-1); } if (std::abs(norm_check2) > 1.e-6) { if (verbose) { cerr << "AZ_sym_diag scaling produced bad soln" << endl; } return(-1); } if (std::abs(norm_check3) > 1.e-6) { if (verbose) { cerr << "AZ_Jacobi scaling produced bad soln" << endl; } return(-1); } options[AZ_pre_calc] = AZ_reuse; err = AztecOO_scale_epetra(AZ_SCALE_MAT_RHS_SOL, Amat, options, bvals, xvals, NULL, scaling); if (err == 0) { if (verbose) { cerr << "AztecOO_scale_epetra failed to return err when" << " asked to reuse non-existent scaling data."<<endl; } return(-1); } options[AZ_keep_info] = 1; options[AZ_pre_calc] = AZ_calc; err = AztecOO_scale_epetra(AZ_SCALE_MAT_RHS_SOL, Amat, options, bvals, xvals, NULL, scaling); if (err != 0) { if (verbose) { cerr << "AztecOO_scale_epetra returned err=="<<err<<endl; } return(err); } options[AZ_keep_info] = 0; options[AZ_pre_calc] = AZ_reuse; err = AztecOO_scale_epetra(AZ_SCALE_MAT_RHS_SOL, Amat, options, bvals, xvals, NULL, scaling); if (err != 0) { if (verbose) { cerr << "AztecOO_scale_epetra returned err=="<<err <<" when asked to reuse scaling data"<<endl; } return(err); } options[AZ_pre_calc] = AZ_calc; err = AztecOO_scale_epetra(AZ_DESTROY_SCALING_DATA, Amat, options, bvals, xvals, NULL, scaling); if (err != 0) { if (verbose) { std::cerr << "AztecOO_scale_epetra returned err=="<<err << " when asked to destroy scaling data."<<std::endl; } return(err); } AZ_matrix_destroy(&Amat); delete [] options; delete [] params; AZ_scaling_destroy(&scaling); AZ_manage_memory(0, AZ_CLEAR_ALL, 0, 0, 0); return(0); }
void AZ_polynomial_expansion( double z[], int options[], int proc_config[], AZ_PRECOND *precond ) /******************************************************************************* Uses a Neuman series expansion to approximate the inverse of a matrix. The series expansion is in terms of (I - A/omega) where I is the identity, A the matrix for which the inverse is being approximated, and omega is a scaling factor (omega >= || A || / 2 , Wong and Jiang (1989) or the diagonal element if it is a constant). If power = 0 then diagonal scaling is performed. If power < 0 then an unparameterized expansion is used. If power > 0 then a parameterized expansion developed by a least squares method is used. This technique minimizes the L2 norm of the residual polynomial R(), on an evalue interval of [0,lambda_max] where lambda_max is an estimate of the largest evalue of A.(see Saad (1985)). This version assumes that diagonal scaling has been carried out on the entire set of equations. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== z: On input, is the residual(rhs) of the set of equations. On output is the result. options: Determines specific solution method and other parameters. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. precond: Structure used to represent the preocnditioner (see az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ int param_flag, one = 1, j; register int i, p; register double cp; double lambda_max; static double c[15], inv_omega; int N, power; double *w, *poly_temp; int *data_org, *bindx, *indx, *cpntr, *rpntr, *bpntr; double *val; /**************************** execution begins ******************************/ data_org = precond->Pmat->data_org; val = precond->Pmat->val; bindx = precond->Pmat->bindx; cpntr = precond->Pmat->cpntr; indx = precond->Pmat->indx; rpntr = precond->Pmat->rpntr; bpntr = precond->Pmat->bpntr; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; power = options[AZ_poly_ord]; poly_temp = (double *) AZ_manage_memory(2*(N+data_org[AZ_N_external])* sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, "poly mem", &j); w = &(poly_temp[N+data_org[AZ_N_external]]); if (options[AZ_precond] == AZ_Neumann ) param_flag = 0; else param_flag = 1; if (options[AZ_pre_calc] < AZ_sys_reuse) { if (precond->Pmat->data_org[AZ_matrix_type] == AZ_USER_MATRIX) { lambda_max = precond->Pmat->matrix_norm; if (lambda_max < 0.0) { if (proc_config[AZ_node] == 0) { AZ_printf_err("Error: Matrix norm not given. Use "); AZ_printf_err("AZ_set_MATFREE_matrix_norm() to set it.\n"); } exit(1); } } else if (precond->Pmat->data_org[AZ_matrix_type] == AZ_MSR_MATRIX || precond->Pmat->data_org[AZ_matrix_type] == AZ_VBR_MATRIX ) { lambda_max = AZ_gmax_matrix_norm(val, indx, bindx, rpntr, cpntr, bpntr, proc_config, data_org); /* change sign of lambda_max if diagonal contains only negative values */ AZ_change_sign(&lambda_max, val, indx, bindx, rpntr, cpntr, bpntr, data_org); } inv_omega = 1.0 / (0.55 * lambda_max); /* 1.1*lambda_max/2 */ if (param_flag) AZ_get_poly_coefficients(power, lambda_max, c, param_flag); } switch (param_flag) { case 0: /* Neumann series */ DSCAL_F77(&N, &inv_omega, z, &one); DCOPY_F77(&N, z, &one, w, &one); for (p = power; p > 0; p--){ precond->Pmat->matvec(z, poly_temp, precond->Pmat, proc_config); for (i = 0; i < N; i++) z[i] += w[i] - inv_omega * poly_temp[i]; } break; case 1: /* least squares */ /* initialization */ DCOPY_F77(&N, z, &one, w, &one); DSCAL_F77(&N, c+power, z, &one); for (p = power - 1; p >= 0; p--) { precond->Pmat->matvec(z, poly_temp, precond->Pmat, proc_config); cp = *(c+p); for (i = 0; i < N; i++) z[i] = cp * w[i] + poly_temp[i]; } break; default: if (proc_config[AZ_node] == 0) { (void) AZ_printf_err( "Error: invalid polynomial preconditioner\n" " options[AZ_precond] improperly set.\n"); } exit(-1); } } /* AZ_polynomial_expansion */
void AZ_pbicgstab(double b[], double x[], double weight[], int options[], double params[],int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Vand der Vorst's (1990) variation of the Bi-Conjugate Gradient algorthm (Sonneveld (1984,1989)) to solve the nonsymmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditionner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter=1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc; int brkdown_will_occur = AZ_FALSE; double alpha = 1.0, beta, true_scaled_r=0.0; double *v, *r, *rtilda, *p, *phat, *s, *shat; double omega = 1.0, dot_vec[2], tmp[2], init_time = 0.0; double rhonm1 = 1.0, rhon, sigma, brkdown_tol = DBL_EPSILON; double scaled_r_norm= -1.0, actual_residual = -1.0, rec_residual= -1.0; double dtemp; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; /**************************** execution begins ******************************/ sprintf(suffix," in cgstab%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* BiCGStab always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate memory for required vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something*/ NN = NN + (NN%2); /* make sure things are aligned for the */ /* assembly coded matvec() on the Intel. */ sprintf(label,"phat%s",suffix); phat = (double *) AZ_manage_memory(7*NN*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label,&j); p = &(phat[1*NN]); shat = &(phat[2*NN]); /* NOTE: phat and shat must be aligned */ /* so that the assembly dgemv */ /* works on the paragon. */ s = &(phat[3*NN]); r = &(phat[4*NN]); rtilda = &(phat[5*NN]); v = &(phat[6*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); /* v, p <- 0 */ for (i = 0; i < N; i++) v[i] = p[i] = 0.0; /* set rtilda */ if (options[AZ_aux_vec] == AZ_resid) DCOPY_F77(&N, r, &one, rtilda, &one); else AZ_random_vector(rtilda, data_org, proc_config); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config,&r_avail,r,rtilda, &rhon, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) (void) AZ_printf_out("%siter: 0 residual = %e\n",prefix,scaled_r_norm); for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++) { if (brkdown_will_occur) { AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = (rhon/rhonm1) * (alpha/omega); if (fabs(rhon) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, rtilda, rhon, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(rhon); } rhonm1 = rhon; /* p = r + beta*(p - omega*v) */ /* phat = M^-1 p */ /* v = A phat */ dtemp = beta * omega; for (i = 0; i < N; i++) p[i] = r[i] + beta * p[i] - dtemp * v[i]; DCOPY_F77(&N, p, &one, phat, &one); if (iter==1) init_time = AZ_second(); if (precond_flag) precond->prec_function(phat,options,proc_config,params,Amat,precond); if (iter==1) status[AZ_first_precond] = AZ_second() - init_time; Amat->matvec(phat, v, Amat, proc_config); sigma = AZ_gdot(N, rtilda, v, proc_config); if (fabs(sigma) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) { /* break down */ AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org,proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * fabs(sigma); } alpha = rhon / sigma; /* s = r - alpha*v */ /* shat = M^-1 s */ /* r = A shat (r is a tmp here for t ) */ for (i = 0; i < N; i++) s[i] = r[i] - alpha * v[i]; DCOPY_F77(&N, s, &one, shat, &one); if (precond_flag) precond->prec_function(shat,options,proc_config,params,Amat,precond); Amat->matvec(shat, r, Amat, proc_config); /* omega = (t,s)/(t,t) with r = t */ dot_vec[0] = DDOT_F77(&N, r, &one, s, &one); dot_vec[1] = DDOT_F77(&N, r, &one, r, &one); AZ_gdot_vec(2, dot_vec, tmp, proc_config); if (fabs(dot_vec[1]) < DBL_MIN) { omega = 0.0; brkdown_will_occur = AZ_TRUE; } else omega = dot_vec[0] / dot_vec[1]; /* x = x + alpha*phat + omega*shat */ /* r = s - omega*r */ DAXPY_F77(&N, &alpha, phat, &one, x, &one); DAXPY_F77(&N, &omega, shat, &one, x, &one); for (i = 0; i < N; i++) r[i] = s[i] - omega * r[i]; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, rtilda, &rhon, convergence_info); if ( (iter%print_freq == 0) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) { dtemp = alpha/omega; DAXPY_F77(&N, &dtemp, phat, &one, shat, &one); convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,omega, shat, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); } if (convergence_info->converged) { AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&convergence_info->epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* bicgstab */
void AZ_pqmrs(double b[], double x[], double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Freund's transpose free QMR routine to solve the nonsymmetric matrix problem Ax = b. NOTE: this routine differs from Freund's paper in that we compute ubar (= M^-1 u ) and qbar (= M^-1 q) instead of u and q defined in Freund's paper. IMPORTANT NOTE: While an estimate of the 2-norm of the qmr residual is available (see comment below), the actual qmr residual is not normally computed as part of the qmr algorithm. Thus, if the user uses a convergence condition (see AZ_compute_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_compute_global_scalars() will set r_avail = AZ_TRUE and the algorithm will compute the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see az_aztec.h and Aztec User's Guide). Oprecond: Structure used to represent the preconditionner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, converged, one = 1, iter= 1,r_avail = AZ_FALSE, j; int precond_flag, print_freq, proc; int brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, true_scaled_r=0.0; double *ubar, *v, *r_cgs, *rtilda, *Aubar, *qbar, *Aqbar, *d, *Ad = NULL; double rhonm1, rhon, est_residual, actual_residual = -1.0; double scaled_r_norm, sigma, epsilon, brkdown_tol = DBL_EPSILON; double omega, c, norm_r_n_cgs, norm_r_nm1_cgs; double tau_m, nu_m, eta_m, init_time = 0.0; double tau_mm1, nu_mm1 = 0.0, eta_mm1 = 0.0, doubleone = 1.0; register double dtemp; double W_norm = 0.0; int offset = 0; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; /**************************** execution begins ******************************/ sprintf(suffix," in qmrcgs%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* allocate memory for required vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everyone allocates something */ NN = NN + (NN%2); /* make sure things are aligned on double words for paragon */ sprintf(label,"ubar%s",suffix); ubar = (double *) AZ_manage_memory(8*NN*sizeof(double), AZ_ALLOC,AZ_SYS,label,&j); v = &(ubar[1*NN]); Aubar = &(ubar[2*NN]); d = &(ubar[3*NN]); qbar = &(ubar[4*NN]); rtilda = &(ubar[5*NN]); Aqbar = &(ubar[6*NN]); r_cgs = &(ubar[7*NN]); AZ_compute_residual(b, x, r_cgs, proc_config, Amat); /* d, qbar, Aqbar, v = 0 */ for (i = 0; i < N; i++) d[i] = qbar[i] = Aqbar[i] = v[i] = 0.0; /* set rtilda */ if (options[AZ_aux_vec] == AZ_resid) dcopy_(&N, r_cgs, &one, rtilda, &one); else AZ_random_vector(rtilda, data_org, proc_config); /* * Compute a few global scalars: * 1) ||r_cgs|| corresponding to options[AZ_conv] * 2) scaled ||r_cgs|| corresponding to options[AZ_conv] * 3) rhon = <rtilda, r_cgs> * Note: step 1) is performed if r_avail = AZ_TRUE on entry or * AZ_FIRST_TIME is passed in. Otherwise, ||r_cgs|| is taken as * est_residual. */ AZ_compute_global_scalars(Amat, x, b, r_cgs, weight, &est_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r_cgs, rtilda, &rhon, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (proc == 0)) (void) fprintf(stdout, "%siter: 0 residual = %e\n",prefix,scaled_r_norm); norm_r_nm1_cgs = est_residual; tau_mm1 = norm_r_nm1_cgs; rhonm1 = rhon; /* Set up aux-vector if we need to compute the qmr residual */ if (r_avail) { sprintf(label,"Ad%s",suffix); Ad = (double *) AZ_manage_memory(NN*sizeof(double),AZ_ALLOC, AZ_SYS, label, &j); for (i = 0; i < N; i++) Ad[i] = 0.0; } converged = scaled_r_norm < epsilon; for (iter = 1; iter <= options[AZ_max_iter] && !converged; iter++) { if (fabs(rhon) < brkdown_tol) { /* possible breakdown problem */ if (AZ_breakdown_f(N, r_cgs, rtilda, rhon, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(rhon); } /* ubar = M^-1 r_cgs + beta*qbar */ /* Aubar = A ubar */ /* v = A ubar + beta ( A qbar + beta pnm1 ) */ /* = Aubar + beta ( Aqbar + beta v) */ dcopy_(&N, r_cgs, &one, ubar, &one); if (iter==1) init_time = AZ_second(); if (precond_flag) precond->prec_function(ubar,options,proc_config,params,Amat,precond); if (iter==1) status[AZ_first_precond] = AZ_second() - init_time; for (i = 0; i < N; i++) ubar[i] = ubar[i] + beta * qbar[i]; Amat->matvec(ubar, Aubar, Amat, proc_config); daxpy_(&N, &beta, v, &one, Aqbar, &one); for (i = 0; i < N; i++) v[i] = Aubar[i] + beta * Aqbar[i]; sigma = AZ_gdot(N, rtilda, v, proc_config); if (fabs(sigma) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) { /* break down */ AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, est_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * fabs(sigma); } alpha = rhon / sigma; /* qbar = ubar - alpha* M^-1 v */ /* Aqbar = A qbar */ /* r_cgs = r_cgs - alpha (A ubar + A qbar) */ /* = r_cgs - alpha (Aubar + Aqbar) */ dcopy_(&N, v, &one, qbar, &one); if (precond_flag) precond->prec_function(qbar,options,proc_config,params,Amat,precond); for (i = 0; i < N; i++) qbar[i] = ubar[i] - alpha * qbar[i]; Amat->matvec(qbar, Aqbar, Amat, proc_config); for (i = 0; i < N; i++) r_cgs[i] = r_cgs[i] - alpha*(Aubar[i] + Aqbar[i]); /* QMRS scaling and iterates weights 5.11 */ norm_r_n_cgs = sqrt(AZ_gdot(N, r_cgs, r_cgs, proc_config)); /* m is odd in Freund's paper */ omega = sqrt(norm_r_nm1_cgs * norm_r_n_cgs); nu_m = omega / tau_mm1; c = 1.0 / sqrt(1.0 + nu_m * nu_m); tau_m = tau_mm1 * nu_m * c; eta_m = c * c * alpha; if (brkdown_will_occur) { AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, est_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } dtemp = nu_mm1 *nu_mm1 * eta_mm1 / alpha; for (i = 0; i < N; i++) d[i] = ubar[i] + dtemp * d[i]; daxpy_(&N, &eta_m, d, &one, x, &one); /* x = x - eta_m d */ if (r_avail) { for (i = 0; i < N; i++) Ad[i] = Aubar[i] + dtemp * Ad[i]; } /* save some values */ eta_mm1 = eta_m; tau_mm1 = tau_m; nu_mm1 = nu_m; norm_r_nm1_cgs = norm_r_n_cgs; /* m is even in Freund's paper */ omega = norm_r_n_cgs; if (tau_mm1 == 0.0) nu_m = 0.0; else nu_m = omega / tau_mm1; c = 1.0 / sqrt(1.0 + nu_m * nu_m); tau_m = tau_mm1 * nu_m * c; if (options[AZ_check_update_size]) { eta_m = eta_m/(c*c*alpha); for (i = 0; i < N; i++) ubar[i] = eta_m*d[i]; } eta_m = c * c * alpha; dtemp = nu_mm1 * nu_mm1 * eta_mm1 / alpha; for (i = 0; i < N; i++) d[i] = qbar[i] + dtemp * d[i]; daxpy_(&N, &eta_m, d, &one, x, &one); /* x = x - eta_m d */ if (r_avail) { for (i = 0; i < N; i++) Ad[i] = Aqbar[i] + dtemp * Ad[i]; } /* save some values */ eta_mm1 = eta_m; tau_mm1 = tau_m; nu_mm1 = nu_m; norm_r_nm1_cgs = norm_r_n_cgs; rhonm1 = rhon; if (r_avail) { for (i = 0; i < N; i++) Aubar[i] = r_cgs[i] - (eta_m - alpha) * Ad[i]; /* Note: Aubar temporarily holds qmr residual */ } else { /* * We want to estimate the 2-norm of the qmr residual. Freund gives the * bound ||r|| <= tau_m * sqrt(2*iter+1). We use this bound until we get * close to the solution. At that point we compute the real residual norm * and use this to estimate the norm of ||W|| in Freund's paper. */ dtemp = sqrt((double) (2 * iter + 1)); if ((scaled_r_norm < epsilon * dtemp) && !offset) { AZ_scale_true_residual(x, b, Aubar, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); if (tau_m != 0.0) W_norm = actual_residual / tau_m; if (W_norm < 1.0) W_norm = 1.0; offset = 2 * iter + 1; est_residual = actual_residual; } else est_residual = sqrt((double)(2 * iter + 1 - offset) + W_norm * W_norm) * tau_m; } /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rhon = <rtilda, r_cgs> * Note: step 1) is performed if r_avail = AZ_TRUE or AZ_FIRST_TIME * is passed in. Otherwise, ||r|| is taken as est_residual. */ AZ_compute_global_scalars(Amat, x, b, Aubar, weight, &est_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, rtilda, r_cgs, &rhon, convergence_info); if ( (iter%print_freq == 0) && proc == 0 ) (void) fprintf(stdout, "%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* convergence tests */ converged = scaled_r_norm < epsilon; if (options[AZ_check_update_size] & converged) { daxpy_(&N, &doubleone , d, &one, ubar, &one); converged = AZ_compare_update_vs_soln(N, -1.,eta_m, ubar, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); } if (converged) { AZ_scale_true_residual(x, b, Aubar, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); converged = true_scaled_r < params[AZ_tol]; /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, est_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } beta = rhon / rhonm1; } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) fprintf(stdout, "%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, est_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* pqmrs */
void AZ_domain_decomp(double x[], AZ_MATRIX *Amat, int options[], int proc_config[], double params[], struct context *context) /******************************************************************************* Precondition 'x' using an overlapping domain decomposition method where a solver specified by options[AZ_subdomain_solve] is used on the subdomains. Note: if a factorization needs to be computed on the first iteration, this will be done and stored for future iterations. Author: Lydie Prevost, SNL, 9222 ======= Revised by R. Tuminaro (4/97), SNL, 9222 Return code: void ============ Parameter list: =============== N_unpadded: On input, number of rows in linear system (unpadded matrix) that will be factored (after adding values for overlapping). Nb_unpadded: On input, number of block rows in linear system (unpadded) that will be factored (after adding values for overlapping). N_nz_unpadded: On input, number of nonzeros in linear system (unpadded) that will be factored (after adding values for overlapping). x: On output, x[] is preconditioned by performing the subdomain solve indicated by options[AZ_subdomain_solve]. val indx bindx rpntr: On input, arrays containing matrix nonzeros (see manual). cpntr bpntr options: Determines specific solution method and other parameters. In this routine, we are concerned with options[AZ_overlap]: == AZ_none: nonoverlapping domain decomposition == AZ_diag: use rows corresponding to external variables but only keep the diagonal for these rows. == k : Obtain rows that are a distance k away from rows owned by this processor. data_org: Contains information on matrix data distribution and communication parameters (see manual). *******************************************************************************/ { int N_unpadded, Nb_unpadded, N_nz_unpadded; double *x_pad = NULL, *x_reord = NULL, *ext_vals = NULL; int N_nz, N_nz_padded, nz_used; int mem_orig, mem_overlapped, mem_factor; int name, i, bandwidth; int *ordering = NULL; double condest; /* double start_t; */ int estimated_requirements; char str[80]; int *garbage; int N; int *padded_data_org = NULL, *bindx, *data_org; double *val; int *inv_ordering = NULL; int *map = NULL; AZ_MATRIX *A_overlapped = NULL; struct aztec_choices aztec_choices; /**************************** execution begins ******************************/ data_org = Amat->data_org; bindx = Amat->bindx; val = Amat->val; N_unpadded = data_org[AZ_N_internal] + data_org[AZ_N_border]; Nb_unpadded = data_org[AZ_N_int_blk]+data_org[AZ_N_bord_blk]; if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) N_nz_unpadded = bindx[N_unpadded]; else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) N_nz_unpadded = (Amat->indx)[(Amat->bpntr)[Nb_unpadded]]; else { if (Amat->N_nz < 0) AZ_matfree_Nnzs(Amat); N_nz_unpadded = Amat->N_nz; } aztec_choices.options = options; aztec_choices.params = params; context->aztec_choices = &aztec_choices; context->proc_config = proc_config; name = data_org[AZ_name]; if ((options[AZ_pre_calc] >= AZ_reuse) && (context->Pmat_computed)) { N = context->N; N_nz = context->N_nz; A_overlapped = context->A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; } else { sprintf(str,"A_over %s",context->tag); A_overlapped = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC, name, str, &i); AZ_matrix_init(A_overlapped, 0); context->A_overlapped = A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; A_overlapped->matrix_type = AZ_MSR_MATRIX; AZ_init_subdomain_solver(context); AZ_compute_matrix_size(Amat, options, N_nz_unpadded, N_unpadded, &N_nz_padded, data_org[AZ_N_external], &(context->max_row), &N, &N_nz, params[AZ_ilut_fill], &(context->extra_fact_nz_per_row), Nb_unpadded,&bandwidth); estimated_requirements = N_nz; if (N_nz*2 > N_nz) N_nz = N_nz*2; /* check for overflow */ /* Add extra memory to N_nz. */ /* This extra memory is used */ /* as temporary space during */ /* overlapping calculation */ /* Readjust N_nz by allocating auxilliary arrays and allocate */ /* the MSR matrix to check that there is enough space. */ /* block off some space for map and padded_data_org in dd_overlap */ garbage = (int *) AZ_allocate((AZ_send_list + 2*(N-N_unpadded) +10)* sizeof(int)); AZ_hold_space(context, N); if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ N_nz = AZ_adjust_N_nz_to_fit_memory(N_nz, context->N_large_int_arrays, context->N_large_dbl_arrays); context->N_nz_factors = N_nz; if (N_nz <= N_nz_unpadded) { AZ_printf_out("Error: Not enough space for domain decomposition\n"); AZ_exit(1); } if (estimated_requirements > N_nz ) estimated_requirements = N_nz; /* allocate matrix via AZ_manage_memory() */ sprintf(str,"bindx %s",context->tag); A_overlapped->bindx =(int *) AZ_manage_memory(N_nz*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"val %s",context->tag); A_overlapped->val =(double *) AZ_manage_memory(N_nz*sizeof(double), AZ_ALLOC, name, str, &i); context->N_nz_allocated = N_nz; AZ_free_space_holder(context); AZ_free(garbage); /* convert to MSR if necessary */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_vb2msr(Nb_unpadded,val,Amat->indx,bindx,Amat->rpntr,Amat->cpntr, Amat->bpntr, A_overlapped->val, A_overlapped->bindx); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0 ; i < N_nz_unpadded; i++ ) { A_overlapped->bindx[i] = bindx[i]; A_overlapped->val[i] = val[i]; } } else AZ_matfree_2_msr(Amat,A_overlapped->val,A_overlapped->bindx,N_nz); mem_orig = AZ_gsum_int(A_overlapped->bindx[N_unpadded],proc_config); /* start_t = AZ_second(); */ AZ_pad_matrix(context, proc_config, N_unpadded, &N, &(context->map), &(context->padded_data_org), &N_nz, estimated_requirements); /* if (proc_config[AZ_node] == 0) AZ_printf_out("matrix padding took %e seconds\n",AZ_second()-start_t); */ mem_overlapped = AZ_gsum_int(A_overlapped->bindx[N],proc_config); if (options[AZ_reorder]) { /* start_t = AZ_second(); */ AZ_find_MSR_ordering(A_overlapped->bindx, &(context->ordering),N, &(context->inv_ordering),name,context); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to find ordering\n", AZ_second()-start_t); */ /* start_t = AZ_second(); */ AZ_mat_reorder(N,A_overlapped->bindx,A_overlapped->val, context->ordering, context->inv_ordering); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to reorder\n", AZ_second()-start_t); */ /* NOTE: ordering is freed inside AZ_mat_reorder */ #ifdef AZ_COL_REORDER if (options[AZ_reorder]==2) { AZ_mat_colperm(N,A_overlapped->bindx,A_overlapped->val, &(context->ordering), name, context ); } #endif } /* Do a factorization if needed. */ /* start_t = AZ_second(); */ AZ_factor_subdomain(context, N, N_nz, &nz_used); if (options[AZ_output] > 0 && options[AZ_diagnostics]!=AZ_none) { AZ_printf_out("\n*********************************************************************\n"); condest = AZ_condest(N, context); AZ_printf_out("***** Condition number estimate for subdomain preconditioner on PE %d = %.4e\n", proc_config[AZ_node], condest); AZ_printf_out("*********************************************************************\n"); } /* start_t = AZ_second()-start_t; max_time = AZ_gmax_double(start_t,proc_config); min_time = AZ_gmin_double(start_t,proc_config); if (proc_config[AZ_node] == 0) AZ_printf_out("time for subdomain solvers ranges from %e to %e\n", min_time,max_time); */ if ( A_overlapped->matrix_type == AZ_MSR_MATRIX) AZ_compress_msr(&(A_overlapped->bindx), &(A_overlapped->val), context->N_nz_allocated, nz_used, name, context); context->N_nz = nz_used; context->N = N; context->N_nz_allocated = nz_used; mem_factor = AZ_gsum_int(nz_used,proc_config); if (proc_config[AZ_node] == 0) AZ_print_header(options,mem_overlapped,mem_orig,mem_factor); if (options[AZ_overlap] >= 1) { sprintf(str,"x_pad %s",context->tag); context->x_pad = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); sprintf(str,"ext_vals %s",context->tag); context->ext_vals = (double *) AZ_manage_memory((N-N_unpadded)* sizeof(double), AZ_ALLOC, name, str, &i); } if (options[AZ_reorder]) { sprintf(str,"x_reord %s",context->tag); context->x_reord = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); } } /* Solve L u = x where the solution u overwrites x */ x_reord = context->x_reord; inv_ordering = context->inv_ordering; ordering = context->ordering; x_pad = context->x_pad; ext_vals = context->ext_vals; padded_data_org = context->padded_data_org; map = context->map; if (x_pad == NULL) x_pad = x; if (options[AZ_overlap] >= 1) { for (i = 0 ; i < N_unpadded ; i++) x_pad[i] = x[i]; AZ_exchange_bdry(x_pad,padded_data_org, proc_config); for (i = 0 ; i < N-N_unpadded ; i++ ) ext_vals[map[i]-N_unpadded] = x_pad[i+N_unpadded]; for (i = 0 ; i < N-N_unpadded ; i++ ) x_pad[i + N_unpadded] = ext_vals[i]; } else if (options[AZ_overlap] == AZ_diag) AZ_exchange_bdry(x_pad,data_org, proc_config); if (x_reord == NULL) x_reord = x_pad; if (options[AZ_reorder]) { /* Apply row permutation to the right hand side */ /* ((P'A P)Pi') Pi P'x = P'rhs, b= P'rhs */ for (i = 0 ; i < N ; i++ ) x_reord[inv_ordering[i]] = x_pad[i]; } AZ_solve_subdomain(x_reord,N, context); #ifdef AZ_COL_REORDER /* Apply column permutation to the solution */ if (options[AZ_reorder]==1){ /* ((P'A P) P'sol = P'rhs sol = P( P'sol) */ for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; } if (options[AZ_reorder]==2){ /* * ((P'A P)Pi') Pi P'sol = P'rhs sol = P Pi'( Pi P'sol) * Version 1: * for (i = 0; i < N; i++) pi_sol[i] = x_reord[ordering[i]]; * for (j = 0; j < N; j++) x_pad[j] = pi_sol[inv_ordering[j]]; * Version 2: */ for (i = 0; i < N; i++) x_pad[i] = x_reord[ ordering[inv_ordering[i]] ]; } #else if (options[AZ_reorder]) for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; #endif AZ_combine_overlapped_values(options[AZ_type_overlap],padded_data_org, options, x_pad, map,ext_vals,name,proc_config); if (x_pad != x) for (i = 0 ; i < N_unpadded ; i++ ) x[i] = x_pad[i]; } /* subdomain driver*/
void AZ_sym_gauss_seidel_sl(double val[],int bindx[],double x[],int data_org[], int options[], struct context *context, int proc_config[]) /******************************************************************************* Symmetric Gauss-Siedel preconditioner. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== val: Array containing the nonzero entries of the matrix (see Aztec User's Guide). indx, bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). x: On input, contains the current solution to the linear system. On output contains the Jacobi preconditioned solution. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). options: Determines specific solution method and other parameters. *******************************************************************************/ { /* local variables */ register int *bindx_ptr; register double sum, *ptr_val; int i, bindx_row, j_last, N, step, ione = 1, j; double *b, *ptr_b; char tag[80]; /**************************** execution begins ******************************/ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; sprintf(tag,"b/sGS %s",context->tag); b = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); DCOPY_F77(&N, x, &ione, b, &ione); ptr_val = val; for (i = 0; i < N; i++) { (*ptr_val) = 1.0 / (*ptr_val); x[i] = 0.0; ptr_val++; } for (step = 0; step < options[AZ_poly_ord]; step++) { AZ_exchange_bdry(x, data_org, proc_config); bindx_row = bindx[0]; bindx_ptr = &bindx[bindx_row]; ptr_val = &val[bindx_row]; ptr_b = b; for (i = 0; i < N; i++) { sum = *ptr_b++; j_last = bindx[i+1] - bindx[i]; for (j = 0; j < j_last; j++) { sum -= *ptr_val++ * x[*bindx_ptr++]; } x[i] = sum * val[i]; } bindx_row = bindx[N]; bindx_ptr = &bindx[bindx_row-1]; ptr_val = &val[bindx_row-1]; for (i = N - 1; i >= 0; i--) { sum = b[i]; j_last = bindx[i+1] - bindx[i]; for (j = 0; j < j_last; j++) { sum -= *ptr_val-- * x[*bindx_ptr--]; } x[i] = sum * val[i]; } } for (i = 0; i < N; i++) val[i] = 1.0 / val[i]; } /* AZ_sym_gauss_seidel_sl */
void AZ_compute_global_scalars(AZ_MATRIX *Amat, double x[], double b[], double r[], double w[], double *r_norm, double *scaled_r_norm, int options[], int data_org[], int proc_config[], int *r_avail, double v1[], double v2[], double *value, struct AZ_CONVERGE_STRUCT *conv_info) /******************************************************************************* Routine to check against 'eps' for convergence. The type of norm use is determined by the variable 'conv_flag' as follows: 0: ||r||2 / ||r0||2 < eps 1: ||r||2 / ||b||2 < eps 2: ||r||2 / ||A||inf < eps 3: ||r||inf / (||A||inf * ||x||1 + ||b||inf) 4: ||r/w||2 where ||*||2 is the Euclidean norm, ||*||inf is the infinity norm and ||*|| is the sum norm. Author: Scott A. Hutchinson, SNL, 1421 ======= Return code: void ============ Parameter list: =============== val: Array containing the nonzero entries of the matrix (see file Aztec User's Guide). indx, bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). x: The current solution vector. b: Right hand side of linear system. r: The current residual vector. w: Weighting vector for convergence norm #4. r_norm: Norm of residual. scaled_r_norm: Norm of residual scaled by norm of the rhs. options: Determines specific solution method and other parameters. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see file Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. r_avail: In general, this variable indicates whether or not the residual is available or needs to be made available. In particular, first_time == TRUE : real residual is available. The norm of this residual will be computed and stored in r_norm. first_time == FALSE && r_avail == TRUE : real residual is available. The norm of this residual will be computed and stored in r_norm. first_time == FALSE && r_avail == FALSE : real residual is not available. The norm of the residual is not computed. Instead, it is assumed that r_norm is an estimate of the residual norm. All of this is done for gmres() and tfqmr() where we often have estimates of the residual 2-norm without actually having computed the residual. IMPORTANT: if a convergence test requires a residual norm other than the 2-norm, it is important that AZ_compute_global_scalars() sets r_avail to TRUE. This tells the iterative method (in particular gmres and tfqmr) that the real residual must be computed (at additional cost) and passed in to AZ_compute_global_scalars(). v1,v2,value: If v1 != NULL, *value = <v1,v2> where <.,.> is the standard inner product, v1 and v2 are double precision vectors of length data_org[AZ_N_internal] + data_org[AZ_N_border]. This is used so that 1 inner product can be grouped together with the inner products required for convergence (to save some messages). first_time: Flag set AZ_TRUE if this is the first time this routine has been called. Set AZ_FALSE otherwise. See comments for r_avail above for more information. *******************************************************************************/ { /* local variables */ register int i; static double *temp, *tr; static int total_N; int N, j; double dots[5], tmp[5], dmax, dtemp; int count = 0, one = 1; /**************************** execution begins ******************************/ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; tr = r; if (options[AZ_ignore_scaling]) { if ( (conv_info->scaling->action == AZ_left_scaling) || (conv_info->scaling->action == AZ_left_and_right_scaling) ) { if (!(*r_avail) && (conv_info->not_initialized==AZ_FALSE)) { printf("AZ_compute_global_scalars: Error residual is needed to \ ignore scaling in convergence tests\n"); exit(1); } *r_avail = AZ_TRUE; tr = AZ_manage_memory(N*sizeof(double),AZ_ALLOC,AZ_SYS, "trinconv",&j); for (i = 0; i < N; i++) tr[i] = r[i]; AZ_scale_f(AZ_INVSCALE_RHS, Amat, options, tr, x, proc_config, conv_info->scaling); } }
void AZ_combine_overlapped_values(int sym_flag,int data_org[],int options[], double x[], int map[], double ext_vals[], int name, int proc_config[]) { /* Add the values that are redundant. That is, add the external values * to the border values that correspond to them. This will make the * operator symmetric if the incomplete factorization used above was * symmetric. */ int type, total, i, j, count, st, from, N_unpadded, N; MPI_AZRequest request[AZ_MAX_NEIGHBORS]; /* Message handle */ double *little; double scale = .5; N_unpadded = data_org[AZ_N_internal] + data_org[AZ_N_border]; N = N_unpadded + data_org[AZ_N_external]; if (sym_flag == AZ_symmetric) scale = 1.; else return; if (options[AZ_overlap] == 0) return; /* unshuffle the data */ if (options[AZ_overlap] >= 1) { for (i = 0 ; i < N-N_unpadded ; i++ ) ext_vals[i] = x[i + N_unpadded]; for (i = 0 ; i < N-N_unpadded ; i++ ) x[i+N_unpadded] = ext_vals[map[i]-N_unpadded]; } /* first send the external points to the neighbors */ type = AZ_sys_msg_type; AZ_sys_msg_type = (AZ_sys_msg_type+1-AZ_MSG_TYPE) % AZ_NUM_MSGS + AZ_MSG_TYPE; /* figure out longest message to be received and allocate space for it. */ total = 0; for ( i = 0 ; i < data_org[AZ_N_neigh] ; i++ ) total += data_org[AZ_send_length+i]; little = (double *) AZ_manage_memory(total*sizeof(double), AZ_ALLOC, name, "temp in combine", &i); /* post receives */ count = 0; for ( i = 0 ; i < data_org[AZ_N_neigh] ; i++ ) { from = data_org[AZ_neighbors+i]; (void) mdwrap_iread((void *) &(little[count]), sizeof(double)*data_org[AZ_send_length+i], &from, &type, request+i); count += data_org[AZ_send_length+i]; } /* send messages */ count = data_org[AZ_N_internal] + data_org[AZ_N_border]; for ( i = 0 ; i < data_org[AZ_N_neigh] ; i++ ) { (void) mdwrap_write((void *) &(x[count]), data_org[AZ_rec_length+i]* sizeof(double), data_org[AZ_neighbors+i], type, &st); count += data_org[AZ_rec_length+i]; } /* receive messages and add recvd values to the send list */ count = 0; for ( i = 0 ; i < data_org[AZ_N_neigh] ; i++ ) { from = data_org[AZ_neighbors+i]; (void) mdwrap_wait((void *) &(little[count]), sizeof(double)*data_org[AZ_send_length+i], &from, &type, &st,request+i); count += data_org[AZ_send_length+i]; } for ( j = 0 ; j < total; j++ ) { x[ data_org[AZ_send_list+j] ] += little[j]; x[ data_org[AZ_send_list+j] ] *= scale; } }
/*extern void mc64ad_(int *, int *, int *, int *, int *, double*, * int *, int *, int *, int *, int *, double*, * int *, int *); */ void AZ_mat_colperm(int n, int bindx[], double val[], int **invp, int name, struct context *context) /******************************************************************************* Use the mc64ad algorithm to permute the columns of a matrix. Unresolved issues: 1. Similar Aztec modules return invp and delete perm. 2. The goal of this module is to increase the number of diagonal nonzeros. This reduces the total number of nonzeros in MSR format. Some effort is required to make this consistent with Aztec format. Author: D. Day Return code: void ============ Parameter list: =============== bindx : On input, the nonzero sparsity pattern of the matrix for which we will determine a new ordering. Note: bindx is changed in this routine invp: On output, invp[i] gives the location of row i */ { int job,nnz,nzdiag,liw,ldw,i,p,row,ki,kf,k,nod; char str[80]; int *mcontrol, *info, *rowptr; /* double work; */ double *diag; if (n==0) return; nnz = bindx[n]-1; liw = 5*n; ldw = 2*n + nnz; /* If job=1, then ldw := n */ sprintf(str,"invp %s",context->tag); *invp = (int *) AZ_manage_memory((n+1)*sizeof(int), AZ_ALLOC, name, str,&i); mcontrol = (int *) AZ_allocate(10*sizeof(int)); info = (int *) AZ_allocate(10*sizeof(int)); rowptr = (int *) AZ_allocate(liw*sizeof(int)); diag = (double *) AZ_allocate(ldw*sizeof(double)); if (diag == NULL){ printf("AZ_col_perm: Error: memory insufficient. Try job=1\n"); AZ_exit(1); } /* Echo input matrix * printf("AZ_mat_colperm: bindx[%d] = %d\n", n, bindx[n]); * for (row=0;row<n;row++){ * printf("%d %d %22.14e \n", row+1, row+1, val[row]); * ki = bindx[row]; * kf = bindx[row+1]; * for (k=ki;k<kf;k++) * printf("%d %d %22.14e \n", row+1, bindx[k]+1, val[k]); * } */ /* msr2csr: retract the diagonal and delete zeros */ for (row=0;row<n;row++) diag[row] = val[row]; for (row=0;row<=n;row++) rowptr[row] = bindx[row]; p=0; ki = rowptr[0]; for (row=0;row<n;row++){ rowptr[row] += ( row - n - 1); kf = rowptr[row+1]; val[p] = diag[row]; diag[row] = 0.0; bindx[p] = row; ++p; for (k=ki;k<kf;k++){ val[p] = val[k]; bindx[p] = bindx[k]; ++p; } ki = kf; } --rowptr[n]; p=0; ki = rowptr[0]; for (row=0;row<n;row++){ rowptr[row] = p; kf = rowptr[row+1]; for (k=ki;k<kf;k++){ if( val[k] != 0.0 ){ val[p] = val[k]; bindx[p] = bindx[k]; ++p; } } ki = kf; } rowptr[n] = p; nnz = p; /* * Convert to standard sparse matrix format with Fortran indexing * bindx(1:n+1), bindx(n+2:nnz+n+2), val(1:nnz) * bindx[n+1:rowptr[n]+n] := bindx[0:rowptr[n]-1] and then * bindx[0:n] := rowptr[0:n] * mcontrol[0:2] := -1 turns off output */ for (k=p-1;k>=0;k--) bindx[k+n+1] = bindx[k]+1; for (k=0;k<=n;k++) bindx[k] = rowptr[k]+1; for (k=0;k<=n;k++) rowptr[k] = 0; job = 4; /* job = 1 may do less violence to symmetric form */ /* for (i=0; i<4; i++) mcontrol[i] = 6; */ for (i=0; i<3; i++) mcontrol[i] = -1; for (i=3; i<10; i++) mcontrol[i] = 0; for (i=0; i<10; i++) info[i] = 0; MC64AD_F77(&job,&n,&nnz,bindx,&(bindx[n+1]),val,&nzdiag,*invp,&liw,rowptr,&ldw,diag,mcontrol,info); /* nzdiag is the number of zero diagonals in the permuted matrix */ /* +1 structurally singular matrix (iffi nzdiag < n) +2 the returned scaling factors are large and may cause overflow when used to scale the matrix (for JOB = 5 entry only.) -1 JOB < 1 or JOB > 5. Value of JOB held in INFO(2). -2 N < 1. Value of N held in INFO(2). -3 NE < 1. Value of NE held in INFO(2). -4 the defined length LIW violates the restriction on LIW. Value of LIW required given by INFO(2). -5 the defined length LDW violates the restriction on LDW. Value of LDW required given by INFO(2). -6 entries are found whose row indices are out of range. INFO(2) contains the index of a column in which such an entry is found. -7 repeated entries are found. INFO(2) contains the index of a column in which such entries are found. */ if( info[0] >= 0 ){ /* convert permutation to C indexing and invert perm */ for (i = 0;i< n;i++) (*invp)[i]--; /* 1 2 3 0 */ /* csr2msr: diag = diag(A P) */ for (i = 0;i<= n;i++) bindx[i] += n; p = bindx[n]; for (i = n+1;i<p;i++) bindx[i]--; for (i = n+1;i<p;i++) bindx[i] = (*invp)[bindx[i]]; for (row=0;row<n;row++) diag[row] = 0.; p = n+1; for (row=0;row<n;row++){ ki = bindx[row]; bindx[row] = p; kf = bindx[row+1]; for (k=ki;k<kf;k++){ if( row != bindx[k]){ bindx[p] = bindx[k]; val[p-n-1] = val[k-n-1]; ++p; } else { diag[row] = val[k-n-1]; } } } bindx[n] = p; /* val[n+1: (n+1) + nod-1] := val[0:nod-1], nod = number off-diagonals */ nod = p-(n+1); /* printf("az_colperm: number of off diagonals is %d\n",nod); */ for (i=nod ; i>0 ; i-- ) val[n+i] = val[i-1]; val[n] = 0; for (i = 0 ; i < n ; i++ ) val[i] = diag[i]; /* Sort the colmns to ascend */ /* This appears unnecessary, though one never can be certain. for (row=0;row<n;row++){ ki = bindx[row]; kf = bindx[row+1]; for (p=ki+1;k<kf;k++){ k = p; while ( (k>ki) && (bindx[k-1]>bindx[k]) ){ work = val[k]; val[k] = val[k-1]; val[k-1] = work; i = bindx[k]; bindx[k] = bindx[k-1]; bindx[k-1] = i; --k; } } } */ if( info[0] == 1 ){ printf("AZ_col_perm: Error: Internal matrix is singular\n"); } }else{ /* Ideally an error flag would be returned here */ printf("az_colperm: Error: info = %d %d\n",info[0],info[1]); AZ_exit(1); } AZ_free(mcontrol); AZ_free(info); AZ_free(diag); AZ_free(rowptr); return; }