void AZ_precondition(double x[], int input_options[], int proc_config[], double input_params[], AZ_MATRIX *Amat, AZ_PRECOND *input_precond) /******************************************************************************* This routine calls appropriate sparse matrix preconditioner. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== x: On input, contains the current solution. On output contains the preconditioned solution to the linear system. options: Determines specific solution method and other parameters. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. params: Drop tolerance and convergence tolerance info. Amat: Structure used to represent the matrix (see az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). * -------------------------------------------------------------------- Related routines: scaling routines: AZ_block_diagonal_scaling -- block-diagonally scales sparse matrix problem. AZ_row_sum_scaling -- row sum scales sparse matrix problem. sym_diagonal_scaling -- diagonaly scales symm. sparse problem. sym_row_sum_scaling -- row sum scales symmetric sparse problem. preconditioners: jacobi -- point Jacobi method. AZ_polynomial_expansion-- Polynomial expansion; Neumann series and least squares. domain decomposition -- Block solvers (LU , ILU or ILUT) used on each processor. The blocks are either non-overlapping or overlapping. icc -- incomplete sparse Choleski (symmetric version). *******************************************************************************/ { /* local variables */ int ione = 1; double *temp; int m, N, k, length; int i, step, j; static int *d2_indx,*d2_bindx,*d2_rpntr,*d2_bpntr; static double *d2_inv; static AZ_MATRIX *Dmat; int tsize, multilevel_flag = 0, max_externals; static int previous_factors = -1; double *v, *y; char *yo = "precond: "; int *data_org, *bindx, *indx, *cpntr, *rpntr, *bpntr; double *val; char label[64],suffix[32]; char tag[80]; double *current_rhs, *orig_rhs = NULL, *x_precond = NULL; int *options, *ioptions, N_fixed, *fixed_pts; double *params, *iparams, *istatus; AZ_MATRIX *Aptr, *Pmat; AZ_PRECOND *Pptr, *precond; struct AZ_SCALING *Sptr; int opt_save1, opt_save2, opt_save3, opt_save4, opt_save5, *itemp; double *tttemp, norm1, *dtemp; #ifdef TIMING double ttt; #endif #ifdef eigen double *tb, *tr; #endif /**************************** execution begins ******************************/ #ifdef TIMING ttt = AZ_second(); #endif precond = input_precond; sprintf(suffix," in precond%d",input_options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ data_org = precond->Pmat->data_org; options = input_options; params = input_params; m = data_org[AZ_N_int_blk] + data_org[AZ_N_bord_blk]; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; max_externals = Amat->data_org[AZ_N_external]; if (max_externals < data_org[AZ_N_external]) max_externals = data_org[AZ_N_external]; current_rhs = x; if (options[AZ_precond] == AZ_multilevel) { /* make extra vectors to hold rhs and residual */ sprintf(tag,"orig_rhs %s",precond->context->tag); orig_rhs = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,tag,&i); sprintf(tag,"x_prec %s",precond->context->tag); x_precond = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0 ; i < N; i++) x_precond[i] = 0.0; for (i = 0 ; i < N; i++) orig_rhs[i] = current_rhs[i]; multilevel_flag = 1; options = precond->options; params = precond->params; } do { data_org = precond->Pmat->data_org; val = precond->Pmat->val; bindx = precond->Pmat->bindx; cpntr = precond->Pmat->cpntr; indx = precond->Pmat->indx; rpntr = precond->Pmat->rpntr; bpntr = precond->Pmat->bpntr; if (max_externals < data_org[AZ_N_external]) max_externals = data_org[AZ_N_external]; switch (options[AZ_precond]) { case AZ_none: break; case AZ_Jacobi: if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0; i < N; i++) current_rhs[i] /= val[i]; if (options[AZ_poly_ord] > 1) { sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0; i < N; i++) v[i] = current_rhs[i]; for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); for(i = 0; i < N; i++) v[i] += current_rhs[i] - y[i] / val[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } else if (data_org[AZ_matrix_type] == AZ_USER_MATRIX) { if (options[AZ_pre_calc] < AZ_sys_reuse) { sprintf(tag,"d2_inv %s",precond->context->tag); d2_inv = (double *) AZ_manage_memory(N*sizeof(double),AZ_ALLOC, data_org[AZ_name],tag,&i); Pmat = precond->Pmat; if ( (Pmat->N_nz < 0) || (Pmat->max_per_row < 0)) AZ_matfree_Nnzs(Pmat); if ( (Pmat->getrow == NULL) && (N != 0) ) { AZ_printf_err("Error: Only matrices with getrow() defined via "); AZ_printf_err("AZ_set_MATFREE_getrow(...) can do Jacobi preconditioning\n"); exit(1); } sprintf(tag,"dtemp %s",precond->context->tag); dtemp = (double *) AZ_manage_memory(Pmat->max_per_row* sizeof(double),AZ_ALLOC, data_org[AZ_name],tag,&i); sprintf(tag,"itemp %s",precond->context->tag); itemp = (int *) AZ_manage_memory(Pmat->max_per_row* sizeof(int ),AZ_ALLOC, data_org[AZ_name],tag,&i); for (i = 0; i < N; i++) { Pmat->getrow(itemp,dtemp,&length,Pmat,1,&i,Pmat->max_per_row); for (k =0; k < length; k++) if (itemp[k] == i) break; if (k == length) d2_inv[i] = 0.0; /* no diagonal */ else d2_inv[i] = 1./dtemp[k]; } } for (i = 0; i < N; i++) current_rhs[i] *= d2_inv[i]; if (options[AZ_poly_ord] > 1) { sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0; i < N; i++) v[i] = current_rhs[i]; for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); for(i = 0; i < N; i++) v[i] += current_rhs[i] - y[i]*d2_inv[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) { /* block Jacobi preconditioning */ if (options[AZ_pre_calc] < AZ_sys_reuse) { /* First, compute block-diagonal inverse */ /* (only if not already computed) */ tsize = 0; for (i = 0; i < m; i++) tsize += (rpntr[i+1] - rpntr[i]) * (cpntr[i+1] - cpntr[i]); sprintf(tag,"d2_indx %s",precond->context->tag); d2_indx = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_bindx %s",precond->context->tag); d2_bindx = (int *) AZ_manage_memory(m*sizeof(int), AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_rpntr %s",precond->context->tag); d2_rpntr = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_bpntr %s",precond->context->tag); d2_bpntr = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_inv %s",precond->context->tag); d2_inv = (double *) AZ_manage_memory(tsize*sizeof(double), AZ_ALLOC, data_org[AZ_name],tag,&i); d2_bpntr[0] = 0; sprintf(tag,"dmat_calk_binv %s",precond->context->tag); Dmat = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC,data_org[AZ_name],tag,&i); Dmat->rpntr = d2_rpntr; Dmat->cpntr = d2_rpntr; Dmat->bpntr = d2_bpntr; Dmat->bindx = d2_bindx; Dmat->indx = d2_indx; Dmat->val = d2_inv; Dmat->data_org = data_org; Dmat->matvec = precond->Pmat->matvec; Dmat->matrix_type = precond->Pmat->matrix_type; if (options[AZ_pre_calc] != AZ_reuse) { AZ_calc_blk_diag_inv(val, indx, bindx, rpntr, cpntr, bpntr, d2_inv, d2_indx, d2_bindx, d2_rpntr, d2_bpntr, data_org); } else if (i == AZ_NEW_ADDRESS) { AZ_printf_err( "Error: options[AZ_pre_calc]==AZ_reuse and" "previous factors\n not found. Check" "data_org[AZ_name].\n"); exit(-1); } } else if (previous_factors != data_org[AZ_name]) { AZ_printf_err( "Warning: Using a previous factorization as a" "preconditioner\neven though matrix" "(data_org[AZ_name]) has changed\n"); } previous_factors = data_org[AZ_name]; /* scale rhs */ sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); Dmat->matvec(current_rhs, v, Dmat, proc_config); DCOPY_F77(&N, v, &ione, current_rhs, &ione); if (options[AZ_poly_ord] > 1) { sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"temp_prec %s",precond->context->tag); temp = AZ_manage_memory(N*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,tag,&i); for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); Dmat->matvec(y, temp, Dmat, proc_config); for (i = 0; i < N; i++) v[i] += current_rhs[i] - temp[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } break; case AZ_sym_GS: /* symmetric Gauss-Seidel preconditioner only available on 1 proc */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_sym_gauss_seidel(); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) AZ_sym_gauss_seidel_sl(val, bindx, current_rhs, data_org, options, precond->context, proc_config); break; case AZ_Neumann: case AZ_ls: if (!options[AZ_poly_ord]) return; AZ_polynomial_expansion(current_rhs, options, proc_config, precond); break; case AZ_dom_decomp: case AZ_rilu: AZ_domain_decomp(current_rhs, precond->Pmat, options, proc_config, params, precond->context); break; case AZ_icc: /* incomplete Cholesky factorization */ (void) AZ_printf_out("Incomplete Cholesky not available (use ilu).\n"); break; case AZ_user_precond: precond->prec_function(current_rhs, options, proc_config, params, Amat, precond); break; case AZ_smoother: sprintf(label,"istatus %s",precond->context->tag); istatus = AZ_manage_memory(AZ_STATUS_SIZE*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label,&i); for (i = 0 ; i < AZ_STATUS_SIZE ; i++ ) istatus[i] = 0.0; sprintf(label,"y %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); sprintf(label,"tttemp %s",precond->context->tag); tttemp = AZ_manage_memory((N+max_externals)*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) tttemp[i] = current_rhs[i]; N_fixed = 0; fixed_pts = NULL; if (Amat->aux_ival != NULL) { N_fixed = Amat->aux_ival[0][0]; fixed_pts = Amat->aux_ival[1]; } else if (options[AZ_pre_calc] != AZ_sys_reuse) AZ_printf_out("Warning: Not fixed points set for local smoothing!!\n"); for (j = 0; j < options[AZ_poly_ord]; j++) { AZ_loc_avg(Amat, tttemp, y, N_fixed, fixed_pts, proc_config); norm1 = sqrt(AZ_gdot(N, y, y, proc_config)); if (proc_config[AZ_node] == 0) { if ((j==0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings)) AZ_printf_out(" %d %e\n",j, norm1); else if ((j==options[AZ_poly_ord]-1) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) AZ_printf_out(" %d %e\n",j, norm1); else if ((options[AZ_output] > 0) && (j%options[AZ_output] == 0)) AZ_printf_out(" %d %e\n",j, norm1); } for (i = 0 ; i < N ; i++ ) tttemp[i] = y[i]; } for (i = 0 ; i < N ; i++ ) y[i] = current_rhs[i] - y[i]; for (i = 0 ; i < N ; i++ ) current_rhs[i] = 0.0; opt_save1 = options[AZ_output]; opt_save2 = options[AZ_solver]; opt_save3 = options[AZ_precond]; opt_save4 = options[AZ_max_iter]; opt_save5 = options[AZ_aux_vec]; options[AZ_output] = AZ_warnings; options[AZ_solver] = AZ_tfqmr; options[AZ_precond] = AZ_dom_decomp; options[AZ_max_iter]= 1000; options[AZ_aux_vec] = AZ_rand; options[AZ_recursion_level]++; AZ_oldsolve(current_rhs, y,options, params, istatus, proc_config, Amat, precond, NULL); options[AZ_recursion_level]--; options[AZ_output] = opt_save1; options[AZ_solver] = opt_save2; options[AZ_precond] = opt_save3; options[AZ_max_iter]= opt_save4; options[AZ_aux_vec] = opt_save5; break; default: if (options[AZ_precond] < AZ_SOLVER_PARAMS) { AZ_recover_sol_params(options[AZ_precond], &ioptions, &iparams, &istatus, &Aptr, &Pptr, &Sptr); sprintf(label,"y %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = current_rhs[i]; for (i = 0 ; i < N ; i++ ) current_rhs[i] = 0.0; ioptions[AZ_recursion_level] = options[AZ_recursion_level] + 1; if ((options[AZ_pre_calc] == AZ_sys_reuse) && (ioptions[AZ_keep_info] == 1)) ioptions[AZ_pre_calc] = AZ_reuse; AZ_oldsolve(current_rhs, y,ioptions,iparams, istatus, proc_config, Aptr, Pptr, Sptr); } else { (void) AZ_printf_err( "%sERROR: invalid preconditioning flag.\n" " options[AZ_precond] improperly set (%d).\n", yo, options[AZ_precond]); exit(-1); } } options[AZ_pre_calc] = AZ_sys_reuse; precond->context->Pmat_computed = 1; if (multilevel_flag) { if (precond->next_prec == NULL) { multilevel_flag = 0; for (i = 0; i < N; i++) current_rhs[i] += x_precond[i]; } else { for (i = 0; i < N; i++) x_precond[i] += current_rhs[i]; AZ_compute_residual(orig_rhs, x_precond, current_rhs, proc_config, Amat); precond = precond->next_prec; options = precond->options; params = precond->params; } } } while (multilevel_flag); proc_config[AZ_MPI_Tag] = AZ_MSG_TYPE; /* reset all the message types. */ /* This is to make sure that all */ /* processors (even those without */ /* any preconditioning work) have */ /* the same message types for the */ /* next message. */ #ifdef TIMING ttt = AZ_second() - ttt; if (input_options[AZ_recursion_level] == 0) input_precond->timing[0] += ttt; #endif } /* precond */
void AZ_pgmresr(double b[], double x[],double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* This routine uses Saad's restarted Genralized Minimum Residual method to solve the nonsymmetric matrix problem Ax = b. IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the actual residual is not normally computed as part of the gmres algorithm. Thus, if the user uses a convergence condition (see AZ_gmres_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_gmres_global_scalars() sets r_avail = AZ_TRUE and the algorithm computes the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== Amat: Structure used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see file Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int k; int i, N, NN, converged, one = 1, iter, r_avail = AZ_FALSE; int print_freq, proc, kspace; double **UU, **CC, *dots, *tmp, *res; double dble_tmp, r_2norm = 1.0, epsilon; double rec_residual, scaled_r_norm, true_scaled_r=0.0; double actual_residual = -1.0, minus_alpha, alpha; double *dummy = (double *) 0; double *UUblock, *CCblock; int mm, ii; char label[64],suffix[32], prefix[64]; int *data_org, str_leng, first_time = AZ_TRUE; double doubleone = 1.0, minusone = -1.0, init_time = 0.0; char *T = "T"; char *T2 = "N"; /**************************** execution begins ******************************/ sprintf(suffix," in gmresr%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; kspace = options[AZ_kspace]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 0; /* GMRES seldom updates solution */ convergence_info->epsilon = params[AZ_tol]; /* allocate memory for required vectors */ NN = kspace + 1; /* +1: make sure everybody allocates something */ sprintf(label,"dots%s",suffix); dots = AZ_manage_memory(2*NN*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); tmp = &(dots[NN]); sprintf(label,"CC%s",suffix); CC = (double **) AZ_manage_memory(2*NN*sizeof(double *), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); UU = &(CC[NN]); NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for intel */ sprintf(label,"UUblock%s",suffix); UUblock = AZ_manage_memory(2*NN*kspace*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,label, &i); for (k = 0; k < kspace; k++) UU[k] = &(UUblock[k*NN]); CCblock = &(UUblock[kspace*NN]); for (k = 0; k < kspace; k++) CC[k] = &(CCblock[k*NN]); sprintf(label,"res%s",suffix); res = AZ_manage_memory(NN*sizeof(double),AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); AZ_compute_residual(b, x, res, proc_config, Amat); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] */ r_2norm = DDOT_F77(&N, res, &one, res, &one); AZ_gdot_vec(1, &r_2norm, &rec_residual, proc_config); r_2norm = sqrt(r_2norm); rec_residual = r_2norm; AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, NULL, NULL, NULL, convergence_info); r_2norm = rec_residual; converged = scaled_r_norm < epsilon; if ( (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings) && (proc == 0) ) (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); iter = 0; /*rst change while (!converged && iter < options[AZ_max_iter]) { */ while (!(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { convergence_info->iteration = iter; i = 0; /*rst change while (i < kspace && !converged && iter < options[AZ_max_iter]) { */ while (i < kspace && !(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { iter++; convergence_info->iteration = iter; /* v_i+1 = A M^-1 v_i */ DCOPY_F77(&N, res , &one, UU[i], &one); if (iter == 1) init_time = AZ_second(); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int precID = -1; precID = Teuchos_startTimer( "AztecOO: Operation Prec*x", precID ); #endif #endif precond->prec_function(UU[i],options,proc_config,params,Amat,precond); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( precID ); #endif #endif if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int matvecID = -1; matvecID = Teuchos_startTimer( "AztecOO: Operation Op*x", matvecID ); #endif #endif Amat->matvec(UU[i], CC[i], Amat, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( matvecID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoID = -1; orthoID = Teuchos_startTimer( "AztecOO: Orthogonalization", orthoID ); #endif #endif /* Gram-Schmidt orthogonalization */ if (!options[AZ_orthog]) { /* classical (stabilized) */ for (ii = 0 ; ii < 2 ; ii++ ) { dble_tmp = 0.0; mm = i; if (N == 0) for (k = 0 ; k < i ; k++) dots[k] = 0.0; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoInnerProdID = -1; orthoInnerProdID = Teuchos_startTimer( "AztecOO: Ortho (Inner Product)", orthoInnerProdID ); #endif #endif DGEMV_F77(CHAR_MACRO(T[0]), &N, &mm, &doubleone, CCblock, &NN, CC[i], &one, &dble_tmp, dots, &one); AZ_gdot_vec(i, dots, tmp, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoInnerProdID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoUpdateID = -1; orthoUpdateID = Teuchos_startTimer( "AztecOO: Ortho (Update)", orthoUpdateID ); #endif #endif DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, CCblock, &NN, dots, &one, &doubleone, CC[i], &one); DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, UUblock, &NN, dots, &one, &doubleone, UU[i], &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoUpdateID ); #endif #endif } } else { /* modified */ for (k = 0; k < i; k++) { alpha = AZ_gdot(N, CC[k], CC[i], proc_config); minus_alpha = -alpha; DAXPY_F77(&N, &minus_alpha, CC[k], &one, CC[i], &one); DAXPY_F77(&N, &minus_alpha, UU[k], &one, UU[i], &one); } } /* normalize vector */ #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS static int orthoNormID = -1; orthoNormID = Teuchos_startTimer( "AztecOO: Ortho (Norm)", orthoNormID ); #endif #endif dble_tmp = sqrt(AZ_gdot(N, CC[i], CC[i], proc_config)); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoNormID ); #endif #endif if (dble_tmp > DBL_EPSILON*r_2norm) dble_tmp = 1.0 / dble_tmp; else dble_tmp = 0.0; DSCAL_F77(&N, &dble_tmp, CC[i], &one); DSCAL_F77(&N, &dble_tmp, UU[i], &one); dble_tmp = AZ_gdot(N, CC[i], res, proc_config); DAXPY_F77(&N, &dble_tmp, UU[i], &one, x, &one); dble_tmp = -dble_tmp; DAXPY_F77(&N, &dble_tmp, CC[i], &one, res, &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop the timer. */ Teuchos_stopTimer( orthoID ); #endif #endif /* determine residual norm & test convergence */ r_2norm = sqrt(AZ_gdot(N, res, res, proc_config)); rec_residual = r_2norm; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * NOTE: if r_avail = AZ_TRUE or AZ_FIRST is passed in, we perform * step 1), otherwise ||r|| is taken as rec_residual. */ AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, dummy, dummy, dummy, convergence_info); converged = scaled_r_norm < epsilon; /*rst change if ( (iter%print_freq == 0) && proc == 0) */ if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); i++; /* subspace dim. counter dim(K) = i - 1 */ #ifdef out if (options[AZ_check_update_size] & converged) converged = AZ_compare_update_vs_soln(N, -1.,dble_tmp, UU[i-1], x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (converged) { /* compute true residual using 'v[kspace]' as a temporary vector */ AZ_scale_true_residual(x, b, res, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); converged = true_scaled_r < params[AZ_tol]; if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not * converged, AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } #endif } } if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #ifdef out /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #endif } /* AZ_pgmres */
void AZ_pbicgstab(double b[], double x[], double weight[], int options[], double params[],int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Vand der Vorst's (1990) variation of the Bi-Conjugate Gradient algorthm (Sonneveld (1984,1989)) to solve the nonsymmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditionner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter=1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc; int brkdown_will_occur = AZ_FALSE; double alpha = 1.0, beta, true_scaled_r=0.0; double *v, *r, *rtilda, *p, *phat, *s, *shat; double omega = 1.0, dot_vec[2], tmp[2], init_time = 0.0; double rhonm1 = 1.0, rhon, sigma, brkdown_tol = DBL_EPSILON; double scaled_r_norm= -1.0, actual_residual = -1.0, rec_residual= -1.0; double dtemp; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; /**************************** execution begins ******************************/ sprintf(suffix," in cgstab%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* BiCGStab always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate memory for required vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something*/ NN = NN + (NN%2); /* make sure things are aligned for the */ /* assembly coded matvec() on the Intel. */ sprintf(label,"phat%s",suffix); phat = (double *) AZ_manage_memory(7*NN*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label,&j); p = &(phat[1*NN]); shat = &(phat[2*NN]); /* NOTE: phat and shat must be aligned */ /* so that the assembly dgemv */ /* works on the paragon. */ s = &(phat[3*NN]); r = &(phat[4*NN]); rtilda = &(phat[5*NN]); v = &(phat[6*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); /* v, p <- 0 */ for (i = 0; i < N; i++) v[i] = p[i] = 0.0; /* set rtilda */ if (options[AZ_aux_vec] == AZ_resid) DCOPY_F77(&N, r, &one, rtilda, &one); else AZ_random_vector(rtilda, data_org, proc_config); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config,&r_avail,r,rtilda, &rhon, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) (void) AZ_printf_out("%siter: 0 residual = %e\n",prefix,scaled_r_norm); for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++) { if (brkdown_will_occur) { AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = (rhon/rhonm1) * (alpha/omega); if (fabs(rhon) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, rtilda, rhon, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(rhon); } rhonm1 = rhon; /* p = r + beta*(p - omega*v) */ /* phat = M^-1 p */ /* v = A phat */ dtemp = beta * omega; for (i = 0; i < N; i++) p[i] = r[i] + beta * p[i] - dtemp * v[i]; DCOPY_F77(&N, p, &one, phat, &one); if (iter==1) init_time = AZ_second(); if (precond_flag) precond->prec_function(phat,options,proc_config,params,Amat,precond); if (iter==1) status[AZ_first_precond] = AZ_second() - init_time; Amat->matvec(phat, v, Amat, proc_config); sigma = AZ_gdot(N, rtilda, v, proc_config); if (fabs(sigma) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) { /* break down */ AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org,proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * fabs(sigma); } alpha = rhon / sigma; /* s = r - alpha*v */ /* shat = M^-1 s */ /* r = A shat (r is a tmp here for t ) */ for (i = 0; i < N; i++) s[i] = r[i] - alpha * v[i]; DCOPY_F77(&N, s, &one, shat, &one); if (precond_flag) precond->prec_function(shat,options,proc_config,params,Amat,precond); Amat->matvec(shat, r, Amat, proc_config); /* omega = (t,s)/(t,t) with r = t */ dot_vec[0] = DDOT_F77(&N, r, &one, s, &one); dot_vec[1] = DDOT_F77(&N, r, &one, r, &one); AZ_gdot_vec(2, dot_vec, tmp, proc_config); if (fabs(dot_vec[1]) < DBL_MIN) { omega = 0.0; brkdown_will_occur = AZ_TRUE; } else omega = dot_vec[0] / dot_vec[1]; /* x = x + alpha*phat + omega*shat */ /* r = s - omega*r */ DAXPY_F77(&N, &alpha, phat, &one, x, &one); DAXPY_F77(&N, &omega, shat, &one, x, &one); for (i = 0; i < N; i++) r[i] = s[i] - omega * r[i]; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, rtilda, &rhon, convergence_info); if ( (iter%print_freq == 0) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) { dtemp = alpha/omega; DAXPY_F77(&N, &dtemp, phat, &one, shat, &one); convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,omega, shat, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); } if (convergence_info->converged) { AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&convergence_info->epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* bicgstab */
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
void AZ_pqmrs(double b[], double x[], double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Freund's transpose free QMR routine to solve the nonsymmetric matrix problem Ax = b. NOTE: this routine differs from Freund's paper in that we compute ubar (= M^-1 u ) and qbar (= M^-1 q) instead of u and q defined in Freund's paper. IMPORTANT NOTE: While an estimate of the 2-norm of the qmr residual is available (see comment below), the actual qmr residual is not normally computed as part of the qmr algorithm. Thus, if the user uses a convergence condition (see AZ_compute_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_compute_global_scalars() will set r_avail = AZ_TRUE and the algorithm will compute the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see az_aztec.h and Aztec User's Guide). Oprecond: Structure used to represent the preconditionner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, converged, one = 1, iter= 1,r_avail = AZ_FALSE, j; int precond_flag, print_freq, proc; int brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, true_scaled_r=0.0; double *ubar, *v, *r_cgs, *rtilda, *Aubar, *qbar, *Aqbar, *d, *Ad = NULL; double rhonm1, rhon, est_residual, actual_residual = -1.0; double scaled_r_norm, sigma, epsilon, brkdown_tol = DBL_EPSILON; double omega, c, norm_r_n_cgs, norm_r_nm1_cgs; double tau_m, nu_m, eta_m, init_time = 0.0; double tau_mm1, nu_mm1 = 0.0, eta_mm1 = 0.0, doubleone = 1.0; register double dtemp; double W_norm = 0.0; int offset = 0; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; /**************************** execution begins ******************************/ sprintf(suffix," in qmrcgs%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* allocate memory for required vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everyone allocates something */ NN = NN + (NN%2); /* make sure things are aligned on double words for paragon */ sprintf(label,"ubar%s",suffix); ubar = (double *) AZ_manage_memory(8*NN*sizeof(double), AZ_ALLOC,AZ_SYS,label,&j); v = &(ubar[1*NN]); Aubar = &(ubar[2*NN]); d = &(ubar[3*NN]); qbar = &(ubar[4*NN]); rtilda = &(ubar[5*NN]); Aqbar = &(ubar[6*NN]); r_cgs = &(ubar[7*NN]); AZ_compute_residual(b, x, r_cgs, proc_config, Amat); /* d, qbar, Aqbar, v = 0 */ for (i = 0; i < N; i++) d[i] = qbar[i] = Aqbar[i] = v[i] = 0.0; /* set rtilda */ if (options[AZ_aux_vec] == AZ_resid) dcopy_(&N, r_cgs, &one, rtilda, &one); else AZ_random_vector(rtilda, data_org, proc_config); /* * Compute a few global scalars: * 1) ||r_cgs|| corresponding to options[AZ_conv] * 2) scaled ||r_cgs|| corresponding to options[AZ_conv] * 3) rhon = <rtilda, r_cgs> * Note: step 1) is performed if r_avail = AZ_TRUE on entry or * AZ_FIRST_TIME is passed in. Otherwise, ||r_cgs|| is taken as * est_residual. */ AZ_compute_global_scalars(Amat, x, b, r_cgs, weight, &est_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r_cgs, rtilda, &rhon, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (proc == 0)) (void) fprintf(stdout, "%siter: 0 residual = %e\n",prefix,scaled_r_norm); norm_r_nm1_cgs = est_residual; tau_mm1 = norm_r_nm1_cgs; rhonm1 = rhon; /* Set up aux-vector if we need to compute the qmr residual */ if (r_avail) { sprintf(label,"Ad%s",suffix); Ad = (double *) AZ_manage_memory(NN*sizeof(double),AZ_ALLOC, AZ_SYS, label, &j); for (i = 0; i < N; i++) Ad[i] = 0.0; } converged = scaled_r_norm < epsilon; for (iter = 1; iter <= options[AZ_max_iter] && !converged; iter++) { if (fabs(rhon) < brkdown_tol) { /* possible breakdown problem */ if (AZ_breakdown_f(N, r_cgs, rtilda, rhon, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(rhon); } /* ubar = M^-1 r_cgs + beta*qbar */ /* Aubar = A ubar */ /* v = A ubar + beta ( A qbar + beta pnm1 ) */ /* = Aubar + beta ( Aqbar + beta v) */ dcopy_(&N, r_cgs, &one, ubar, &one); if (iter==1) init_time = AZ_second(); if (precond_flag) precond->prec_function(ubar,options,proc_config,params,Amat,precond); if (iter==1) status[AZ_first_precond] = AZ_second() - init_time; for (i = 0; i < N; i++) ubar[i] = ubar[i] + beta * qbar[i]; Amat->matvec(ubar, Aubar, Amat, proc_config); daxpy_(&N, &beta, v, &one, Aqbar, &one); for (i = 0; i < N; i++) v[i] = Aubar[i] + beta * Aqbar[i]; sigma = AZ_gdot(N, rtilda, v, proc_config); if (fabs(sigma) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) { /* break down */ AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, est_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * fabs(sigma); } alpha = rhon / sigma; /* qbar = ubar - alpha* M^-1 v */ /* Aqbar = A qbar */ /* r_cgs = r_cgs - alpha (A ubar + A qbar) */ /* = r_cgs - alpha (Aubar + Aqbar) */ dcopy_(&N, v, &one, qbar, &one); if (precond_flag) precond->prec_function(qbar,options,proc_config,params,Amat,precond); for (i = 0; i < N; i++) qbar[i] = ubar[i] - alpha * qbar[i]; Amat->matvec(qbar, Aqbar, Amat, proc_config); for (i = 0; i < N; i++) r_cgs[i] = r_cgs[i] - alpha*(Aubar[i] + Aqbar[i]); /* QMRS scaling and iterates weights 5.11 */ norm_r_n_cgs = sqrt(AZ_gdot(N, r_cgs, r_cgs, proc_config)); /* m is odd in Freund's paper */ omega = sqrt(norm_r_nm1_cgs * norm_r_n_cgs); nu_m = omega / tau_mm1; c = 1.0 / sqrt(1.0 + nu_m * nu_m); tau_m = tau_mm1 * nu_m * c; eta_m = c * c * alpha; if (brkdown_will_occur) { AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, est_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } dtemp = nu_mm1 *nu_mm1 * eta_mm1 / alpha; for (i = 0; i < N; i++) d[i] = ubar[i] + dtemp * d[i]; daxpy_(&N, &eta_m, d, &one, x, &one); /* x = x - eta_m d */ if (r_avail) { for (i = 0; i < N; i++) Ad[i] = Aubar[i] + dtemp * Ad[i]; } /* save some values */ eta_mm1 = eta_m; tau_mm1 = tau_m; nu_mm1 = nu_m; norm_r_nm1_cgs = norm_r_n_cgs; /* m is even in Freund's paper */ omega = norm_r_n_cgs; if (tau_mm1 == 0.0) nu_m = 0.0; else nu_m = omega / tau_mm1; c = 1.0 / sqrt(1.0 + nu_m * nu_m); tau_m = tau_mm1 * nu_m * c; if (options[AZ_check_update_size]) { eta_m = eta_m/(c*c*alpha); for (i = 0; i < N; i++) ubar[i] = eta_m*d[i]; } eta_m = c * c * alpha; dtemp = nu_mm1 * nu_mm1 * eta_mm1 / alpha; for (i = 0; i < N; i++) d[i] = qbar[i] + dtemp * d[i]; daxpy_(&N, &eta_m, d, &one, x, &one); /* x = x - eta_m d */ if (r_avail) { for (i = 0; i < N; i++) Ad[i] = Aqbar[i] + dtemp * Ad[i]; } /* save some values */ eta_mm1 = eta_m; tau_mm1 = tau_m; nu_mm1 = nu_m; norm_r_nm1_cgs = norm_r_n_cgs; rhonm1 = rhon; if (r_avail) { for (i = 0; i < N; i++) Aubar[i] = r_cgs[i] - (eta_m - alpha) * Ad[i]; /* Note: Aubar temporarily holds qmr residual */ } else { /* * We want to estimate the 2-norm of the qmr residual. Freund gives the * bound ||r|| <= tau_m * sqrt(2*iter+1). We use this bound until we get * close to the solution. At that point we compute the real residual norm * and use this to estimate the norm of ||W|| in Freund's paper. */ dtemp = sqrt((double) (2 * iter + 1)); if ((scaled_r_norm < epsilon * dtemp) && !offset) { AZ_scale_true_residual(x, b, Aubar, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); if (tau_m != 0.0) W_norm = actual_residual / tau_m; if (W_norm < 1.0) W_norm = 1.0; offset = 2 * iter + 1; est_residual = actual_residual; } else est_residual = sqrt((double)(2 * iter + 1 - offset) + W_norm * W_norm) * tau_m; } /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rhon = <rtilda, r_cgs> * Note: step 1) is performed if r_avail = AZ_TRUE or AZ_FIRST_TIME * is passed in. Otherwise, ||r|| is taken as est_residual. */ AZ_compute_global_scalars(Amat, x, b, Aubar, weight, &est_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, rtilda, r_cgs, &rhon, convergence_info); if ( (iter%print_freq == 0) && proc == 0 ) (void) fprintf(stdout, "%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* convergence tests */ converged = scaled_r_norm < epsilon; if (options[AZ_check_update_size] & converged) { daxpy_(&N, &doubleone , d, &one, ubar, &one); converged = AZ_compare_update_vs_soln(N, -1.,eta_m, ubar, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); } if (converged) { AZ_scale_true_residual(x, b, Aubar, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); converged = true_scaled_r < params[AZ_tol]; /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, est_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } beta = rhon / rhonm1; } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) fprintf(stdout, "%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, est_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* pqmrs */