int ML_Smoother_Ifpack(ML_Smoother *sm,int inlen,double x[],int outlen, double rhs[]) { ML_Smoother *smooth_ptr = (ML_Smoother *) sm; void *Ifpack_Handle = smooth_ptr->smoother->data; double* x2 = NULL,* rhs2 = NULL; /*int i;*/ int n, kk; int one_int = 1; double minus_one_double = -1.0; if (sm->init_guess == ML_NONZERO) { n = sm->my_level->Amat->invec_leng; assert (n == sm->my_level->Amat->outvec_leng); rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1)); x2 = (double*) ML_allocate(sizeof(double) * (n + 1)); ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2); DCOPY_F77(&n, x, &one_int, x2, &one_int); DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int); ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2); DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int); ML_free(rhs2); ML_free(x2); } else ML_Ifpack_Solve(Ifpack_Handle, x, rhs); for (kk = 1; kk < sm->ntimes; kk++) { n = sm->my_level->Amat->invec_leng; assert (n == sm->my_level->Amat->outvec_leng); rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1)); x2 = (double*) ML_allocate(sizeof(double) * (n + 1)); ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2); DCOPY_F77(&n, x, &one_int, x2, &one_int); DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int); ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2); DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int); ML_free(rhs2); ML_free(x2); } return 0; } /* ML_Smoother_Ifpack */
//============================================================================= void Epetra_BLAS::COPY(const int N, const double * X, double * Y, const int INCX, const int INCY) const { DCOPY_F77(&N, X, &INCX, Y, &INCY); return; }
void AZ_precondition(double x[], int input_options[], int proc_config[], double input_params[], AZ_MATRIX *Amat, AZ_PRECOND *input_precond) /******************************************************************************* This routine calls appropriate sparse matrix preconditioner. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== x: On input, contains the current solution. On output contains the preconditioned solution to the linear system. options: Determines specific solution method and other parameters. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. params: Drop tolerance and convergence tolerance info. Amat: Structure used to represent the matrix (see az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). * -------------------------------------------------------------------- Related routines: scaling routines: AZ_block_diagonal_scaling -- block-diagonally scales sparse matrix problem. AZ_row_sum_scaling -- row sum scales sparse matrix problem. sym_diagonal_scaling -- diagonaly scales symm. sparse problem. sym_row_sum_scaling -- row sum scales symmetric sparse problem. preconditioners: jacobi -- point Jacobi method. AZ_polynomial_expansion-- Polynomial expansion; Neumann series and least squares. domain decomposition -- Block solvers (LU , ILU or ILUT) used on each processor. The blocks are either non-overlapping or overlapping. icc -- incomplete sparse Choleski (symmetric version). *******************************************************************************/ { /* local variables */ int ione = 1; double *temp; int m, N, k, length; int i, step, j; static int *d2_indx,*d2_bindx,*d2_rpntr,*d2_bpntr; static double *d2_inv; static AZ_MATRIX *Dmat; int tsize, multilevel_flag = 0, max_externals; static int previous_factors = -1; double *v, *y; char *yo = "precond: "; int *data_org, *bindx, *indx, *cpntr, *rpntr, *bpntr; double *val; char label[64],suffix[32]; char tag[80]; double *current_rhs, *orig_rhs = NULL, *x_precond = NULL; int *options, *ioptions, N_fixed, *fixed_pts; double *params, *iparams, *istatus; AZ_MATRIX *Aptr, *Pmat; AZ_PRECOND *Pptr, *precond; struct AZ_SCALING *Sptr; int opt_save1, opt_save2, opt_save3, opt_save4, opt_save5, *itemp; double *tttemp, norm1, *dtemp; #ifdef TIMING double ttt; #endif #ifdef eigen double *tb, *tr; #endif /**************************** execution begins ******************************/ #ifdef TIMING ttt = AZ_second(); #endif precond = input_precond; sprintf(suffix," in precond%d",input_options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ data_org = precond->Pmat->data_org; options = input_options; params = input_params; m = data_org[AZ_N_int_blk] + data_org[AZ_N_bord_blk]; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; max_externals = Amat->data_org[AZ_N_external]; if (max_externals < data_org[AZ_N_external]) max_externals = data_org[AZ_N_external]; current_rhs = x; if (options[AZ_precond] == AZ_multilevel) { /* make extra vectors to hold rhs and residual */ sprintf(tag,"orig_rhs %s",precond->context->tag); orig_rhs = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,tag,&i); sprintf(tag,"x_prec %s",precond->context->tag); x_precond = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0 ; i < N; i++) x_precond[i] = 0.0; for (i = 0 ; i < N; i++) orig_rhs[i] = current_rhs[i]; multilevel_flag = 1; options = precond->options; params = precond->params; } do { data_org = precond->Pmat->data_org; val = precond->Pmat->val; bindx = precond->Pmat->bindx; cpntr = precond->Pmat->cpntr; indx = precond->Pmat->indx; rpntr = precond->Pmat->rpntr; bpntr = precond->Pmat->bpntr; if (max_externals < data_org[AZ_N_external]) max_externals = data_org[AZ_N_external]; switch (options[AZ_precond]) { case AZ_none: break; case AZ_Jacobi: if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0; i < N; i++) current_rhs[i] /= val[i]; if (options[AZ_poly_ord] > 1) { sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0; i < N; i++) v[i] = current_rhs[i]; for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); for(i = 0; i < N; i++) v[i] += current_rhs[i] - y[i] / val[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } else if (data_org[AZ_matrix_type] == AZ_USER_MATRIX) { if (options[AZ_pre_calc] < AZ_sys_reuse) { sprintf(tag,"d2_inv %s",precond->context->tag); d2_inv = (double *) AZ_manage_memory(N*sizeof(double),AZ_ALLOC, data_org[AZ_name],tag,&i); Pmat = precond->Pmat; if ( (Pmat->N_nz < 0) || (Pmat->max_per_row < 0)) AZ_matfree_Nnzs(Pmat); if ( (Pmat->getrow == NULL) && (N != 0) ) { AZ_printf_err("Error: Only matrices with getrow() defined via "); AZ_printf_err("AZ_set_MATFREE_getrow(...) can do Jacobi preconditioning\n"); exit(1); } sprintf(tag,"dtemp %s",precond->context->tag); dtemp = (double *) AZ_manage_memory(Pmat->max_per_row* sizeof(double),AZ_ALLOC, data_org[AZ_name],tag,&i); sprintf(tag,"itemp %s",precond->context->tag); itemp = (int *) AZ_manage_memory(Pmat->max_per_row* sizeof(int ),AZ_ALLOC, data_org[AZ_name],tag,&i); for (i = 0; i < N; i++) { Pmat->getrow(itemp,dtemp,&length,Pmat,1,&i,Pmat->max_per_row); for (k =0; k < length; k++) if (itemp[k] == i) break; if (k == length) d2_inv[i] = 0.0; /* no diagonal */ else d2_inv[i] = 1./dtemp[k]; } } for (i = 0; i < N; i++) current_rhs[i] *= d2_inv[i]; if (options[AZ_poly_ord] > 1) { sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag,&i); for (i = 0; i < N; i++) v[i] = current_rhs[i]; for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); for(i = 0; i < N; i++) v[i] += current_rhs[i] - y[i]*d2_inv[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) { /* block Jacobi preconditioning */ if (options[AZ_pre_calc] < AZ_sys_reuse) { /* First, compute block-diagonal inverse */ /* (only if not already computed) */ tsize = 0; for (i = 0; i < m; i++) tsize += (rpntr[i+1] - rpntr[i]) * (cpntr[i+1] - cpntr[i]); sprintf(tag,"d2_indx %s",precond->context->tag); d2_indx = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_bindx %s",precond->context->tag); d2_bindx = (int *) AZ_manage_memory(m*sizeof(int), AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_rpntr %s",precond->context->tag); d2_rpntr = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_bpntr %s",precond->context->tag); d2_bpntr = (int *) AZ_manage_memory((m+1)*sizeof(int),AZ_ALLOC, data_org[AZ_name], tag, &i); sprintf(tag,"d2_inv %s",precond->context->tag); d2_inv = (double *) AZ_manage_memory(tsize*sizeof(double), AZ_ALLOC, data_org[AZ_name],tag,&i); d2_bpntr[0] = 0; sprintf(tag,"dmat_calk_binv %s",precond->context->tag); Dmat = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC,data_org[AZ_name],tag,&i); Dmat->rpntr = d2_rpntr; Dmat->cpntr = d2_rpntr; Dmat->bpntr = d2_bpntr; Dmat->bindx = d2_bindx; Dmat->indx = d2_indx; Dmat->val = d2_inv; Dmat->data_org = data_org; Dmat->matvec = precond->Pmat->matvec; Dmat->matrix_type = precond->Pmat->matrix_type; if (options[AZ_pre_calc] != AZ_reuse) { AZ_calc_blk_diag_inv(val, indx, bindx, rpntr, cpntr, bpntr, d2_inv, d2_indx, d2_bindx, d2_rpntr, d2_bpntr, data_org); } else if (i == AZ_NEW_ADDRESS) { AZ_printf_err( "Error: options[AZ_pre_calc]==AZ_reuse and" "previous factors\n not found. Check" "data_org[AZ_name].\n"); exit(-1); } } else if (previous_factors != data_org[AZ_name]) { AZ_printf_err( "Warning: Using a previous factorization as a" "preconditioner\neven though matrix" "(data_org[AZ_name]) has changed\n"); } previous_factors = data_org[AZ_name]; /* scale rhs */ sprintf(tag,"v_prec %s",precond->context->tag); v = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); Dmat->matvec(current_rhs, v, Dmat, proc_config); DCOPY_F77(&N, v, &ione, current_rhs, &ione); if (options[AZ_poly_ord] > 1) { sprintf(tag,"y_prec %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); sprintf(tag,"temp_prec %s",precond->context->tag); temp = AZ_manage_memory(N*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,tag,&i); for (step = 1; step < options[AZ_poly_ord]; step++) { Amat->matvec(v, y, Amat, proc_config); Dmat->matvec(y, temp, Dmat, proc_config); for (i = 0; i < N; i++) v[i] += current_rhs[i] - temp[i]; } for (i = 0; i < N; i++) current_rhs[i] = v[i]; } } break; case AZ_sym_GS: /* symmetric Gauss-Seidel preconditioner only available on 1 proc */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_sym_gauss_seidel(); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) AZ_sym_gauss_seidel_sl(val, bindx, current_rhs, data_org, options, precond->context, proc_config); break; case AZ_Neumann: case AZ_ls: if (!options[AZ_poly_ord]) return; AZ_polynomial_expansion(current_rhs, options, proc_config, precond); break; case AZ_dom_decomp: case AZ_rilu: AZ_domain_decomp(current_rhs, precond->Pmat, options, proc_config, params, precond->context); break; case AZ_icc: /* incomplete Cholesky factorization */ (void) AZ_printf_out("Incomplete Cholesky not available (use ilu).\n"); break; case AZ_user_precond: precond->prec_function(current_rhs, options, proc_config, params, Amat, precond); break; case AZ_smoother: sprintf(label,"istatus %s",precond->context->tag); istatus = AZ_manage_memory(AZ_STATUS_SIZE*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label,&i); for (i = 0 ; i < AZ_STATUS_SIZE ; i++ ) istatus[i] = 0.0; sprintf(label,"y %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); sprintf(label,"tttemp %s",precond->context->tag); tttemp = AZ_manage_memory((N+max_externals)*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) tttemp[i] = current_rhs[i]; N_fixed = 0; fixed_pts = NULL; if (Amat->aux_ival != NULL) { N_fixed = Amat->aux_ival[0][0]; fixed_pts = Amat->aux_ival[1]; } else if (options[AZ_pre_calc] != AZ_sys_reuse) AZ_printf_out("Warning: Not fixed points set for local smoothing!!\n"); for (j = 0; j < options[AZ_poly_ord]; j++) { AZ_loc_avg(Amat, tttemp, y, N_fixed, fixed_pts, proc_config); norm1 = sqrt(AZ_gdot(N, y, y, proc_config)); if (proc_config[AZ_node] == 0) { if ((j==0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings)) AZ_printf_out(" %d %e\n",j, norm1); else if ((j==options[AZ_poly_ord]-1) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) AZ_printf_out(" %d %e\n",j, norm1); else if ((options[AZ_output] > 0) && (j%options[AZ_output] == 0)) AZ_printf_out(" %d %e\n",j, norm1); } for (i = 0 ; i < N ; i++ ) tttemp[i] = y[i]; } for (i = 0 ; i < N ; i++ ) y[i] = current_rhs[i] - y[i]; for (i = 0 ; i < N ; i++ ) current_rhs[i] = 0.0; opt_save1 = options[AZ_output]; opt_save2 = options[AZ_solver]; opt_save3 = options[AZ_precond]; opt_save4 = options[AZ_max_iter]; opt_save5 = options[AZ_aux_vec]; options[AZ_output] = AZ_warnings; options[AZ_solver] = AZ_tfqmr; options[AZ_precond] = AZ_dom_decomp; options[AZ_max_iter]= 1000; options[AZ_aux_vec] = AZ_rand; options[AZ_recursion_level]++; AZ_oldsolve(current_rhs, y,options, params, istatus, proc_config, Amat, precond, NULL); options[AZ_recursion_level]--; options[AZ_output] = opt_save1; options[AZ_solver] = opt_save2; options[AZ_precond] = opt_save3; options[AZ_max_iter]= opt_save4; options[AZ_aux_vec] = opt_save5; break; default: if (options[AZ_precond] < AZ_SOLVER_PARAMS) { AZ_recover_sol_params(options[AZ_precond], &ioptions, &iparams, &istatus, &Aptr, &Pptr, &Sptr); sprintf(label,"y %s",precond->context->tag); y = AZ_manage_memory((N+max_externals)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = current_rhs[i]; for (i = 0 ; i < N ; i++ ) current_rhs[i] = 0.0; ioptions[AZ_recursion_level] = options[AZ_recursion_level] + 1; if ((options[AZ_pre_calc] == AZ_sys_reuse) && (ioptions[AZ_keep_info] == 1)) ioptions[AZ_pre_calc] = AZ_reuse; AZ_oldsolve(current_rhs, y,ioptions,iparams, istatus, proc_config, Aptr, Pptr, Sptr); } else { (void) AZ_printf_err( "%sERROR: invalid preconditioning flag.\n" " options[AZ_precond] improperly set (%d).\n", yo, options[AZ_precond]); exit(-1); } } options[AZ_pre_calc] = AZ_sys_reuse; precond->context->Pmat_computed = 1; if (multilevel_flag) { if (precond->next_prec == NULL) { multilevel_flag = 0; for (i = 0; i < N; i++) current_rhs[i] += x_precond[i]; } else { for (i = 0; i < N; i++) x_precond[i] += current_rhs[i]; AZ_compute_residual(orig_rhs, x_precond, current_rhs, proc_config, Amat); precond = precond->next_prec; options = precond->options; params = precond->params; } } } while (multilevel_flag); proc_config[AZ_MPI_Tag] = AZ_MSG_TYPE; /* reset all the message types. */ /* This is to make sure that all */ /* processors (even those without */ /* any preconditioning work) have */ /* the same message types for the */ /* next message. */ #ifdef TIMING ttt = AZ_second() - ttt; if (input_options[AZ_recursion_level] == 0) input_precond->timing[0] += ttt; #endif } /* precond */
void AZ_sym_gauss_seidel_sl(double val[],int bindx[],double x[],int data_org[], int options[], struct context *context, int proc_config[]) /******************************************************************************* Symmetric Gauss-Siedel preconditioner. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== val: Array containing the nonzero entries of the matrix (see Aztec User's Guide). indx, bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). x: On input, contains the current solution to the linear system. On output contains the Jacobi preconditioned solution. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). options: Determines specific solution method and other parameters. *******************************************************************************/ { /* local variables */ register int *bindx_ptr; register double sum, *ptr_val; int i, bindx_row, j_last, N, step, ione = 1, j; double *b, *ptr_b; char tag[80]; /**************************** execution begins ******************************/ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; sprintf(tag,"b/sGS %s",context->tag); b = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); DCOPY_F77(&N, x, &ione, b, &ione); ptr_val = val; for (i = 0; i < N; i++) { (*ptr_val) = 1.0 / (*ptr_val); x[i] = 0.0; ptr_val++; } for (step = 0; step < options[AZ_poly_ord]; step++) { AZ_exchange_bdry(x, data_org, proc_config); bindx_row = bindx[0]; bindx_ptr = &bindx[bindx_row]; ptr_val = &val[bindx_row]; ptr_b = b; for (i = 0; i < N; i++) { sum = *ptr_b++; j_last = bindx[i+1] - bindx[i]; for (j = 0; j < j_last; j++) { sum -= *ptr_val++ * x[*bindx_ptr++]; } x[i] = sum * val[i]; } bindx_row = bindx[N]; bindx_ptr = &bindx[bindx_row-1]; ptr_val = &val[bindx_row-1]; for (i = N - 1; i >= 0; i--) { sum = b[i]; j_last = bindx[i+1] - bindx[i]; for (j = 0; j < j_last; j++) { sum -= *ptr_val-- * x[*bindx_ptr--]; } x[i] = sum * val[i]; } } for (i = 0; i < N; i++) val[i] = 1.0 / val[i]; } /* AZ_sym_gauss_seidel_sl */
void AZ_solve_subdomain(double x[],int N, struct context *context) { /**************************************************************************** Given a vector 'x' representing the right hand side, solve the system using whatever subdomain solver is indicated by 'context->which' and whatever factorization information has already been computed. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== x On input, the right hand side of the subdomain system that is to be solved. On output, the solution of the subdomain system. N On input, the size of the linear system to be solved. bindx2,val2 On input, matrix or factorization information to be used by the solver. For most schemes, this information is in MSR format. However, the lu and bilu scheme would have this information in another format. Note: additional array information can be passed through context. context On input, the various fields are set to solver specific information corresponding to algorithm parameters as well as a previously done factorization. *******************************************************************************/ double *val2; int *bindx2; int N_blk_rows; #ifdef HAVE_AZLU int ifail; #endif int *sub_options, sub_proc_config[AZ_PROC_SIZE], *hold_data_org, *new_data_org; double *sub_params, *sub_status; AZ_MATRIX *sub_matrix; AZ_PRECOND *sub_precond; struct AZ_SCALING *sub_scaling; #ifdef AZTEC_MPI MPI_AZComm *tptr; #endif double *y; char label[80]; int t1, t2, t3, i, t4, t5 = 0; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK int ione = 1; void *precon; #endif /* End Aztec 2.1 mheroux mod */ val2 = context->A_overlapped->val; bindx2 = context->A_overlapped->bindx; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK y = (double *) malloc (N * sizeof(double)); DCOPY_F77(&N, x, &ione, y, &ione); precon = context->precon; ifp_apply(precon, N, 1, y, N, x, N); free((void *) y); #endif break; /* End Aztec 2.1 mheroux mod */ case AZ_bilu: N_blk_rows = context->N_blk_rows; AZ_lower_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x); AZ_upper_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x, context->ipvt, context->dblock); break; case AZ_ilut: case AZ_rilu: case AZ_ilu: AZ_lower_tsolve(x,N, val2, bindx2, context->iu, x ); AZ_upper_tsolve( x, N, val2, bindx2, context->iu); break; case AZ_icc: AZ_lower_icc(bindx2,val2,N,x); AZ_upper_icc(bindx2,val2,N,x); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; else if (N== 1) { x[0] *= val2[0]; ifail = 0; } else AZ_backsolve(val2, context->pivot,x, bindx2, context->ha, context->iflag, &ifail, &(context->N_nz_factors), &N, &N); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_out("ERROR: Unknown subdomain solver %d\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } else { /* better to put most of this in the factorization */ AZ_recover_sol_params(context->aztec_choices->options[ AZ_subdomain_solve], &sub_options, &sub_params, &sub_status, &sub_matrix, &sub_precond, &sub_scaling); t1 = sub_options[AZ_recursion_level]; sub_options[AZ_recursion_level]++; t2 = sub_options[AZ_output]; if (context->proc_config[AZ_node] != 0 ) sub_options[AZ_output] = AZ_none; t3 = context->proc_config[AZ_MPI_Tag]; /* fix data_org */ hold_data_org = context->A_overlapped->data_org; new_data_org = (int *) AZ_allocate( sizeof(int) * AZ_send_list ); if (new_data_org == NULL) { AZ_printf_out("Error: Not enough space for subdomain matrix\n"); exit(1); } context->A_overlapped->data_org = new_data_org; context->A_overlapped->matvec = AZ_MSR_matvec_mult; new_data_org[AZ_matrix_type] = AZ_MSR_MATRIX; new_data_org[AZ_N_internal] = N; new_data_org[AZ_N_border ] = 0; new_data_org[AZ_N_external] = 0; new_data_org[AZ_N_int_blk ] = N; new_data_org[AZ_N_bord_blk] = 0; new_data_org[AZ_N_ext_blk ] = 0; new_data_org[AZ_N_neigh ] = 0; new_data_org[AZ_total_send] = 0; new_data_org[AZ_name ] = hold_data_org[AZ_name]; new_data_org[AZ_internal_use]= 0; new_data_org[AZ_N_rows ]= N; sub_precond->Pmat = context->A_overlapped; sub_precond->prec_function = AZ_precondition; sub_proc_config[AZ_node] = 0; sub_proc_config[AZ_N_procs] = 1; #ifdef AZTEC_MPI tptr = AZ_get_comm(context->proc_config); AZ_set_comm(sub_proc_config, *tptr); #endif sprintf(label,"y in ssolve%d", sub_options[AZ_recursion_level]); y = AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = x[i]; for (i = 0 ; i < N ; i++ ) x[i] = 0.0; t4 = sub_options[AZ_keep_info]; sub_options[AZ_keep_info] = 1; if (context->aztec_choices->options[AZ_pre_calc] >= AZ_reuse) { t5 = sub_options[AZ_pre_calc]; sub_options[AZ_pre_calc] = AZ_sys_reuse; } AZ_oldsolve(x, y,sub_options,sub_params, sub_status, sub_proc_config, context->A_overlapped, sub_precond, sub_scaling); sub_options[AZ_keep_info] = t4; if (context->aztec_choices->options[AZ_pre_calc] == AZ_sys_reuse) sub_options[AZ_pre_calc] = t5; sub_options[AZ_recursion_level] = t1; sub_options[AZ_output] = t2; context->A_overlapped->data_org = hold_data_org; AZ_free(new_data_org); context->proc_config[AZ_MPI_Tag] = t3; } } }
// ====================================================================== void GetPtent(const Operator& A, Teuchos::ParameterList& List, const MultiVector& ThisNS, Operator& Ptent, MultiVector& NextNS) { std::string CoarsenType = List.get("aggregation: type", "Uncoupled"); /* old version int NodesPerAggr = List.get("aggregation: per aggregate", 64); */ double Threshold = List.get("aggregation: threshold", 0.0); int NumPDEEquations = List.get("PDE equations", 1); ML_Aggregate* agg_object; ML_Aggregate_Create(&agg_object); ML_Aggregate_Set_MaxLevels(agg_object,2); ML_Aggregate_Set_StartLevel(agg_object,0); ML_Aggregate_Set_Threshold(agg_object,Threshold); //agg_object->curr_threshold = 0.0; ML_Operator* ML_Ptent = 0; ML_Ptent = ML_Operator_Create(GetML_Comm()); if (ThisNS.GetNumVectors() == 0) ML_THROW("zero-dimension null space", -1); int size = ThisNS.GetMyLength(); double* null_vect = 0; ML_memory_alloc((void **)(&null_vect), sizeof(double) * size * ThisNS.GetNumVectors(), "ns"); int incr = 1; for (int v = 0 ; v < ThisNS.GetNumVectors() ; ++v) DCOPY_F77(&size, (double*)ThisNS.GetValues(v), &incr, null_vect + v * ThisNS.GetMyLength(), &incr); ML_Aggregate_Set_NullSpace(agg_object, NumPDEEquations, ThisNS.GetNumVectors(), null_vect, ThisNS.GetMyLength()); if (CoarsenType == "Uncoupled") agg_object->coarsen_scheme = ML_AGGR_UNCOUPLED; else if (CoarsenType == "Uncoupled-MIS") agg_object->coarsen_scheme = ML_AGGR_HYBRIDUM; else if (CoarsenType == "MIS") { /* needed for MIS, otherwise it sets the number of equations to * the null space dimension */ agg_object->max_levels = -7; agg_object->coarsen_scheme = ML_AGGR_MIS; } else if (CoarsenType == "METIS") agg_object->coarsen_scheme = ML_AGGR_METIS; else { ML_THROW("Requested aggregation scheme (" + CoarsenType + ") not recognized", -1); } int NextSize = ML_Aggregate_Coarsen(agg_object, A.GetML_Operator(), &ML_Ptent, GetML_Comm()); /* This is the old version int NextSize; if (CoarsenType == "Uncoupled") { NextSize = ML_Aggregate_CoarsenUncoupled(agg_object, A.GetML_Operator(), } else if (CoarsenType == "MIS") { NextSize = ML_Aggregate_CoarsenMIS(agg_object, A.GetML_Operator(), &ML_Ptent, GetML_Comm()); } else if (CoarsenType == "METIS") { ML ml_object; ml_object.ML_num_levels = 1; // crap for line below ML_Aggregate_Set_NodesPerAggr(&ml_object,agg_object,0,NodesPerAggr); NextSize = ML_Aggregate_CoarsenMETIS(agg_object, A.GetML_Operator(), &ML_Ptent, GetML_Comm()); } else { ML_THROW("Requested aggregation scheme (" + CoarsenType + ") not recognized", -1); } */ ML_Operator_ChangeToSinglePrecision(ML_Ptent); int NumMyElements = NextSize; Space CoarseSpace(-1,NumMyElements); Ptent.Reshape(CoarseSpace,A.GetRangeSpace(),ML_Ptent,true); assert (NextSize * ThisNS.GetNumVectors() != 0); NextNS.Reshape(CoarseSpace, ThisNS.GetNumVectors()); size = NextNS.GetMyLength(); for (int v = 0 ; v < NextNS.GetNumVectors() ; ++v) DCOPY_F77(&size, agg_object->nullspace_vect + v * size, &incr, NextNS.GetValues(v), &incr); ML_Aggregate_Destroy(&agg_object); ML_memory_free((void**)(&null_vect)); }
void AZ_pgmresr(double b[], double x[],double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* This routine uses Saad's restarted Genralized Minimum Residual method to solve the nonsymmetric matrix problem Ax = b. IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the actual residual is not normally computed as part of the gmres algorithm. Thus, if the user uses a convergence condition (see AZ_gmres_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_gmres_global_scalars() sets r_avail = AZ_TRUE and the algorithm computes the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== Amat: Structure used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see file Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int k; int i, N, NN, converged, one = 1, iter, r_avail = AZ_FALSE; int print_freq, proc, kspace; double **UU, **CC, *dots, *tmp, *res; double dble_tmp, r_2norm = 1.0, epsilon; double rec_residual, scaled_r_norm, true_scaled_r=0.0; double actual_residual = -1.0, minus_alpha, alpha; double *dummy = (double *) 0; double *UUblock, *CCblock; int mm, ii; char label[64],suffix[32], prefix[64]; int *data_org, str_leng, first_time = AZ_TRUE; double doubleone = 1.0, minusone = -1.0, init_time = 0.0; char *T = "T"; char *T2 = "N"; /**************************** execution begins ******************************/ sprintf(suffix," in gmresr%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; kspace = options[AZ_kspace]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 0; /* GMRES seldom updates solution */ convergence_info->epsilon = params[AZ_tol]; /* allocate memory for required vectors */ NN = kspace + 1; /* +1: make sure everybody allocates something */ sprintf(label,"dots%s",suffix); dots = AZ_manage_memory(2*NN*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); tmp = &(dots[NN]); sprintf(label,"CC%s",suffix); CC = (double **) AZ_manage_memory(2*NN*sizeof(double *), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); UU = &(CC[NN]); NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for intel */ sprintf(label,"UUblock%s",suffix); UUblock = AZ_manage_memory(2*NN*kspace*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,label, &i); for (k = 0; k < kspace; k++) UU[k] = &(UUblock[k*NN]); CCblock = &(UUblock[kspace*NN]); for (k = 0; k < kspace; k++) CC[k] = &(CCblock[k*NN]); sprintf(label,"res%s",suffix); res = AZ_manage_memory(NN*sizeof(double),AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); AZ_compute_residual(b, x, res, proc_config, Amat); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] */ r_2norm = DDOT_F77(&N, res, &one, res, &one); AZ_gdot_vec(1, &r_2norm, &rec_residual, proc_config); r_2norm = sqrt(r_2norm); rec_residual = r_2norm; AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, NULL, NULL, NULL, convergence_info); r_2norm = rec_residual; converged = scaled_r_norm < epsilon; if ( (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings) && (proc == 0) ) (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); iter = 0; /*rst change while (!converged && iter < options[AZ_max_iter]) { */ while (!(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { convergence_info->iteration = iter; i = 0; /*rst change while (i < kspace && !converged && iter < options[AZ_max_iter]) { */ while (i < kspace && !(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { iter++; convergence_info->iteration = iter; /* v_i+1 = A M^-1 v_i */ DCOPY_F77(&N, res , &one, UU[i], &one); if (iter == 1) init_time = AZ_second(); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int precID = -1; precID = Teuchos_startTimer( "AztecOO: Operation Prec*x", precID ); #endif #endif precond->prec_function(UU[i],options,proc_config,params,Amat,precond); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( precID ); #endif #endif if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int matvecID = -1; matvecID = Teuchos_startTimer( "AztecOO: Operation Op*x", matvecID ); #endif #endif Amat->matvec(UU[i], CC[i], Amat, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( matvecID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoID = -1; orthoID = Teuchos_startTimer( "AztecOO: Orthogonalization", orthoID ); #endif #endif /* Gram-Schmidt orthogonalization */ if (!options[AZ_orthog]) { /* classical (stabilized) */ for (ii = 0 ; ii < 2 ; ii++ ) { dble_tmp = 0.0; mm = i; if (N == 0) for (k = 0 ; k < i ; k++) dots[k] = 0.0; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoInnerProdID = -1; orthoInnerProdID = Teuchos_startTimer( "AztecOO: Ortho (Inner Product)", orthoInnerProdID ); #endif #endif DGEMV_F77(CHAR_MACRO(T[0]), &N, &mm, &doubleone, CCblock, &NN, CC[i], &one, &dble_tmp, dots, &one); AZ_gdot_vec(i, dots, tmp, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoInnerProdID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoUpdateID = -1; orthoUpdateID = Teuchos_startTimer( "AztecOO: Ortho (Update)", orthoUpdateID ); #endif #endif DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, CCblock, &NN, dots, &one, &doubleone, CC[i], &one); DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, UUblock, &NN, dots, &one, &doubleone, UU[i], &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoUpdateID ); #endif #endif } } else { /* modified */ for (k = 0; k < i; k++) { alpha = AZ_gdot(N, CC[k], CC[i], proc_config); minus_alpha = -alpha; DAXPY_F77(&N, &minus_alpha, CC[k], &one, CC[i], &one); DAXPY_F77(&N, &minus_alpha, UU[k], &one, UU[i], &one); } } /* normalize vector */ #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS static int orthoNormID = -1; orthoNormID = Teuchos_startTimer( "AztecOO: Ortho (Norm)", orthoNormID ); #endif #endif dble_tmp = sqrt(AZ_gdot(N, CC[i], CC[i], proc_config)); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoNormID ); #endif #endif if (dble_tmp > DBL_EPSILON*r_2norm) dble_tmp = 1.0 / dble_tmp; else dble_tmp = 0.0; DSCAL_F77(&N, &dble_tmp, CC[i], &one); DSCAL_F77(&N, &dble_tmp, UU[i], &one); dble_tmp = AZ_gdot(N, CC[i], res, proc_config); DAXPY_F77(&N, &dble_tmp, UU[i], &one, x, &one); dble_tmp = -dble_tmp; DAXPY_F77(&N, &dble_tmp, CC[i], &one, res, &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop the timer. */ Teuchos_stopTimer( orthoID ); #endif #endif /* determine residual norm & test convergence */ r_2norm = sqrt(AZ_gdot(N, res, res, proc_config)); rec_residual = r_2norm; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * NOTE: if r_avail = AZ_TRUE or AZ_FIRST is passed in, we perform * step 1), otherwise ||r|| is taken as rec_residual. */ AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, dummy, dummy, dummy, convergence_info); converged = scaled_r_norm < epsilon; /*rst change if ( (iter%print_freq == 0) && proc == 0) */ if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); i++; /* subspace dim. counter dim(K) = i - 1 */ #ifdef out if (options[AZ_check_update_size] & converged) converged = AZ_compare_update_vs_soln(N, -1.,dble_tmp, UU[i-1], x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (converged) { /* compute true residual using 'v[kspace]' as a temporary vector */ AZ_scale_true_residual(x, b, res, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); converged = true_scaled_r < params[AZ_tol]; if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not * converged, AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } #endif } } if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #ifdef out /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #endif } /* AZ_pgmres */
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
void AZ_polynomial_expansion( double z[], int options[], int proc_config[], AZ_PRECOND *precond ) /******************************************************************************* Uses a Neuman series expansion to approximate the inverse of a matrix. The series expansion is in terms of (I - A/omega) where I is the identity, A the matrix for which the inverse is being approximated, and omega is a scaling factor (omega >= || A || / 2 , Wong and Jiang (1989) or the diagonal element if it is a constant). If power = 0 then diagonal scaling is performed. If power < 0 then an unparameterized expansion is used. If power > 0 then a parameterized expansion developed by a least squares method is used. This technique minimizes the L2 norm of the residual polynomial R(), on an evalue interval of [0,lambda_max] where lambda_max is an estimate of the largest evalue of A.(see Saad (1985)). This version assumes that diagonal scaling has been carried out on the entire set of equations. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== z: On input, is the residual(rhs) of the set of equations. On output is the result. options: Determines specific solution method and other parameters. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. precond: Structure used to represent the preocnditioner (see az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ int param_flag, one = 1, j; register int i, p; register double cp; double lambda_max; static double c[15], inv_omega; int N, power; double *w, *poly_temp; int *data_org, *bindx, *indx, *cpntr, *rpntr, *bpntr; double *val; /**************************** execution begins ******************************/ data_org = precond->Pmat->data_org; val = precond->Pmat->val; bindx = precond->Pmat->bindx; cpntr = precond->Pmat->cpntr; indx = precond->Pmat->indx; rpntr = precond->Pmat->rpntr; bpntr = precond->Pmat->bpntr; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; power = options[AZ_poly_ord]; poly_temp = (double *) AZ_manage_memory(2*(N+data_org[AZ_N_external])* sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, "poly mem", &j); w = &(poly_temp[N+data_org[AZ_N_external]]); if (options[AZ_precond] == AZ_Neumann ) param_flag = 0; else param_flag = 1; if (options[AZ_pre_calc] < AZ_sys_reuse) { if (precond->Pmat->data_org[AZ_matrix_type] == AZ_USER_MATRIX) { lambda_max = precond->Pmat->matrix_norm; if (lambda_max < 0.0) { if (proc_config[AZ_node] == 0) { AZ_printf_err("Error: Matrix norm not given. Use "); AZ_printf_err("AZ_set_MATFREE_matrix_norm() to set it.\n"); } exit(1); } } else if (precond->Pmat->data_org[AZ_matrix_type] == AZ_MSR_MATRIX || precond->Pmat->data_org[AZ_matrix_type] == AZ_VBR_MATRIX ) { lambda_max = AZ_gmax_matrix_norm(val, indx, bindx, rpntr, cpntr, bpntr, proc_config, data_org); /* change sign of lambda_max if diagonal contains only negative values */ AZ_change_sign(&lambda_max, val, indx, bindx, rpntr, cpntr, bpntr, data_org); } inv_omega = 1.0 / (0.55 * lambda_max); /* 1.1*lambda_max/2 */ if (param_flag) AZ_get_poly_coefficients(power, lambda_max, c, param_flag); } switch (param_flag) { case 0: /* Neumann series */ DSCAL_F77(&N, &inv_omega, z, &one); DCOPY_F77(&N, z, &one, w, &one); for (p = power; p > 0; p--){ precond->Pmat->matvec(z, poly_temp, precond->Pmat, proc_config); for (i = 0; i < N; i++) z[i] += w[i] - inv_omega * poly_temp[i]; } break; case 1: /* least squares */ /* initialization */ DCOPY_F77(&N, z, &one, w, &one); DSCAL_F77(&N, c+power, z, &one); for (p = power - 1; p >= 0; p--) { precond->Pmat->matvec(z, poly_temp, precond->Pmat, proc_config); cp = *(c+p); for (i = 0; i < N; i++) z[i] = cp * w[i] + poly_temp[i]; } break; default: if (proc_config[AZ_node] == 0) { (void) AZ_printf_err( "Error: invalid polynomial preconditioner\n" " options[AZ_precond] improperly set.\n"); } exit(-1); } } /* AZ_polynomial_expansion */
void AZ_pbicgstab(double b[], double x[], double weight[], int options[], double params[],int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Vand der Vorst's (1990) variation of the Bi-Conjugate Gradient algorthm (Sonneveld (1984,1989)) to solve the nonsymmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditionner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter=1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc; int brkdown_will_occur = AZ_FALSE; double alpha = 1.0, beta, true_scaled_r=0.0; double *v, *r, *rtilda, *p, *phat, *s, *shat; double omega = 1.0, dot_vec[2], tmp[2], init_time = 0.0; double rhonm1 = 1.0, rhon, sigma, brkdown_tol = DBL_EPSILON; double scaled_r_norm= -1.0, actual_residual = -1.0, rec_residual= -1.0; double dtemp; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; /**************************** execution begins ******************************/ sprintf(suffix," in cgstab%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* BiCGStab always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate memory for required vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something*/ NN = NN + (NN%2); /* make sure things are aligned for the */ /* assembly coded matvec() on the Intel. */ sprintf(label,"phat%s",suffix); phat = (double *) AZ_manage_memory(7*NN*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label,&j); p = &(phat[1*NN]); shat = &(phat[2*NN]); /* NOTE: phat and shat must be aligned */ /* so that the assembly dgemv */ /* works on the paragon. */ s = &(phat[3*NN]); r = &(phat[4*NN]); rtilda = &(phat[5*NN]); v = &(phat[6*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); /* v, p <- 0 */ for (i = 0; i < N; i++) v[i] = p[i] = 0.0; /* set rtilda */ if (options[AZ_aux_vec] == AZ_resid) DCOPY_F77(&N, r, &one, rtilda, &one); else AZ_random_vector(rtilda, data_org, proc_config); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config,&r_avail,r,rtilda, &rhon, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) (void) AZ_printf_out("%siter: 0 residual = %e\n",prefix,scaled_r_norm); for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++) { if (brkdown_will_occur) { AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = (rhon/rhonm1) * (alpha/omega); if (fabs(rhon) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, rtilda, rhon, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(rhon); } rhonm1 = rhon; /* p = r + beta*(p - omega*v) */ /* phat = M^-1 p */ /* v = A phat */ dtemp = beta * omega; for (i = 0; i < N; i++) p[i] = r[i] + beta * p[i] - dtemp * v[i]; DCOPY_F77(&N, p, &one, phat, &one); if (iter==1) init_time = AZ_second(); if (precond_flag) precond->prec_function(phat,options,proc_config,params,Amat,precond); if (iter==1) status[AZ_first_precond] = AZ_second() - init_time; Amat->matvec(phat, v, Amat, proc_config); sigma = AZ_gdot(N, rtilda, v, proc_config); if (fabs(sigma) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) { /* break down */ AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org,proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * fabs(sigma); } alpha = rhon / sigma; /* s = r - alpha*v */ /* shat = M^-1 s */ /* r = A shat (r is a tmp here for t ) */ for (i = 0; i < N; i++) s[i] = r[i] - alpha * v[i]; DCOPY_F77(&N, s, &one, shat, &one); if (precond_flag) precond->prec_function(shat,options,proc_config,params,Amat,precond); Amat->matvec(shat, r, Amat, proc_config); /* omega = (t,s)/(t,t) with r = t */ dot_vec[0] = DDOT_F77(&N, r, &one, s, &one); dot_vec[1] = DDOT_F77(&N, r, &one, r, &one); AZ_gdot_vec(2, dot_vec, tmp, proc_config); if (fabs(dot_vec[1]) < DBL_MIN) { omega = 0.0; brkdown_will_occur = AZ_TRUE; } else omega = dot_vec[0] / dot_vec[1]; /* x = x + alpha*phat + omega*shat */ /* r = s - omega*r */ DAXPY_F77(&N, &alpha, phat, &one, x, &one); DAXPY_F77(&N, &omega, shat, &one, x, &one); for (i = 0; i < N; i++) r[i] = s[i] - omega * r[i]; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, rtilda, &rhon, convergence_info); if ( (iter%print_freq == 0) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) { dtemp = alpha/omega; DAXPY_F77(&N, &dtemp, phat, &one, shat, &one); convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,omega, shat, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); } if (convergence_info->converged) { AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&convergence_info->epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* bicgstab */
void BLAS<int, double>::COPY(const int n, const double* x, const int incx, double* y, const int incy) const { DCOPY_F77(&n, x, &incx, y, &incy); }