//============================================================================= void Epetra_BLAS::GEMV(const char TRANS, const int M, const int N, const double ALPHA, const double * A, const int LDA, const double * X, const double BETA, double * Y, const int INCX, const int INCY) const { DGEMV_F77(CHAR_MACRO(TRANS), &M, &N, &ALPHA, A, &LDA, X, &INCX, &BETA, Y, &INCY); }
void AZ_pgmresr(double b[], double x[],double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* This routine uses Saad's restarted Genralized Minimum Residual method to solve the nonsymmetric matrix problem Ax = b. IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the actual residual is not normally computed as part of the gmres algorithm. Thus, if the user uses a convergence condition (see AZ_gmres_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_gmres_global_scalars() sets r_avail = AZ_TRUE and the algorithm computes the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== Amat: Structure used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see file Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int k; int i, N, NN, converged, one = 1, iter, r_avail = AZ_FALSE; int print_freq, proc, kspace; double **UU, **CC, *dots, *tmp, *res; double dble_tmp, r_2norm = 1.0, epsilon; double rec_residual, scaled_r_norm, true_scaled_r=0.0; double actual_residual = -1.0, minus_alpha, alpha; double *dummy = (double *) 0; double *UUblock, *CCblock; int mm, ii; char label[64],suffix[32], prefix[64]; int *data_org, str_leng, first_time = AZ_TRUE; double doubleone = 1.0, minusone = -1.0, init_time = 0.0; char *T = "T"; char *T2 = "N"; /**************************** execution begins ******************************/ sprintf(suffix," in gmresr%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; kspace = options[AZ_kspace]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 0; /* GMRES seldom updates solution */ convergence_info->epsilon = params[AZ_tol]; /* allocate memory for required vectors */ NN = kspace + 1; /* +1: make sure everybody allocates something */ sprintf(label,"dots%s",suffix); dots = AZ_manage_memory(2*NN*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); tmp = &(dots[NN]); sprintf(label,"CC%s",suffix); CC = (double **) AZ_manage_memory(2*NN*sizeof(double *), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); UU = &(CC[NN]); NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for intel */ sprintf(label,"UUblock%s",suffix); UUblock = AZ_manage_memory(2*NN*kspace*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,label, &i); for (k = 0; k < kspace; k++) UU[k] = &(UUblock[k*NN]); CCblock = &(UUblock[kspace*NN]); for (k = 0; k < kspace; k++) CC[k] = &(CCblock[k*NN]); sprintf(label,"res%s",suffix); res = AZ_manage_memory(NN*sizeof(double),AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); AZ_compute_residual(b, x, res, proc_config, Amat); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] */ r_2norm = DDOT_F77(&N, res, &one, res, &one); AZ_gdot_vec(1, &r_2norm, &rec_residual, proc_config); r_2norm = sqrt(r_2norm); rec_residual = r_2norm; AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, NULL, NULL, NULL, convergence_info); r_2norm = rec_residual; converged = scaled_r_norm < epsilon; if ( (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings) && (proc == 0) ) (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); iter = 0; /*rst change while (!converged && iter < options[AZ_max_iter]) { */ while (!(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { convergence_info->iteration = iter; i = 0; /*rst change while (i < kspace && !converged && iter < options[AZ_max_iter]) { */ while (i < kspace && !(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { iter++; convergence_info->iteration = iter; /* v_i+1 = A M^-1 v_i */ DCOPY_F77(&N, res , &one, UU[i], &one); if (iter == 1) init_time = AZ_second(); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int precID = -1; precID = Teuchos_startTimer( "AztecOO: Operation Prec*x", precID ); #endif #endif precond->prec_function(UU[i],options,proc_config,params,Amat,precond); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( precID ); #endif #endif if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int matvecID = -1; matvecID = Teuchos_startTimer( "AztecOO: Operation Op*x", matvecID ); #endif #endif Amat->matvec(UU[i], CC[i], Amat, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( matvecID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoID = -1; orthoID = Teuchos_startTimer( "AztecOO: Orthogonalization", orthoID ); #endif #endif /* Gram-Schmidt orthogonalization */ if (!options[AZ_orthog]) { /* classical (stabilized) */ for (ii = 0 ; ii < 2 ; ii++ ) { dble_tmp = 0.0; mm = i; if (N == 0) for (k = 0 ; k < i ; k++) dots[k] = 0.0; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoInnerProdID = -1; orthoInnerProdID = Teuchos_startTimer( "AztecOO: Ortho (Inner Product)", orthoInnerProdID ); #endif #endif DGEMV_F77(CHAR_MACRO(T[0]), &N, &mm, &doubleone, CCblock, &NN, CC[i], &one, &dble_tmp, dots, &one); AZ_gdot_vec(i, dots, tmp, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoInnerProdID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoUpdateID = -1; orthoUpdateID = Teuchos_startTimer( "AztecOO: Ortho (Update)", orthoUpdateID ); #endif #endif DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, CCblock, &NN, dots, &one, &doubleone, CC[i], &one); DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, UUblock, &NN, dots, &one, &doubleone, UU[i], &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoUpdateID ); #endif #endif } } else { /* modified */ for (k = 0; k < i; k++) { alpha = AZ_gdot(N, CC[k], CC[i], proc_config); minus_alpha = -alpha; DAXPY_F77(&N, &minus_alpha, CC[k], &one, CC[i], &one); DAXPY_F77(&N, &minus_alpha, UU[k], &one, UU[i], &one); } } /* normalize vector */ #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS static int orthoNormID = -1; orthoNormID = Teuchos_startTimer( "AztecOO: Ortho (Norm)", orthoNormID ); #endif #endif dble_tmp = sqrt(AZ_gdot(N, CC[i], CC[i], proc_config)); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoNormID ); #endif #endif if (dble_tmp > DBL_EPSILON*r_2norm) dble_tmp = 1.0 / dble_tmp; else dble_tmp = 0.0; DSCAL_F77(&N, &dble_tmp, CC[i], &one); DSCAL_F77(&N, &dble_tmp, UU[i], &one); dble_tmp = AZ_gdot(N, CC[i], res, proc_config); DAXPY_F77(&N, &dble_tmp, UU[i], &one, x, &one); dble_tmp = -dble_tmp; DAXPY_F77(&N, &dble_tmp, CC[i], &one, res, &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop the timer. */ Teuchos_stopTimer( orthoID ); #endif #endif /* determine residual norm & test convergence */ r_2norm = sqrt(AZ_gdot(N, res, res, proc_config)); rec_residual = r_2norm; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * NOTE: if r_avail = AZ_TRUE or AZ_FIRST is passed in, we perform * step 1), otherwise ||r|| is taken as rec_residual. */ AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, dummy, dummy, dummy, convergence_info); converged = scaled_r_norm < epsilon; /*rst change if ( (iter%print_freq == 0) && proc == 0) */ if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); i++; /* subspace dim. counter dim(K) = i - 1 */ #ifdef out if (options[AZ_check_update_size] & converged) converged = AZ_compare_update_vs_soln(N, -1.,dble_tmp, UU[i-1], x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (converged) { /* compute true residual using 'v[kspace]' as a temporary vector */ AZ_scale_true_residual(x, b, res, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); converged = true_scaled_r < params[AZ_tol]; if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not * converged, AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } #endif } } if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #ifdef out /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #endif } /* AZ_pgmres */
void BLAS<int, double>::GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
void dvbr_sparax_basic(int m, double *val, int *bindx, int *rpntr, int *cpntr, int *bpntr, double *b, double *c, int exchange_flag, int *data_org, int *proc_config) /****************************************************************************** c = Ab: Sparse (square) matrix-vector multiply, using the variable block row (VBR) data structure (A = val). Author: Scott A. Hutchinson, SNL, 1421 ======= Return code: void ============ Parameter list: =============== m: Number of (block) rows in A. val: Array containing the entries of the matrix. The matrix is stored block-row-by-block-row. Each block entry is dense and stored by columns (VBR). bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see Aztec User's Guide). b: Right hand side of linear system. c: On output contains the solution to the linear system. exchange_flag: Flag which controls call to AZ_exchange_bdry() (ignored in serial implementation). data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). ******************************************************************************/ { /* local variables */ register double *x; register double *c_pntr; register int iblk_row, j, jblk, iblk_size; int m1, ib1, n1; int bpoff, rpoff; int ione = 1; int irpntr, irpntr_next; int ibpntr, ibpntr_next = 0; double one = 1.0; double *val_pntr; char *N = "N"; /**************************** execution begins *****************************/ /* exchange boundary info */ if (exchange_flag) AZ_exchange_bdry(b, data_org, proc_config); /* offset of the first block */ bpoff = *bpntr; rpoff = *rpntr; /* zero the result vector */ for (j = 0; j < rpntr[m] - rpoff; c[j++] = 0.0); val_pntr = val; irpntr_next = *rpntr++; bpntr++; c -= rpoff; /* loop over block rows */ for (iblk_row = 0; iblk_row < m; iblk_row++) { irpntr = irpntr_next; irpntr_next = *rpntr++; ibpntr = ibpntr_next; ibpntr_next = *bpntr++ - bpoff; /* set result pointer */ c_pntr = c + irpntr; /* number of rows in the current row block */ m1 = irpntr_next - irpntr; /* loop over each block in the current row block */ for (j = ibpntr; j < ibpntr_next; j++) { jblk = *(bindx+j); /* the starting point column index of the current block */ ib1 = *(cpntr+jblk); /* number of columns in the current block */ n1 = cpntr[jblk + 1] - ib1; iblk_size = m1*n1; /****************** Dense matrix-vector multiplication *****************/ /* * Get base addresses */ x = b + ib1; /* * Special case the m1 = n1 = 1 case */ if (iblk_size == 1) *c_pntr += *val_pntr * *x; else if (m1 == n1) { /* * Inline small amounts of work */ switch (m1) { case 2: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[2]*x[1]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[3]*x[1]; break; case 3: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[3]*x[1] + val_pntr[6]*x[2]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[4]*x[1] + val_pntr[7]*x[2]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[5]*x[1] + val_pntr[8]*x[2]; break; case 4: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[4]*x[1] + val_pntr[8] *x[2] + val_pntr[12]*x[3]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[5]*x[1] + val_pntr[9] *x[2] + val_pntr[13]*x[3]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[6]*x[1] + val_pntr[10]*x[2] + val_pntr[14]*x[3]; c_pntr[3] += val_pntr[3]*x[0] + val_pntr[7]*x[1] + val_pntr[11]*x[2] + val_pntr[15]*x[3]; break; case 5: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[5]*x[1] + val_pntr[10]*x[2] + val_pntr[15]*x[3] + val_pntr[20]*x[4]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[6]*x[1] + val_pntr[11]*x[2] + val_pntr[16]*x[3] + val_pntr[21]*x[4]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[7]*x[1] + val_pntr[12]*x[2] + val_pntr[17]*x[3] + val_pntr[22]*x[4]; c_pntr[3] += val_pntr[3]*x[0] + val_pntr[8]*x[1] + val_pntr[13]*x[2] + val_pntr[18]*x[3] + val_pntr[23]*x[4]; c_pntr[4] += val_pntr[4]*x[0] + val_pntr[9]*x[1] + val_pntr[14]*x[2] + val_pntr[19]*x[3] + val_pntr[24]*x[4]; break; case 6: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[6] *x[1] + val_pntr[12]*x[2] + val_pntr[18]*x[3] + val_pntr[24]*x[4] + val_pntr[30]*x[5]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[7] *x[1] + val_pntr[13]*x[2] + val_pntr[19]*x[3] + val_pntr[25]*x[4] + val_pntr[31]*x[5]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[8] *x[1] + val_pntr[14]*x[2] + val_pntr[20]*x[3] + val_pntr[26]*x[4] + val_pntr[32]*x[5]; c_pntr[3] += val_pntr[3]*x[0] + val_pntr[9] *x[1] + val_pntr[15]*x[2] + val_pntr[21]*x[3] + val_pntr[27]*x[4] + val_pntr[33]*x[5]; c_pntr[4] += val_pntr[4]*x[0] + val_pntr[10]*x[1] + val_pntr[16]*x[2] + val_pntr[22]*x[3] + val_pntr[28]*x[4] + val_pntr[34]*x[5]; c_pntr[5] += val_pntr[5]*x[0] + val_pntr[11]*x[1] + val_pntr[17]*x[2] + val_pntr[23]*x[3] + val_pntr[29]*x[4] + val_pntr[35]*x[5]; break; default: /* * For most computers, a really well-optimized assembly-coded level 2 * blas for small blocks sizes doesn't exist. It's better to * optimize your own version, and take out all the overhead from the * regular dgemv call. For large block sizes, it's also a win to * check for a column of zeroes; this is what dgemv_ does. The * routine dgemvnsqr_() is a fortran routine that contains optimized * code for the hp, created from the optimizing preprocessor. Every * workstation will probably have an entry here eventually, since * this is a key optimization location. */ /* #ifdef AZ_PA_RISC */ /* dgemvnsqr_(&m1, val_pntr, x, c_pntr); */ /* #else */ if (m1 < 10) AZ_dgemv2(m1, n1, val_pntr, x, c_pntr); else DGEMV_F77(CHAR_MACRO(N[0]), &m1, &n1, &one, val_pntr, &m1, x, &ione, &one, c_pntr, &ione); /* #endif */ } } /* nonsquare cases */ else { if (m1 < 10) AZ_dgemv2(m1, n1, val_pntr, x, c_pntr); else DGEMV_F77(CHAR_MACRO(N[0]), &m1, &n1, &one, val_pntr, &m1, x, &ione, &one, c_pntr, &ione); } val_pntr += iblk_size; } } } /* dvbr_sparax_basic */