int ML_Smoother_Ifpack(ML_Smoother *sm,int inlen,double x[],int outlen, double rhs[]) { ML_Smoother *smooth_ptr = (ML_Smoother *) sm; void *Ifpack_Handle = smooth_ptr->smoother->data; double* x2 = NULL,* rhs2 = NULL; /*int i;*/ int n, kk; int one_int = 1; double minus_one_double = -1.0; if (sm->init_guess == ML_NONZERO) { n = sm->my_level->Amat->invec_leng; assert (n == sm->my_level->Amat->outvec_leng); rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1)); x2 = (double*) ML_allocate(sizeof(double) * (n + 1)); ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2); DCOPY_F77(&n, x, &one_int, x2, &one_int); DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int); ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2); DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int); ML_free(rhs2); ML_free(x2); } else ML_Ifpack_Solve(Ifpack_Handle, x, rhs); for (kk = 1; kk < sm->ntimes; kk++) { n = sm->my_level->Amat->invec_leng; assert (n == sm->my_level->Amat->outvec_leng); rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1)); x2 = (double*) ML_allocate(sizeof(double) * (n + 1)); ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2); DCOPY_F77(&n, x, &one_int, x2, &one_int); DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int); ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2); DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int); ML_free(rhs2); ML_free(x2); } return 0; } /* ML_Smoother_Ifpack */
/**=========================================================================**/ void Aztec_LSVector::addVec (double s, const Aztec_LSVector& c) { /** Aztec_LSVector::addVec --- add multiple of a vector: s*c **/ int N_update = amap_->localSize(); int one = 1; double *pv = localCoeffs_; double *pc = (double*)c.startPointer(); DAXPY_F77(&N_update,&s,pc,&one,pv,&one); return; }
//============================================================================= void Epetra_BLAS::AXPY(const int N, const double ALPHA, const double * X, double * Y, const int INCX, const int INCY) const { DAXPY_F77(&N, &ALPHA, X, &INCX, Y, &INCY); }
int main(int argc, char *argv[]) { int num_PDE_eqns=1, N_levels=3, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; int leng1,leng2; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL; struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid=NULL, alpha; char filename[80]; int one = 1; int proc,nprocs; char pathfilename[100]; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); proc = proc_config[AZ_node]; nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); proc = 0; nprocs = 1; #endif if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/inputfile",argv[1]); ML_Reader_ReadInput(pathfilename, &context); } else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context)); AZ_broadcast((char *) context, sizeof(struct reader_context), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); N_levels = context->N_levels; printf("N_levels %d\n",N_levels); nsmooth = context->nsmooth; num_PDE_eqns = context->N_dofPerNode; printf("num_PDE_eqns %d\n",num_PDE_eqns); ML_Set_PrintLevel(context->output_level); /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); fp=fopen(pathfilename,"r"); if (fp==NULL) { printf("**ERR** couldn't open file data_matrix.txt\n"); exit(1); } fscanf(fp,"%d",&leng); fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ #if 0 if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; #endif i = AZ_linear; /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/ /* make a linear distribution of the matrix */ /* if the linear distribution does not align with the blocks, */ /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */ leng1 = leng/nprocs; leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } N_update = leng1; update = (int*)AZ_allocate((N_update+1)*sizeof(int)); if (update==NULL) { (void) fprintf (stderr, "Not enough space to allocate 'update'\n"); fflush(stderr); exit(EXIT_FAILURE); } for (i=0; i<N_update; i++) update[i] = i+leng2; #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]); ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde, &N_update,&update,proc_config); #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all blocks (including the ghost nodes) the same size. */ /* not sure, whether this is a good idea with variable blocks */ /* the examples inpufiles (see top of this file) don't need it */ /* anyway */ /* AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); */ AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config); ML_Set_ResidualOutputFrequency(ml, context->output); ML_Set_Tolerance(ml, context->tol); ML_Aggregate_Create( &ag ); if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) { ML_Aggregate_Set_CoarsenScheme_MIS(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Coupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) { ML_Aggregate_Set_CoarsenScheme_METIS(ag); for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) { /* when no blocks read, use standard metis assuming constant block sizes */ if (!blocks) ML_Aggregate_Set_CoarsenScheme_METIS(ag); else { ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag); ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks, blocks,block_pde,N_update); } for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else { printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme); exit(-1); } ML_Aggregate_Set_DampingFactor(ag, context->agg_damping); ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize); ML_Aggregate_Set_Threshold(ag, context->agg_thresh); if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) { ML_Set_SpectralNormScheme_Calc(ml); } else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) { ML_Set_SpectralNormScheme_Anorm(ml); } else { printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm); } /* read in the rigid body modes */ Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); while( (fp = fopen(pathfilename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } /* Set rhs */ sprintf(pathfilename,"%s/data_rhs.txt",argv[1]); fp = fopen(pathfilename,"r"); if (fp == NULL) { rhs=(double *)ML_allocate(leng*sizeof(double)); if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n"); for (i = 0; i < N_update; i++) rhs[i] = (double) update[i]; } else { fclose(fp); if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n"); AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, proc_config); } AZ_reorder_vec(rhs, data_org, update_index, NULL); for (i = 0; i < Nrigid; i++) { sprintf(filename,"data_nullsp%d.txt",i); sprintf(pathfilename,"%s/%s",argv[1],filename); AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); #if 0 /* test the given rigid body mode, output-vector should be ~0 */ Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); #endif for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); } /* rhs orthogonalization */ alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/ AZ_gdot(N_update, mode, mode, proc_config); DAXPY_F77(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); } #if 0 /* for testing the default nullsp */ ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update); #else if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); } #endif if (rigid) ML_free(rigid); ag->keep_agg_information = 1; coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, ML_INCREASING, ag); coarsest_level--; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); #if 0 /* set up smoothers */ if (!blocks) blocks = (int *) ML_allocate(sizeof(int)*N_update); #endif for (level = 0; level < coarsest_level; level++) { num_PDE_eqns = ml->Amat[level].num_PDEs; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->smoother,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); blocks = NULL; block_pde = NULL; nblocks = 0; ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks, &blocks,&block_pde); if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng; for (i =0; i < nblocks; i++) blocks[i] = i; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); */ /* Jacobi Smoothing */ else if (ML_strcmp(context->smoother,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4); } /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* */ else if (ML_strcmp(context->smoother,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } else { printf("unknown smoother %s\n",context->smoother); exit(1); } } /* set coarse level solver */ nsmooth = context->coarse_its; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->coarse_solve,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5); } else if (ML_strcmp(context->coarse_solve,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) { ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); } else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) { ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0); } else { printf("unknown coarse grid solver %s\n",context->coarse_solve); exit(1); } ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); AZ_defaults(options, params); if (ML_strcmp(context->krylov,"Cg") == 0) { options[AZ_solver] = AZ_cg; } else if (ML_strcmp(context->krylov,"Bicgstab") == 0) { options[AZ_solver] = AZ_bicgstab; } else if (ML_strcmp(context->krylov,"Tfqmr") == 0) { options[AZ_solver] = AZ_tfqmr; } else if (ML_strcmp(context->krylov,"Gmres") == 0) { options[AZ_solver] = AZ_gmres; } else { printf("unknown krylov method %s\n",context->krylov); } if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_output] = 1; options[AZ_max_iter] = context->max_outer_its; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = context->tol; options[AZ_output] = context->output; ML_free(context); AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ /* there is no initguess supplied with these examples for the moment.... */ fp = fopen("initguessfile","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, proc_config); options[AZ_conv] = AZ_expected_values; } else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n"); AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
void AZ_pgmresr(double b[], double x[],double weight[], int options[], double params[], int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* This routine uses Saad's restarted Genralized Minimum Residual method to solve the nonsymmetric matrix problem Ax = b. IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the actual residual is not normally computed as part of the gmres algorithm. Thus, if the user uses a convergence condition (see AZ_gmres_global_scalars()) that is based on the 2-norm of the residual there is no need to compute the residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is requested, AZ_gmres_global_scalars() sets r_avail = AZ_TRUE and the algorithm computes the residual. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== Amat: Structure used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see file Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int k; int i, N, NN, converged, one = 1, iter, r_avail = AZ_FALSE; int print_freq, proc, kspace; double **UU, **CC, *dots, *tmp, *res; double dble_tmp, r_2norm = 1.0, epsilon; double rec_residual, scaled_r_norm, true_scaled_r=0.0; double actual_residual = -1.0, minus_alpha, alpha; double *dummy = (double *) 0; double *UUblock, *CCblock; int mm, ii; char label[64],suffix[32], prefix[64]; int *data_org, str_leng, first_time = AZ_TRUE; double doubleone = 1.0, minusone = -1.0, init_time = 0.0; char *T = "T"; char *T2 = "N"; /**************************** execution begins ******************************/ sprintf(suffix," in gmresr%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; epsilon = params[AZ_tol]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; kspace = options[AZ_kspace]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 0; /* GMRES seldom updates solution */ convergence_info->epsilon = params[AZ_tol]; /* allocate memory for required vectors */ NN = kspace + 1; /* +1: make sure everybody allocates something */ sprintf(label,"dots%s",suffix); dots = AZ_manage_memory(2*NN*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); tmp = &(dots[NN]); sprintf(label,"CC%s",suffix); CC = (double **) AZ_manage_memory(2*NN*sizeof(double *), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); UU = &(CC[NN]); NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for intel */ sprintf(label,"UUblock%s",suffix); UUblock = AZ_manage_memory(2*NN*kspace*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id,label, &i); for (k = 0; k < kspace; k++) UU[k] = &(UUblock[k*NN]); CCblock = &(UUblock[kspace*NN]); for (k = 0; k < kspace; k++) CC[k] = &(CCblock[k*NN]); sprintf(label,"res%s",suffix); res = AZ_manage_memory(NN*sizeof(double),AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i); AZ_compute_residual(b, x, res, proc_config, Amat); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] */ r_2norm = DDOT_F77(&N, res, &one, res, &one); AZ_gdot_vec(1, &r_2norm, &rec_residual, proc_config); r_2norm = sqrt(r_2norm); rec_residual = r_2norm; AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, NULL, NULL, NULL, convergence_info); r_2norm = rec_residual; converged = scaled_r_norm < epsilon; if ( (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_summary) && (options[AZ_output] != AZ_warnings) && (proc == 0) ) (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); iter = 0; /*rst change while (!converged && iter < options[AZ_max_iter]) { */ while (!(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { convergence_info->iteration = iter; i = 0; /*rst change while (i < kspace && !converged && iter < options[AZ_max_iter]) { */ while (i < kspace && !(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) { iter++; convergence_info->iteration = iter; /* v_i+1 = A M^-1 v_i */ DCOPY_F77(&N, res , &one, UU[i], &one); if (iter == 1) init_time = AZ_second(); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int precID = -1; precID = Teuchos_startTimer( "AztecOO: Operation Prec*x", precID ); #endif #endif precond->prec_function(UU[i],options,proc_config,params,Amat,precond); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( precID ); #endif #endif if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start timer. */ static int matvecID = -1; matvecID = Teuchos_startTimer( "AztecOO: Operation Op*x", matvecID ); #endif #endif Amat->matvec(UU[i], CC[i], Amat, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop timer. */ Teuchos_stopTimer( matvecID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoID = -1; orthoID = Teuchos_startTimer( "AztecOO: Orthogonalization", orthoID ); #endif #endif /* Gram-Schmidt orthogonalization */ if (!options[AZ_orthog]) { /* classical (stabilized) */ for (ii = 0 ; ii < 2 ; ii++ ) { dble_tmp = 0.0; mm = i; if (N == 0) for (k = 0 ; k < i ; k++) dots[k] = 0.0; #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoInnerProdID = -1; orthoInnerProdID = Teuchos_startTimer( "AztecOO: Ortho (Inner Product)", orthoInnerProdID ); #endif #endif DGEMV_F77(CHAR_MACRO(T[0]), &N, &mm, &doubleone, CCblock, &NN, CC[i], &one, &dble_tmp, dots, &one); AZ_gdot_vec(i, dots, tmp, proc_config); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoInnerProdID ); #endif #endif #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Start the timer. */ static int orthoUpdateID = -1; orthoUpdateID = Teuchos_startTimer( "AztecOO: Ortho (Update)", orthoUpdateID ); #endif #endif DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, CCblock, &NN, dots, &one, &doubleone, CC[i], &one); DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, UUblock, &NN, dots, &one, &doubleone, UU[i], &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoUpdateID ); #endif #endif } } else { /* modified */ for (k = 0; k < i; k++) { alpha = AZ_gdot(N, CC[k], CC[i], proc_config); minus_alpha = -alpha; DAXPY_F77(&N, &minus_alpha, CC[k], &one, CC[i], &one); DAXPY_F77(&N, &minus_alpha, UU[k], &one, UU[i], &one); } } /* normalize vector */ #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS static int orthoNormID = -1; orthoNormID = Teuchos_startTimer( "AztecOO: Ortho (Norm)", orthoNormID ); #endif #endif dble_tmp = sqrt(AZ_gdot(N, CC[i], CC[i], proc_config)); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS Teuchos_stopTimer( orthoNormID ); #endif #endif if (dble_tmp > DBL_EPSILON*r_2norm) dble_tmp = 1.0 / dble_tmp; else dble_tmp = 0.0; DSCAL_F77(&N, &dble_tmp, CC[i], &one); DSCAL_F77(&N, &dble_tmp, UU[i], &one); dble_tmp = AZ_gdot(N, CC[i], res, proc_config); DAXPY_F77(&N, &dble_tmp, UU[i], &one, x, &one); dble_tmp = -dble_tmp; DAXPY_F77(&N, &dble_tmp, CC[i], &one, res, &one); #ifdef AZ_ENABLE_TIMEMONITOR #ifdef HAVE_AZTECOO_TEUCHOS /* Stop the timer. */ Teuchos_stopTimer( orthoID ); #endif #endif /* determine residual norm & test convergence */ r_2norm = sqrt(AZ_gdot(N, res, res, proc_config)); rec_residual = r_2norm; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * NOTE: if r_avail = AZ_TRUE or AZ_FIRST is passed in, we perform * step 1), otherwise ||r|| is taken as rec_residual. */ AZ_compute_global_scalars(Amat, x, b, res, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, dummy, dummy, dummy, convergence_info); converged = scaled_r_norm < epsilon; /*rst change if ( (iter%print_freq == 0) && proc == 0) */ if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); i++; /* subspace dim. counter dim(K) = i - 1 */ #ifdef out if (options[AZ_check_update_size] & converged) converged = AZ_compare_update_vs_soln(N, -1.,dble_tmp, UU[i-1], x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (converged) { /* compute true residual using 'v[kspace]' as a temporary vector */ AZ_scale_true_residual(x, b, res, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); converged = true_scaled_r < params[AZ_tol]; if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT)) { /* * Computed residual has converged, actual residual has not * converged, AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } #endif } } if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #ifdef out /* check if we exceeded maximum number of iterations */ if (converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); #endif } /* AZ_pgmres */
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
void AZ_pbicgstab(double b[], double x[], double weight[], int options[], double params[],int proc_config[], double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Vand der Vorst's (1990) variation of the Bi-Conjugate Gradient algorthm (Sonneveld (1984,1989)) to solve the nonsymmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditionner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter=1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc; int brkdown_will_occur = AZ_FALSE; double alpha = 1.0, beta, true_scaled_r=0.0; double *v, *r, *rtilda, *p, *phat, *s, *shat; double omega = 1.0, dot_vec[2], tmp[2], init_time = 0.0; double rhonm1 = 1.0, rhon, sigma, brkdown_tol = DBL_EPSILON; double scaled_r_norm= -1.0, actual_residual = -1.0, rec_residual= -1.0; double dtemp; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; /**************************** execution begins ******************************/ sprintf(suffix," in cgstab%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; data_org = Amat->data_org; /* pull needed values out of parameter arrays */ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* BiCGStab always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate memory for required vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something*/ NN = NN + (NN%2); /* make sure things are aligned for the */ /* assembly coded matvec() on the Intel. */ sprintf(label,"phat%s",suffix); phat = (double *) AZ_manage_memory(7*NN*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label,&j); p = &(phat[1*NN]); shat = &(phat[2*NN]); /* NOTE: phat and shat must be aligned */ /* so that the assembly dgemv */ /* works on the paragon. */ s = &(phat[3*NN]); r = &(phat[4*NN]); rtilda = &(phat[5*NN]); v = &(phat[6*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); /* v, p <- 0 */ for (i = 0; i < N; i++) v[i] = p[i] = 0.0; /* set rtilda */ if (options[AZ_aux_vec] == AZ_resid) DCOPY_F77(&N, r, &one, rtilda, &one); else AZ_random_vector(rtilda, data_org, proc_config); /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config,&r_avail,r,rtilda, &rhon, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) (void) AZ_printf_out("%siter: 0 residual = %e\n",prefix,scaled_r_norm); for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++) { if (brkdown_will_occur) { AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = (rhon/rhonm1) * (alpha/omega); if (fabs(rhon) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, rtilda, rhon, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(rhon); } rhonm1 = rhon; /* p = r + beta*(p - omega*v) */ /* phat = M^-1 p */ /* v = A phat */ dtemp = beta * omega; for (i = 0; i < N; i++) p[i] = r[i] + beta * p[i] - dtemp * v[i]; DCOPY_F77(&N, p, &one, phat, &one); if (iter==1) init_time = AZ_second(); if (precond_flag) precond->prec_function(phat,options,proc_config,params,Amat,precond); if (iter==1) status[AZ_first_precond] = AZ_second() - init_time; Amat->matvec(phat, v, Amat, proc_config); sigma = AZ_gdot(N, rtilda, v, proc_config); if (fabs(sigma) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) { /* break down */ AZ_scale_true_residual( x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org,proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * fabs(sigma); } alpha = rhon / sigma; /* s = r - alpha*v */ /* shat = M^-1 s */ /* r = A shat (r is a tmp here for t ) */ for (i = 0; i < N; i++) s[i] = r[i] - alpha * v[i]; DCOPY_F77(&N, s, &one, shat, &one); if (precond_flag) precond->prec_function(shat,options,proc_config,params,Amat,precond); Amat->matvec(shat, r, Amat, proc_config); /* omega = (t,s)/(t,t) with r = t */ dot_vec[0] = DDOT_F77(&N, r, &one, s, &one); dot_vec[1] = DDOT_F77(&N, r, &one, r, &one); AZ_gdot_vec(2, dot_vec, tmp, proc_config); if (fabs(dot_vec[1]) < DBL_MIN) { omega = 0.0; brkdown_will_occur = AZ_TRUE; } else omega = dot_vec[0] / dot_vec[1]; /* x = x + alpha*phat + omega*shat */ /* r = s - omega*r */ DAXPY_F77(&N, &alpha, phat, &one, x, &one); DAXPY_F77(&N, &omega, shat, &one, x, &one); for (i = 0; i < N; i++) r[i] = s[i] - omega * r[i]; /* * Compute a few global scalars: * 1) ||r|| corresponding to options[AZ_conv] * 2) scaled ||r|| corresponding to options[AZ_conv] * 3) rho = <rtilda, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, rtilda, &rhon, convergence_info); if ( (iter%print_freq == 0) && proc == 0) (void) AZ_printf_out("%siter: %4d residual = %e\n",prefix,iter, scaled_r_norm); /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) { dtemp = alpha/omega; DAXPY_F77(&N, &dtemp, phat, &one, shat, &one); convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,omega, shat, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); } if (convergence_info->converged) { AZ_scale_true_residual(x, b, v, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&convergence_info->epsilon, scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test)) (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix,iter, scaled_r_norm); /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* bicgstab */
void BLAS<int, double>::AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }