Esempio n. 1
0
int ML_Smoother_Ifpack(ML_Smoother *sm,int inlen,double x[],int outlen,
		       double rhs[])
{
  ML_Smoother    *smooth_ptr = (ML_Smoother *) sm;
  void *Ifpack_Handle = smooth_ptr->smoother->data;
  double* x2 = NULL,* rhs2 = NULL;
  /*int i;*/
  int n, kk;
  int one_int = 1;
  double minus_one_double = -1.0;

  if (sm->init_guess == ML_NONZERO)
  {
    n = sm->my_level->Amat->invec_leng;
    assert (n == sm->my_level->Amat->outvec_leng);

    rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1));
    x2   = (double*) ML_allocate(sizeof(double) * (n + 1));

    ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2);
    DCOPY_F77(&n, x, &one_int, x2, &one_int);
    DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int);
    ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2);
    DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int);

    ML_free(rhs2);
    ML_free(x2);
  }
  else
    ML_Ifpack_Solve(Ifpack_Handle, x, rhs);

  for (kk = 1; kk < sm->ntimes; kk++) {
    n = sm->my_level->Amat->invec_leng;
    assert (n == sm->my_level->Amat->outvec_leng);

    rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1));
    x2 = (double*) ML_allocate(sizeof(double) * (n + 1));

    ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2);
    DCOPY_F77(&n, x, &one_int, x2, &one_int);
    DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int);
    ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2);
    DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int);

    ML_free(rhs2);
    ML_free(x2);
  }
  return 0;
} /* ML_Smoother_Ifpack */
/**=========================================================================**/
void Aztec_LSVector::addVec (double s, const Aztec_LSVector& c) {

/** Aztec_LSVector::addVec --- add multiple of a vector:  s*c **/

   int N_update = amap_->localSize();
   int one = 1;

   double *pv = localCoeffs_;
   double *pc = (double*)c.startPointer();

   DAXPY_F77(&N_update,&s,pc,&one,pv,&one);

   return;
}
Esempio n. 3
0
//=============================================================================
void Epetra_BLAS::AXPY(const int N, const double ALPHA, const double * X, double * Y, const int INCX, const int INCY) const {
  DAXPY_F77(&N, &ALPHA, X, &INCX, Y, &INCY);
}
int main(int argc, char *argv[])
{
  int num_PDE_eqns=1, N_levels=3, nsmooth=2;

  int leng, level, N_grid_pts, coarsest_level;
  int leng1,leng2;
  /* See Aztec User's Guide for more information on the */
  /* variables that follow.                             */

  int    proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];

  /* data structure for matrix corresponding to the fine grid */

  double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time;
  AZ_MATRIX *Amat;
  AZ_PRECOND *Pmat = NULL;
  ML *ml;
  FILE *fp;
  int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL;
  struct AZ_SCALING *scaling;
  ML_Aggregate *ag;
  double *mode, *rigid=NULL, alpha; 
  char filename[80];
  int    one = 1;
  int    proc,nprocs;
  char pathfilename[100];

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
  /* get number of processors and the name of this processor */
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
  proc   = proc_config[AZ_node];
  nprocs = proc_config[AZ_N_procs];
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
  proc   = 0;
  nprocs = 1;
#endif

   if (proc_config[AZ_node] == 0) {
      sprintf(pathfilename,"%s/inputfile",argv[1]);
      ML_Reader_ReadInput(pathfilename, &context);
   }
   else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context));
   AZ_broadcast((char *) context,  sizeof(struct reader_context), proc_config,
                AZ_PACK);
   AZ_broadcast((char *) NULL        ,   0          , proc_config, AZ_SEND);

   N_levels = context->N_levels;
   printf("N_levels %d\n",N_levels);
   nsmooth   = context->nsmooth;
   num_PDE_eqns = context->N_dofPerNode;
   printf("num_PDE_eqns %d\n",num_PDE_eqns);

   ML_Set_PrintLevel(context->output_level);

  /* read in the number of matrix equations */
  leng = 0;
  if (proc_config[AZ_node] == 0) {
        sprintf(pathfilename,"%s/data_matrix.txt",argv[1]);
        fp=fopen(pathfilename,"r");
     if (fp==NULL) {
        printf("**ERR** couldn't open file data_matrix.txt\n");
        exit(1);
     }
        fscanf(fp,"%d",&leng);
     fclose(fp);
  }
  leng = AZ_gsum_int(leng, proc_config);

  N_grid_pts=leng/num_PDE_eqns;


  /* initialize the list of global indices. NOTE: the list of global */
  /* indices must be in ascending order so that subsequent calls to  */
  /* AZ_find_index() will function properly. */
#if 0
  if (proc_config[AZ_N_procs] == 1) i = AZ_linear;
  else i = AZ_file;
#endif
  i = AZ_linear;

  /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/
  /* make a linear distribution of the matrix       */
  /* if the linear distribution does not align with the blocks, */
  /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */
  leng1 = leng/nprocs;
  leng2 = leng-leng1*nprocs;
  if (proc >= leng2)
  {
     leng2 += (proc*leng1);
  }
  else
  {
     leng1++;
     leng2 = proc*leng1;
  }
  N_update = leng1;
  update = (int*)AZ_allocate((N_update+1)*sizeof(int));
  if (update==NULL)
  {
      (void) fprintf (stderr, "Not enough space to allocate 'update'\n");
      fflush(stderr); exit(EXIT_FAILURE);
  }
  for (i=0; i<N_update; i++) update[i] = i+leng2;
  
#if 0 /* debug */
  printf("proc %d N_update %d\n",proc_config[AZ_node],N_update);
  fflush(stdout);                   
#endif
  sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]);
  ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde,
                                  &N_update,&update,proc_config);
#if 0 /* debug */
  printf("proc %d N_update %d\n",proc_config[AZ_node],N_update);
  fflush(stdout);                   
#endif

  sprintf(pathfilename,"%s/data_matrix.txt",argv[1]);
  AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config);

  /* This code is to fix things up so that we are sure we have   */ 
  /* all blocks (including the ghost nodes) the same size.       */
  /* not sure, whether this is a good idea with variable blocks  */
  /* the examples inpufiles (see top of this file) don't need it */
  /* anyway                                                      */
  /*
  AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update);
  */
  AZ_transform_norowreordering(proc_config, &external, bindx, val,  update, &update_index,
	       &extern_index, &data_org, N_update, 0, 0, 0, &cpntr,
	       AZ_MSR_MATRIX);
	
  Amat = AZ_matrix_create( leng );

  AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];
	
  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];
			
  start_time = AZ_second();

  options[AZ_scaling] = AZ_none;

  ML_Create(&ml, N_levels);
			
			
  /* set up discretization matrix and matrix vector function */
  AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config);

  ML_Set_ResidualOutputFrequency(ml, context->output);
  ML_Set_Tolerance(ml, context->tol);
  ML_Aggregate_Create( &ag );
  if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) {
     ML_Aggregate_Set_CoarsenScheme_MIS(ag);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) {
     ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) {
     ML_Aggregate_Set_CoarsenScheme_Coupled(ag);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) {
     ML_Aggregate_Set_CoarsenScheme_METIS(ag);
     for (i=0; i<N_levels; i++)
        ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) {
     /* when no blocks read, use standard metis assuming constant block sizes */
     if (!blocks) 
        ML_Aggregate_Set_CoarsenScheme_METIS(ag);
     else {
        ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag);
        ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks,
                                                       blocks,block_pde,N_update);
     }
     for (i=0; i<N_levels; i++)
        ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9);
  }
  else {
     printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme);
     exit(-1);
  }
  ML_Aggregate_Set_DampingFactor(ag, context->agg_damping);
  ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize);
  ML_Aggregate_Set_Threshold(ag, context->agg_thresh);

  if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) {
     ML_Set_SpectralNormScheme_Calc(ml);
  }
  else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) {
     ML_Set_SpectralNormScheme_Anorm(ml);
  }
  else {
     printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm);
  }

  /* read in the rigid body modes */

   Nrigid = 0;
   if (proc_config[AZ_node] == 0) {
      sprintf(filename,"data_nullsp%d.txt",Nrigid);
      sprintf(pathfilename,"%s/%s",argv[1],filename);
      while( (fp = fopen(pathfilename,"r")) != NULL) {
          fclose(fp);
          Nrigid++;
          sprintf(filename,"data_nullsp%d.txt",Nrigid);
          sprintf(pathfilename,"%s/%s",argv[1],filename);
      }
    }
    Nrigid = AZ_gsum_int(Nrigid,proc_config);

    if (Nrigid != 0) {
       rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) );
       if (rigid == NULL) {
          printf("Error: Not enough space for rigid body modes\n");
       }
    }

   /* Set rhs */
   sprintf(pathfilename,"%s/data_rhs.txt",argv[1]);
   fp = fopen(pathfilename,"r");
   if (fp == NULL) {
      rhs=(double *)ML_allocate(leng*sizeof(double));
      if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n");
      for (i = 0; i < N_update; i++) rhs[i] = (double) update[i];
   }
   else {
      fclose(fp);
      if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n");
      AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, 
                          proc_config);
   }
   AZ_reorder_vec(rhs, data_org, update_index, NULL);

   for (i = 0; i < Nrigid; i++) {
      sprintf(filename,"data_nullsp%d.txt",i);
      sprintf(pathfilename,"%s/%s",argv[1],filename);
      AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, 
                          proc_config);
      AZ_reorder_vec(mode, data_org, update_index, NULL);

#if 0 /* test the given rigid body mode, output-vector should be ~0 */
       Amat->matvec(mode, rigid, Amat, proc_config);
       for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]);
#endif

    for (j = 0; j < i; j++) {
       alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/
                  AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), 
                               proc_config);
       DAXPY_F77(&N_update, &alpha,  &(rigid[j*N_update]),  &one, mode, &one);
    }
   
    /* rhs orthogonalization */

    alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/
                    AZ_gdot(N_update, mode, mode, proc_config);
    DAXPY_F77(&N_update, &alpha,  mode,  &one, rhs, &one);

    for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j];
    free(mode);
    free(garbage);
  }

  for (j = 0; j < Nrigid; j++) {
     alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/
              AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), 
                      proc_config);
     DAXPY_F77(&N_update, &alpha,  &(rigid[j*N_update]),  &one, rhs, &one);
  }

#if 0 /* for testing the default nullsp */
  ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update);
#else
  if (Nrigid != 0) {
     ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update);
  }
#endif
  if (rigid) ML_free(rigid);

  ag->keep_agg_information = 1;
  coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, 
                                            ML_INCREASING, ag);
  coarsest_level--;                                            

  if ( proc_config[AZ_node] == 0 )
	printf("Coarse level = %d \n", coarsest_level);
	
#if 0
  /* set up smoothers */
  if (!blocks)
     blocks = (int *) ML_allocate(sizeof(int)*N_update);
#endif

  for (level = 0; level < coarsest_level; level++) {

      num_PDE_eqns = ml->Amat[level].num_PDEs;
		
     /*  Sparse approximate inverse smoother that acutally does both */
     /*  pre and post smoothing.                                     */

     if (ML_strcmp(context->smoother,"Parasails") == 0) {
        ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, 
                                parasails_sym, parasails_thresh, 
                                parasails_nlevels, parasails_filter,
                                (int) parasails_loadbal, parasails_factorized);
     }

     /* This is the symmetric Gauss-Seidel smoothing that we usually use. */
     /* In parallel, it is not a true Gauss-Seidel in that each processor */
     /* does a Gauss-Seidel on its local submatrix independent of the     */
     /* other processors.                                                 */

     else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) {
       ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.);
     }
     else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) {
       ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.);
     }
     else if (ML_strcmp(context->smoother,"Poly") == 0) {
       ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth);
     }
     else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) {
       ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1.,
					 num_PDE_eqns);
     }
     else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         blocks    = NULL;
         block_pde = NULL;
         nblocks   = 0;
         ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks,
                                                        &blocks,&block_pde);
         if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.,
                                              nblocks, blocks);
     }

     /* This is a true Gauss Seidel in parallel. This seems to work for  */
     /* elasticity problems.  However, I don't believe that this is very */
     /* efficient in parallel.                                           */       
     /*
      nblocks = ml->Amat[level].invec_leng;
      for (i =0; i < nblocks; i++) blocks[i] = i;
      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
     */

     /* Jacobi Smoothing                                                 */

     else if (ML_strcmp(context->smoother,"Jacobi") == 0) {
        ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4);
        ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4);
     }

     /*  This does a block Gauss-Seidel (not true GS in parallel)        */
     /*  where each processor has 'nblocks' blocks.                      */
     /* */

     else if (ML_strcmp(context->smoother,"Metis") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         nblocks = 250;
         ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.,
                                        nblocks, blocks);
     }
     else {
         printf("unknown smoother %s\n",context->smoother);
         exit(1);
     }
   }
	
   /* set coarse level solver */
   nsmooth   = context->coarse_its;
   /*  Sparse approximate inverse smoother that acutally does both */
   /*  pre and post smoothing.                                     */

   if (ML_strcmp(context->coarse_solve,"Parasails") == 0) {
        ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, 
                                parasails_sym, parasails_thresh, 
                                parasails_nlevels, parasails_filter,
                                (int) parasails_loadbal, parasails_factorized);
   }

   else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) {
       ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.);
   }
   else if (ML_strcmp(context->coarse_solve,"Poly") == 0) {
     ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth);
   }
   else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) {
       ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.);
   }
   else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) {
       ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1.,
					num_PDE_eqns);
   }
   else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, 
                                        nsmooth,1., nblocks, blocks);
   }
   else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) {
        ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5);
   }
   else if (ML_strcmp(context->coarse_solve,"Metis") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         nblocks = 250;
         ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, 
                                              nsmooth,1., nblocks, blocks);
   }
   else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) {
      ML_Gen_CoarseSolverSuperLU( ml, coarsest_level);
   }
   else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) {
      ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0);
   }
   else {
         printf("unknown coarse grid solver %s\n",context->coarse_solve);
         exit(1);
   }
		
   ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); 

   AZ_defaults(options, params);
	
   if (ML_strcmp(context->krylov,"Cg") == 0) {
      options[AZ_solver]   = AZ_cg;
   }
   else if (ML_strcmp(context->krylov,"Bicgstab") == 0) {
      options[AZ_solver]   = AZ_bicgstab;
   }
   else if (ML_strcmp(context->krylov,"Tfqmr") == 0) {
      options[AZ_solver]   = AZ_tfqmr;
   }
   else if (ML_strcmp(context->krylov,"Gmres") == 0) {
      options[AZ_solver]   = AZ_gmres;
   }
   else {
      printf("unknown krylov method %s\n",context->krylov);
   }
   if (blocks)            ML_free(blocks);
   if (block_pde)         ML_free(block_pde);
   options[AZ_scaling]  = AZ_none;
   options[AZ_precond]  = AZ_user_precond;
   options[AZ_conv]     = AZ_r0;
   options[AZ_output]   = 1;
   options[AZ_max_iter] = context->max_outer_its;
   options[AZ_poly_ord] = 5;
   options[AZ_kspace]   = 130;
   params[AZ_tol]       = context->tol;
   options[AZ_output]   = context->output;
   ML_free(context);
	
   AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); 
   setup_time = AZ_second() - start_time;
	
   xxx = (double *) malloc( leng*sizeof(double));

   for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; 
	

   /* Set x */
   /*
   there is no initguess supplied with these examples for the moment....
   */
   fp = fopen("initguessfile","r");
   if (fp != NULL) {
      fclose(fp);
      if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n");
      AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, 
                          proc_config);

      options[AZ_conv] = AZ_expected_values;
   }
   else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n");

   AZ_reorder_vec(xxx, data_org, update_index, NULL);

   /* if Dirichlet BC ... put the answer in */

   for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) {
      if ( (val[i] > .99999999) && (val[i] < 1.0000001))
         xxx[i] = rhs[i];      
   }

   fp = fopen("AZ_no_multilevel.dat","r");
   scaling = AZ_scaling_create();
   start_time = AZ_second();
   if (fp != NULL) {
      fclose(fp);
      options[AZ_precond] = AZ_none;
      options[AZ_scaling] = AZ_sym_diag;
      options[AZ_ignore_scaling] = AZ_TRUE;

      options[AZ_keep_info] = 1;
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 

/*
      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
*/
   }
   else {
      options[AZ_keep_info] = 1;
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
/*
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
*/
   }
   solve_time = AZ_second() - start_time;

   if (proc_config[AZ_node] == 0) 
      printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time);

   if (proc_config[AZ_node] == 0) 
     printf("Printing out a few entries of the solution ...\n");

   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}

   ML_Aggregate_Destroy(&ag);
   ML_Destroy(&ml);
   AZ_free((void *) Amat->data_org);
   AZ_free((void *) Amat->val);
   AZ_free((void *) Amat->bindx);
   AZ_free((void *) update);
   AZ_free((void *) external);
   AZ_free((void *) extern_index);
   AZ_free((void *) update_index);
   AZ_scaling_destroy(&scaling);
   if (Amat  != NULL) AZ_matrix_destroy(&Amat);
   if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
   free(xxx);
   free(rhs);

#ifdef ML_MPI
  MPI_Finalize();
#endif
	
  return 0;
	
}
Esempio n. 5
0
void AZ_pgmresr(double b[], double x[],double weight[], int options[],
	double params[], int proc_config[], double status[], AZ_MATRIX *Amat, 
	AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info)

/*******************************************************************************

  This routine uses Saad's restarted Genralized Minimum Residual method to solve
  the nonsymmetric matrix problem Ax = b.

  IMPORTANT NOTE: While the 2-norm of the gmres residual is available, the
  actual residual is not normally computed as part of the gmres algorithm. Thus,
  if the user uses a convergence condition (see AZ_gmres_global_scalars()) that
  is based on the 2-norm of the residual there is no need to compute the
  residual (i.e. r_avail = AZ_FALSE). However, if another norm of r is
  requested, AZ_gmres_global_scalars() sets r_avail = AZ_TRUE and the algorithm
  computes the residual.

  Author:          John N. Shadid, SNL, 1421
  =======

  Return code:     void
  ============

  Parameter list:
  ===============

  Amat:            Structure used for DMSR and DVBR sparse matrix storage (see
                   file Aztec User's Guide).

  b:               Right hand side of linear system.

  x:               On input, contains the initial guess. On output contains the
                   solution to the linear system.

  weight:          Vector of weights for convergence norm #4.

  options:         Determines specific solution method and other parameters.

  params:          Drop tolerance and convergence tolerance info.

  data_org:        Array containing information on the distribution of the
                   matrix to this processor as well as communication parameters
                   (see file Aztec User's Guide).

  proc_config:     Machine configuration.  proc_config[AZ_node] is the node
                   number.  proc_config[AZ_N_procs] is the number of processors.

  status:          On output, indicates termination status:
                    0:  terminated normally.
                   -1:  maximum number of iterations taken without achieving
                        convergence.
                   -2:  Breakdown. The algorithm can not proceed due to
                        numerical difficulties (usually a divide by zero).
                   -3:  Internal residual differs from the computed residual due
                        to a significant loss of precision.

  Amat:            Structure used to represent the matrix (see file az_aztec.h
                   and Aztec User's Guide).
*******************************************************************************/

{

  /* local variables */

  register int k;
  int          i, N, NN, converged, one = 1, iter, r_avail = AZ_FALSE;
  int          print_freq, proc, kspace;
  double     **UU, **CC, *dots, *tmp, *res;
  double       dble_tmp, r_2norm = 1.0, epsilon;
  double       rec_residual, scaled_r_norm, true_scaled_r=0.0;
  double       actual_residual = -1.0, minus_alpha, alpha;
  double       *dummy = (double *) 0;
  double       *UUblock, *CCblock;
  int          mm, ii;
  char         label[64],suffix[32], prefix[64];
  int          *data_org, str_leng, first_time = AZ_TRUE;
  double       doubleone = 1.0, minusone = -1.0, init_time = 0.0;
char *T = "T";
char *T2 = "N";


  /**************************** execution begins ******************************/

  sprintf(suffix," in gmresr%d",options[AZ_recursion_level]);
						/* set string that will be used */
                                                /* for manage_memory label      */
  /* set prefix for printing */

  str_leng = 0;
  for (i = 0; i < 16; i++) prefix[str_leng++] = ' ';
  for (i = 0 ; i < options[AZ_recursion_level]; i++ ) {
     prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';
     prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';
  }
  prefix[str_leng] = '\0';

  data_org = Amat->data_org;

  /* pull needed values out of parameter arrays */

  N            = data_org[AZ_N_internal] + data_org[AZ_N_border];
  epsilon      = params[AZ_tol];
  proc         = proc_config[AZ_node];
  print_freq   = options[AZ_print_freq];
  kspace       = options[AZ_kspace];

  /* Initialize some values in convergence info struct */
  convergence_info->print_info = print_freq;
  convergence_info->iteration = 0;
  convergence_info->sol_updated = 0; /* GMRES seldom updates solution */
  convergence_info->epsilon = params[AZ_tol];

  /* allocate memory for required vectors */

  NN    = kspace  + 1;
  /* +1: make sure everybody allocates something */

  sprintf(label,"dots%s",suffix);
  dots  = AZ_manage_memory(2*NN*sizeof(double), AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i);
  tmp   = &(dots[NN]);
  sprintf(label,"CC%s",suffix);
  CC    = (double **) AZ_manage_memory(2*NN*sizeof(double *),
                                       AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i);
  UU    = &(CC[NN]);

  NN    = N + data_org[AZ_N_external];
  if (NN == 0) NN++; /* make sure everybody allocates something */
  NN = NN + (NN%2);  /* make sure things are aligned for intel  */


  sprintf(label,"UUblock%s",suffix);
  UUblock = AZ_manage_memory(2*NN*kspace*sizeof(double),
                             AZ_ALLOC, AZ_SYS+az_iterate_id,label, &i);
  for (k = 0; k < kspace; k++) UU[k] = &(UUblock[k*NN]);
  CCblock = &(UUblock[kspace*NN]);
  for (k = 0; k < kspace; k++) CC[k] = &(CCblock[k*NN]);

  sprintf(label,"res%s",suffix);
  res = AZ_manage_memory(NN*sizeof(double),AZ_ALLOC,AZ_SYS+az_iterate_id,label,&i);

  AZ_compute_residual(b, x, res, proc_config, Amat);

  /*
   * Compute a few global scalars:
   *     1) ||r||                corresponding to options[AZ_conv]
   *     2) scaled ||r||         corresponding to options[AZ_conv]
   */
  r_2norm = DDOT_F77(&N, res, &one, res, &one);
  AZ_gdot_vec(1, &r_2norm, &rec_residual, proc_config);  
  r_2norm = sqrt(r_2norm);
  rec_residual = r_2norm;

  AZ_compute_global_scalars(Amat, x, b, res,
                          weight, &rec_residual, &scaled_r_norm, options,
                          data_org, proc_config, &r_avail, NULL, NULL, NULL,
                          convergence_info);
  r_2norm = rec_residual;

  converged = scaled_r_norm < epsilon;

  if ( (options[AZ_output] != AZ_none) && 
       (options[AZ_output] != AZ_last) &&
       (options[AZ_output] != AZ_summary) &&
       (options[AZ_output] != AZ_warnings) && (proc == 0) )
    (void) AZ_printf_out("%siter:    0           residual = %e\n",
                           prefix,scaled_r_norm);

  iter = 0;
/*rst change  while (!converged && iter < options[AZ_max_iter]) { */
  while (!(convergence_info->converged) && iter < options[AZ_max_iter] && !(convergence_info->isnan)) {

    convergence_info->iteration = iter;
    i = 0;

/*rst change   while (i < kspace && !converged && iter < options[AZ_max_iter]) { */
    while (i < kspace && !(convergence_info->converged) && iter < options[AZ_max_iter]
           && !(convergence_info->isnan)) {

      iter++;
    convergence_info->iteration = iter;


      /* v_i+1 = A M^-1 v_i */

      DCOPY_F77(&N, res , &one, UU[i], &one);

      if (iter == 1) init_time = AZ_second();

#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
        /* Start timer. */
      static int precID = -1;
      precID = Teuchos_startTimer( "AztecOO: Operation Prec*x", precID );
#endif
#endif
      precond->prec_function(UU[i],options,proc_config,params,Amat,precond);
#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Stop timer. */
      Teuchos_stopTimer( precID );
#endif
#endif
      if (iter == 1) status[AZ_first_precond] = AZ_second() - init_time;

#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Start timer. */
      static int matvecID = -1;
      matvecID = Teuchos_startTimer( "AztecOO: Operation Op*x", matvecID );
#endif
#endif
      Amat->matvec(UU[i], CC[i], Amat, proc_config);
#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Stop timer. */
      Teuchos_stopTimer( matvecID );
#endif
#endif

#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Start the timer. */
      static int orthoID = -1;
      orthoID = Teuchos_startTimer( "AztecOO: Orthogonalization", orthoID );
#endif
#endif

      /* Gram-Schmidt orthogonalization */

      if (!options[AZ_orthog]) { /* classical  (stabilized) */
         for (ii = 0 ; ii < 2 ; ii++ ) {
            dble_tmp = 0.0; mm = i;
            if (N == 0) for (k = 0 ; k < i ; k++) dots[k] = 0.0;
#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Start the timer. */
      static int orthoInnerProdID = -1;
      orthoInnerProdID = Teuchos_startTimer( "AztecOO: Ortho (Inner Product)", orthoInnerProdID );
#endif
#endif
            DGEMV_F77(CHAR_MACRO(T[0]), &N, &mm, &doubleone, CCblock, &NN, CC[i], 
                   &one, &dble_tmp, dots, &one);

            AZ_gdot_vec(i, dots, tmp, proc_config);
#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      Teuchos_stopTimer( orthoInnerProdID );
#endif
#endif

#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Start the timer. */
      static int orthoUpdateID = -1;
      orthoUpdateID = Teuchos_startTimer( "AztecOO: Ortho (Update)", orthoUpdateID );
#endif
#endif
            DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, CCblock, &NN, dots, 
                   &one, &doubleone, CC[i], &one);
            DGEMV_F77(CHAR_MACRO(T2[0]), &N, &mm, &minusone, UUblock, &NN, dots,
                   &one, &doubleone, UU[i], &one);
#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      Teuchos_stopTimer( orthoUpdateID );
#endif
#endif
         }
      }
      else {                    /* modified */
        for (k = 0; k < i; k++) {
          alpha = AZ_gdot(N, CC[k], CC[i], proc_config);
          minus_alpha = -alpha;
          DAXPY_F77(&N, &minus_alpha, CC[k], &one, CC[i], &one);
          DAXPY_F77(&N, &minus_alpha, UU[k], &one, UU[i], &one);
        }
      }

      /* normalize vector */

#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      static int orthoNormID = -1;
      orthoNormID = Teuchos_startTimer( "AztecOO: Ortho (Norm)", orthoNormID );
#endif
#endif
      dble_tmp = sqrt(AZ_gdot(N, CC[i], CC[i], proc_config));
#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      Teuchos_stopTimer( orthoNormID );
#endif
#endif

      if (dble_tmp  > DBL_EPSILON*r_2norm)
        dble_tmp  = 1.0 / dble_tmp;
      else
        dble_tmp = 0.0;

      DSCAL_F77(&N, &dble_tmp, CC[i], &one);
      DSCAL_F77(&N, &dble_tmp, UU[i], &one);

      dble_tmp = AZ_gdot(N, CC[i], res, proc_config);
      DAXPY_F77(&N, &dble_tmp, UU[i], &one, x, &one);
      dble_tmp = -dble_tmp;
      DAXPY_F77(&N, &dble_tmp, CC[i], &one, res, &one);

#ifdef AZ_ENABLE_TIMEMONITOR
#ifdef HAVE_AZTECOO_TEUCHOS
      /* Stop the timer. */
      Teuchos_stopTimer( orthoID );
#endif
#endif

      /* determine residual norm & test convergence */

      r_2norm      = sqrt(AZ_gdot(N, res, res, proc_config));
      rec_residual = r_2norm;

      /*
       * Compute a few global scalars:
       *     1) ||r||                corresponding to options[AZ_conv]
       *     2) scaled ||r||         corresponding to options[AZ_conv]
       * NOTE: if r_avail = AZ_TRUE or AZ_FIRST is passed in, we perform
       * step 1), otherwise ||r|| is taken as rec_residual.
       */

      AZ_compute_global_scalars(Amat, x, b, res,
                              weight, &rec_residual, &scaled_r_norm, options,
                              data_org, proc_config, &r_avail, dummy, dummy,
                              dummy, convergence_info);

      converged = scaled_r_norm < epsilon;

/*rst change      if ( (iter%print_freq == 0) && proc == 0) */
      if ( (iter%print_freq == 0) &&
           (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0)
        (void) AZ_printf_out("%siter: %4d           residual = %e\n",prefix,iter,
                       scaled_r_norm);

      i++;      /* subspace dim. counter dim(K) = i - 1 */
#ifdef out
      if (options[AZ_check_update_size] & converged)
         converged = AZ_compare_update_vs_soln(N, -1.,dble_tmp, UU[i-1], x,
                                           params[AZ_update_reduction],
                                           options[AZ_output], proc_config, &first_time);



      if (converged) {

        /* compute true residual using 'v[kspace]' as a temporary vector */

        AZ_scale_true_residual(x, b,
                               res, weight, &actual_residual, &true_scaled_r,
                               options, data_org, proc_config, Amat,
			       convergence_info);

        converged = true_scaled_r < params[AZ_tol];

        if (!converged && (AZ_get_new_eps(&epsilon, scaled_r_norm,
                                          true_scaled_r,
                                          options, proc_config) == AZ_QUIT)) {

          /*
           * Computed residual has converged, actual residual has not
           * converged, AZ_get_new_eps() has decided that it is time to quit.
           */

          AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params,
                                    true_scaled_r, actual_residual, options,
                                    proc_config);
          return;
        }
      }
#endif
    }
  }

  if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none)
       && (options[AZ_output] != AZ_warnings))
    (void) AZ_printf_out("%siter: %4d           residual = %e\n",
		   prefix,iter, scaled_r_norm);


  if (convergence_info->converged) {
    i = AZ_normal;
    scaled_r_norm = true_scaled_r;
  }
  else if (convergence_info->isnan) i = AZ_breakdown;
  else i = AZ_maxits;

  AZ_terminate_status_print(i, iter, status, rec_residual, params,
                            scaled_r_norm, actual_residual, options,
                            proc_config);

#ifdef out
  /* check if we exceeded maximum number of iterations */

  if (converged) {
    i = AZ_normal;
    scaled_r_norm = true_scaled_r;
  }
  else
    i = AZ_maxits;

  AZ_terminate_status_print(i, iter, status, rec_residual, params,
                            scaled_r_norm, actual_residual, options,
                            proc_config);

#endif
} /* AZ_pgmres */
Esempio n. 6
0
void AZ_pcg_f(double b[], double x[], double weight[], int options[],
              double params[], int proc_config[],double status[],
              AZ_MATRIX *Amat, AZ_PRECOND *precond,
              struct AZ_CONVERGE_STRUCT *convergence_info)

     /*******************************************************************************

  Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b.

  Author:          John N. Shadid, SNL, 1421
  =======

  Return code:     void
  ============

  Parameter list:
  ===============

  b:               Right hand side of linear system.

  x:               On input, contains the initial guess. On output contains the
                   solution to the linear system.

  weight:          Vector of weights for convergence norm #4.

  options:         Determines specific solution method and other parameters.

  params:          Drop tolerance and convergence tolerance info.

  proc_config:     Machine configuration.  proc_config[AZ_node] is the node
                   number.  proc_config[AZ_N_procs] is the number of processors.

  status:          On output, indicates termination status:
                    0:  terminated normally.
                   -1:  maximum number of iterations taken without achieving
                        convergence.
                   -2:  Breakdown. The algorithm can not proceed due to
                        numerical difficulties (usually a divide by zero).
                   -3:  Internal residual differs from the computed residual due
                        to a significant loss of precision.

  Amat:            Structure used to represent the matrix (see file az_aztec.h
                   and Aztec User's Guide).

  precond:         Structure used to represent the preconditioner
                   (see file az_aztec.h and Aztec User's Guide).
*******************************************************************************/



{

  /* local variables */

  register int i;
  int          N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j;
  int          precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE;
  double       alpha, beta = 0.0, nalpha, true_scaled_r=-1.0;
  double      *r, *z, *p, *ap, actual_residual = -1.0;
  double       r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0;
  double       scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON;
  int          *data_org, str_leng, first_time = AZ_TRUE;
  char         label[64],suffix[32], prefix[64];

  double **saveme, *ptap;
  int *kvec_sizes = NULL, current_kept = 0;
  double *dots;
  double doubleone = 1., dzero = 0.;
  char *T = "T";
  char *T2 = "N";
  double *block;



  /**************************** execution begins ******************************/

  sprintf(suffix," in cg%d",options[AZ_recursion_level]);  /* set string that will be used */
                                                           /* for manage_memory label      */
  /* set prefix for printing */

  str_leng = 0;
  for (i = 0; i < 16; i++) prefix[str_leng++] = ' ';
  for (i = 0 ; i < options[AZ_recursion_level]; i++ ) {
    prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';
    prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';
  }
  prefix[str_leng] = '\0';


  /* pull needed values out of parameter arrays */

  data_org = Amat->data_org;

  N            = data_org[AZ_N_internal] + data_org[AZ_N_border];


  precond_flag = options[AZ_precond];
  proc         = proc_config[AZ_node];
  print_freq   = options[AZ_print_freq];

  /* Initialize some values in convergence info struct */
  convergence_info->print_info = print_freq;
  convergence_info->iteration = 0;
  convergence_info->sol_updated = 1; /* CG always updates solution */
  convergence_info->epsilon = params[AZ_tol]; /* Test against this */

  /* allocate space for necessary vectors */

  NN = N + data_org[AZ_N_external];
  if (NN == 0) NN++;  /* make sure everybody allocates something */
  NN = NN + (NN%2);   /* make sure things are aligned for assembly */
                      /* matvec on paragon. */



  sprintf(label,"z%s",suffix);
  p  = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC,
                                   AZ_SYS+az_iterate_id, label, &j);
  r  = &(p[1*NN]);
  z  = &(p[2*NN]);
  ap = &(p[3*NN]);

  AZ_compute_residual(b, x, r, proc_config, Amat);

  if (options[AZ_apply_kvecs]) {
    AZ_compute_global_scalars(Amat, x, b, r,
                              weight, &rec_residual, &scaled_r_norm, options,
                              data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot,
                              convergence_info);
    AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme,
                       &ptap, options, data_org, suffix,
                       proc_config[AZ_node], &block);
    dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double));
    if (dots == NULL) {
      printf("Not space to apply vectors in CG\n");
      exit(1);
    }
    DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one);
    AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config);
    for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i];
    DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone,
              x,  &one);

    AZ_free(dots);
    AZ_compute_residual(b, x, r, proc_config, Amat);
    if ((options[AZ_output] != AZ_none) && (proc == 0))
      printf("\t\tApplied Previous Krylov Vectors ... \n\n");
  }
  if (options[AZ_keep_kvecs] > 0)
    AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme,
                       &ptap, options, data_org, suffix,
                       proc_config[AZ_node], &block);



  /*  z = M r */
  /*  p = 0   */

  DCOPY_F77(&N, r, &one, z, &one);
  status[AZ_first_precond] = AZ_second();
  if (precond_flag)
    precond->prec_function(z,options,proc_config,params,Amat,precond);

  status[AZ_first_precond] = AZ_second() - status[AZ_first_precond];

  for (i = 0; i < N; i++ ) p[i] = 0.0;

  /* compute a few global scalars:                                 */
  /*     1) ||r||                corresponding to options[AZ_conv] */
  /*     2) scaled ||r||         corresponding to options[AZ_conv] */
  /*     3) r_z_dot = <z, r>                                       */

  AZ_compute_global_scalars(Amat, x, b, r,
                            weight, &rec_residual, &scaled_r_norm, options,
                            data_org, proc_config, &r_avail,r, z, &r_z_dot,
                            convergence_info);
  true_scaled_r = scaled_r_norm;

  if ((options[AZ_output] != AZ_none) &&
      (options[AZ_output] != AZ_last) &&
      (options[AZ_output] != AZ_warnings) &&
      (options[AZ_output] != AZ_summary) &&
      (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0))
    {
      (void) AZ_printf_out("%siter:    0           residual = %e\n",
                     prefix,scaled_r_norm);
      AZ_flush_out();
    }


  for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && 
	 !(convergence_info->isnan); iter++ ) {
    convergence_info->iteration = iter;

    /* p  = z + beta * p */
    /* ap = A p          */

    for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i];
    Amat->matvec(p, ap, Amat, proc_config);

    if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) {
      for (i = 0; i < current_kept; i++) {
        alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i];
        DAXPY_F77(&N, &alpha,  saveme[i],  &one, p, &one);
      }
      if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config);
    }

    p_ap_dot = AZ_gdot(N, p, ap, proc_config);
    if (p_ap_dot < brkdown_tol) {

      /* possible problem */

      if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) {

        /* something wrong */

        AZ_scale_true_residual(x, b, ap,
                               weight, &actual_residual, &true_scaled_r,
                               options, data_org, proc_config, Amat,
                               convergence_info);
        AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual,
                                  params, true_scaled_r, actual_residual,
                                  options, proc_config);
        return;
      }
      else brkdown_tol = 0.1 * p_ap_dot;
    }

    alpha  = r_z_dot / p_ap_dot;
    nalpha = -alpha;

    /* x = x + alpha*p  */
    /* r = r - alpha*Ap */
    /* z = M^-1 r       */

    DAXPY_F77(&N, &alpha,  p,  &one, x, &one);

    if (iter <= options[AZ_keep_kvecs]) {
      DCOPY_F77(&N, p, &one, saveme[iter-1], &one);
      ptap[iter-1] = p_ap_dot ;
      kvec_sizes[AZ_Nkept]++;
      current_kept = kvec_sizes[AZ_Nkept];
    }
    /*
      else {
      i = (iter-1)%options[AZ_keep_kvecs];
      DCOPY_F77(&N, p, &one, saveme[i], &one);
      ptap[i] = p_ap_dot ;
      }
    */
    DAXPY_F77(&N, &nalpha, ap, &one, r, &one);
    DCOPY_F77(&N, r, &one, z, &one);

    if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond);

    r_z_dot_old = r_z_dot;

    /* compute a few global scalars:                                 */
    /*     1) ||r||                corresponding to options[AZ_conv] */
    /*     2) scaled ||r||         corresponding to options[AZ_conv] */
    /*     3) r_z_dot = <z, r>                                       */

    AZ_compute_global_scalars(Amat, x, b, r,
                              weight, &rec_residual, &scaled_r_norm, options,
                              data_org, proc_config, &r_avail, r, z, &r_z_dot,
                              convergence_info);

    if (brkdown_will_occur) {
      AZ_scale_true_residual( x, b, ap,
                              weight, &actual_residual, &true_scaled_r, options,
                              data_org, proc_config, Amat,convergence_info);
      AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual,
                                params, true_scaled_r, actual_residual, options,
                                proc_config);
      return;
    }

    beta = r_z_dot / r_z_dot_old;

    if (fabs(r_z_dot) < brkdown_tol) {

      /* possible problem */

      if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config))
        brkdown_will_occur = AZ_TRUE;
      else
        brkdown_tol = 0.1 * fabs(r_z_dot);
    }

    if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 )
      {
        (void) AZ_printf_out("%siter: %4d           residual = %e\n", prefix, iter,
                       scaled_r_norm);
        AZ_flush_out();
      }

    /* convergence tests */

    if (options[AZ_check_update_size] & convergence_info->converged)
      convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x,
							      params[AZ_update_reduction],
							      options[AZ_output], proc_config, &first_time);


    if (convergence_info->converged) {
      AZ_scale_true_residual(x, b, ap,
                             weight, &actual_residual, &true_scaled_r, options,
                             data_org, proc_config, Amat, convergence_info);
      
      
      
      /*
       * Note: epsilon and params[AZ_tol] may not be equal due to a previous
       * call to AZ_get_new_eps().
       */
      
      if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) {

	if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r,
			   options, proc_config) == AZ_QUIT) {

	  /*
	   * Computed residual has converged, actual residual has not converged,
	   * AZ_get_new_eps() has decided that it is time to quit.
	   */
	  
	  AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params,
				    true_scaled_r, actual_residual, options,
				    proc_config);
	  return;
	}
      }
    }
  }
  iter--;
  if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none)
       && (options[AZ_output] != AZ_warnings) &&
      (options[AZ_conv]!=AZTECOO_conv_test) )
    {
      (void) AZ_printf_out("%siter: %4d           residual = %e\n", prefix, iter,
                     scaled_r_norm);
      AZ_flush_out();
    }

  /* check if we exceeded maximum number of iterations */

  if (convergence_info->converged) {
    i = AZ_normal; scaled_r_norm = true_scaled_r; }
  else if (convergence_info->isnan) i = AZ_breakdown;
  else
    i = AZ_maxits;

  AZ_terminate_status_print(i, iter, status, rec_residual, params,
                            scaled_r_norm, actual_residual, options,
                            proc_config);

} /* AZ_pcg */
Esempio n. 7
0
void AZ_pbicgstab(double b[], double x[], double weight[], int options[], 
	double params[],int proc_config[], double status[], AZ_MATRIX *Amat, 
	AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info)

/*******************************************************************************

  Vand der Vorst's (1990) variation of the Bi-Conjugate Gradient algorthm
  (Sonneveld (1984,1989)) to solve the nonsymmetric matrix problem Ax = b.

  Author:          John N. Shadid, SNL, 1421
  =======

  Return code:     void
  ============

  Parameter list:
  ===============

  b:               Right hand side of linear system.

  x:               On input, contains the initial guess. On output contains the
                   solution to the linear system.

  weight:          Vector of weights for convergence norm #4.

  options:         Determines specific solution method and other parameters.

  params:          Drop tolerance and convergence tolerance info.

  proc_config:     Machine configuration.  proc_config[AZ_node] is the node
                   number.  proc_config[AZ_N_procs] is the number of processors.

  status:          On output, indicates termination status:
                    0:  terminated normally.
                   -1:  maximum number of iterations taken without achieving
                        convergence.
                   -2:  Breakdown. The algorithm can not proceed due to
                        numerical difficulties (usually a divide by zero).
                   -3:  Internal residual differs from the computed residual due
                        to a significant loss of precision.

  Amat:            Structure used to represent the matrix (see file az_aztec.h
                   and Aztec User's Guide).
  precond:         Structure used to represent the preconditionner
                   (see file az_aztec.h and Aztec User's Guide).

*******************************************************************************/

{

  /* local variables */

  register int    i;
  int             N, NN, one = 1, iter=1, r_avail = AZ_TRUE, j;
  int             precond_flag, print_freq, proc;
  int             brkdown_will_occur = AZ_FALSE;
  double          alpha = 1.0, beta, true_scaled_r=0.0;
  double          *v, *r, *rtilda, *p, *phat, *s, *shat;
  double          omega = 1.0, dot_vec[2], tmp[2], init_time = 0.0;
  double          rhonm1 = 1.0, rhon, sigma, brkdown_tol = DBL_EPSILON;
  double          scaled_r_norm= -1.0, actual_residual = -1.0, rec_residual= -1.0;
  double          dtemp;
  int          *data_org, str_leng, first_time = AZ_TRUE;
  char         label[64],suffix[32], prefix[64];


  /**************************** execution begins ******************************/

  sprintf(suffix," in cgstab%d",options[AZ_recursion_level]);
                                                           /* set string that will be used */
                                                           /* for manage_memory label      */
  /* set prefix for printing */

  str_leng = 0;
  for (i = 0; i < 16; i++) prefix[str_leng++] = ' ';
  for (i = 0 ; i < options[AZ_recursion_level]; i++ ) {
     prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';
     prefix[str_leng++] = ' '; prefix[str_leng++] = ' ';
  }
  prefix[str_leng] = '\0';

  data_org = Amat->data_org;

  /* pull needed values out of parameter arrays */

  N            = data_org[AZ_N_internal] + data_org[AZ_N_border];
  precond_flag = options[AZ_precond];
  proc         = proc_config[AZ_node];
  print_freq   = options[AZ_print_freq];

  /* Initialize some values in convergence info struct */
  convergence_info->print_info = print_freq;
  convergence_info->iteration = 0;
  convergence_info->sol_updated = 1; /* BiCGStab always updates solution */
  convergence_info->epsilon = params[AZ_tol]; /* Test against this */

  /* allocate memory for required vectors */

  NN     = N + data_org[AZ_N_external];
  if (NN == 0) NN++;   /* make sure everybody allocates something*/
  NN = NN + (NN%2);    /* make sure things are aligned for the   */
                       /* assembly coded matvec() on the Intel.  */

  sprintf(label,"phat%s",suffix);
  phat   = (double *) AZ_manage_memory(7*NN*sizeof(double), AZ_ALLOC, 
                                       AZ_SYS+az_iterate_id, label,&j);
  p      = &(phat[1*NN]);
  shat   = &(phat[2*NN]);   /* NOTE: phat and shat must be aligned */
                            /*       so that the assembly dgemv    */
                            /*       works on the paragon.         */
  s      = &(phat[3*NN]);
  r      = &(phat[4*NN]);
  rtilda = &(phat[5*NN]);
  v      = &(phat[6*NN]);

  AZ_compute_residual(b, x, r, proc_config, Amat);

  /* v, p <- 0 */

  for (i = 0; i < N; i++) v[i] = p[i] = 0.0;

  /* set rtilda */

  if (options[AZ_aux_vec] == AZ_resid)
    DCOPY_F77(&N, r, &one, rtilda, &one);
  else
    AZ_random_vector(rtilda, data_org, proc_config);

  /*
   * Compute a few global scalars:
   *     1) ||r||                corresponding to options[AZ_conv]
   *     2) scaled ||r||         corresponding to options[AZ_conv]
   *     3) rho = <rtilda, r>
   */

  AZ_compute_global_scalars(Amat, x, b, r,
                            weight, &rec_residual, &scaled_r_norm, options,
                            data_org, proc_config,&r_avail,r,rtilda, &rhon,
                            convergence_info);
  true_scaled_r = scaled_r_norm;

  if ((options[AZ_output] != AZ_none) && 
      (options[AZ_output] != AZ_last) &&
      (options[AZ_output] != AZ_warnings) &&
      (options[AZ_output] != AZ_summary) &&
      (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0))
    (void) AZ_printf_out("%siter:    0           residual = %e\n",prefix,scaled_r_norm);


  for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged)
	 && !(convergence_info->isnan); iter++) {
    if (brkdown_will_occur) {
      AZ_scale_true_residual( x, b, v,
                             weight, &actual_residual, &true_scaled_r, options,
                             data_org, proc_config, Amat, convergence_info);

      AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual,
                                params, true_scaled_r, actual_residual, options,
                                proc_config);
      return;
    }

    beta = (rhon/rhonm1) * (alpha/omega);

    if (fabs(rhon) < brkdown_tol) { /* possible problem */
      if (AZ_breakdown_f(N, r, rtilda, rhon, proc_config))
        brkdown_will_occur = AZ_TRUE;
      else
        brkdown_tol = 0.1 * fabs(rhon);
    }

    rhonm1 = rhon;

    /* p    = r + beta*(p - omega*v)       */
    /* phat = M^-1 p                       */
    /* v    = A phat                       */

    dtemp = beta * omega;
    for (i = 0; i < N; i++) p[i] = r[i] + beta * p[i] - dtemp * v[i];
    DCOPY_F77(&N, p, &one, phat, &one);

    if (iter==1) init_time = AZ_second();
    if (precond_flag)
      precond->prec_function(phat,options,proc_config,params,Amat,precond);

    if (iter==1) status[AZ_first_precond] = AZ_second() - init_time;

    Amat->matvec(phat, v, Amat, proc_config);

    sigma = AZ_gdot(N, rtilda, v, proc_config);

    if (fabs(sigma) < brkdown_tol) { /* possible problem */
      if (AZ_breakdown_f(N, rtilda, v, sigma, proc_config)) {

        /* break down */

        AZ_scale_true_residual( x, b, v,
                               weight, &actual_residual, &true_scaled_r,
                               options, data_org,proc_config, Amat,
			       convergence_info);

        AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual,
                                  params, true_scaled_r, actual_residual,
                                  options, proc_config);
        return;
      }
      else brkdown_tol = 0.1 * fabs(sigma);
    }

    alpha = rhon / sigma;

    /* s = r - alpha*v                     */
    /* shat = M^-1 s                       */
    /* r = A shat (r is a tmp here for t ) */

    for (i = 0; i < N; i++) s[i] = r[i] - alpha * v[i];
    DCOPY_F77(&N, s, &one, shat, &one);

    if (precond_flag)    
      precond->prec_function(shat,options,proc_config,params,Amat,precond);

    Amat->matvec(shat, r, Amat, proc_config);


    /* omega = (t,s)/(t,t) with r = t */

    dot_vec[0] = DDOT_F77(&N, r, &one, s, &one);
    dot_vec[1] = DDOT_F77(&N, r, &one, r, &one);
    AZ_gdot_vec(2, dot_vec, tmp, proc_config);

    if (fabs(dot_vec[1]) < DBL_MIN) {
      omega = 0.0;
      brkdown_will_occur = AZ_TRUE;
    }
    else omega = dot_vec[0] / dot_vec[1];

    /* x = x + alpha*phat + omega*shat */
    /* r = s - omega*r */

    DAXPY_F77(&N, &alpha, phat, &one, x, &one);
    DAXPY_F77(&N, &omega, shat, &one, x, &one);

    for (i = 0; i < N; i++) r[i] = s[i] - omega * r[i];

    /*
     * Compute a few global scalars:
     *     1) ||r||                corresponding to options[AZ_conv]
     *     2) scaled ||r||         corresponding to options[AZ_conv]
     *     3) rho = <rtilda, r>
     */

    AZ_compute_global_scalars(Amat, x, b, r,
                              weight, &rec_residual, &scaled_r_norm, options,
                              data_org, proc_config, &r_avail, r, rtilda, &rhon,
                              convergence_info);

    if ( (iter%print_freq == 0) && proc == 0)
      (void) AZ_printf_out("%siter: %4d           residual = %e\n",prefix,iter,
                     scaled_r_norm);

    /* convergence tests */

    if (options[AZ_check_update_size] & convergence_info->converged) {
      dtemp = alpha/omega;
      DAXPY_F77(&N, &dtemp, phat, &one, shat, &one);
      convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,omega, shat, x,
                                         params[AZ_update_reduction],
                                         options[AZ_output], proc_config, &first_time);
    }

    if (convergence_info->converged) {
      AZ_scale_true_residual(x, b, v,
                             weight, &actual_residual, &true_scaled_r, options,
                             data_org, proc_config, Amat, convergence_info);


      /*
       * Note: epsilon and params[AZ_tol] may not be equal due to a previous
       *       call to AZ_get_new_eps().
       */

      if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) {
	if (AZ_get_new_eps(&convergence_info->epsilon, scaled_r_norm, true_scaled_r,
			   options, proc_config) == AZ_QUIT) {

	  /*
	   * Computed residual has converged, actual residual has not converged,
	   * AZ_get_new_eps() has decided that it is time to quit.
	   */
	  
	  AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params,
				    true_scaled_r, actual_residual, options,
				    proc_config);
	  return;
	}
      }
    }
  }

  iter--;
  if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none)
       && (options[AZ_output] != AZ_warnings) &&
       (options[AZ_conv]!=AZTECOO_conv_test))
    (void) AZ_printf_out("%siter: %4d           residual = %e\n", prefix,iter,
                   scaled_r_norm);

  /* check if we exceeded maximum number of iterations */

  if (convergence_info->converged) {
    i = AZ_normal;
    scaled_r_norm = true_scaled_r;
  }
  else if (convergence_info->isnan) i = AZ_breakdown;
  else
    i = AZ_maxits;

  AZ_terminate_status_print(i, iter, status, rec_residual, params,
                            scaled_r_norm, actual_residual, options,
                            proc_config);

} /* bicgstab */
Esempio n. 8
0
 void BLAS<int, double>::AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const
 { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }