Пример #1
0
int ML_Aggregate_CoarsenUser(ML_Aggregate *ml_ag, ML_Operator *Amatrix, 
			      ML_Operator **Pmatrix, ML_Comm *comm)
{
  unsigned int nbytes, length;
  int     i, j,  k, Nrows, exp_Nrows;
  int     diff_level;
  int     aggr_count, index, mypid, num_PDE_eqns;
  int     *aggr_index = NULL, nullspace_dim;
  int     Ncoarse, count;
  int     *new_ia = NULL, *new_ja = NULL, new_Nrows;
  int     exp_Ncoarse;
  int     *aggr_cnt_array = NULL;
  int     level, index3, max_agg_size;
  int     **rows_in_aggs = NULL, lwork, info;
  double  *new_val = NULL, epsilon;
  double  *nullspace_vect = NULL, *qr_tmp = NULL;
  double  *tmp_vect = NULL, *work = NULL, *new_null = NULL;
  ML_SuperNode          *aggr_head = NULL, *aggr_curr, *supernode;
  struct ML_CSR_MSRdata *csr_data;
  int                   total_nz = 0;
  char str[80];

  int * graph_decomposition = NULL;
  ML_Aggregate_Viz_Stats * aggr_viz_and_stats;
  ML_Aggregate_Viz_Stats * grid_info;
  int Nprocs;
  char * unamalg_bdry = NULL;
  char* label;
  int N_dimensions;
  double* x_coord = NULL;
  double* y_coord = NULL;
  double* z_coord = NULL;

  /* ------------------- execution begins --------------------------------- */

  label =  ML_GetUserLabel();
  sprintf(str, "%s (level %d) :", label, ml_ag->cur_level);

  /* ============================================================= */
  /* get the machine information and matrix references             */
  /* ============================================================= */

  mypid                   = comm->ML_mypid;
  Nprocs                  = comm->ML_nprocs;
  epsilon                 = ml_ag->threshold;
  num_PDE_eqns            = ml_ag->num_PDE_eqns;
  nullspace_dim           = ml_ag->nullspace_dim;
  nullspace_vect          = ml_ag->nullspace_vect;
  Nrows                   = Amatrix->outvec_leng;

  if (mypid == 0 && 5 < ML_Get_PrintLevel()) {
    printf("%s num PDE eqns = %d\n",
           str,
           num_PDE_eqns);
  }

  /* ============================================================= */
  /* check the system size versus null dimension size              */
  /* ============================================================= */

  if ( Nrows % num_PDE_eqns != 0 )
  {
    printf("ML_Aggregate_CoarsenUser ERROR : Nrows must be multiples");
    printf(" of num_PDE_eqns.\n");
    exit(EXIT_FAILURE);
  }
  diff_level = ml_ag->max_levels - ml_ag->cur_level - 1;
  if ( diff_level > 0 ) num_PDE_eqns = nullspace_dim; /* ## 12/20/99 */

  /* ============================================================= */
  /* set up the threshold for weight-based coarsening              */
  /* ============================================================= */

  diff_level = ml_ag->begin_level - ml_ag->cur_level;
  if (diff_level == 0) 
    ml_ag->curr_threshold = ml_ag->threshold;
  epsilon = ml_ag->curr_threshold;
  ml_ag->curr_threshold *= 0.5;

  if (mypid == 0 && 7 < ML_Get_PrintLevel())
    printf("%s current eps = %e\n", str, epsilon);

  epsilon = epsilon * epsilon;

  ML_Operator_AmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon);
  Nrows /= num_PDE_eqns;

  exp_Nrows = Nrows;

  /* ********************************************************************** */
  /* allocate memory for aggr_index, which will contain the decomposition   */
  /* ********************************************************************** */

  nbytes = (Nrows*num_PDE_eqns) * sizeof(int);

  if ( nbytes > 0 ) {
    ML_memory_alloc((void**) &aggr_index, nbytes, "ACJ");
    if( aggr_index == NULL ) {
      fprintf( stderr,
              "*ML*ERR* not enough memory for %d bytes\n"
              "*ML*ERR* (file %s, line %d)\n",
              nbytes,
              __FILE__,
              __LINE__ );
      exit( EXIT_FAILURE );
    }
  }
  else              aggr_index = NULL;

  for( i=0 ; i<Nrows*num_PDE_eqns ; i++ ) aggr_index[i] = -1;

  unamalg_bdry = (char *) ML_allocate( sizeof(char) * (Nrows+1) );

  if( unamalg_bdry == NULL ) {
    fprintf( stderr,
            "*ML*ERR* on proc %d, not enough space for %d bytes\n"
            "*ML*ERR* (file %s, line %d)\n",
            mypid,
            (int)sizeof(char) * Nrows,
            __FILE__,
            __LINE__ );
    exit( EXIT_FAILURE );
  }

  N_dimensions = ml_ag->N_dimensions;
  grid_info = (ML_Aggregate_Viz_Stats*) Amatrix->to->Grid->Grid;
  x_coord = grid_info->x;

  if (N_dimensions > 1 && x_coord)
    y_coord = grid_info->y;
  else
    y_coord = 0;
  if (N_dimensions > 2 && x_coord)
    z_coord = grid_info->z;
  else
    z_coord = 0;

  aggr_count = ML_GetUserPartitions(Amatrix,unamalg_bdry,
                                    epsilon,
                                    x_coord,y_coord,z_coord,
                                    aggr_index,&total_nz);

#ifdef ML_MPI
  MPI_Allreduce( &Nrows, &i, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm );
  MPI_Allreduce( &aggr_count, &j, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm );
#else
  i = Nrows;
  j = aggr_count;
#endif

  if( mypid == 0 && 7 < ML_Get_PrintLevel() ) {
    printf("%s Using %d (block) aggregates (globally)\n",
           str,
           j );
    printf("%s # (block) aggre/ # (block) rows = %8.5f %% ( = %d / %d)\n",
           str,
           100.0*j/i,
           j, i);
  }

  j = ML_gsum_int( aggr_count, comm );
  if (mypid == 0 && 7 < ML_Get_PrintLevel())  {
    printf("%s %d (block) aggregates (globally)\n",
           str, j );
  }   

  /* ********************************************************************** */
  /* I allocate room to copy aggr_index and pass this value to the user,    */
  /* who will be able to analyze and visualize this after the construction  */
  /* of the levels. This way, the only price we have to pay for stats and   */
  /* viz is essentially a little bit of memory.                             */
  /* this memory will be cleaned with the object ML_Aggregate ml_ag.        */
  /* I set the pointers using the ML_Aggregate_Info structure. This is      */
  /* allocated using ML_Aggregate_Info_Setup(ml,MaxNumLevels)               */
  /* ********************************************************************** */

  if (Amatrix->to->Grid->Grid != NULL) {

    graph_decomposition = (int *)ML_allocate(sizeof(int)*(Nrows+1));
    if( graph_decomposition == NULL ) {
      fprintf( stderr,
              "*ML*ERR* Not enough memory for %d bytes\n"
              "*ML*ERR* (file %s, line %d)\n",
              (int)sizeof(int)*Nrows,
              __FILE__,
              __LINE__ );
      exit( EXIT_FAILURE );
    }

    for( i=0 ; i<Nrows ; i++ ) graph_decomposition[i] = aggr_index[i];

    aggr_viz_and_stats = (ML_Aggregate_Viz_Stats *) (Amatrix->to->Grid->Grid);
    aggr_viz_and_stats->graph_decomposition = graph_decomposition;
    aggr_viz_and_stats->Nlocal = Nrows;
    aggr_viz_and_stats->Naggregates = aggr_count;
    aggr_viz_and_stats->local_or_global = ML_LOCAL_INDICES;
    aggr_viz_and_stats->is_filled = ML_YES;
    aggr_viz_and_stats->Amatrix = Amatrix;
  }

  /* ********************************************************************** */
  /* take the decomposition as created by METIS and form the aggregates     */
  /* ********************************************************************** */

  total_nz = ML_Comm_GsumInt( comm, total_nz);
  i = ML_Comm_GsumInt( comm, Nrows);

  if ( mypid == 0 && 7 < ML_Get_PrintLevel())
    printf("%s Total (block) nnz = %d ( = %5.2f/(block)row)\n",
           str,
           total_nz,1.0*total_nz/i);

  if ( ml_ag->operator_complexity == 0.0 ) {
    ml_ag->fine_complexity = total_nz;
    ml_ag->operator_complexity = total_nz;
  }
  else ml_ag->operator_complexity += total_nz;

  /* fix aggr_index for num_PDE_eqns > 1 */

  for (i = Nrows - 1; i >= 0; i-- ) {
    for (j = num_PDE_eqns-1; j >= 0; j--) {
      aggr_index[i*num_PDE_eqns+j] = aggr_index[i];
    }
  }

  if ( mypid == 0 && 8 < ML_Get_PrintLevel())
  {
    printf("Calling ML_Operator_UnAmalgamateAndDropWeak\n");
    fflush(stdout);
  }

  ML_Operator_UnAmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon);

  Nrows      *= num_PDE_eqns;
  exp_Nrows  *= num_PDE_eqns;

  /* count the size of each aggregate */

  aggr_cnt_array = (int *) ML_allocate(sizeof(int)*(aggr_count+1));
  for (i = 0; i < aggr_count ; i++) aggr_cnt_array[i] = 0;
  for (i = 0; i < exp_Nrows; i++) {
    if (aggr_index[i] >= 0) {
      if( aggr_index[i] >= aggr_count ) {
        fprintf( stderr,
                "*ML*WRN* on process %d, something weird happened...\n"
                "*ML*WRN* node %d belong to aggregate %d (#aggr = %d)\n"
                "*ML*WRN* (file %s, line %d)\n",
                comm->ML_mypid,
                i,
                aggr_index[i],
                aggr_count,
                __FILE__,
                __LINE__ );
      } else {
        aggr_cnt_array[aggr_index[i]]++;
      }
    }
  }

  /* ============================================================= */
  /* Form tentative prolongator                                    */
  /* ============================================================= */

  Ncoarse = aggr_count;

  /* ============================================================= */
  /* check and copy aggr_index                                     */
  /* ------------------------------------------------------------- */

  level = ml_ag->cur_level;
  nbytes = (Nrows+1) * sizeof( int );
  ML_memory_alloc((void**) &(ml_ag->aggr_info[level]), nbytes, "AGl");
  count = aggr_count;
  for ( i = 0; i < Nrows; i+=num_PDE_eqns ) 
  {
    if ( aggr_index[i] >= 0 )
    {
      for ( j = 0; j < num_PDE_eqns; j++ ) 
        ml_ag->aggr_info[level][i+j] = aggr_index[i];
      if (aggr_index[i] >= count) count = aggr_index[i] + 1;
    }
    /*else
     *{
     *   printf("%d : CoarsenMIS error : aggr_index[%d] < 0\n",
     *          mypid,i);
     *   exit(1);
     *}*/
  }
  ml_ag->aggr_count[level] = count; /* for relaxing boundary points */ 

  /* ============================================================= */
  /* set up the new operator                                       */
  /* ------------------------------------------------------------- */

  new_Nrows = Nrows;
  exp_Ncoarse = Nrows;

  for ( i = 0; i < new_Nrows; i++ ) 
  {
    if ( aggr_index[i] >= exp_Ncoarse ) 
    {
      printf("*ML*WRN* index out of bound %d = %d(%d)\n",
             i, aggr_index[i], 
             exp_Ncoarse);
    }
  }
  nbytes = ( new_Nrows+1 ) * sizeof(int); 
  ML_memory_alloc((void**)&(new_ia), nbytes, "AIA");
  nbytes = ( new_Nrows+1)  * nullspace_dim * sizeof(int); 
  ML_memory_alloc((void**)&(new_ja), nbytes, "AJA");
  nbytes = ( new_Nrows+1)  * nullspace_dim * sizeof(double); 
  ML_memory_alloc((void**)&(new_val), nbytes, "AVA");
  for ( i = 0; i < new_Nrows*nullspace_dim; i++ ) new_val[i] = 0.0;

  /* ------------------------------------------------------------- */
  /* set up the space for storing the new null space               */
  /* ------------------------------------------------------------- */

  nbytes = (Ncoarse+1) * nullspace_dim * nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&(new_null),nbytes,"AGr");
  if( new_null == NULL ) {
    fprintf( stderr,
            "*ML*ERR* on process %d, not enough memory for %d bytes\n"
            "*ML*ERR* (file %s, line %d)\n",
            mypid,
            nbytes,
            __FILE__,
            __LINE__ );
    exit( EXIT_FAILURE );
  }

  for (i = 0; i < Ncoarse*nullspace_dim*nullspace_dim; i++) 
    new_null[i] = 0.0;

  /* ------------------------------------------------------------- */
  /* initialize the row pointer for the CSR prolongation operator  */
  /* (each row will have at most nullspace_dim nonzero entries)    */
  /* ------------------------------------------------------------- */

  for (i = 0; i <= Nrows; i++) new_ia[i] = i * nullspace_dim;

  /* trying this when a Dirichlet row is taken out */
  j = 0;
  new_ia[0] = 0;
  for (i = 0; i < Nrows; i++) {
    if (aggr_index[i] != -1) j += nullspace_dim;
    new_ia[i+1] = j;
  }

  /* ------------------------------------------------------------- */
  /* generate an array to store which aggregate has which rows.Then*/
  /* loop through the rows of A checking which aggregate each row  */
  /* is in, and adding it to the appropriate spot in rows_in_aggs  */
  /* ------------------------------------------------------------- */

  ML_memory_alloc((void**)&rows_in_aggs,aggr_count*sizeof(int*),"MLs");
  for (i = 0; i < aggr_count; i++) {
    nbytes = aggr_cnt_array[i]+1;
    rows_in_aggs[i] = (int *) ML_allocate(nbytes*sizeof(int));
    aggr_cnt_array[i] = 0;
    if (rows_in_aggs[i] == NULL)  {
      printf("*ML*ERR* couldn't allocate memory in CoarsenMETIS\n");
      exit(1);
    }
  }
  for (i = 0; i < exp_Nrows; i+=num_PDE_eqns) {
    if ( aggr_index[i] >= 0 && aggr_index[i] < aggr_count)
    {
      for (j = 0; j < num_PDE_eqns; j++)
      {
        index = aggr_cnt_array[aggr_index[i]]++; 
        rows_in_aggs[aggr_index[i]][index] = i + j;
      }
    }
  }

  /* ------------------------------------------------------------- */
  /* allocate work arrays for QR factorization                     */
  /* work and lwork are needed for lapack's QR routine.  These     */
  /* settings seemed easiest since I don't quite understand        */
  /* what they do, but may want to do something better here later  */
  /* ------------------------------------------------------------- */

  max_agg_size = 0;
  for (i = 0; i < aggr_count; i++) 
  {
    if (aggr_cnt_array[i] > max_agg_size) max_agg_size = aggr_cnt_array[i];
  }
  nbytes = max_agg_size * nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&qr_tmp, nbytes, "AGu");
  nbytes = nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&tmp_vect, nbytes, "AGv");

  lwork  = nullspace_dim;
  nbytes = nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&work, nbytes, "AGw");

  /* ------------------------------------------------------------- */
  /* perform block QR decomposition                                */
  /* ------------------------------------------------------------- */

  for (i = 0; i < aggr_count; i++) 
  {
    /* ---------------------------------------------------------- */
    /* set up the matrix we want to decompose into Q and R:       */
    /* ---------------------------------------------------------- */

    length = aggr_cnt_array[i];
    if (nullspace_vect == NULL) 
    {
      for (j = 0; j < (int) length; j++)
      {
        index = rows_in_aggs[i][j];

        for (k = 0; k < nullspace_dim; k++)
        {
          if ( unamalg_bdry[index/num_PDE_eqns] == 'T')
            qr_tmp[k*length+j] = 0.;
          else
          {
            if (index % num_PDE_eqns == k) qr_tmp[k*length+j] = 1.0;
            else                           qr_tmp[k*length+j] = 0.0;
          }
        }
      }
    }
    else 
    {
      for (k = 0; k < nullspace_dim; k++)
      {
        for (j = 0; j < (int) length; j++)
        {
          index = rows_in_aggs[i][j];
          if ( unamalg_bdry[index/num_PDE_eqns] == 'T')
            qr_tmp[k*length+j] = 0.;
          else {
            if (index < Nrows) {
              qr_tmp[k*length+j] = nullspace_vect[k*Nrows+index];
            }
            else {
              fprintf( stderr,
                      "*ML*ERR* in QR\n"
                      "*ML*ERR* (file %s, line %d)\n",
                      __FILE__,
                      __LINE__ );
              exit( EXIT_FAILURE );
            }
          }
        }
      }
    }

    /* ---------------------------------------------------------- */
    /* now calculate QR using an LAPACK routine                   */
    /* ---------------------------------------------------------- */

    if (aggr_cnt_array[i] >= nullspace_dim) {

      DGEQRF_F77(&(aggr_cnt_array[i]), &nullspace_dim, qr_tmp, 
                 &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info);
      if (info != 0)
        pr_error("ErrOr in CoarsenMIS : dgeqrf returned a non-zero %d %d\n",
                 aggr_cnt_array[i],i);

      if (work[0] > lwork) 
      {
        lwork=(int) work[0]; 
        ML_memory_free((void**) &work);
        ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGx");
      }
      else lwork=(int) work[0];

      /* ---------------------------------------------------------- */
      /* the upper triangle of qr_tmp is now R, so copy that into   */
      /* the new nullspace                                          */
      /* ---------------------------------------------------------- */

      for (j = 0; j < nullspace_dim; j++)
        for (k = j; k < nullspace_dim; k++)
          new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = 
            qr_tmp[j+aggr_cnt_array[i]*k];

      /* ---------------------------------------------------------- */
      /* to get this block of P, need to run qr_tmp through another */
      /* LAPACK function:                                           */
      /* ---------------------------------------------------------- */

      if ( aggr_cnt_array[i] < nullspace_dim ){
        printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i],
               nullspace_dim);
        printf("ERROR : performing QR on a MxN matrix where M<N.\n");
      }
      DORGQR_F77(&(aggr_cnt_array[i]), &nullspace_dim, &nullspace_dim, 
                 qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info);
      if (info != 0) {
        printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i],
               nullspace_dim);
        pr_error("Error in CoarsenMIS: dorgqr returned a non-zero\n");
      }

      if (work[0] > lwork) 
      {
        lwork=(int) work[0]; 
        ML_memory_free((void**) &work);
        ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGy");
      }
      else lwork=(int) work[0];

      /* ---------------------------------------------------------- */
      /* now copy Q over into the appropriate part of P:            */
      /* The rows of P get calculated out of order, so I assume the */
      /* Q is totally dense and use what I know of how big each Q   */
      /* will be to determine where in ia, ja, etc each nonzero in  */
      /* Q belongs.  If I did not assume this, I would have to keep */
      /* all of P in memory in order to determine where each entry  */
      /* should go                                                  */
      /* ---------------------------------------------------------- */

      for (j = 0; j < aggr_cnt_array[i]; j++)
      {
        index = rows_in_aggs[i][j];

        if ( index < Nrows )
        {
          index3 = new_ia[index];
          for (k = 0; k < nullspace_dim; k++) 
          {
            new_ja [index3+k] = i * nullspace_dim + k;
            new_val[index3+k] = qr_tmp[ k*aggr_cnt_array[i]+j];
          }
        }
        else 
        {
          fprintf( stderr,
                  "*ML*ERR* in QR: index out of bounds (%d - %d)\n",
                  index,
                  Nrows );
        }
      }
    }
    else {
      /* We have a small aggregate such that the QR factorization can not */
      /* be performed. Instead let us copy the null space from the fine   */
      /* into the coarse grid nullspace and put the identity for the      */
      /* prolongator????                                                  */
      for (j = 0; j < nullspace_dim; j++)
        for (k = 0; k < nullspace_dim; k++)
          new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = 
            qr_tmp[j+aggr_cnt_array[i]*k];
      for (j = 0; j < aggr_cnt_array[i]; j++) {
        index = rows_in_aggs[i][j];
        index3 = new_ia[index];
        for (k = 0; k < nullspace_dim; k++) {
          new_ja [index3+k] = i * nullspace_dim + k;
          if (k == j) new_val[index3+k] = 1.;
          else new_val[index3+k] = 0.;
        }
      }
    }


  }

  ML_Aggregate_Set_NullSpace(ml_ag, num_PDE_eqns, nullspace_dim, 
                             new_null, Ncoarse*nullspace_dim);
  ML_memory_free( (void **) &new_null);

  /* ------------------------------------------------------------- */
  /* set up the csr_data data structure                            */
  /* ------------------------------------------------------------- */

  ML_memory_alloc((void**) &csr_data, sizeof(struct ML_CSR_MSRdata),"CSR");
  csr_data->rowptr  = new_ia;
  csr_data->columns = new_ja;
  csr_data->values  = new_val;

  ML_Operator_Set_ApplyFuncData( *Pmatrix, nullspace_dim*Ncoarse, Nrows, 
                                csr_data, Nrows, NULL, 0);
  (*Pmatrix)->data_destroy = ML_CSR_MSR_ML_memorydata_Destroy;
  (*Pmatrix)->getrow->pre_comm = ML_CommInfoOP_Create();
  (*Pmatrix)->max_nz_per_row = 1;

  ML_Operator_Set_Getrow((*Pmatrix), Nrows, CSR_getrow);
  ML_Operator_Set_ApplyFunc((*Pmatrix), CSR_matvec);
  (*Pmatrix)->max_nz_per_row = 1;
  /* this must be set so that the hierarchy generation does not abort early
     in adaptive SA */
  (*Pmatrix)->num_PDEs = nullspace_dim;

  /* ------------------------------------------------------------- */
  /* clean up                                                      */
  /* ------------------------------------------------------------- */

  ML_free(unamalg_bdry);
  ML_memory_free((void**)&aggr_index);
  ML_free(aggr_cnt_array);
  for (i = 0; i < aggr_count; i++) ML_free(rows_in_aggs[i]);
  ML_memory_free((void**)&rows_in_aggs);
  ML_memory_free((void**)&qr_tmp);
  ML_memory_free((void**)&tmp_vect);
  ML_memory_free((void**)&work);

  aggr_curr = aggr_head;
  while ( aggr_curr != NULL ) 
  {
    supernode = aggr_curr;
    aggr_curr = aggr_curr->next;
    if ( supernode->length > 0 ) ML_free( supernode->list );
    ML_free( supernode );
  }

  return Ncoarse*nullspace_dim;

} /* ML_Aggregate_CoarsenUser */
Пример #2
0
int main(int argc, char *argv[])
{
	int num_PDE_eqns=6, N_levels=4, nsmooth=2;

	int    leng, level, N_grid_pts, coarsest_level;

  /* See Aztec User's Guide for more information on the */
  /* variables that follow.                             */

  int    proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];

  /* data structure for matrix corresponding to the fine grid */

  double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time;
  AZ_MATRIX *Amat;
  AZ_PRECOND *Pmat = NULL;
  ML *ml;
  FILE *fp;
  int i, j, Nrigid, *garbage = NULL;
#ifdef ML_partition
  int nblocks;
  int *block_list = NULL;
  int k;
#endif
  struct AZ_SCALING *scaling;
  ML_Aggregate *ag;
double *mode, *rigid;
char filename[80];
double alpha;
int allocated = 0;
int old_prec, old_sol;
double old_tol;
/*
double *Amode, beta, biggest;
int big_ind = -1, ii;
*/
ML_Operator *Amatrix;
int *rowi_col = NULL, rowi_N, count2, ccc;
double *rowi_val = NULL;
double max_diag, min_diag, max_sum, sum;
 int nBlocks, *blockIndices, Ndof;
#ifdef ML_partition
   FILE *fp2;
   int count;

   if (argc != 2) {
     printf("Usage: ml_read_elas num_processors\n");
     exit(1);
   }
   else sscanf(argv[1],"%d",&nblocks);
#endif

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  /* get number of processors and the name of this processor */

  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
#endif

  /* read in the number of matrix equations */
  leng = 0;
  if (proc_config[AZ_node] == 0) {
#    ifdef binary
	fp=fopen(".data","rb");
#    else
	fp=fopen(".data","r");
#    endif
     if (fp==NULL) {
        printf("couldn't open file .data\n");
        exit(1);
     }
#    ifdef binary
        fread(&leng, sizeof(int), 1, fp);
#    else
        fscanf(fp,"%d",&leng);
#    endif
     fclose(fp);
  }
  leng = AZ_gsum_int(leng, proc_config);

  N_grid_pts=leng/num_PDE_eqns;

  /* initialize the list of global indices. NOTE: the list of global */
  /* indices must be in ascending order so that subsequent calls to  */
  /* AZ_find_index() will function properly. */

  if (proc_config[AZ_N_procs] == 1) i = AZ_linear;
  else i = AZ_file;
  AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,i);

  AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config);


  /* This code is to fix things up so that we are sure we have */
  /* all block (including the ghost nodes the same size.       */

  AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update);

  AZ_transform_norowreordering(proc_config, &external, bindx, val,  update, &update_index,
	       &extern_index, &data_org, N_update, 0, 0, 0, &cpntr,
	       AZ_MSR_MATRIX);

  Amat = AZ_matrix_create( leng );
  AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];

  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];

#ifdef SCALE_ME
  ML_MSR_sym_diagonal_scaling(Amat, proc_config, &scaling_vect);
#endif

  start_time = AZ_second();

  options[AZ_scaling] = AZ_none;
  ML_Create(&ml, N_levels);
  ML_Set_PrintLevel(10);


  /* set up discretization matrix and matrix vector function */

  AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config);

#ifdef ML_partition

  /* this code is meant to partition the matrices so that things can be */
  /* run in parallel later.                                             */
  /* It is meant to be run on only one processor.                       */
#ifdef	MB_MODIF
  fp2 = fopen(".update","w");
#else
  fp2 = fopen("partition_file","w");
#endif

  ML_Operator_AmalgamateAndDropWeak(&(ml->Amat[N_levels-1]), num_PDE_eqns, 0.0);
  ML_Gen_Blocks_Metis(ml, N_levels-1, &nblocks, &block_list);

  for (i = 0; i < nblocks; i++) {
     count = 0;
     for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) {
        if (block_list[j] == i) count++;
     }
     fprintf(fp2,"   %d\n",count*num_PDE_eqns);
     for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) {
        if (block_list[j] == i) {
           for (k = 0; k < num_PDE_eqns; k++)  fprintf(fp2,"%d\n",j*num_PDE_eqns+k);
        }
     }
  }
  fclose(fp2);
  ML_Operator_UnAmalgamateAndDropWeak(&(ml->Amat[N_levels-1]),num_PDE_eqns,0.0);
#ifdef	MB_MODIF
  printf(" partition file dumped in .update\n");
#endif
  exit(1);
#endif

  ML_Aggregate_Create( &ag );
/*
  ML_Aggregate_Set_CoarsenScheme_MIS(ag);
*/
#ifdef MB_MODIF
  ML_Aggregate_Set_DampingFactor(ag,1.50);
#else
  ML_Aggregate_Set_DampingFactor(ag,1.5);
#endif
  ML_Aggregate_Set_CoarsenScheme_METIS(ag);
  ML_Aggregate_Set_NodesPerAggr( ml, ag, -1, 35);
  /*
  ML_Aggregate_Set_Phase3AggregateCreationAggressiveness(ag, 10.001);
  */


  ML_Aggregate_Set_Threshold(ag, 0.0);
  ML_Aggregate_Set_MaxCoarseSize( ag, 300);


  /* read in the rigid body modes */

   Nrigid = 0;

  /* to ensure compatibility with RBM dumping software */
   if (proc_config[AZ_node] == 0) {

      sprintf(filename,"rigid_body_mode%02d",Nrigid+1);
      while( (fp = fopen(filename,"r")) != NULL) {
	which_filename = 1;
          fclose(fp);
          Nrigid++;
          sprintf(filename,"rigid_body_mode%02d",Nrigid+1);
      }
      sprintf(filename,"rigid_body_mode%d",Nrigid+1);
      while( (fp = fopen(filename,"r")) != NULL) {
          fclose(fp);
          Nrigid++;
          sprintf(filename,"rigid_body_mode%d",Nrigid+1);
      }
    }

    Nrigid = AZ_gsum_int(Nrigid,proc_config);

    if (Nrigid != 0) {
       rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) );
       if (rigid == NULL) {
          printf("Error: Not enough space for rigid body modes\n");
       }
    }

    rhs   = (double *) malloc(leng*sizeof(double));
    xxx   = (double *) malloc(leng*sizeof(double));

    for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0;



    for (i = 0; i < Nrigid; i++) {
       if (which_filename == 1) sprintf(filename,"rigid_body_mode%02d",i+1);
       else sprintf(filename,"rigid_body_mode%d",i+1);
       AZ_input_msr_matrix(filename,update,&mode,&garbage,N_update,proc_config);
       AZ_reorder_vec(mode, data_org, update_index, NULL);
       /* here is something to stick a rigid body mode as the initial */
       /* The idea is to solve A x = 0 without smoothing with a two   */
       /* level method. If everything is done properly, we should     */
       /* converge in 2 iterations.                                   */
       /* Note: we must also zero out components of the rigid body    */
       /* mode that correspond to Dirichlet bcs.                      */

       if (i == -4) {
          for (iii = 0; iii < leng; iii++) xxx[iii] = mode[iii];

          ccc = 0;
          Amatrix = &(ml->Amat[N_levels-1]);
          for (iii = 0; iii < Amatrix->outvec_leng; iii++) {
             ML_get_matrix_row(Amatrix,1,&iii,&allocated,&rowi_col,&rowi_val,
                               &rowi_N, 0);
             count2 = 0;
             for (j = 0; j < rowi_N; j++) if (rowi_val[j] != 0.) count2++;
             if (count2 <= 1) { xxx[iii] = 0.; ccc++; }
          }
          free(rowi_col); free(rowi_val);
          allocated = 0; rowi_col = NULL; rowi_val = NULL;
       }

       /*
        *  Rescale matrix/rigid body modes and checking
        *
        AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling);
        Amat->matvec(mode, rigid, Amat, proc_config);
        for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]);
        */

        /* Here is some code to check that the rigid body modes are  */
        /* really rigid body modes. The idea is to multiply by A and */
        /* then to zero out things that we "think" are boundaries.   */
        /* In this hardwired example, things near boundaries         */
        /* correspond to matrix rows that do not have 81 nonzeros.   */
        /*

        Amode = (double *) malloc(leng*sizeof(double));
        Amat->matvec(mode, Amode, Amat, proc_config);
        j = 0;
        biggest = 0.0;
        for (ii = 0; ii < N_update; ii++) {
           if ( Amat->bindx[ii+1] - Amat->bindx[ii] != 80) {
              Amode[ii] = 0.; j++;
           }
           else {
              if ( fabs(Amode[ii]) > biggest) {
                 biggest=fabs(Amode[ii]); big_ind = ii;
              }
           }
        }
        printf("%d entries zeroed out of %d elements\n",j,N_update);
        alpha = AZ_gdot(N_update, Amode, Amode, proc_config);
        beta  = AZ_gdot(N_update,  mode,  mode, proc_config);
        printf("||A r||^2 =%e, ||r||^2 = %e, ratio = %e\n",
               alpha,beta,alpha/beta);
        printf("the biggest is %e at row %d\n",biggest,big_ind);
        free(Amode);

        */

        /* orthogonalize mode with respect to previous modes. */

        for (j = 0; j < i; j++) {
           alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/
                    AZ_gdot(N_update, &(rigid[j*N_update]),
                               &(rigid[j*N_update]), proc_config);
	   /*           daxpy_(&N_update,&alpha,&(rigid[j*N_update]),  &one, mode, &one); */
        }
#ifndef	MB_MODIF
       printf(" after mb %e %e %e\n",mode[0],mode[1],mode[2]);
#endif

        for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j];
        free(mode);
        free(garbage); garbage = NULL;

    }

    if (Nrigid != 0) {
             ML_Aggregate_Set_BlockDiagScaling(ag);
       ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update);
       free(rigid);
    }
#ifdef SCALE_ME
    ML_Aggregate_Scale_NullSpace(ag, scaling_vect, N_update);
#endif

    coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1,
				ML_DECREASING, ag);
   AZ_defaults(options, params);
   coarsest_level = N_levels - coarsest_level;
   if ( proc_config[AZ_node] == 0 )
	printf("Coarse level = %d \n", coarsest_level);

   /* set up smoothers */

   for (level = N_levels-1; level > coarsest_level; level--) {

/*
      ML_Gen_Smoother_BlockGaussSeidel(ml, level,ML_BOTH, 1, 1., num_PDE_eqns);
*/

    /*  Sparse approximate inverse smoother that acutally does both */
    /*  pre and post smoothing.                                     */
    /*
      ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth,
                                parasails_sym, parasails_thresh,
                                parasails_nlevels, parasails_filter,
                                parasails_loadbal, parasails_factorized);
     */

     /* This is the symmetric Gauss-Seidel smoothing that we usually use. */
     /* In parallel, it is not a true Gauss-Seidel in that each processor */
     /* does a Gauss-Seidel on its local submatrix independent of the     */
     /* other processors.                                                 */

     /* ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); */
     Ndof = ml->Amat[level].invec_leng;

     ML_Gen_Blocks_Aggregates(ag, level, &nBlocks, &blockIndices);

     ML_Gen_Smoother_BlockDiagScaledCheby(ml, level, ML_BOTH, 30.,nsmooth,
					  nBlocks, blockIndices);

     /*
      ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.);
     */


      /* This is a true Gauss Seidel in parallel. This seems to work for  */
      /* elasticity problems.  However, I don't believe that this is very */
      /* efficient in parallel.                                           */
     /*
      nblocks = ml->Amat[level].invec_leng/num_PDE_eqns;
      blocks = (int *) ML_allocate(sizeof(int)*N_update);
      for (i =0; i < ml->Amat[level].invec_leng; i++)
         blocks[i] = i/num_PDE_eqns;

      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
      free(blocks);
*/

      /* Block Jacobi Smoothing */
      /*
      nblocks = ml->Amat[level].invec_leng/num_PDE_eqns;
      blocks = (int *) ML_allocate(sizeof(int)*N_update);
      for (i =0; i < ml->Amat[level].invec_leng; i++)
         blocks[i] = i/num_PDE_eqns;

      ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,
                                   ML_ONE_STEP_CG, nblocks, blocks);
      free(blocks);
      */

      /* Jacobi Smoothing                                                 */
     /*

      ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth, ML_ONE_STEP_CG);
      ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,ML_ONE_STEP_CG);
     */



      /*  This does a block Gauss-Seidel (not true GS in parallel)        */
      /*  where each processor has 'nblocks' blocks.                      */
      /*
      nblocks = 250;
      ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks);
      ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,ML_ONE_STEP_CG,
                                        nblocks, blocks);
      free(blocks);
      */
      num_PDE_eqns = 6;
   }
   /* Choose coarse grid solver: mls, superlu, symGS, or Aztec */

   /*
   ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth);
   ML_Gen_CoarseSolverSuperLU( ml, coarsest_level);
   */
   /*
   ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.);
   */

   old_prec = options[AZ_precond];
   old_sol  = options[AZ_solver];
   old_tol  = params[AZ_tol];
   params[AZ_tol] = 1.0e-9;
   params[AZ_tol] = 1.0e-5;
   options[AZ_precond] = AZ_Jacobi;
   options[AZ_solver]  = AZ_cg;
   options[AZ_poly_ord] = 1;
   options[AZ_conv] = AZ_r0;
   options[AZ_orth_kvecs] = AZ_TRUE;

   j = AZ_gsum_int(ml->Amat[coarsest_level].outvec_leng, proc_config);

   options[AZ_keep_kvecs] = j - 6;
   options[AZ_max_iter] =  options[AZ_keep_kvecs];

   ML_Gen_SmootherAztec(ml, coarsest_level, options, params,
            proc_config, status, options[AZ_keep_kvecs], ML_PRESMOOTHER, NULL);

   options[AZ_conv] = AZ_noscaled;
   options[AZ_keep_kvecs] = 0;
   options[AZ_orth_kvecs] = 0;
   options[AZ_precond] = old_prec;
   options[AZ_solver] = old_sol;
   params[AZ_tol] = old_tol;

   /*   */


#ifdef RST_MODIF
   ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level);
#else
#ifdef	MB_MODIF
   ML_Gen_Solver(ml, ML_SAAMG,   N_levels-1, coarsest_level);
#else
   ML_Gen_Solver(ml, ML_MGFULLV, N_levels-1, coarsest_level);
#endif
#endif

   options[AZ_solver]   = AZ_GMRESR;
         options[AZ_solver]   = AZ_cg;
   options[AZ_scaling]  = AZ_none;
   options[AZ_precond]  = AZ_user_precond;
   options[AZ_conv]     = AZ_r0;
   options[AZ_conv] = AZ_noscaled;
   options[AZ_output]   = 1;
   options[AZ_max_iter] = 500;
   options[AZ_poly_ord] = 5;
   options[AZ_kspace]   = 40;
   params[AZ_tol]       = 4.8e-6;

   AZ_set_ML_preconditioner(&Pmat, Amat, ml, options);
   setup_time = AZ_second() - start_time;

   /* Set rhs */

   fp = fopen("AZ_capture_rhs.dat","r");
   if (fp == NULL) {
      AZ_random_vector(rhs, data_org, proc_config);
      if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n");
      for (i = 0; i < -N_update; i++) {
        rhs[i] = (double) update[i]; rhs[i] = 7.;
      }
   }
   else {
      if (proc_config[AZ_node]== 0) printf("reading rhs guess from file\n");
      AZ_input_msr_matrix("AZ_capture_rhs.dat", update, &rhs, &garbage,
			  N_update, proc_config);
      free(garbage);
   }
   AZ_reorder_vec(rhs, data_org, update_index, NULL);

   printf("changing rhs by multiplying with A\n");
  Amat->matvec(rhs, xxx, Amat, proc_config);
  for (i = 0; i < N_update; i++) rhs[i] = xxx[i];

   fp = fopen("AZ_capture_init_guess.dat","r");
   if (fp != NULL) {
      fclose(fp);
      if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n");
      AZ_input_msr_matrix("AZ_capture_init_guess.dat", update, &xxx, &garbage,
      			  N_update, proc_config);
      free(garbage);


      xxx = (double *) realloc(xxx, sizeof(double)*(
					 Amat->data_org[AZ_N_internal]+
					 Amat->data_org[AZ_N_border] +
					 Amat->data_org[AZ_N_external]));
   }
   AZ_reorder_vec(xxx, data_org, update_index, NULL);

   /* if Dirichlet BC ... put the answer in */

/*
   for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) {
      if ( (val[i] > .99999999) && (val[i] < 1.0000001))
         xxx[i] = rhs[i];
   }
*/

   fp = fopen("AZ_no_multilevel.dat","r");
   scaling = AZ_scaling_create();
   start_time = AZ_second();


   if (fp != NULL) {
      fclose(fp);
      options[AZ_precond] = AZ_none;
      options[AZ_scaling] = AZ_sym_diag;
      options[AZ_ignore_scaling] = AZ_TRUE;

      options[AZ_keep_info] = 1;
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);

/*
      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);
*/
   }
   else {
      options[AZ_keep_info] = 1;
      options[AZ_conv] = AZ_noscaled;
      options[AZ_conv] = AZ_r0;
      params[AZ_tol] = 1.0e-7;
      /* ML_Iterate(ml, xxx, rhs); */
alpha = sqrt(AZ_gdot(N_update, xxx, xxx, proc_config));
printf("init guess = %e\n",alpha);
alpha = sqrt(AZ_gdot(N_update, rhs, rhs, proc_config));
printf("rhs = %e\n",alpha);
#ifdef SCALE_ME
	ML_MSR_scalerhs(rhs, scaling_vect, data_org[AZ_N_internal] +
                    data_org[AZ_N_border]);
	ML_MSR_scalesol(xxx, scaling_vect, data_org[AZ_N_internal] +
			data_org[AZ_N_border]);
#endif

max_diag = 0.;
min_diag = 1.e30;
max_sum  = 0.;
for (i = 0; i < N_update; i++) {
   if (Amat->val[i] < 0.) printf("woops negative diagonal A(%d,%d) = %e\n",
				 i,i,Amat->val[i]);
   if (Amat->val[i] > max_diag) max_diag = Amat->val[i];
   if (Amat->val[i] < min_diag) min_diag = Amat->val[i];
   sum = fabs(Amat->val[i]);
   for (j = Amat->bindx[i]; j < Amat->bindx[i+1]; j++) {
      sum += fabs(Amat->val[j]);
   }
   if (sum > max_sum) max_sum = sum;
}
printf("Largest diagonal = %e, min diag = %e large abs row sum = %e\n",
max_diag, min_diag, max_sum);

      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);

      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
/*
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
*/
   }
   solve_time = AZ_second() - start_time;

   if (proc_config[AZ_node] == 0)
      printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time);
   if (proc_config[AZ_node] == 0)
     printf("Printing out a few entries of the solution ...\n");

   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}


   ML_Aggregate_Destroy(&ag);
   ML_Destroy(&ml);
   AZ_free((void *) Amat->data_org);
   AZ_free((void *) Amat->val);
   AZ_free((void *) Amat->bindx);
   AZ_free((void *) update);
   AZ_free((void *) external);
   AZ_free((void *) extern_index);
   AZ_free((void *) update_index);
   AZ_scaling_destroy(&scaling);
   if (Amat  != NULL) AZ_matrix_destroy(&Amat);
   if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
   free(xxx);
   free(rhs);


#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return 0;

}
// ================================================ ====== ==== ==== == =
// the tentative null space is in input because the user
// has to remember to allocate and fill it, and then to delete
// it after calling this method.
int ML_Epetra::MultiLevelPreconditioner::
ComputeAdaptivePreconditioner(int TentativeNullSpaceSize,
                              double* TentativeNullSpace)
{

  if ((TentativeNullSpaceSize == 0) || (TentativeNullSpace == 0))
    ML_CHK_ERR(-1);
   
  // ================================== //
  // get parameters from the input list //
  // ================================== //
  
  // maximum number of relaxation sweeps
  int MaxSweeps = List_.get("adaptive: max sweeps", 10);
  // number of std::vector to be added to the tentative null space
  int NumAdaptiveVectors = List_.get("adaptive: num vectors", 1);

  if (verbose_) {
    std::cout << PrintMsg_ << "*** Adaptive Smoother Aggregation setup ***" << std::endl;
    std::cout << PrintMsg_ << "    Maximum relaxation sweeps     = " << MaxSweeps << std::endl;
    std::cout << PrintMsg_ << "    Additional vectors to compute = " << NumAdaptiveVectors << std::endl;
  }

  // ==================================================== //
  // compute the preconditioner, set null space from user //
  // (who will have to delete std::vector TentativeNullSpace)  //
  // ==================================================== //
  
  double* NewNullSpace = 0;
  double* OldNullSpace = TentativeNullSpace;
  int OldNullSpaceSize = TentativeNullSpaceSize;

  // need some work otherwise matvec() with Epetra_Vbr fails.
  // Also, don't differentiate between range and domain here
  // as ML will not work if range != domain
  const Epetra_VbrMatrix* VbrA = NULL;
  VbrA = dynamic_cast<const Epetra_VbrMatrix*>(RowMatrix_);

  Epetra_Vector* LHS = 0;
  Epetra_Vector* RHS = 0;

  if (VbrA != 0) {
    LHS = new Epetra_Vector(VbrA->DomainMap());
    RHS = new Epetra_Vector(VbrA->DomainMap());
  } else {
    LHS = new Epetra_Vector(RowMatrix_->OperatorDomainMap());
    RHS = new Epetra_Vector(RowMatrix_->OperatorDomainMap());
  }

  // destroy what we may already have
  if (IsComputePreconditionerOK_ == true) {
    DestroyPreconditioner();
  }

  // build the preconditioner for the first time
  List_.set("null space: type", "pre-computed");
  List_.set("null space: dimension", OldNullSpaceSize);
  List_.set("null space: vectors", OldNullSpace);
  ComputePreconditioner();

  // ====================== //
  // add one std::vector at time //
  // ====================== //
  
  for (int istep = 0 ; istep < NumAdaptiveVectors ; ++istep) {

    if (verbose_) {
      std::cout << PrintMsg_ << "\tAdaptation step " << istep << std::endl;
      std::cout << PrintMsg_ << "\t---------------" << std::endl;
    }

    // ==================== //
    // look for "bad" modes //
    // ==================== //

    // note: should an error occur, ML_CHK_ERR will return,
    // and LHS and RHS will *not* be delete'd (--> memory leak).
    // Anyway, this means that something wrong happened in the code
    // and should be fixed by the user.

    LHS->Random();
    double Norm2;

    for (int i = 0 ; i < MaxSweeps ; ++i) {
      // RHS = (I - ML^{-1} A) LHS
      ML_CHK_ERR(RowMatrix_->Multiply(false,*LHS,*RHS));
      // FIXME: can do something slightly better here
      ML_CHK_ERR(ApplyInverse(*RHS,*RHS));
      ML_CHK_ERR(LHS->Update(-1.0,*RHS,1.0));
      LHS->Norm2(&Norm2);
      if (verbose_) {
        std::cout << PrintMsg_ << "\titer " << i << ", ||x||_2 = ";
        std::cout << Norm2 << std::endl;
      }
    }

    // scaling vectors
    double NormInf;
    LHS->NormInf(&NormInf);
    LHS->Scale(1.0 / NormInf);

    // ========================================================= //
    // copy tentative and computed null space into NewNullSpace, //
    // which now becomes the standard null space                 //
    // ========================================================= //

    int NewNullSpaceSize = OldNullSpaceSize + 1;
    NewNullSpace = new double[NumMyRows() * NewNullSpaceSize];
    assert (NewNullSpace != 0);
    int itmp = OldNullSpaceSize * NumMyRows();
    for (int i = 0 ; i < itmp ; ++i) {
      NewNullSpace[i] = OldNullSpace[i];
    }

    for (int j = 0 ; j < NumMyRows() ; ++j) {
      NewNullSpace[itmp + j] = (*LHS)[j];
    }

    // =============== //
    // visualize modes //
    // =============== //

    if (List_.get("adaptive: visualize", false)) {

      double* x_coord = List_.get("viz: x-coordinates", (double*)0);
      double* y_coord = List_.get("viz: y-coordinates", (double*)0);
      double* z_coord = List_.get("viz: z-coordinates", (double*)0);
      assert (x_coord != 0);

      std::vector<double> plot_me(NumMyRows()/NumPDEEqns_);
      ML_Aggregate_Viz_Stats info;
      info.Amatrix = &(ml_->Amat[LevelID_[0]]);
      info.x = x_coord;
      info.y = y_coord;
      info.z = z_coord;
      info.Nlocal = NumMyRows() / NumPDEEqns_;
      info.Naggregates = 1;
      ML_Operator_AmalgamateAndDropWeak(&(ml_->Amat[LevelID_[0]]),
                                        NumPDEEqns_, 0.0);

      for (int ieqn = 0 ; ieqn < NumPDEEqns_ ; ++ieqn) {
        for (int j = 0 ; j < NumMyRows() ; j+=NumPDEEqns_) {
          plot_me[j / NumPDEEqns_] = (*LHS)[j + ieqn];
        }
        char FileName[80];
        sprintf(FileName,"nullspace-mode%d-eq%d.xyz", istep, ieqn);
        if (verbose_)
          std::cout << PrintMsg_ << "writing file " << FileName << "..." << std::endl;
        ML_Aggregate_VisualizeXYZ(info,FileName,
                                  ml_->comm,&plot_me[0]);
      }

      ML_Operator_UnAmalgamateAndDropWeak(&(ml_->Amat[LevelID_[0]]),
                                          NumPDEEqns_, 0.0);
    }
    
    // Destroy the old preconditioner
    DestroyPreconditioner();

    // ==================================================== //
    // build the new preconditioner with the new null space //
    // ==================================================== //

    List_.set("null space: type", "pre-computed");
    List_.set("null space: dimension", NewNullSpaceSize);
    List_.set("null space: vectors", NewNullSpace);

    ML_CHK_ERR(ComputePreconditioner());

    if (istep && (istep != NumAdaptiveVectors))
      delete OldNullSpace;

    OldNullSpace = NewNullSpace;
    OldNullSpaceSize = NewNullSpaceSize;

  }

  // keep trace of this pointer, it will be delete'd later
  NullSpaceToFree_ = NewNullSpace;

  delete LHS;
  delete RHS;

  return(0);

}
Пример #4
0
// ================================================ ====== ==== ==== == =
// Copied from ml_agg_genP.c
static void ML_Init_Aux(ML_Operator* A, Teuchos::ParameterList &List) {
  int i, j, n, count, num_PDEs, BlockRow, BlockCol;
  double threshold;
  int* columns;
  double* values;
  int allocated, entries = 0;
  int N_dimensions;
  int DiagID;
  double DiagValue;
  int** filter;
  double dist;
  double *LaplacianDiag;
  int     Nghost;


  // Boundary exchange the coords
  double *x_coord=0, *y_coord=0, *z_coord=0;
  RefMaxwell_SetupCoordinates(A,List,x_coord,y_coord,z_coord);
  int dim=(x_coord!=0) + (y_coord!=0) + (z_coord!=0);

  /* Sanity Checks */
  if(dim == 0 || ((!x_coord && (y_coord || z_coord)) || (x_coord && !y_coord && z_coord))){
    std::cerr<<"Error: Coordinates not defined.  This is necessary for aux aggregation (found "<<dim<<" coordinates).\n";
    exit(-1);
  }

  num_PDEs = A->num_PDEs;
  N_dimensions = dim;
  threshold = A->aux_data->threshold;

  ML_Operator_AmalgamateAndDropWeak(A, num_PDEs, 0.0);
  n = A->invec_leng;
  Nghost = ML_CommInfoOP_Compute_TotalRcvLength(A->getrow->pre_comm);

  LaplacianDiag = (double *) ML_allocate((A->getrow->Nrows+Nghost+1)*
                                         sizeof(double));

  filter = (int**) ML_allocate(sizeof(int*) * n);

  allocated = 128;
  columns = (int *)    ML_allocate(allocated * sizeof(int));
  values  = (double *) ML_allocate(allocated * sizeof(double));

  for (i = 0 ; i < n ; ++i) {
    BlockRow = i;
    DiagID = -1;
    DiagValue = 0.0;

    ML_get_matrix_row(A,1,&i,&allocated,&columns,&values, &entries,0);

    for (j = 0; j < entries; j++) {
      BlockCol = columns[j];
      if (BlockRow != BlockCol) {
        dist = 0.0;

        switch (N_dimensions) {
        case 3:
          dist += (z_coord[BlockRow] - z_coord[BlockCol]) * (z_coord[BlockRow] - z_coord[BlockCol]);
        case 2:
          dist += (y_coord[BlockRow] - y_coord[BlockCol]) * (y_coord[BlockRow] - y_coord[BlockCol]);
        case 1:
          dist += (x_coord[BlockRow] - x_coord[BlockCol]) * (x_coord[BlockRow] - x_coord[BlockCol]);
        }

        if (dist == 0.0) {
          printf("node %d = %e ", i, x_coord[BlockRow]);
          if (N_dimensions > 1) printf(" %e ", y_coord[BlockRow]);
          if (N_dimensions > 2) printf(" %e ", z_coord[BlockRow]);
          printf("\n");
          printf("node %d = %e ", j, x_coord[BlockCol]);
          if (N_dimensions > 1) printf(" %e ", y_coord[BlockCol]);
          if (N_dimensions > 2) printf(" %e ", z_coord[BlockCol]);
          printf("\n");
          printf("Operator has inlen = %d and outlen = %d\n",
                 A->invec_leng, A->outvec_leng);
        }

        dist = 1.0 / dist;
        DiagValue += dist;
      }
      else if (columns[j] == i) {
        DiagID = j;
      }
    }

    if (DiagID == -1) {
      fprintf(stderr, "ERROR: matrix has no diagonal!\n"
              "ERROR: (file %s, line %d)\n",
              __FILE__, __LINE__);
      exit(EXIT_FAILURE);
    }
    LaplacianDiag[BlockRow] = DiagValue;
  }
  if ( A->getrow->pre_comm != NULL )
     ML_exchange_bdry(LaplacianDiag,A->getrow->pre_comm,A->getrow->Nrows,
                      A->comm, ML_OVERWRITE,NULL);


  for (i = 0 ; i < n ; ++i) {
    BlockRow = i;

    ML_get_matrix_row(A,1,&i,&allocated,&columns,&values, &entries,0);

    for (j = 0; j < entries; j++) {
      BlockCol = columns[j];
      if (BlockRow != BlockCol) {
        dist = 0.0;
        switch (N_dimensions) {
        case 3:
          dist += (z_coord[BlockRow] - z_coord[BlockCol]) * (z_coord[BlockRow] - z_coord[BlockCol]);
        case 2:
          dist += (y_coord[BlockRow] - y_coord[BlockCol]) * (y_coord[BlockRow] - y_coord[BlockCol]);
        case 1:
          dist += (x_coord[BlockRow] - x_coord[BlockCol]) * (x_coord[BlockRow] - x_coord[BlockCol]);
        }

        dist = 1.0 / dist;
        values[j] = dist;
      }
    }

    count = 0;
    for (j = 0 ; j < entries ; ++j) {
      if (  (i != columns[j]) &&
            (values[j]*values[j] <
       LaplacianDiag[BlockRow]*LaplacianDiag[columns[j]]*threshold*threshold)){
        columns[count++] = columns[j];
      }
    }

    /* insert the rows */
    filter[BlockRow] = (int*) ML_allocate(sizeof(int) * (count + 1));
    filter[BlockRow][0] = count;

    for (j = 0 ; j < count ; ++j)
      filter[BlockRow][j + 1] = columns[j];

  }

  ML_free(columns);
  ML_free(values);

  ML_free(LaplacianDiag);

  ML_Operator_UnAmalgamateAndDropWeak(A, num_PDEs, 0.0);

  A->aux_data->aux_func_ptr  = A->getrow->func_ptr;
  A->getrow->func_ptr = ML_Aux_Getrow;
  A->aux_data->filter = filter;
  A->aux_data->filter_size = n;

  // Cleanup
  ML_free(x_coord);
  ML_free(y_coord);
  ML_free(z_coord);

}