// ================================================ ====== ==== ==== == = 
// Computes C= <me> * A
int  ML_Epetra::ML_RefMaxwell_11_Operator::MatrixMatrix_Multiply(const Epetra_CrsMatrix & A, ML_Comm *comm, ML_Operator **C) const
{
  ML_Operator *SM_ML,*A_ML,*temp1,*temp2;

  /* General Stuff */  
  ML_Comm* temp = global_comm;
  A_ML  = ML_Operator_Create(comm);
  *C = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)&A,A_ML);  
  
  /* Do the SM part */
  SM_ML = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)SM_Matrix_,SM_ML);
  ML_2matmult(SM_ML,A_ML,temp1,ML_CSR_MATRIX);
  ML_Matrix_Print(temp1,*Comm_,*RangeMap_,"smp.dat");

  /* Do the Addon part */
  Addon_->MatrixMatrix_Multiply(A,comm,&temp2);
  ML_Matrix_Print(temp2,*Comm_,*RangeMap_,"add_p.dat");
  
  /* Add the matrices together */
  ML_Operator_Add(temp2,temp1,*C,ML_CSR_MATRIX,1.0);
  ML_Matrix_Print(*C,*Comm_,*RangeMap_,"tfinal.dat");  
  
  /* Cleanup */
  global_comm = temp;
  ML_Operator_Destroy(&A_ML);
  ML_Operator_Destroy(&SM_ML);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&temp2);

  return 0;
}/*end MatrixMatrix_Multiply*/
// ================================================ ====== ==== ==== == =
// Forms the coarse matrix, given the prolongator
int  ML_Epetra::FaceMatrixFreePreconditioner::FormCoarseMatrix()
{
  CoarseMat_ML = ML_Operator_Create(ml_comm_);
  CoarseMat_ML->data_destroy=free;
  ML_Operator *Temp_ML=0;
  ML_Operator *R= ML_Operator_Create(ml_comm_);
  ML_Operator *P= ML_Operator_Create(ml_comm_);

  /* Build ML_Operator version of Prolongator_, Restriction Operator */
  ML_CHK_ERR(ML_Operator_WrapEpetraCrsMatrix(Prolongator_,P,verbose_));
  P->num_rigid=P->num_PDEs=dim;

  //NTS: ML_CHK_ERR won't work on this: it returns 1
  ML_Operator_Transpose_byrow(P, R);

  /* OPTION: Disable the addon */
  const Epetra_CrsMatrix *Op11crs = dynamic_cast<const Epetra_CrsMatrix*>(&*Operator_);
  const Epetra_Operator_With_MatMat *Op11mm = dynamic_cast<const Epetra_Operator_With_MatMat*>(&*Operator_);

  /* Do the A*P  with or without addon*/
  if(Op11crs){
    if(verbose_ && !Comm_->MyPID()) printf("FMFP: Running *without* addon\n");
    ML_Operator *SM_ML = ML_Operator_Create(ml_comm_);
    Temp_ML = ML_Operator_Create(ml_comm_);
    ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Op11crs,SM_ML,verbose_);
    ML_2matmult(SM_ML,P,Temp_ML,ML_CSR_MATRIX);
    ML_Operator_Destroy(&SM_ML);
  }
  else if(Op11mm){
    if(verbose_ && !Comm_->MyPID()) printf("FMFP: Running with addon\n");
    ML_CHK_ERR(Op11mm->MatrixMatrix_Multiply(*Prolongator_,ml_comm_,&Temp_ML));
  }
  else{
    if(!Comm_->MyPID()) printf("ERROR: FMFP Illegal Operator\n");
    delete R;
    ML_CHK_ERR(-1);
  }

  /* Do R * AP */
  R->num_rigid=R->num_PDEs=dim;
  ML_2matmult_block(R, Temp_ML,CoarseMat_ML,ML_CSR_MATRIX);

  /* Wrap to Epetra-land */
  int nnz=100;
  double time;
  ML_Operator2EpetraCrsMatrix(CoarseMat_ML,CoarseMatrix,nnz,true,time,0,verbose_);
  // NTS: This is a hack to get around the sticking ones on the diagonal issue;

  /* Cleanup */
  ML_Operator_Destroy(&P);
  ML_Operator_Destroy(&R);
  ML_Operator_Destroy(&Temp_ML);
  ML_Operator_Destroy(&CoarseMat_ML);CoarseMat_ML=0;//HAX
  return 0;
}/*end FormCoarseMatrix*/
// ================================================ ====== ==== ==== == =
// Destroys all structures allocated in \c ComputePreconditioner() if the preconditioner has been computed.
int ML_Epetra::FaceMatrixFreePreconditioner::DestroyPreconditioner(){
  if (ml_comm_) { ML_Comm_Destroy(&ml_comm_); ml_comm_ = 0; }// will need this
  if (Prolongator_) {delete Prolongator_; Prolongator_=0;}
  if (InvDiagonal_) {delete InvDiagonal_; InvDiagonal_=0;}
  if (CoarsePC) {delete CoarsePC; CoarsePC=0;}
  if (CoarseMatrix) {delete CoarseMatrix; CoarseMatrix=0;}
  if (CoarseMat_ML) {ML_Operator_Destroy(&CoarseMat_ML);CoarseMat_ML=0;}
  if (CoarseMap_) {delete CoarseMap_; CoarseMap_=0;}
#ifdef HAVE_ML_IFPACK
  if (Smoother_){delete Smoother_; Smoother_=0;}
#endif
  return 0;
}/*end DestroyPreconditioner*/
// ================================================ ====== ==== ==== == =
//! Build the face-to-node prolongator described by Bochev, Siefert, Tuminaro, Xu and Zhu (2007).
int ML_Epetra::FaceMatrixFreePreconditioner::PBuildSparsity(ML_Operator *P, Epetra_CrsMatrix *&Psparse){

  /* Create wrapper to do abs(T) */
  // NTS: Assume D0 has already been reindexed by now.
  ML_Operator* AbsFN_ML = ML_Operator_Create(ml_comm_);
  ML_CHK_ERR(ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&*FaceNode_Matrix_),AbsFN_ML,verbose_));
  ML_Operator_Set_Getrow(AbsFN_ML,AbsFN_ML->outvec_leng,CSR_getrow_ones);

  /* Form abs(T) * P_n */
  ML_Operator* AbsFNP = ML_Operator_Create(ml_comm_);
  ML_2matmult(AbsFN_ML,P,AbsFNP, ML_CSR_MATRIX);

  /* Wrap P_n into Epetra-land */
  Epetra_CrsMatrix_Wrap_ML_Operator(AbsFNP,*Comm_,*FaceRangeMap_,&Psparse,Copy,0);

  /* Nuke the rows in Psparse */
  if(BCfaces_.size()>0) Apply_BCsToMatrixRows(BCfaces_.get(),BCfaces_.size(),*Psparse);

  // Cleanup
  ML_Operator_Destroy(&AbsFN_ML);
  ML_Operator_Destroy(&AbsFNP);

  return 0;
}
// ================================================ ====== ==== ==== == =
// Computes C= <me> * A
int  ML_Epetra::Epetra_Multi_CrsMatrix::MatrixMatrix_Multiply(const Epetra_CrsMatrix & A, ML_Comm *comm, ML_Operator **C) const
{

  int rv=0;
  ML_Comm* temp = global_comm;

  /* Setup for 1st Matmat */
  ML_Operator * MV[2]={0,0},*CV;
  MV[(NumMatrices_-1)%2]= ML_Operator_Create(comm);
  rv=ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)&A,MV[(NumMatrices_-1)%2]);
  ML_CHK_ERR(rv);

  /* Do the matmats */
  for(int i=NumMatrices_-1;rv==0 && i>=0;i--){
    /* Do pre-wraps */
    if(MV[(i+1)%2] && i!=NumMatrices_-1) ML_Operator_Destroy(&MV[(i+1)%2]);
    MV[(i+1)%2]=ML_Operator_Create(comm);
    CV=ML_Operator_Create(comm);
    rv=ML_Operator_WrapEpetraCrsMatrix(CrsMatrices_[i],CV);
    ML_CHK_ERR(rv);

    /* Do matmat */
    ML_2matmult(CV,MV[i%2],MV[(i+1)%2],ML_CSR_MATRIX);

    ML_Operator_Destroy(&CV);
  }/*end for*/
  global_comm = temp;

  /* Final Postwrap */
  *C=MV[1];

  /* Cleanup */
  if(MV[0]) ML_Operator_Destroy(&MV[0]);

  return rv;
}/*end MatrixMatrix_Multiply*/
// ================================================ ====== ==== ==== == = 
// Computes C= <me> * A
int ML_Epetra::ML_RefMaxwell_11_Operator::MatrixMatrix_Multiply(const Epetra_CrsMatrix & A, Epetra_CrsMatrix **C) const
{
  ML_Comm* comm;
  ML_Comm_Create(&comm);
#ifdef ML_MPI
  const Epetra_MpiComm *epcomm = dynamic_cast<const Epetra_MpiComm*>(&(A.Comm()));
  // Get the MPI communicator, as it may not be MPI_COMM_W0RLD, and update the ML comm object
  if (epcomm) ML_Comm_Set_UsrComm(comm,epcomm->Comm());
#endif
  ML_Operator *C_;
  int rv=MatrixMatrix_Multiply(A,comm,&C_);
  Epetra_CrsMatrix_Wrap_ML_Operator(C_,*Comm_,*DomainMap_,C,Copy,A.IndexBase());
  ML_Operator_Destroy(&C_);
  ML_Comm_Destroy(&comm);
  return rv;
}/*end MatrixMatrix_Multiply*/
Exemple #7
0
void ML_getrow_matvec(ML_Operator *matrix, double *vec, int Nvec, 
                      double *ovec, int *Novec)
{
   ML_Operator *temp, *temp2, *temp3, *temp4, *tptr;
   int *cols, i;
   int allocated, row_length;

   if (matrix->getrow->func_ptr == NULL) {
      printf("ML_getrow_matvec: empty object? \n");
      exit(1);
   }
   temp = ML_Operator_Create(matrix->comm);
   ML_Operator_Set_1Levels(temp, matrix->from, matrix->from);
   ML_Operator_Set_ApplyFuncData(temp,1,Nvec,vec,Nvec,NULL,0);

   ML_Operator_Set_Getrow(temp,Nvec, VECTOR_getrows);
   temp->max_nz_per_row = 1;
   temp->N_nonzeros     = Nvec;

   if (matrix->getrow->pre_comm != NULL) {
      ML_exchange_rows(temp, &temp2, matrix->getrow->pre_comm);
   }
   else temp2 = temp;

   ML_matmat_mult(matrix, temp2, &temp3);

   if (matrix->getrow->post_comm != NULL)
      ML_exchange_rows(temp3, &temp4, matrix->getrow->post_comm);
   else temp4 = temp3;

   allocated = temp4->getrow->Nrows + 1;
   cols = (int *) ML_allocate(allocated*sizeof(int));
   if (cols == NULL) {
      printf("no space in ML_getrow_matvec()\n");
      exit(1);
   }
   for (i = 0; i < temp4->getrow->Nrows; i++) {
      ML_get_matrix_row(temp4, 1, &i, &allocated , &cols, &ovec,
                   &row_length, i);
      if (allocated != temp4->getrow->Nrows + 1)
         printf("memory problems ... we can't reallocate here\n");
   }

   ML_free(cols);

   if ( *Novec != temp4->getrow->Nrows) {
     printf("Warning: The length of ML's output vector does not agree with\n");
     printf("         the user's length for the output vector (%d vs. %d).\n",
            *Novec, temp4->getrow->Nrows);
     printf("         indicate a problem.\n");
   }
   *Novec = temp4->getrow->Nrows;

   if (matrix->getrow->pre_comm != NULL) {
      tptr = temp2;
      while ( (tptr!= NULL) && (tptr->sub_matrix != temp))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(temp2);
      ML_Operator_Destroy(&temp2);
   }
   if (matrix->getrow->post_comm != NULL) {
      tptr = temp4;
      while ( (tptr!= NULL) && (tptr->sub_matrix != temp3))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(temp4);
      ML_Operator_Destroy(&temp4);
   }

   ML_Operator_Destroy(&temp);
   ML_RECUR_CSR_MSRdata_Destroy(temp3);
   ML_Operator_Destroy(&temp3);
}
// ================================================ ====== ==== ==== == = 
// Computes C= A^T * <me> * A.  OptimizeStorage *must* be called for both A and the
// matrices in *this, before this routine can work.
int ML_Epetra::ML_RefMaxwell_11_Operator::PtAP(const Epetra_CrsMatrix & P, ML_Comm *comm, ML_Operator **C) const{
  ML_Operator *SM_ML,*P_ML,*R_ML,*PtSMP_ML,*temp1,*temp2,*opwrap,*D0_M1_P_ML;

  /* General Stuff */  
  ML_Comm* temp = global_comm;
  P_ML  = ML_Operator_Create(comm);
  R_ML  = ML_Operator_Create(comm);;
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)&P,P_ML);  
  ML_Operator_Transpose_byrow(P_ML,R_ML);

  /* Do the SM part */
  SM_ML = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  PtSMP_ML  = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)SM_Matrix_,SM_ML);
  ML_2matmult(SM_ML,P_ML,temp1,ML_CSR_MATRIX);
  ML_2matmult_block(R_ML,temp1,PtSMP_ML,ML_CSR_MATRIX);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&SM_ML);
#ifdef MANUALLY_TRANSPOSE_D0
  ML_Operator_Destroy(&R_ML);
#endif
  ML_Matrix_Print(PtSMP_ML,*Comm_,*RangeMap_,"ptsmp.dat");

#ifdef MANUALLY_TRANSPOSE_D0
  /* Do the Addon: Step #1: M1 * P*/
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[4],opwrap);
  ML_2matmult(opwrap,P_ML,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&P_ML);

  /* Do the Addon: Step #2: D0^T *(M1 * P)*/
  opwrap = ML_Operator_Create(comm);
  D0_M1_P_ML = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[3],opwrap);
  ML_2matmult(opwrap,temp1,D0_M1_P_ML,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);
  
  /* Do the Addon: Step #3: M0^{-1} * (D0^T * M1 * P)*/
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[2],opwrap);
  ML_2matmult(opwrap,D0_M1_P_ML,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);  

  /* Do the Addon: Step #4: Transpose (D0^T * M1 * P) & multiply by output from Step 3*/
  opwrap = ML_Operator_Create(comm);
  temp2 = ML_Operator_Create(comm);  
  ML_Operator_Transpose_byrow(D0_M1_P_ML,opwrap);
  ML_2matmult(opwrap,temp1,temp2,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&D0_M1_P_ML);
  ML_Matrix_Print(temp2,*Comm_,*RangeMap_,"pt_add_p.dat");
#else
  ML_Operator *P_M1_D0_ML;

  /* Do the Addon: Step #1: P^T * M1 */
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[0],opwrap);
  ML_2matmult_block(R_ML,opwrap,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&P_ML);
  ML_Operator_Destroy(&R_ML);
  
  /* Do the Addon: Step #2: (P^T * M1) * D0*/
  opwrap = ML_Operator_Create(comm);
  P_M1_D0_ML = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[1],opwrap);
  ML_2matmult_block(temp1,opwrap,P_M1_D0_ML,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);  

  /* Do the Addon: Step #3: (P^T * M1 * D0) * M0^{-1} */
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[2],opwrap);
  ML_2matmult(P_M1_D0_ML,opwrap,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);  

  /* Do the Addon: Step #4: Transpose (P^T * M1 * D0) & multiply by output from Step 3*/
  opwrap = ML_Operator_Create(comm);
  temp2 = ML_Operator_Create(comm);  
  ML_Operator_Transpose_byrow(P_M1_D0_ML,opwrap);
  ML_2matmult(temp1,opwrap,temp2,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&P_M1_D0_ML);
  ML_Matrix_Print(temp2,*Comm_,*RangeMap_,"pt_add_p_rev.dat");   
#endif

  /* Add the matrices together */
  ML_Operator_Add(PtSMP_ML,temp2,*C,ML_CSR_MATRIX,1.0);
  ML_Matrix_Print(*C,*Comm_,*RangeMap_,"ptap.dat");  

  /* Cleanup */
  global_comm = temp;
  ML_Operator_Destroy(&temp2);
  ML_Operator_Destroy(&PtSMP_ML);

  return 0;
}
Exemple #9
0
void ML_rap(ML_Operator *Rmat, ML_Operator *Amat,
            ML_Operator *Pmat, ML_Operator *Result, int matrix_type)
{
  int         max_per_proc, i, j, N_input_vector;
   ML_Operator *APmat, *RAPmat, *Pcomm, *RAPcomm, *APcomm, *AP2comm, *tptr;
   ML_CommInfoOP *getrow_comm;
   double      *scales = NULL;
#  ifdef ML_TIMING
   double tpre,tmult,tpost,ttotal;
#  endif

   /* Check that N_input_vector is reasonable */

#  ifdef ML_TIMING
   tpre = GetClock();
   ttotal = GetClock();
#  endif

   N_input_vector = Pmat->invec_leng;
   getrow_comm = Pmat->getrow->pre_comm;
   if ( getrow_comm != NULL) {
      for (i = 0; i < getrow_comm->N_neighbors; i++) {
         for (j = 0; j < getrow_comm->neighbors[i].N_send; j++) {
            if (getrow_comm->neighbors[i].send_list[j] >= N_input_vector) {
              printf("(%d) Error: N_input_vector (%d) argument to rap() is not \n", Amat->comm->ML_mypid,N_input_vector);
              printf("(%d) Error: larger than %dth element (%d) sent to node %d\n", Amat->comm->ML_mypid,j+1,
                     getrow_comm->neighbors[i].send_list[j],
                     getrow_comm->neighbors[i].ML_id);
printf("(%d) Error: Amat(%d,%d)  Rmat(%d,%d)  Pmat(%d,%d)\n",
                Amat->comm->ML_mypid,
                Amat->outvec_leng,Amat->invec_leng,
                Rmat->outvec_leng,Rmat->invec_leng,
                Pmat->outvec_leng,Pmat->invec_leng);
fflush(stdout);
              exit(1);
            }
         }
      }
   }


   ML_create_unique_col_id(N_input_vector, &(Pmat->getrow->loc_glob_map),
                           getrow_comm, &max_per_proc, Pmat->comm);
   Pmat->getrow->use_loc_glob_map = ML_YES;



   if (Amat->getrow->pre_comm != NULL)
      ML_exchange_rows( Pmat, &Pcomm, Amat->getrow->pre_comm);
   else Pcomm = Pmat;

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : A * P begins...\n");
#endif

#  ifdef ML_TIMING
   tpre = GetClock() - tpre;
   tmult = GetClock();
#  endif

   ML_matmat_mult(Amat, Pcomm , &APmat);

#  ifdef ML_TIMING
   tmult = GetClock() - tmult;
   tpost = GetClock();
#  endif

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : A * P ends.\n");
#endif

   ML_free(Pmat->getrow->loc_glob_map); Pmat->getrow->loc_glob_map = NULL;
   Pmat->getrow->use_loc_glob_map = ML_NO;
   if (Amat->getrow->pre_comm != NULL) {
      tptr = Pcomm;
      while ( (tptr!= NULL) && (tptr->sub_matrix != Pmat))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(Pcomm);
      ML_Operator_Destroy(&Pcomm);
   }

   if (Amat->getrow->post_comm != NULL) {
      ML_exchange_rows(APmat, &APcomm, Amat->getrow->post_comm);
   }
   else APcomm = APmat;

   /* Take into account any scaling in Amat */

   if (Rmat->from != NULL)
      ML_DVector_GetDataPtr(Rmat->from->Amat_Normalization,&scales);
   if (scales != NULL)
      ML_Scale_CSR(APcomm, scales, 0);

   if (Rmat->getrow->pre_comm != NULL)
      ML_exchange_rows( APcomm, &AP2comm, Rmat->getrow->pre_comm);
   else AP2comm = APcomm;

#  ifdef ML_TIMING
   tpost = GetClock() - tpost;
   if ( Pmat->comm->ML_mypid == 0 && ML_Get_PrintLevel() > 5) {
     int level=-1;
     if (Amat->from != NULL)
       level = Amat->from->levelnum-1;
     printf("Timing summary (in seconds) for product RAP on level %d\n", level);
     printf("     (level %d) RAP right: pre-multiply communication time    = %3.2e\n", level, tpre);
     printf("     (level %d) RAP right: multiply time                      = %3.2e\n", level, tmult);
     printf("     (level %d) RAP right: post-multiply communication time   = %3.2e\n", level, tpost);
   }
#  endif

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : R * AP begins...\n");
#endif

#  ifdef ML_TIMING
   tmult = GetClock();
#  endif
   ML_matmat_mult(Rmat,AP2comm, &RAPmat);

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : R * AP ends.\n");
#endif

   ML_RECUR_CSR_MSRdata_Destroy(AP2comm);
   ML_Operator_Destroy(&AP2comm);

#  ifdef ML_TIMING
   tmult = GetClock()-tmult;
   tpost = GetClock();
#  endif

   if (Rmat->getrow->post_comm != NULL)
      ML_exchange_rows( RAPmat, &RAPcomm, Rmat->getrow->post_comm);
   else RAPcomm = RAPmat;

   scales = NULL;
   if (Rmat->to != NULL)
      ML_DVector_GetDataPtr(Rmat->to->Amat_Normalization,&scales);
   if (scales != NULL)
      ML_Scale_CSR(RAPcomm, scales, 1);

   RAPcomm->num_PDEs = Amat->num_PDEs;
   RAPcomm->num_rigid = Amat->num_rigid;
   if (matrix_type == ML_MSR_MATRIX)
     ML_back_to_local(RAPcomm, Result, max_per_proc);
   else if (matrix_type == ML_CSR_MATRIX)
     ML_back_to_csrlocal(RAPcomm, Result, max_per_proc);
   else if (matrix_type == ML_EpetraCRS_MATRIX)
#ifdef ML_WITH_EPETRA
     ML_back_to_epetraCrs(RAPcomm, Result, Rmat, Pmat);
#else
     pr_error("ML_RAP: ML_EpetraCRS_MATRIX requires epetra to be compiled in.\n");
#endif
   else pr_error("ML_RAP: Unknown matrix type\n");
int main(int argc, char *argv[])
{
  int    Nnodes=16*16;              /* Total number of nodes in the problem.*/
                                    /* 'Nnodes' must be a perfect square.   */
  int    MaxMgLevels=6;             /* Maximum number of Multigrid Levels   */
  int    Nits_per_presmooth=1;      /* # of pre & post smoothings per level */
  double tolerance = 1.0e-8;        /* At convergence:                      */
                                    /*   ||r_k||_2 < tolerance ||r_0||_2    */
  int smoothPe_flag = ML_YES;       /* ML_YES: smooth tentative prolongator */
                                    /* ML_NO: don't smooth prolongator      */

  /***************************************************************************/
  /* Select Hiptmair relaxation subsmoothers for the nodal and edge problems */
  /* Choices include                                                         */
  /*   1) ML_Gen_Smoother_SymGaussSeidel: this corresponds to a processor    */
  /*      local version of symmetric Gauss-Seidel/SOR. The number of sweeps  */
  /*      can be set via either 'edge_its' or 'nodal_its'. The damping can   */
  /*      be set via 'edge_omega' or 'nodal_omega'. When set to ML_DDEFAULT, */
  /*      the damping is set to '1' on one processor. On multiple processors */
  /*      a lower damping value is set. This is needed to converge processor */
  /*      local SOR.                                                         */
  /*   2) ML_Gen_Smoother_Cheby: this corresponds to polynomial relaxation.    */
  /*      The degree of the polynomial is set via 'edge_its' or 'nodal_its'. */
  /*      If the degree is '-1', Marian Brezina's MLS polynomial is chosen.  */
  /*      Otherwise, a Chebyshev polynomial is used over high frequencies    */
  /*      [ lambda_max/alpha , lambda_max]. Lambda_max is computed. 'alpha'  */
  /*      is hardwired in this example to correspond to twice the ratio of   */
  /*      unknowns in the fine and coarse meshes.                            */
  /*                                                                         */
  /* Using 'hiptmair_type' (see comments below) it is also possible to choose*/
  /* when edge and nodal problems are relaxed within the Hiptmair smoother.  */
  /***************************************************************************/

  void  *edge_smoother=(void *)     /* Edge relaxation:                     */
               ML_Gen_Smoother_Cheby; /*   ML_Gen_Smoother_Cheby            */
                                    /*     ML_Gen_Smoother_SymGaussSeidel   */
  void *nodal_smoother=(void *)     /* Nodal relaxation                     */
               ML_Gen_Smoother_Cheby;/*     ML_Gen_Smoother_Cheby           */
                                    /*     ML_Gen_Smoother_SymGaussSeidel   */

  int  edge_its = 3;                /* Iterations or polynomial degree for  */
  int  nodal_its = 3;               /* edge/nodal subsmoothers.             */
  double nodal_omega = ML_DDEFAULT, /* SOR damping parameter for noda/edge  */
         edge_omega  = ML_DDEFAULT; /* subsmoothers (see comments above).   */
  int   hiptmair_type=HALF_HIPTMAIR;/* FULL_HIPTMAIR: each invokation       */
                                    /*     smoothes on edges, then nodes,   */
                                    /*     and then once again on edges.    */
                                    /* HALF_HIPTMAIR: each pre-invokation   */
                                    /*     smoothes on edges, then nodes.   */
                                    /*     Each post-invokation smoothes    */
                                    /*     on nodes then edges. .           */


  ML_Operator  *Tmat, *Tmat_trans, **Tmat_array, **Tmat_trans_array;
  ML           *ml_edges, *ml_nodes;
  ML_Aggregate *ag;
  int          Nfine_edge, Ncoarse_edge, Nfine_node, Ncoarse_node, Nlevels;
  int          level, coarsest_level, itmp;
  double       edge_coarsening_rate, node_coarsening_rate, *rhs, *xxx;
  void         **edge_args, **nodal_args;
  struct       user_partition Edge_Partition = {NULL, NULL,0,0}, 
                                Node_Partition = {NULL, NULL,0,0};
  struct Tmat_data Tmat_data;
int i, Ntotal;
 ML_Comm *comm;

  /* See Aztec User's Guide for information on these variables */

#ifdef AZTEC
  AZ_MATRIX    *Ke_mat, *Kn_mat;
  AZ_PRECOND   *Pmat = NULL;
  int          proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double       params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];
#endif


  /* get processor information (proc id & # of procs) and set ML's printlevel. */

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
#endif
#ifdef AZTEC
  AZ_set_proc_config(proc_config, COMMUNICATOR);
#endif
  ML_Set_PrintLevel(10);   /* set ML's output level: 0 gives least output */

  /* Set the # of global nodes/edges and partition both the edges and the */
  /* nodes over the processors. NOTE: I believe we assume that if an edge */
  /* is assigned to a processor at least one of its nodes must be also    */
  /* assigned to that processor.                                          */

  Node_Partition.Nglobal = Nnodes;
  Edge_Partition.Nglobal = Node_Partition.Nglobal*2;
  Node_Partition.type = NODE;
  Edge_Partition.type = EDGE;
#define perxodic
#ifdef periodic
Node_Partition.Nglobal += 2; 
#endif
  partition_edges(&Edge_Partition);
  partition_nodes(&Node_Partition);
xxx = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); 
rhs = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); 
 for (i = 0; i < Edge_Partition.Nlocal + 100; i++) xxx[i] = -1.;
 for (i = 0; i < Edge_Partition.Nlocal; i++) xxx[i] = (double) 
        Edge_Partition.my_global_ids[i];

update_ghost_edges(xxx, (void *) &Edge_Partition);


  /* Create an empty multigrid hierarchy and set the 'MaxMGLevels-1'th   */
  /* level discretization within this hierarchy to the ML matrix         */
  /* representing Ke (Maxwell edge discretization).                      */

  ML_Create(&ml_edges, MaxMgLevels);
#ifdef AZTEC
  /* Build Ke as an Aztec matrix. Use built-in function AZ_ML_Set_Amat() */
  /* to convert to an ML matrix and put in hierarchy.                    */

  Ke_mat = user_Ke_build(&Edge_Partition);
  AZ_ML_Set_Amat(ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal,
      		 Edge_Partition.Nlocal, Ke_mat, proc_config);
#else
  /* Build Ke directly as an ML matrix.                                  */

  ML_Init_Amatrix      (ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal,
			Edge_Partition.Nlocal, &Edge_Partition);

  Ntotal = Edge_Partition.Nlocal;
  if (Edge_Partition.nprocs == 2) Ntotal += Edge_Partition.Nghost;
  ML_Set_Amatrix_Getrow(ml_edges, MaxMgLevels-1,  Ke_getrow, update_ghost_edges, Ntotal);
  ML_Set_Amatrix_Matvec(ml_edges, MaxMgLevels-1,  Ke_matvec);

#endif



  /* Build an Aztec matrix representing an auxiliary nodal PDE problem.  */
  /* This should be a variable coefficient Poisson problem (with unknowns*/
  /* at the nodes). The coefficients should be chosen to reflect the     */
  /* conductivity of the original edge problems.                         */
  /* Create an empty multigrid hierarchy. Convert the Aztec matrix to an */
  /* ML matrix and put it in the 'MaxMGLevels-1' level of the hierarchy. */
  /* Note it is possible to multiply T'*T for get this matrix though this*/
  /* will not incorporate material properties.                           */

  ML_Create(&ml_nodes, MaxMgLevels);

#ifdef AZTEC
  Kn_mat = user_Kn_build( &Node_Partition);
  AZ_ML_Set_Amat(ml_nodes, MaxMgLevels-1, Node_Partition.Nlocal, 
		 Node_Partition.Nlocal, Kn_mat, proc_config);
#else
  ML_Init_Amatrix      (ml_nodes, MaxMgLevels-1 , Node_Partition.Nlocal,
			Node_Partition.Nlocal, &Node_Partition);
  Ntotal = Node_Partition.Nlocal;
  if (Node_Partition.nprocs == 2) Ntotal += Node_Partition.Nghost;
  ML_Set_Amatrix_Getrow(ml_nodes, MaxMgLevels-1,  Kn_getrow, update_ghost_nodes, Ntotal);
#endif

  /* Build an ML matrix representing the null space of the PDE problem. */
  /* This should be a discrete gradient (nodes to edges).               */

#ifdef AZTEC
    Tmat = user_T_build (&Edge_Partition, &Node_Partition, 
  		   &(ml_nodes->Amat[MaxMgLevels-1]));
#else
    Tmat = ML_Operator_Create(ml_nodes->comm);
    Tmat_data.edge = &Edge_Partition;
    Tmat_data.node = &Node_Partition;
    Tmat_data.Kn   = &(ml_nodes->Amat[MaxMgLevels-1]);

    ML_Operator_Set_ApplyFuncData( Tmat,	Node_Partition.Nlocal,
				   Edge_Partition.Nlocal, ML_EMPTY, (void *) &Tmat_data, 
				   Edge_Partition.Nlocal, NULL, 0);
    ML_Operator_Set_Getrow( Tmat, ML_INTERNAL, Edge_Partition.Nlocal,Tmat_getrow);
    ML_Operator_Set_ApplyFunc(Tmat, ML_INTERNAL, Tmat_matvec);
  ML_Comm_Create( &comm);

  ML_CommInfoOP_Generate( &(Tmat->getrow->pre_comm), update_ghost_nodes, 
			  &Node_Partition,comm, Tmat->invec_leng, 
			  Node_Partition.Nghost);
#endif


  /********************************************************************/
  /* Set some ML parameters.                                          */
  /*------------------------------------------------------------------*/
	
  ML_Set_ResidualOutputFrequency(ml_edges, 1);
  ML_Set_Tolerance(ml_edges, 1.0e-8);
  ML_Aggregate_Create( &ag );
  ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag);
  ML_Aggregate_Set_DampingFactor(ag, 0.0); /* must use 0 for maxwell */
  ML_Aggregate_Set_MaxCoarseSize(ag, 30);
  ML_Aggregate_Set_Threshold(ag, 0.0);


  /********************************************************************/
  /*                      Set up Tmat_trans                           */
  /*------------------------------------------------------------------*/

  Tmat_trans = ML_Operator_Create(ml_edges->comm);
  ML_Operator_Transpose_byrow(Tmat, Tmat_trans);


  Nlevels=ML_Gen_MGHierarchy_UsingReitzinger(ml_edges, &ml_nodes,MaxMgLevels-1,
					     ML_DECREASING,ag,Tmat,Tmat_trans, 
					     &Tmat_array,&Tmat_trans_array, 
					     smoothPe_flag, 1.5);

  /* Set the Hiptmair subsmoothers */

  if (nodal_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) {
    nodal_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its);
    ML_Smoother_Arglist_Set(nodal_args, 1, &nodal_omega);
  }
  if (edge_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) {
    edge_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(edge_args, 0, &edge_its);
    ML_Smoother_Arglist_Set(edge_args, 1, &edge_omega);
  }
  if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
    nodal_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its);
    Nfine_node = Tmat_array[MaxMgLevels-1]->invec_leng;
    Nfine_node = ML_gsum_int(Nfine_node, ml_edges->comm);
  }
  if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
    edge_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(edge_args, 0, &edge_its);
    Nfine_edge = Tmat_array[MaxMgLevels-1]->outvec_leng;
    Nfine_edge = ML_gsum_int(Nfine_edge, ml_edges->comm);
  }

  /****************************************************
  * Set up smoothers for all levels but the coarsest. *
  ****************************************************/
  coarsest_level = MaxMgLevels - Nlevels;

  for (level = MaxMgLevels-1; level > coarsest_level; level--)
    {
      if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
	Ncoarse_edge = Tmat_array[level-1]->outvec_leng;
	Ncoarse_edge = ML_gsum_int(Ncoarse_edge, ml_edges->comm);
	edge_coarsening_rate =  2.*((double) Nfine_edge)/ ((double) Ncoarse_edge);
	ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate);
	Nfine_edge = Ncoarse_edge;
      }
      if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
	Ncoarse_node = Tmat_array[level-1]->invec_leng;
	Ncoarse_node = ML_gsum_int(Ncoarse_node, ml_edges->comm);
	node_coarsening_rate =  2.*((double) Nfine_node)/ ((double) Ncoarse_node);
	ML_Smoother_Arglist_Set(nodal_args, 1, &node_coarsening_rate);
	Nfine_node = Ncoarse_node;
      }
      ML_Gen_Smoother_Hiptmair(ml_edges, level, ML_BOTH, Nits_per_presmooth,
			       Tmat_array, Tmat_trans_array, NULL, edge_smoother,
			       edge_args, nodal_smoother,nodal_args, hiptmair_type);
    }

  /*******************************************
  * Set up coarsest level smoother
  *******************************************/

  if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
    edge_coarsening_rate = (double) Nfine_edge;
    ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate);
  }
  if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
    node_coarsening_rate = (double) Nfine_node;
    ML_Smoother_Arglist_Set(nodal_args,1,&node_coarsening_rate);
  }
  ML_Gen_CoarseSolverSuperLU( ml_edges, coarsest_level);
  

  /* Must be called before invoking the preconditioner */
  ML_Gen_Solver(ml_edges, ML_MGV, MaxMgLevels-1, coarsest_level); 



  /* Set the initial guess and the right hand side. Invoke solver */	

  xxx = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); 
  ML_random_vec(xxx, Edge_Partition.Nlocal, ml_edges->comm);
  rhs = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); 
  ML_random_vec(rhs, Edge_Partition.Nlocal, ml_edges->comm);

#ifdef AZTEC
  /* Choose the Aztec solver and criteria. Also tell Aztec that */
  /* ML will be supplying the preconditioner.                   */

  AZ_defaults(options, params);
  options[AZ_solver]   = AZ_fixed_pt;
  options[AZ_solver]   = AZ_gmres;
  options[AZ_kspace]   = 80;
  params[AZ_tol]       = tolerance;
  AZ_set_ML_preconditioner(&Pmat, Ke_mat, ml_edges, options); 
  options[AZ_conv] = AZ_noscaled;
  AZ_iterate(xxx, rhs, options, params, status, proc_config, Ke_mat, Pmat, NULL);
#else
  ML_Iterate(ml_edges, xxx, rhs);
#endif


  /* clean up. */

  ML_Smoother_Arglist_Delete(&nodal_args);
  ML_Smoother_Arglist_Delete(&edge_args);
  ML_Aggregate_Destroy(&ag);
  ML_Destroy(&ml_edges);
  ML_Destroy(&ml_nodes);
#ifdef AZTEC
  AZ_free((void *) Ke_mat->data_org);
  AZ_free((void *) Ke_mat->val);
  AZ_free((void *) Ke_mat->bindx);
  if (Ke_mat  != NULL) AZ_matrix_destroy(&Ke_mat);
  if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
  if (Kn_mat != NULL) AZ_matrix_destroy(&Kn_mat);
#endif
  free(xxx);
  free(rhs);
  ML_Operator_Destroy(&Tmat);
  ML_Operator_Destroy(&Tmat_trans);
  ML_MGHierarchy_ReitzingerDestroy(MaxMgLevels-2, &Tmat_array, &Tmat_trans_array);

#ifdef ML_MPI
  MPI_Finalize();
#endif
		
  return 0;
		
}
// ================================================ ====== ==== ==== == =
//! Build the face-to-node prolongator described by Bochev, Siefert, Tuminaro, Xu and Zhu (2007).
int ML_Epetra::FaceMatrixFreePreconditioner::BuildProlongator()
{

  /* Wrap TMT_Matrix in a ML_Operator */
  ML_Operator* TMT_ML = ML_Operator_Create(ml_comm_);
  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&*TMT_Matrix_),TMT_ML);

  /* Nodal Aggregation */
  ML_Aggregate_Struct *MLAggr=0;
  ML_Operator *P=0;
  int NumAggregates;
  int rv=ML_Epetra::RefMaxwell_Aggregate_Nodes(*TMT_Matrix_,List_,ml_comm_,std::string("FMFP (level 0) :"),MLAggr,P,NumAggregates);
  if(rv || !P) {if(!Comm_->MyPID()) printf("ERROR: Building nodal P\n");ML_CHK_ERR(-1);}

  /* Build 1-unknown sparsity of prolongator */
  Epetra_CrsMatrix *Psparse=0;
  PBuildSparsity(P,Psparse);
  if(!Psparse) {if(!Comm_->MyPID()) printf("ERROR: Building Psparse\n");ML_CHK_ERR(-2);}

  /* Build the "nullspace" */
  Epetra_MultiVector *nullspace;
  BuildNullspace(nullspace);
  if(!nullspace) {if(!Comm_->MyPID()) printf("ERROR: Building Nullspace\n");ML_CHK_ERR(-3);}


  /* Build the DomainMap of the new operator*/
  const Epetra_Map & FineColMap = Psparse->ColMap();
  CoarseMap_=new Epetra_Map(-1,NumAggregates*dim,0,*Comm_);

  /* Allocate the Prolongator_ */
  int max_nz_per_row=Psparse->MaxNumEntries();
  Prolongator_=new Epetra_CrsMatrix(Copy,*FaceRangeMap_,0);
  int ne1, *idx1, *idx2;
  idx2=new int [dim*max_nz_per_row];
  double *vals1, *vals2;
  vals2=new double[dim*max_nz_per_row];
  int nonzeros;

  for(int i=0;i<Prolongator_->NumMyRows();i++){
    Psparse->ExtractMyRowView(i,ne1,vals1,idx1);
    nonzeros=0;
    for(int j=0;j<ne1;j++) nonzeros+=ABS(vals1[j])>0;

    for(int j=0;j<ne1;j++){
      for(int k=0;k<dim;k++) {
        idx2[j*dim+k]=FineColMap.GID(idx1[j])*dim+k;
        //FIX: This works only because there's an implicit linear mapping which
        //we're exploiting.
        if(idx2[j*dim+k]==-1) printf("[%d] ERROR: idx1[j]=%d / idx1[j]*dim+k=%d does not have a GID!\n",Comm_->MyPID(),idx1[j],idx1[j]*dim+k);
        if(vals1[j]==0 ) vals2[j*dim+k]=0;
	else vals2[j*dim+k]=(*nullspace)[k][i] / nonzeros;
      }/*end for*/
    }/*end for*/
    Prolongator_->InsertGlobalValues(FaceRangeMap_->GID(i),dim*ne1,vals2,idx2);
  }/*end for*/


  /* FillComplete / OptimizeStorage for Prolongator*/
  Prolongator_->FillComplete(*CoarseMap_,*FaceRangeMap_);
  Prolongator_->OptimizeStorage();

#ifndef NO_OUTPUT
  /* DEBUG: Dump aggregates */
  Epetra_IntVector AGG(View,*NodeDomainMap_,MLAggr->aggr_info[0]);
  IVOUT(AGG,"agg.dat");
  EpetraExt::RowMatrixToMatlabFile("fmfp_psparse.dat",*Psparse);
  EpetraExt::RowMatrixToMatlabFile("fmfp_prolongator.dat",*Prolongator_);
  EpetraExt::VectorToMatrixMarketFile("fmfp_null0.dat",*(*nullspace)(0));
  EpetraExt::VectorToMatrixMarketFile("fmfp_null1.dat",*(*nullspace)(1));
  EpetraExt::VectorToMatrixMarketFile("fmfp_null2.dat",*(*nullspace)(2));

#endif

  /* EXPERIMENTAL: Normalize Prolongator Columns */
  bool normalize_prolongator=List_.get("face matrix free: normalize prolongator",false);
  if(normalize_prolongator){
    Epetra_Vector n_vector(*CoarseMap_,false);
    Prolongator_->InvColSums(n_vector);
    Prolongator_->RightScale(n_vector);
  }/*end if*/

  /* Post-wrapping to convert to ML indexing */
#ifdef HAVE_ML_EPETRAEXT
  Prolongator_ = dynamic_cast<Epetra_CrsMatrix*>(ModifyEpetraMatrixColMap(*Prolongator_,ProlongatorColMapTrans_,"Prolongator",(verbose_&&!Comm_->MyPID())));
#endif

  /* Cleanup */
  ML_qr_fix_Destroy();
  ML_Aggregate_Destroy(&MLAggr);
  ML_Operator_Destroy(&TMT_ML);
  ML_Operator_Destroy(&P);

  delete nullspace;
  delete Psparse;
  delete [] idx2;
  delete [] vals2;
  return 0;
}/*end BuildProlongator_*/
// ================================================ ====== ==== ==== == =
int ML_Epetra::RefMaxwell_Aggregate_Nodes(const Epetra_CrsMatrix & A, Teuchos::ParameterList & List, ML_Comm * ml_comm, std::string PrintMsg,
					  ML_Aggregate_Struct *& MLAggr,ML_Operator *&P, int &NumAggregates){

  /* Output level */
  bool verbose, very_verbose;
  int OutputLevel = List.get("ML output", -47);
  if(OutputLevel == -47) OutputLevel = List.get("output", 1);
  if(OutputLevel>=15) very_verbose=verbose=true;
  if(OutputLevel > 5) {very_verbose=false;verbose=true;}
  else very_verbose=verbose=false;

  /* Wrap A in a ML_Operator */
  ML_Operator* A_ML = ML_Operator_Create(ml_comm);
  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A),A_ML);

 /* Pull Teuchos Options */
  std::string CoarsenType  = List.get("aggregation: type", "Uncoupled");
  double Threshold    = List.get("aggregation: threshold", 0.0);
  int    NodesPerAggr = List.get("aggregation: nodes per aggregate",
                                  ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate());
  bool UseAux         = List.get("aggregation: aux: enable",false);
  double AuxThreshold = List.get("aggregation: aux: threshold",0.0);
  int  MaxAuxLevels   = List.get("aggregation: aux: max levels",10);


  ML_Aggregate_Create(&MLAggr);
  ML_Aggregate_Set_MaxLevels(MLAggr, 2);
  ML_Aggregate_Set_StartLevel(MLAggr, 0);
  ML_Aggregate_Set_Threshold(MLAggr, Threshold);
  ML_Aggregate_Set_MaxCoarseSize(MLAggr,1);
  MLAggr->cur_level = 0;
  ML_Aggregate_Set_Reuse(MLAggr);
  MLAggr->keep_agg_information = 1;
  P = ML_Operator_Create(ml_comm);

  /* Process Teuchos Options */
  if (CoarsenType == "Uncoupled")
    ML_Aggregate_Set_CoarsenScheme_Uncoupled(MLAggr);
  else if (CoarsenType == "Uncoupled-MIS"){
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }
  else if (CoarsenType == "METIS"){
    ML_Aggregate_Set_CoarsenScheme_METIS(MLAggr);
    ML_Aggregate_Set_NodesPerAggr(0, MLAggr, 0, NodesPerAggr);
  }/*end if*/
  else {
    if(!A.Comm().MyPID()) printf("%s Unsupported (1,1) block aggregation type(%s), resetting to uncoupled-mis\n",PrintMsg.c_str(),CoarsenType.c_str());
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }

  /* Setup Aux Data */
  if(UseAux) {
    A_ML->aux_data->enable=1;
    A_ML->aux_data->threshold=AuxThreshold;
    A_ML->aux_data->max_level=MaxAuxLevels;
    ML_Init_Aux(A_ML,List);
    if(verbose && !A.Comm().MyPID()) {
      printf("%s Using auxiliary matrix\n",PrintMsg.c_str());
      printf("%s aux threshold = %e\n",PrintMsg.c_str(),A_ML->aux_data->threshold);
    }
  }

  /* Aggregate Nodes */
  int printlevel=ML_Get_PrintLevel();
  if(verbose) ML_Set_PrintLevel(10);
  NumAggregates = ML_Aggregate_Coarsen(MLAggr,A_ML, &P, ml_comm);
  if(verbose) ML_Set_PrintLevel(printlevel);

  if (NumAggregates == 0){
    std::cerr << "Found 0 aggregates, perhaps the problem is too small." << std::endl;
    ML_CHK_ERR(-2);
  }/*end if*/
  else if(very_verbose) printf("[%d] %s %d aggregates created invec_leng=%d\n",A.Comm().MyPID(),PrintMsg.c_str(),NumAggregates,P->invec_leng);

  if(verbose){
    int globalAggs=0;
    A.Comm().SumAll(&NumAggregates,&globalAggs,1);
    if(!A.Comm().MyPID()) {
      printf("%s Aggregation threshold = %e\n",PrintMsg.c_str(),Threshold);
      printf("%s Global aggregates     = %d\n",PrintMsg.c_str(),globalAggs);

    }
  }

  /* Cleanup */
  ML_qr_fix_Destroy();
  if(UseAux) ML_Finalize_Aux(A_ML);
  ML_Operator_Destroy(&A_ML);

  return 0;
}
int main(int argc, char *argv[])
{
  int    Nnodes=32*32;              /* Total number of nodes in the problem.*/
                                    /* 'Nnodes' must be a perfect square.   */

  struct       user_partition Edge_Partition = {NULL, NULL,0,0,NULL,0,0,0}, 
                                Node_Partition = {NULL, NULL,0,0,NULL,0,0,0};

  int          proc_config[AZ_PROC_SIZE];

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
#endif

  AZ_set_proc_config(proc_config, COMMUNICATOR);
  ML_Comm* comm;
  ML_Comm_Create(&comm);

  Node_Partition.Nglobal = Nnodes;
  Edge_Partition.Nglobal = Node_Partition.Nglobal*2;

  user_partition_nodes(&Node_Partition);
  user_partition_edges(&Edge_Partition, &Node_Partition);
  
  AZ_MATRIX * AZ_Ke = user_Ke_build(&Edge_Partition);
  AZ_MATRIX * AZ_Kn = user_Kn_build(&Node_Partition);

  // convert (put wrappers) from Aztec matrices to ML_Operator's

  ML_Operator * ML_Ke, * ML_Kn, * ML_Tmat;

  ML_Ke = ML_Operator_Create( comm );
  ML_Kn = ML_Operator_Create( comm );

  AZ_convert_aztec_matrix_2ml_matrix(AZ_Ke,ML_Ke,proc_config);
  AZ_convert_aztec_matrix_2ml_matrix(AZ_Kn,ML_Kn,proc_config);

  ML_Tmat = user_T_build(&Edge_Partition, &Node_Partition, 
		      ML_Kn, comm);

  Epetra_CrsMatrix * Epetra_Kn, * Epetra_Ke, * Epetra_T;
  
  int MaxNumNonzeros;
  double CPUTime;

  ML_Operator2EpetraCrsMatrix(ML_Ke,Epetra_Ke,
			      MaxNumNonzeros,
			      true,CPUTime);

  ML_Operator2EpetraCrsMatrix(ML_Kn,
			      Epetra_Kn,MaxNumNonzeros,
			      true,CPUTime);

  ML_Operator2EpetraCrsMatrix(ML_Tmat,Epetra_T,MaxNumNonzeros,
			      true,CPUTime);  

  Teuchos::ParameterList MLList;
  ML_Epetra::SetDefaults("maxwell", MLList);
  
  MLList.set("ML output", 0);

  MLList.set("aggregation: type", "Uncoupled");
  MLList.set("coarse: max size", 30);
  MLList.set("aggregation: threshold", 0.0);

  MLList.set("coarse: type", "Amesos-KLU");

  ML_Epetra::MultiLevelPreconditioner * MLPrec =
    new ML_Epetra::MultiLevelPreconditioner(*Epetra_Ke, *Epetra_T, *Epetra_Kn,
					    MLList);

  Epetra_Vector LHS(Epetra_Ke->DomainMap()); LHS.Random();
  Epetra_Vector RHS(Epetra_Ke->DomainMap()); RHS.PutScalar(1.0);
  
  Epetra_LinearProblem Problem(Epetra_Ke,&LHS,&RHS);
  AztecOO solver(Problem);
  solver.SetPrecOperator(MLPrec);

  solver.SetAztecOption(AZ_solver, AZ_cg_condnum);
  solver.SetAztecOption(AZ_output, 32);
  solver.Iterate(500, 1e-8);

  // ========================= //
  // compute the real residual //
  // ========================= //

  Epetra_Vector RHScomp(Epetra_Ke->DomainMap());
  int ierr;
  ierr = Epetra_Ke->Multiply(false, LHS, RHScomp);
  assert(ierr==0);

  Epetra_Vector resid(Epetra_Ke->DomainMap());

  ierr = resid.Update(1.0, RHS, -1.0, RHScomp, 0.0);
  assert(ierr==0);

  double residual;
  ierr = resid.Norm2(&residual);
  assert(ierr==0);
  if (proc_config[AZ_node] == 0) {
    std::cout << std::endl;
    std::cout << "==> Residual = " << residual << std::endl;
    std::cout << std::endl;
  }

  // =============== //
  // C L E A N   U P //
  // =============== //
  
  delete MLPrec;    // destroy phase prints out some information
  delete Epetra_Kn;
  delete Epetra_Ke;
  delete Epetra_T;
  
  ML_Operator_Destroy( &ML_Ke );
  ML_Operator_Destroy( &ML_Kn );
  ML_Comm_Destroy( &comm );

  if (Edge_Partition.my_local_ids != NULL) free(Edge_Partition.my_local_ids);
  if (Node_Partition.my_local_ids != NULL) free(Node_Partition.my_local_ids);
  if (Node_Partition.my_global_ids != NULL) free(Node_Partition.my_global_ids);
  if (Edge_Partition.my_global_ids != NULL) free(Edge_Partition.my_global_ids);
  if (Node_Partition.needed_external_ids != NULL) 
    free(Node_Partition.needed_external_ids);
  if (Edge_Partition.needed_external_ids != NULL) 
    free(Edge_Partition.needed_external_ids);

  if (AZ_Ke!= NULL) {
    AZ_free(AZ_Ke->bindx);
    AZ_free(AZ_Ke->val);
    AZ_free(AZ_Ke->data_org);
    AZ_matrix_destroy(&AZ_Ke);
  }
  if (AZ_Kn!= NULL) {
    AZ_free(AZ_Kn->bindx);
    AZ_free(AZ_Kn->val);
    AZ_free(AZ_Kn->data_org);
    AZ_matrix_destroy(&AZ_Kn);
  }

  ML_Operator_Destroy(&ML_Tmat);

  if (residual > 1e-5) {
    std::cout << "`MultiLevelPreconditioner_Maxwell.exe' failed!" << std::endl;
    exit(EXIT_FAILURE);
  }

#ifdef ML_MPI
  MPI_Finalize();
#endif
		
  if (proc_config[AZ_node] == 0)
    std::cout << "`MultiLevelPreconditioner_Maxwell.exe' passed!" << std::endl;
  exit(EXIT_SUCCESS);
		
}