// ================================================ ====== ==== ==== == = 
// Computes C= <me> * A
int  ML_Epetra::ML_RefMaxwell_11_Operator::MatrixMatrix_Multiply(const Epetra_CrsMatrix & A, ML_Comm *comm, ML_Operator **C) const
{
  ML_Operator *SM_ML,*A_ML,*temp1,*temp2;

  /* General Stuff */  
  ML_Comm* temp = global_comm;
  A_ML  = ML_Operator_Create(comm);
  *C = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)&A,A_ML);  
  
  /* Do the SM part */
  SM_ML = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)SM_Matrix_,SM_ML);
  ML_2matmult(SM_ML,A_ML,temp1,ML_CSR_MATRIX);
  ML_Matrix_Print(temp1,*Comm_,*RangeMap_,"smp.dat");

  /* Do the Addon part */
  Addon_->MatrixMatrix_Multiply(A,comm,&temp2);
  ML_Matrix_Print(temp2,*Comm_,*RangeMap_,"add_p.dat");
  
  /* Add the matrices together */
  ML_Operator_Add(temp2,temp1,*C,ML_CSR_MATRIX,1.0);
  ML_Matrix_Print(*C,*Comm_,*RangeMap_,"tfinal.dat");  
  
  /* Cleanup */
  global_comm = temp;
  ML_Operator_Destroy(&A_ML);
  ML_Operator_Destroy(&SM_ML);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&temp2);

  return 0;
}/*end MatrixMatrix_Multiply*/
// ================================================ ====== ==== ==== == =
// Forms the coarse matrix, given the prolongator
int  ML_Epetra::FaceMatrixFreePreconditioner::FormCoarseMatrix()
{
  CoarseMat_ML = ML_Operator_Create(ml_comm_);
  CoarseMat_ML->data_destroy=free;
  ML_Operator *Temp_ML=0;
  ML_Operator *R= ML_Operator_Create(ml_comm_);
  ML_Operator *P= ML_Operator_Create(ml_comm_);

  /* Build ML_Operator version of Prolongator_, Restriction Operator */
  ML_CHK_ERR(ML_Operator_WrapEpetraCrsMatrix(Prolongator_,P,verbose_));
  P->num_rigid=P->num_PDEs=dim;

  //NTS: ML_CHK_ERR won't work on this: it returns 1
  ML_Operator_Transpose_byrow(P, R);

  /* OPTION: Disable the addon */
  const Epetra_CrsMatrix *Op11crs = dynamic_cast<const Epetra_CrsMatrix*>(&*Operator_);
  const Epetra_Operator_With_MatMat *Op11mm = dynamic_cast<const Epetra_Operator_With_MatMat*>(&*Operator_);

  /* Do the A*P  with or without addon*/
  if(Op11crs){
    if(verbose_ && !Comm_->MyPID()) printf("FMFP: Running *without* addon\n");
    ML_Operator *SM_ML = ML_Operator_Create(ml_comm_);
    Temp_ML = ML_Operator_Create(ml_comm_);
    ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Op11crs,SM_ML,verbose_);
    ML_2matmult(SM_ML,P,Temp_ML,ML_CSR_MATRIX);
    ML_Operator_Destroy(&SM_ML);
  }
  else if(Op11mm){
    if(verbose_ && !Comm_->MyPID()) printf("FMFP: Running with addon\n");
    ML_CHK_ERR(Op11mm->MatrixMatrix_Multiply(*Prolongator_,ml_comm_,&Temp_ML));
  }
  else{
    if(!Comm_->MyPID()) printf("ERROR: FMFP Illegal Operator\n");
    delete R;
    ML_CHK_ERR(-1);
  }

  /* Do R * AP */
  R->num_rigid=R->num_PDEs=dim;
  ML_2matmult_block(R, Temp_ML,CoarseMat_ML,ML_CSR_MATRIX);

  /* Wrap to Epetra-land */
  int nnz=100;
  double time;
  ML_Operator2EpetraCrsMatrix(CoarseMat_ML,CoarseMatrix,nnz,true,time,0,verbose_);
  // NTS: This is a hack to get around the sticking ones on the diagonal issue;

  /* Cleanup */
  ML_Operator_Destroy(&P);
  ML_Operator_Destroy(&R);
  ML_Operator_Destroy(&Temp_ML);
  ML_Operator_Destroy(&CoarseMat_ML);CoarseMat_ML=0;//HAX
  return 0;
}/*end FormCoarseMatrix*/
// ================================================ ====== ==== ==== == =
// Computes C= <me> * A
int  ML_Epetra::Epetra_Multi_CrsMatrix::MatrixMatrix_Multiply(const Epetra_CrsMatrix & A, ML_Comm *comm, ML_Operator **C) const
{

  int rv=0;
  ML_Comm* temp = global_comm;

  /* Setup for 1st Matmat */
  ML_Operator * MV[2]={0,0},*CV;
  MV[(NumMatrices_-1)%2]= ML_Operator_Create(comm);
  rv=ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)&A,MV[(NumMatrices_-1)%2]);
  ML_CHK_ERR(rv);

  /* Do the matmats */
  for(int i=NumMatrices_-1;rv==0 && i>=0;i--){
    /* Do pre-wraps */
    if(MV[(i+1)%2] && i!=NumMatrices_-1) ML_Operator_Destroy(&MV[(i+1)%2]);
    MV[(i+1)%2]=ML_Operator_Create(comm);
    CV=ML_Operator_Create(comm);
    rv=ML_Operator_WrapEpetraCrsMatrix(CrsMatrices_[i],CV);
    ML_CHK_ERR(rv);

    /* Do matmat */
    ML_2matmult(CV,MV[i%2],MV[(i+1)%2],ML_CSR_MATRIX);

    ML_Operator_Destroy(&CV);
  }/*end for*/
  global_comm = temp;

  /* Final Postwrap */
  *C=MV[1];

  /* Cleanup */
  if(MV[0]) ML_Operator_Destroy(&MV[0]);

  return rv;
}/*end MatrixMatrix_Multiply*/
// ================================================ ====== ==== ==== == =
//! Build the face-to-node prolongator described by Bochev, Siefert, Tuminaro, Xu and Zhu (2007).
int ML_Epetra::FaceMatrixFreePreconditioner::PBuildSparsity(ML_Operator *P, Epetra_CrsMatrix *&Psparse){

  /* Create wrapper to do abs(T) */
  // NTS: Assume D0 has already been reindexed by now.
  ML_Operator* AbsFN_ML = ML_Operator_Create(ml_comm_);
  ML_CHK_ERR(ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&*FaceNode_Matrix_),AbsFN_ML,verbose_));
  ML_Operator_Set_Getrow(AbsFN_ML,AbsFN_ML->outvec_leng,CSR_getrow_ones);

  /* Form abs(T) * P_n */
  ML_Operator* AbsFNP = ML_Operator_Create(ml_comm_);
  ML_2matmult(AbsFN_ML,P,AbsFNP, ML_CSR_MATRIX);

  /* Wrap P_n into Epetra-land */
  Epetra_CrsMatrix_Wrap_ML_Operator(AbsFNP,*Comm_,*FaceRangeMap_,&Psparse,Copy,0);

  /* Nuke the rows in Psparse */
  if(BCfaces_.size()>0) Apply_BCsToMatrixRows(BCfaces_.get(),BCfaces_.size(),*Psparse);

  // Cleanup
  ML_Operator_Destroy(&AbsFN_ML);
  ML_Operator_Destroy(&AbsFNP);

  return 0;
}
// ================================================ ====== ==== ==== == = 
// Computes C= A^T * <me> * A.  OptimizeStorage *must* be called for both A and the
// matrices in *this, before this routine can work.
int ML_Epetra::ML_RefMaxwell_11_Operator::PtAP(const Epetra_CrsMatrix & P, ML_Comm *comm, ML_Operator **C) const{
  ML_Operator *SM_ML,*P_ML,*R_ML,*PtSMP_ML,*temp1,*temp2,*opwrap,*D0_M1_P_ML;

  /* General Stuff */  
  ML_Comm* temp = global_comm;
  P_ML  = ML_Operator_Create(comm);
  R_ML  = ML_Operator_Create(comm);;
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)&P,P_ML);  
  ML_Operator_Transpose_byrow(P_ML,R_ML);

  /* Do the SM part */
  SM_ML = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  PtSMP_ML  = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)SM_Matrix_,SM_ML);
  ML_2matmult(SM_ML,P_ML,temp1,ML_CSR_MATRIX);
  ML_2matmult_block(R_ML,temp1,PtSMP_ML,ML_CSR_MATRIX);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&SM_ML);
#ifdef MANUALLY_TRANSPOSE_D0
  ML_Operator_Destroy(&R_ML);
#endif
  ML_Matrix_Print(PtSMP_ML,*Comm_,*RangeMap_,"ptsmp.dat");

#ifdef MANUALLY_TRANSPOSE_D0
  /* Do the Addon: Step #1: M1 * P*/
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[4],opwrap);
  ML_2matmult(opwrap,P_ML,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&P_ML);

  /* Do the Addon: Step #2: D0^T *(M1 * P)*/
  opwrap = ML_Operator_Create(comm);
  D0_M1_P_ML = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[3],opwrap);
  ML_2matmult(opwrap,temp1,D0_M1_P_ML,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);
  
  /* Do the Addon: Step #3: M0^{-1} * (D0^T * M1 * P)*/
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[2],opwrap);
  ML_2matmult(opwrap,D0_M1_P_ML,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);  

  /* Do the Addon: Step #4: Transpose (D0^T * M1 * P) & multiply by output from Step 3*/
  opwrap = ML_Operator_Create(comm);
  temp2 = ML_Operator_Create(comm);  
  ML_Operator_Transpose_byrow(D0_M1_P_ML,opwrap);
  ML_2matmult(opwrap,temp1,temp2,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&D0_M1_P_ML);
  ML_Matrix_Print(temp2,*Comm_,*RangeMap_,"pt_add_p.dat");
#else
  ML_Operator *P_M1_D0_ML;

  /* Do the Addon: Step #1: P^T * M1 */
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[0],opwrap);
  ML_2matmult_block(R_ML,opwrap,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&P_ML);
  ML_Operator_Destroy(&R_ML);
  
  /* Do the Addon: Step #2: (P^T * M1) * D0*/
  opwrap = ML_Operator_Create(comm);
  P_M1_D0_ML = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[1],opwrap);
  ML_2matmult_block(temp1,opwrap,P_M1_D0_ML,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);  

  /* Do the Addon: Step #3: (P^T * M1 * D0) * M0^{-1} */
  opwrap = ML_Operator_Create(comm);
  temp1 = ML_Operator_Create(comm);
  ML_Operator_WrapEpetraCrsMatrix((Epetra_CrsMatrix*)Addon_Matrix_[2],opwrap);
  ML_2matmult(P_M1_D0_ML,opwrap,temp1,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);  

  /* Do the Addon: Step #4: Transpose (P^T * M1 * D0) & multiply by output from Step 3*/
  opwrap = ML_Operator_Create(comm);
  temp2 = ML_Operator_Create(comm);  
  ML_Operator_Transpose_byrow(P_M1_D0_ML,opwrap);
  ML_2matmult(temp1,opwrap,temp2,ML_CSR_MATRIX);
  ML_Operator_Destroy(&opwrap);
  ML_Operator_Destroy(&temp1);
  ML_Operator_Destroy(&P_M1_D0_ML);
  ML_Matrix_Print(temp2,*Comm_,*RangeMap_,"pt_add_p_rev.dat");   
#endif

  /* Add the matrices together */
  ML_Operator_Add(PtSMP_ML,temp2,*C,ML_CSR_MATRIX,1.0);
  ML_Matrix_Print(*C,*Comm_,*RangeMap_,"ptap.dat");  

  /* Cleanup */
  global_comm = temp;
  ML_Operator_Destroy(&temp2);
  ML_Operator_Destroy(&PtSMP_ML);

  return 0;
}
// ================================================ ====== ==== ==== == =
//! Build the face-to-node prolongator described by Bochev, Siefert, Tuminaro, Xu and Zhu (2007).
int ML_Epetra::FaceMatrixFreePreconditioner::BuildProlongator()
{

  /* Wrap TMT_Matrix in a ML_Operator */
  ML_Operator* TMT_ML = ML_Operator_Create(ml_comm_);
  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&*TMT_Matrix_),TMT_ML);

  /* Nodal Aggregation */
  ML_Aggregate_Struct *MLAggr=0;
  ML_Operator *P=0;
  int NumAggregates;
  int rv=ML_Epetra::RefMaxwell_Aggregate_Nodes(*TMT_Matrix_,List_,ml_comm_,std::string("FMFP (level 0) :"),MLAggr,P,NumAggregates);
  if(rv || !P) {if(!Comm_->MyPID()) printf("ERROR: Building nodal P\n");ML_CHK_ERR(-1);}

  /* Build 1-unknown sparsity of prolongator */
  Epetra_CrsMatrix *Psparse=0;
  PBuildSparsity(P,Psparse);
  if(!Psparse) {if(!Comm_->MyPID()) printf("ERROR: Building Psparse\n");ML_CHK_ERR(-2);}

  /* Build the "nullspace" */
  Epetra_MultiVector *nullspace;
  BuildNullspace(nullspace);
  if(!nullspace) {if(!Comm_->MyPID()) printf("ERROR: Building Nullspace\n");ML_CHK_ERR(-3);}


  /* Build the DomainMap of the new operator*/
  const Epetra_Map & FineColMap = Psparse->ColMap();
  CoarseMap_=new Epetra_Map(-1,NumAggregates*dim,0,*Comm_);

  /* Allocate the Prolongator_ */
  int max_nz_per_row=Psparse->MaxNumEntries();
  Prolongator_=new Epetra_CrsMatrix(Copy,*FaceRangeMap_,0);
  int ne1, *idx1, *idx2;
  idx2=new int [dim*max_nz_per_row];
  double *vals1, *vals2;
  vals2=new double[dim*max_nz_per_row];
  int nonzeros;

  for(int i=0;i<Prolongator_->NumMyRows();i++){
    Psparse->ExtractMyRowView(i,ne1,vals1,idx1);
    nonzeros=0;
    for(int j=0;j<ne1;j++) nonzeros+=ABS(vals1[j])>0;

    for(int j=0;j<ne1;j++){
      for(int k=0;k<dim;k++) {
        idx2[j*dim+k]=FineColMap.GID(idx1[j])*dim+k;
        //FIX: This works only because there's an implicit linear mapping which
        //we're exploiting.
        if(idx2[j*dim+k]==-1) printf("[%d] ERROR: idx1[j]=%d / idx1[j]*dim+k=%d does not have a GID!\n",Comm_->MyPID(),idx1[j],idx1[j]*dim+k);
        if(vals1[j]==0 ) vals2[j*dim+k]=0;
	else vals2[j*dim+k]=(*nullspace)[k][i] / nonzeros;
      }/*end for*/
    }/*end for*/
    Prolongator_->InsertGlobalValues(FaceRangeMap_->GID(i),dim*ne1,vals2,idx2);
  }/*end for*/


  /* FillComplete / OptimizeStorage for Prolongator*/
  Prolongator_->FillComplete(*CoarseMap_,*FaceRangeMap_);
  Prolongator_->OptimizeStorage();

#ifndef NO_OUTPUT
  /* DEBUG: Dump aggregates */
  Epetra_IntVector AGG(View,*NodeDomainMap_,MLAggr->aggr_info[0]);
  IVOUT(AGG,"agg.dat");
  EpetraExt::RowMatrixToMatlabFile("fmfp_psparse.dat",*Psparse);
  EpetraExt::RowMatrixToMatlabFile("fmfp_prolongator.dat",*Prolongator_);
  EpetraExt::VectorToMatrixMarketFile("fmfp_null0.dat",*(*nullspace)(0));
  EpetraExt::VectorToMatrixMarketFile("fmfp_null1.dat",*(*nullspace)(1));
  EpetraExt::VectorToMatrixMarketFile("fmfp_null2.dat",*(*nullspace)(2));

#endif

  /* EXPERIMENTAL: Normalize Prolongator Columns */
  bool normalize_prolongator=List_.get("face matrix free: normalize prolongator",false);
  if(normalize_prolongator){
    Epetra_Vector n_vector(*CoarseMap_,false);
    Prolongator_->InvColSums(n_vector);
    Prolongator_->RightScale(n_vector);
  }/*end if*/

  /* Post-wrapping to convert to ML indexing */
#ifdef HAVE_ML_EPETRAEXT
  Prolongator_ = dynamic_cast<Epetra_CrsMatrix*>(ModifyEpetraMatrixColMap(*Prolongator_,ProlongatorColMapTrans_,"Prolongator",(verbose_&&!Comm_->MyPID())));
#endif

  /* Cleanup */
  ML_qr_fix_Destroy();
  ML_Aggregate_Destroy(&MLAggr);
  ML_Operator_Destroy(&TMT_ML);
  ML_Operator_Destroy(&P);

  delete nullspace;
  delete Psparse;
  delete [] idx2;
  delete [] vals2;
  return 0;
}/*end BuildProlongator_*/
// ================================================ ====== ==== ==== == =
int ML_Epetra::RefMaxwell_Aggregate_Nodes(const Epetra_CrsMatrix & A, Teuchos::ParameterList & List, ML_Comm * ml_comm, std::string PrintMsg,
					  ML_Aggregate_Struct *& MLAggr,ML_Operator *&P, int &NumAggregates){

  /* Output level */
  bool verbose, very_verbose;
  int OutputLevel = List.get("ML output", -47);
  if(OutputLevel == -47) OutputLevel = List.get("output", 1);
  if(OutputLevel>=15) very_verbose=verbose=true;
  if(OutputLevel > 5) {very_verbose=false;verbose=true;}
  else very_verbose=verbose=false;

  /* Wrap A in a ML_Operator */
  ML_Operator* A_ML = ML_Operator_Create(ml_comm);
  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A),A_ML);

 /* Pull Teuchos Options */
  std::string CoarsenType  = List.get("aggregation: type", "Uncoupled");
  double Threshold    = List.get("aggregation: threshold", 0.0);
  int    NodesPerAggr = List.get("aggregation: nodes per aggregate",
                                  ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate());
  bool UseAux         = List.get("aggregation: aux: enable",false);
  double AuxThreshold = List.get("aggregation: aux: threshold",0.0);
  int  MaxAuxLevels   = List.get("aggregation: aux: max levels",10);


  ML_Aggregate_Create(&MLAggr);
  ML_Aggregate_Set_MaxLevels(MLAggr, 2);
  ML_Aggregate_Set_StartLevel(MLAggr, 0);
  ML_Aggregate_Set_Threshold(MLAggr, Threshold);
  ML_Aggregate_Set_MaxCoarseSize(MLAggr,1);
  MLAggr->cur_level = 0;
  ML_Aggregate_Set_Reuse(MLAggr);
  MLAggr->keep_agg_information = 1;
  P = ML_Operator_Create(ml_comm);

  /* Process Teuchos Options */
  if (CoarsenType == "Uncoupled")
    ML_Aggregate_Set_CoarsenScheme_Uncoupled(MLAggr);
  else if (CoarsenType == "Uncoupled-MIS"){
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }
  else if (CoarsenType == "METIS"){
    ML_Aggregate_Set_CoarsenScheme_METIS(MLAggr);
    ML_Aggregate_Set_NodesPerAggr(0, MLAggr, 0, NodesPerAggr);
  }/*end if*/
  else {
    if(!A.Comm().MyPID()) printf("%s Unsupported (1,1) block aggregation type(%s), resetting to uncoupled-mis\n",PrintMsg.c_str(),CoarsenType.c_str());
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }

  /* Setup Aux Data */
  if(UseAux) {
    A_ML->aux_data->enable=1;
    A_ML->aux_data->threshold=AuxThreshold;
    A_ML->aux_data->max_level=MaxAuxLevels;
    ML_Init_Aux(A_ML,List);
    if(verbose && !A.Comm().MyPID()) {
      printf("%s Using auxiliary matrix\n",PrintMsg.c_str());
      printf("%s aux threshold = %e\n",PrintMsg.c_str(),A_ML->aux_data->threshold);
    }
  }

  /* Aggregate Nodes */
  int printlevel=ML_Get_PrintLevel();
  if(verbose) ML_Set_PrintLevel(10);
  NumAggregates = ML_Aggregate_Coarsen(MLAggr,A_ML, &P, ml_comm);
  if(verbose) ML_Set_PrintLevel(printlevel);

  if (NumAggregates == 0){
    std::cerr << "Found 0 aggregates, perhaps the problem is too small." << std::endl;
    ML_CHK_ERR(-2);
  }/*end if*/
  else if(very_verbose) printf("[%d] %s %d aggregates created invec_leng=%d\n",A.Comm().MyPID(),PrintMsg.c_str(),NumAggregates,P->invec_leng);

  if(verbose){
    int globalAggs=0;
    A.Comm().SumAll(&NumAggregates,&globalAggs,1);
    if(!A.Comm().MyPID()) {
      printf("%s Aggregation threshold = %e\n",PrintMsg.c_str(),Threshold);
      printf("%s Global aggregates     = %d\n",PrintMsg.c_str(),globalAggs);

    }
  }

  /* Cleanup */
  ML_qr_fix_Destroy();
  if(UseAux) ML_Finalize_Aux(A_ML);
  ML_Operator_Destroy(&A_ML);

  return 0;
}