static int run_test(Teuchos::RCP<Epetra_CrsMatrix> matrix,
	  bool verbose,           // display the graph before & after
	  bool contract,          // set global number of partitions to 1/2 num procs
	  int partitioningType,   // hypergraph or graph partitioning, or simple
	  int vertexWeightType,   // use vertex weights?
	  int edgeWeightType,     // use edge/hyperedge weights?
	  int objectType)         // use isorropia's CrsMatrix or CrsGraph
{
  int rc=0, fail = 0;
#ifdef HAVE_EPETRAEXT
  int localProc = 0;
  double balance1, balance2, cutn1, cutn2, cutl1, cutl2;
  double balance3, cutn3, cutl3;
  double cutWgt1, cutWgt2, cutWgt3;
  int numCuts1, numCuts2, numCuts3, valid;
  int numPartitions = 0;
  int keepDenseEdges = 0;
  int numProcs = 1;

#ifdef HAVE_MPI
  const Epetra_MpiComm &Comm = dynamic_cast<const Epetra_MpiComm &>(matrix->Comm());
  localProc = Comm.MyPID();
  numProcs = Comm.NumProc();
#else
  const Epetra_SerialComm &Comm = dynamic_cast<const Epetra_SerialComm &>(matrix->Comm());
#endif

  int numRows = matrix->NumGlobalRows();

  if (numRows < (numProcs * 100)){
    // By default Zoltan throws out dense edges, defined as those
    // whose number of non-zeros exceeds 25% of the number of vertices.
    //
    // If dense edges are thrown out of a small matrix, there may be nothing left.
    keepDenseEdges = 1;
  }

  double myShareBefore = 1.0 / numProcs;
  double myShare = myShareBefore;

  if (contract){
    numPartitions = numProcs / 2;

    if (numPartitions > numRows)
      numPartitions = numRows;

    if (numPartitions > 0){
      if (localProc < numPartitions){
	myShare = 1.0 / numPartitions;
      }
      else{
	myShare = 0.0;
      }
    }
    else{
      contract = 0;
    }
  }

  // If we want Zoltan's or Isorropia's default weights, then we don't
  // need to supply a CostDescriber object to createBalancedCopy,
  // so we get to test the API functions that don't take a CostDescriber.

  bool noCosts = ((vertexWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS) &&
		   (edgeWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS));

  // Test the interface that has no parameters, if possible

  bool noParams =
    ((partitioningType == HYPERGRAPH_PARTITIONING) && // default, so requires no params
     (numPartitions == 0) &&                          // >0 would require a parameter
     (keepDenseEdges == 0));                          // >0 would require a parameter

  // Maps for original object
  const Epetra_Map &sourceRowMap = matrix->RowMap();
  const Epetra_Map &sourceRangeMap = matrix->RangeMap();
//   const Epetra_Map &sourceColMap = matrix->ColMap();
  const Epetra_Map &sourceDomainMap = matrix->DomainMap();

  int numCols = matrix->NumGlobalCols();
  int nMyRows = sourceRowMap.NumMyElements();
  int base = sourceRowMap.IndexBase();

  // Compute vertex and edge weights

  Isorropia::Epetra::CostDescriber costs;

  Teuchos::RCP<Epetra_Vector> vptr;

  Teuchos::RCP<Epetra_CrsMatrix> eptr;

  Teuchos::RCP<Epetra_Vector> hyperEdgeWeights;

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      // Create graph edge weights.

      eptr = Teuchos::rcp(new Epetra_CrsMatrix(*matrix));

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	eptr->PutScalar(1.0);   // set all nonzeros to 1.0
      }
      else{
	int maxRowSize = eptr->MaxNumEntries();
	double *newVal = NULL;
	if (maxRowSize > 0){
	  newVal = new double [maxRowSize];
	  for (int j=0; j<maxRowSize; j++){
	    newVal[j] = localProc + 1 + j;
	  }
	}
	int numEntries;
	int *idx;
	double *val;
	for (int i=0; i<nMyRows; i++){
	  rc = eptr->ExtractMyRowView(i, numEntries, val, idx);
	  for (int j=0; j<numEntries; j++){
	    val[j] = newVal[j];
	  }
	}
	if (newVal) delete [] newVal;
      }

      eptr->FillComplete(sourceDomainMap, sourceRangeMap);

      costs.setGraphEdgeWeights(eptr);
    }
    else{
      // Create hyperedge weights.  (Note that the list of hyperedges that a
      // process provides weights for has no relation to the columns
      // that it has non-zeroes for, or the rows that is has.  Hypergraphs
      // in general are not square.  Also more than one process can provide
      // a weight for the same edge.  Zoltan combines the weights according
      // to the value of the PHG_EDGE_WEIGHT_OPERATION parameter.  The default
      // for this parameter is to use the maximum edge weight provided by any
      // process for a given hyperedge.)

      Epetra_Map hyperEdgeMap(numCols, base, Comm);

      hyperEdgeWeights = Teuchos::rcp(new Epetra_Vector(hyperEdgeMap));

      int *edgeGIDs = NULL;
      double *weights = NULL;
      int numHEweights = hyperEdgeMap.NumMyElements();

      if (numHEweights){
	edgeGIDs = new int [numHEweights];
	weights = new double [numHEweights];

	if (edgeWeightType == SUPPLY_EQUAL_WEIGHTS){
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    weights[i] = 1.0;
	  }
	}
	else{
	  int hiVolumeStart = matrix->NumGlobalCols() / 3;
	  int hiVolumeEnd = hiVolumeStart * 2;
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    if ((edgeGIDs[i] < hiVolumeStart) || (edgeGIDs[i] >= hiVolumeEnd)){
	      weights[i] = 1.0;
	    }
	    else{
	      weights[i] = 3.0;
	    }
	  }
	}
	hyperEdgeWeights->ReplaceGlobalValues(numHEweights, weights, edgeGIDs);
      }

      if (weights){
	delete [] weights;
	delete [] edgeGIDs;
      }

      costs.setHypergraphEdgeWeights(hyperEdgeWeights);
    }
  }

  bool need_importer = false;

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    need_importer = true;  // to redistribute row weights

    double *val = NULL;

    if (nMyRows){
      val = new double [nMyRows];

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0;
	}
      }
      else if (vertexWeightType == SUPPLY_UNEQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0 + ((localProc+1) / 2);
	}
      }
    }

    vptr = Teuchos::rcp(new Epetra_Vector(Copy, sourceRowMap, val));

    if (val) delete [] val;

    costs.setVertexWeights(vptr);
  }

  // Calculate partition quality metrics before calling Zoltan

  if (partitioningType == GRAPH_PARTITIONING){
    rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShare, balance1, numCuts1, cutWgt1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, numCuts3, cutWgt3, cutn3, cutl3);
    }
  }
  else{
    rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShare, balance1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, cutn3, cutl3);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  Teuchos::ParameterList params;

#ifdef HAVE_ISORROPIA_ZOLTAN

  if (!noParams){

    // We're using Zoltan for partitioning and supplying
    // parameters, overriding defaults.

    Teuchos::ParameterList &sublist = params.sublist("Zoltan");

    if (partitioningType == GRAPH_PARTITIONING){
      params.set("PARTITIONING METHOD", "GRAPH");
      sublist.set("GRAPH_PACKAGE", "PHG");
    }
    else{
      params.set("PARTITIONING METHOD", "HYPERGRAPH");
      sublist.set("LB_APPROACH", "PARTITION");
      sublist.set("PHG_CUT_OBJECTIVE", "CONNECTIVITY");  // "cutl"
    }

    if (keepDenseEdges){
      // only throw out rows that have no zeroes, default is to
      // throw out if .25 or more of the columns are non-zero
      sublist.set("PHG_EDGE_SIZE_THRESHOLD", "1.0");
    }
     if (numPartitions > 0){
	// test #Partitions < #Processes
	std::ostringstream os;
	os << numPartitions;
	std::string s = os.str();
	//	sublist.set("NUM_GLOBAL_PARTS", s);
	params.set("NUM PARTS", s);
      }

      //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters
      //sublist.set("DEBUG_LEVEL", "5");   // proc 0 will trace Zoltan calls
      //sublist.set("DEBUG_MEMORY", "2");  // Zoltan will trace alloc & free
  }

#else
    ERROREXIT((localProc==0),
      "Zoltan partitioning required but Zoltan not available.")
#endif

  // Function scope values

  Teuchos::RCP<Epetra_Vector> newvwgts;
  Teuchos::RCP<Epetra_CrsMatrix> newewgts;

  // Function scope values required for LinearProblem

  Epetra_LinearProblem *problem = NULL;
  Epetra_Map *LHSmap = NULL;
  Epetra_MultiVector *RHS = NULL;
  Epetra_MultiVector *LHS = NULL;

  // Reference counted pointer to balanced object

  Epetra_CrsMatrix *matrixPtr=NULL;
  Epetra_CrsGraph *graphPtr=NULL;
  Epetra_RowMatrix *rowMatrixPtr=NULL;
  Epetra_LinearProblem *problemPtr=NULL;

  // Row map for balanced object
  const Epetra_BlockMap *targetBlockRowMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetRowMap=NULL;            // for all other inputs

  // Column map for balanced object
  const Epetra_BlockMap *targetBlockColMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetColMap=NULL;            // for all other inputs

  if (objectType == EPETRA_CRSMATRIX){
    if (noParams && noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(matrixPtr->RowMap());
    targetColMap = &(matrixPtr->ColMap());
  }
  else if (objectType == EPETRA_CRSGRAPH){
    const Epetra_CrsGraph graph = matrix->Graph();
    if (noParams && noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph);
    }
    else if (noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph, params);
    }
    targetBlockRowMap = &(graphPtr->RowMap());
    targetBlockColMap = &(graphPtr->ColMap());
  }
  else if (objectType == EPETRA_ROWMATRIX){
    if (noParams && noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(rowMatrixPtr->RowMatrixRowMap());
    targetColMap = &(rowMatrixPtr->RowMatrixColMap());
  }
  else if (objectType == EPETRA_LINEARPROBLEM){

    // Create a linear problem with this matrix.

    LHSmap = new Epetra_Map(numCols, base, Comm);

    int myRHSsize = sourceRowMap.NumMyElements();
    int myLHSsize = LHSmap->NumMyElements();

    int valSize = ((myRHSsize > myLHSsize) ? myRHSsize : myLHSsize);

    double *vals = NULL;

    if (valSize){
      vals = new double [valSize];
    }

    if (valSize){
      for (int i=0; i < valSize; i++){
	// put my rank in my portion of LHS and my portion of RHS
	vals[i] = localProc;
      }
    }

    RHS = new Epetra_MultiVector(Copy, sourceRowMap, vals, 1, 1);

    LHS = new Epetra_MultiVector(Copy, *LHSmap, vals, 1, 1);

    if (valSize){
      delete [] vals;
    }

    problem = new Epetra_LinearProblem(matrix.get(), LHS, RHS);

    Epetra_LinearProblem lp = *problem;

    if (lp.CheckInput()){
      ERROREXIT((localProc==0), "Error creating a LinearProblem");
    }
    if (noParams && noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp);
    }
    else if (noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp, params);
    }

    targetRowMap = &(problemPtr->GetMatrix()->RowMatrixRowMap());
    targetColMap = &(problemPtr->GetMatrix()->RowMatrixColMap());
  }

  // Redistribute the edge weights
  // Comment this out since we don't redistribute columns

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      Epetra_Import *importer = NULL;

      if (objectType == EPETRA_CRSGRAPH){
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *graphPtr));
	targetRowMap = &(newewgts->RowMap());
	targetColMap = &(newewgts->ColMap());
      }
      else{
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *targetRowMap, *targetColMap, 0));
      }

      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
      newewgts->Import(*eptr, *importer, Insert);
      newewgts->FillComplete(*targetColMap, *targetRowMap);

      costs.setGraphEdgeWeights(newewgts);
    }
  }

  // Redistribute the vertex weights

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    Epetra_Import *importer = NULL;

    if (objectType == EPETRA_CRSGRAPH){
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetBlockRowMap));
      importer = new Epetra_Import(*targetBlockRowMap, sourceRowMap);
    }
    else{
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetRowMap));
      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
    }

    newvwgts->Import(*vptr, *importer, Insert);
    costs.setVertexWeights(newvwgts);
  }

  if (localProc == 0){
    test_type(numPartitions, partitioningType, vertexWeightType, edgeWeightType, objectType);
  }

  if (verbose){

    // Picture of problem before balancing

    if (objectType == EPETRA_LINEARPROBLEM){

      ispatest::show_matrix("Before load balancing", *problem, Comm);
    }
    else{
      ispatest::show_matrix("Before load balancing", matrix->Graph(), Comm);
    }

    // Picture of problem after balancing

    if (objectType == EPETRA_LINEARPROBLEM){
      ispatest::show_matrix("After load balancing (x in Ax=b is not redistributed)", *problemPtr, Comm);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      ispatest::show_matrix("After load balancing", *rowMatrixPtr, Comm);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      ispatest::show_matrix("After load balancing", matrixPtr->Graph(), Comm);
    }
    else if (objectType == EPETRA_CRSGRAPH){
      ispatest::show_matrix("After load balancing", *graphPtr, Comm);
    }
  }

  // After partitioning, recompute the metrics

  if (partitioningType == GRAPH_PARTITIONING){
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_graph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_graph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_graph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else {
      rc = ispatest::compute_graph_metrics(*graphPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
  }
  else{
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_hypergraph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_hypergraph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_hypergraph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else{
      rc = ispatest::compute_hypergraph_metrics(*graphPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  std::string why;

  if (partitioningType == GRAPH_PARTITIONING){
    fail = (cutWgt2 > cutWgt1);
    why = "New weighted edge cuts are worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;

      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;

      std::cout << " Total edge cuts: " << numCuts1;
      std::cout << " Total weighted edge cuts: " << cutWgt1 << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
      std::cout << " Total edge cuts: " << numCuts2;
      std::cout << " Total weighted edge cuts: " << cutWgt2 << std::endl;
    }
  }
  else{
      fail = (cutl2 > cutl1);
      why = "New cutl is worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;
      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
    }
  }

  if (fail){
    if (localProc == 0) std::cout << "ERROR: "+why << std::endl;
  }

  // Check that input matrix is valid.  This test constructs an "x"
  // with the matrix->DomainMap() and a "y" with matrix->RangeMap()
  // and then calculates y = Ax.

  if (objectType == EPETRA_LINEARPROBLEM){
    valid = ispatest::test_matrix_vector_multiply(*problemPtr);
  }
  else if (objectType == EPETRA_ROWMATRIX){
    valid = ispatest::test_row_matrix_vector_multiply(*rowMatrixPtr);
  }
  else if (objectType == EPETRA_CRSMATRIX){
    valid = ispatest::test_matrix_vector_multiply(*matrixPtr);
  }
  else{
    valid = ispatest::test_matrix_vector_multiply(*graphPtr);
  }

  if (!valid){
    if (localProc == 0) std::cout << "Rebalanced matrix is not a valid Epetra matrix" << std::endl;
    fail = 1;
  }
  else{
    if (localProc == 0) std::cout << "Rebalanced matrix is a valid Epetra matrix" << std::endl;
  }

  if (localProc == 0)
    std::cout << std::endl;



#else
  std::cout << "test_simple main: currently can only test "
	 << "with Epetra and EpetraExt enabled." << std::endl;
  rc = -1;
#endif

  return fail;
}
Example #2
0
int main(int argc, char *argv[])
{

  std::cout << Epetra_Version() << std::endl << std::endl;

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif


  Teuchos::RCP<Teuchos::FancyOStream> fos = getFancyOStream(Teuchos::rcpFromRef(std::cout));

  // Construct a Map with NumElements and index base of 0
  Teuchos::RCP<Epetra_Map> rgMap = Teuchos::rcp(new Epetra_Map(63, 0, Comm));
  Teuchos::RCP<Epetra_Map> doMap = Teuchos::rcp(new Epetra_Map(20, 0, Comm));

  Epetra_CrsMatrix* Pin = NULL;
  EpetraExt::MatrixMarketFileToCrsMatrix("Test.mat",
      *rgMap, *rgMap, *doMap,
      Pin, false,
      true);
  Teuchos::RCP<Epetra_CrsMatrix> P = Teuchos::rcp(Pin);

  Epetra_CrsMatrix* A = NULL;
  A = TridiagMatrix(rgMap.get(), 1.0, -2.0, 1.0);
  Teuchos::RCP<Epetra_CrsMatrix> rcpA = Teuchos::rcp(A);

  ////////////////////////////////////////////
  // plain Epetra
  ////////////////////////////////////////////

  // Create x and b vectors
  Teuchos::RCP<Epetra_Vector> x = Teuchos::rcp(new Epetra_Vector(*doMap));
  Teuchos::RCP<Epetra_Vector> x2 = Teuchos::rcp(new Epetra_Vector(*rgMap));
  Teuchos::RCP<Epetra_Vector> b = Teuchos::rcp(new Epetra_Vector(*rgMap));
  Teuchos::RCP<Epetra_Vector> b2 = Teuchos::rcp(new Epetra_Vector(*rgMap));

  x->PutScalar(1.0);
  x2->PutScalar(1.0);
  double normx = 0.0;
  x->Norm1(&normx);
  if (Comm.MyPID() == 0) std::cout << "||x|| = " << normx << std::endl;
  x2->Norm1(&normx);
  if (Comm.MyPID() == 0) std::cout << "||x2|| = " << normx << std::endl;

  /*P->Apply(*x,*b);
  normx = 0.0;
  b->Norm1(&normx);*/
  //if (Comm.MyPID() == 0) std::cout << "||Px||_1 = " << normx << std::endl;

  /*Epetra_RowMatrixTransposer et(&*P);
        Epetra_CrsMatrix* PT;
        int rv = et.CreateTranspose(true,PT);
        if (rv != 0) {
          std::ostringstream buf;
          buf << rv;
          std::string msg = "Utils::Transpose: Epetra::RowMatrixTransposer returned value of " + buf.str();
          std::cout << msg << std::endl;
        }

  Teuchos::RCP<Epetra_CrsMatrix> rcpPT(PT);

  rcpPT->Apply(*x2,*b2);
  normx = 0.0;
  b2->Norm1(&normx);*/
  //if (Comm.MyPID() == 0) std::cout << "||P^T x||_1 = " << normx << std::endl;

  // matrix-matrix multiply
  Teuchos::RCP<Epetra_CrsMatrix> AP = Teuchos::rcp(new Epetra_CrsMatrix(Copy,rcpA->RangeMap(),1));
  EpetraExt::MatrixMatrix::Multiply(*rcpA,false,*P,false,*AP, *fos);
  //  AP->FillComplete(P->DomainMap(),rcpA->RangeMap());
  //std::cout << *AP << std::endl;
  AP->Apply(*x,*b2);
  normx = 0.0;
  b2->Norm1(&normx);
  if (Comm.MyPID() == 0) std::cout << "Epetra: ||AP x||_1 = " << normx << std::endl;

  // build A^T explicitely
  Epetra_RowMatrixTransposer etA(&*rcpA);
        Epetra_CrsMatrix* AT;
        int rv3 = etA.CreateTranspose(true,AT);
        if (rv3 != 0) {
          std::ostringstream buf;
          buf << rv3;
          std::string msg = "Utils::Transpose: Epetra::RowMatrixTransposer returned value of " + buf.str();
          std::cout << msg << std::endl;
        }
  Teuchos::RCP<Epetra_CrsMatrix> rcpAT(AT);

  // calculate A^T Px
  Teuchos::RCP<Epetra_CrsMatrix> APexpl = Teuchos::rcp(new Epetra_CrsMatrix(Copy,rcpA->DomainMap(),20));
  EpetraExt::MatrixMatrix::Multiply(*rcpAT,false,*P,false,*APexpl, *fos);
  //  APexpl->FillComplete(P->DomainMap(),rcpA->DomainMap()); // check me
  APexpl->Apply(*x,*b2);
  normx = 0.0;
  b2->Norm1(&normx);
  if (Comm.MyPID() == 0) std::cout << "Epetra: ||A^T_expl P x||_1 = " << normx << std::endl;

  // calculate A^T Px
  Teuchos::RCP<Epetra_CrsMatrix> APimpl = Teuchos::rcp(new Epetra_CrsMatrix(Copy,rcpA->RangeMap(),20));
  EpetraExt::MatrixMatrix::Multiply(*rcpA,true,*P,false,*APimpl, *fos);
  //  APimpl->FillComplete(P->DomainMap(),APimpl->RangeMap()); // check me
  APimpl->Apply(*x,*b2);
  normx = 0.0;
  b2->Norm1(&normx);
  if (Comm.MyPID() == 0) std::cout << "Epetra: ||A^T_impl P x||_1 = " << normx << std::endl;


  ///////////////////////////////////////
  // Xpetra
  ///////////////////////////////////////


   // wrap original matrix to Matrix
   Teuchos::RCP<Xpetra::EpetraMap> xrgMap = Teuchos::rcp(new Xpetra::EpetraMap(rgMap));
   Teuchos::RCP<Xpetra::EpetraMap> xdoMap = Teuchos::rcp(new Xpetra::EpetraMap(doMap));

   Teuchos::RCP<Xpetra::EpetraCrsMatrix> Pop = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(P) );
   Teuchos::RCP<Xpetra::CrsMatrix<double> > crsP = Teuchos::rcp_implicit_cast<Xpetra::CrsMatrix<double> >(Pop);
   Teuchos::RCP<Xpetra::CrsMatrixWrap<double> > crsPOp = Teuchos::rcp( new Xpetra::CrsMatrixWrap<double>(crsP) );
   crsPOp->fillComplete(xdoMap,xrgMap);

   Teuchos::RCP<Xpetra::EpetraCrsMatrix> Aop = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(rcpA) );
   Teuchos::RCP<Xpetra::CrsMatrix<double> > crsA = Teuchos::rcp_implicit_cast<Xpetra::CrsMatrix<double> >(Aop);
   Teuchos::RCP<Xpetra::CrsMatrixWrap<double> > crsAOp = Teuchos::rcp( new Xpetra::CrsMatrixWrap<double>(crsA) );
   crsAOp->fillComplete(xrgMap,xrgMap);

   // wrap vectors
   Teuchos::RCP<Xpetra::EpetraVector> xx = Teuchos::rcp(new Xpetra::EpetraVector(x));
   Teuchos::RCP<Xpetra::EpetraVector> xx2 = Teuchos::rcp(new Xpetra::EpetraVector(x2));
   Teuchos::RCP<Xpetra::EpetraVector> bb1 = Teuchos::rcp(new Xpetra::EpetraVector(b));
   Teuchos::RCP<Xpetra::EpetraVector> bb2 = Teuchos::rcp(new Xpetra::EpetraVector(b2));

   bb1->putScalar(0.0);
   bb2->putScalar(0.0);

   //crsPOp->apply(*xx,*bb1);

   // if (Comm.MyPID() == 0) std::cout << "||Pop x||_1 = " << bb1->norm1() << std::endl;

   //Teuchos::RCP<Xpetra::Matrix<double> > crsPOpT = MueLu::Utils2<double>::Transpose(crsPOp,false);

   //crsPOpT->apply(*xx2,*bb2);
   //if (Comm.MyPID() == 0) std::cout << "||Pop^T x||_1 = " << bb2->norm1() << std::endl;

   // calculate APx
   Teuchos::RCP<Xpetra::Matrix<double> > crsAP = MueLu::Utils<double>::Multiply(*crsAOp,false,*crsPOp,false, *fos);
   //crsAP->describe(*fos,Teuchos::VERB_EXTREME);
   bb1->putScalar(0.0);
   crsAP->apply(*xx,*bb1);
   normx = bb1->norm1();
   if (Comm.MyPID() == 0) std::cout << "Xpetra: ||A Pop x||_1 = " << normx << std::endl;

   // calculate A^T P x explicitely
   Teuchos::RCP<Xpetra::Matrix<double> > crsAOpT = MueLu::Utils2<double>::Transpose(*crsAOp,  false);
   Teuchos::RCP<Xpetra::Matrix<double> > AtPexpl = MueLu::Utils<double> ::Multiply (*crsAOpT, false, *crsPOp, false, *fos);
   bb1->putScalar(0.0);
   AtPexpl->apply(*xx,*bb1);
   normx = bb1->norm1();
   if (Comm.MyPID() == 0)
       std::cout << "Xpetra: ||A^T_expl Pop x||_1 = " << normx << std::endl;

   // calculate A^T P x
   Teuchos::RCP<Xpetra::Matrix<double> > AtPimpl = MueLu::Utils<double>::Multiply(*crsAOp,true,*crsPOp,false, *fos);
   bb1->putScalar(0.0);
   AtPimpl->apply(*xx,*bb1);
   normx = bb1->norm1();
   if (Comm.MyPID() == 0)
       std::cout << "Xpetra: ||A^T_impl Pop x||_1 = " << normx << std::endl;


#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return 0;
}
/*----------------------------------------------------------------------*
 |  compute the preconditioner (public)                      m.gee 03/06|
 *----------------------------------------------------------------------*/
bool MOERTEL::Mortar_ML_Preconditioner::Compute()
{

    iscomputed_ = false;

    MLAPI::Init();

    // get parameters
    int     maxlevels     = mlparams_.get("max levels",10);
    int     maxcoarsesize = mlparams_.get("coarse: max size",10);
    double* nullspace     = mlparams_.get("null space: vectors",(double*)NULL);
    int     nsdim         = mlparams_.get("null space: dimension",1);
    int     numpde        = mlparams_.get("PDE equations",1);
    double  damping       = mlparams_.get("aggregation: damping factor",1.33);
    std::string  eigenanalysis = mlparams_.get("eigen-analysis: type", "Anorm");
    std::string  smoothertype  = mlparams_.get("smoother: type","symmetric Gauss-Seidel");
    std::string  coarsetype    = mlparams_.get("coarse: type","Amesos-KLU");
    std::string  ptype         = mlparams_.get("prolongator: type","mod_full");

    // create the 2 rowmaps
    Arrmap_ = Teuchos::rcp(MOERTEL::SplitMap(A_->RowMap(),*Annmap_));
    Teuchos::RCP<Epetra_Map> map1 = Arrmap_;
    Teuchos::RCP<Epetra_Map> map2 = Annmap_;

    // split Atilde
    //
    //  Atilde11 Atilde12
    //  Atilde21 Atilde22
    //
    Teuchos::RCP<Epetra_CrsMatrix> Atilde11;
    Teuchos::RCP<Epetra_CrsMatrix> Atilde12;
    Teuchos::RCP<Epetra_CrsMatrix> Atilde21;
    Teuchos::RCP<Epetra_CrsMatrix> Atilde22;
    MOERTEL::SplitMatrix2x2(Atilde_,map1,map2,Atilde11,Atilde12,Atilde21,Atilde22);
    Atilde11_ = Atilde11;

    // build BWT (padded to full size)
    //
    //  0   Mr Dinv
    //  0    I
    //
    Teuchos::RCP<Epetra_CrsMatrix> BWT = Teuchos::rcp(MOERTEL::MatMatMult(*B_,false,*WT_,false,0));
    Teuchos::RCP<Epetra_CrsMatrix> tmp = Teuchos::rcp(MOERTEL::PaddedMatrix(BWT->RowMap(),0.0,25));
    MOERTEL::MatrixMatrixAdd(*BWT,false,1.0,*tmp,0.0);
    tmp->FillComplete(BWT->DomainMap(),BWT->RangeMap());
    BWT = tmp;
    tmp = Teuchos::null;

    // split BWT to obtain M = Mr Dinv
    Teuchos::RCP<Epetra_CrsMatrix> Zero11;
    Teuchos::RCP<Epetra_CrsMatrix> M;
    Teuchos::RCP<Epetra_CrsMatrix> Zero21;
    Teuchos::RCP<Epetra_CrsMatrix> I22;
    MOERTEL::SplitMatrix2x2(BWT,map1,map2,Zero11,M,Zero21,I22);
    M_ = M;

    // transpose BWT to get WBT and split again
    tmp = Teuchos::rcp(MOERTEL::PaddedMatrix(BWT->RowMap(),0.0,25));
    MOERTEL::MatrixMatrixAdd(*BWT,true,1.0,*tmp,0.0);
    tmp->FillComplete();
    Teuchos::RCP<Epetra_CrsMatrix> Zero12;
    MOERTEL::SplitMatrix2x2(tmp,map1,map2,Zero11,Zero12,MT_,I22);

    // build matrix Ahat11 = Atilde11 + M Atilde22 M^T
    Teuchos::RCP<Epetra_CrsMatrix> Ahat11 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,*map1,50,false));
    MOERTEL::MatrixMatrixAdd(*Atilde11,false,1.0,*Ahat11,0.0);
    Teuchos::RCP<Epetra_CrsMatrix> tmp1 = Teuchos::rcp(MOERTEL::MatMatMult(*Atilde22,false,*M,true,0));
    Teuchos::RCP<Epetra_CrsMatrix> tmp2 = Teuchos::rcp(MOERTEL::MatMatMult(*M,false,*tmp1,false,0));
    MOERTEL::MatrixMatrixAdd(*tmp2,false,-1.0,*Ahat11,1.0);
    Ahat11->FillComplete();
    Ahat11->OptimizeStorage();
    Ahat11_ = Ahat11;

    // build mlapi objects
    Space space1(*map1);
    Space space2(*map2);
    mlapiAtilde11_.Reshape(space1,space1,Atilde11_.get(),false);
    mlapiAhat11_.Reshape(space1,space1,Ahat11_.get(),false);
    mlapiM_.Reshape(space2,space1,M_.get(),false);
    mlapiMT_.Reshape(space1,space2,MT_.get(),false);

    // build the smoother G(Atilde11)
    G_.Reshape(mlapiAtilde11_,smoothertype,mlparams_);

    iscomputed_ = true;
    return true;
}
/*----------------------------------------------------------------------*
 |  compute the preconditioner (public)                      m.gee 03/06|
 *----------------------------------------------------------------------*/
bool MOERTEL::Mortar_ML_Preconditioner::Compute()
{

    iscomputed_ = false;

    MLAPI::Init();

    // get parameters
    int     maxlevels     = mlparams_.get("max levels",10);
    int     maxcoarsesize = mlparams_.get("coarse: max size",10);
    double* nullspace     = mlparams_.get("null space: vectors",(double*)NULL);
    int     nsdim         = mlparams_.get("null space: dimension",1);
    int     numpde        = mlparams_.get("PDE equations",1);
    double  damping       = mlparams_.get("aggregation: damping factor",1.33);
    std::string  eigenanalysis = mlparams_.get("eigen-analysis: type", "Anorm");
    std::string  smoothertype  = mlparams_.get("smoother: type","symmetric Gauss-Seidel");
    std::string  coarsetype    = mlparams_.get("coarse: type","Amesos-KLU");
    std::string  ptype         = mlparams_.get("prolongator: type","mod_full");

    // create the missing rowmap Arrmap_
    Arrmap_ = Teuchos::rcp(MOERTEL::SplitMap(A_->RowMap(),*Annmap_));
    Teuchos::RCP<Epetra_Map> map1 = Arrmap_;
    Teuchos::RCP<Epetra_Map> map2 = Annmap_;

    // split Atilde
    //
    //  Atilde11 Atilde12
    //  Atilde21 Atilde22
    //
    Teuchos::RCP<Epetra_CrsMatrix> Atilde11;
    Teuchos::RCP<Epetra_CrsMatrix> Atilde12;
    Teuchos::RCP<Epetra_CrsMatrix> Atilde21;
    Teuchos::RCP<Epetra_CrsMatrix> Atilde22;
    MOERTEL::SplitMatrix2x2(Atilde_,map1,map2,Atilde11,Atilde12,Atilde21,Atilde22);

    // build Atildesplit
    //
    //  Atilde11  0
    //  0         I
    //
    Atildesplit_ = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A_->RowMap(),50,false));
    MOERTEL::MatrixMatrixAdd(*Atilde11,false,1.0,*Atildesplit_,0.0);
    Teuchos::RCP<Epetra_CrsMatrix> tmp = Teuchos::rcp(MOERTEL::PaddedMatrix(*map2,1.0,1));
    tmp->FillComplete();
    MOERTEL::MatrixMatrixAdd(*tmp,false,1.0,*Atildesplit_,1.0);
    Atildesplit_->FillComplete();
    Atildesplit_->OptimizeStorage();

    // split A
    //
    //  A11 A12
    //  A21 A22
    //
    Teuchos::RCP<Epetra_CrsMatrix> A11;
    Teuchos::RCP<Epetra_CrsMatrix> A12;
    Teuchos::RCP<Epetra_CrsMatrix> A21;
    Teuchos::RCP<Epetra_CrsMatrix> A22;
    MOERTEL::SplitMatrix2x2(A_,map1,map2,A11,A12,A21,A22);

    // build Asplit_
    //
    //  A11  0
    //  0    A22
    //
    Asplit_ = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A_->RowMap(),50,false));
    MOERTEL::MatrixMatrixAdd(*A11,false,1.0,*Asplit_,0.0);
    MOERTEL::MatrixMatrixAdd(*A22,false,1.0,*Asplit_,1.0);
    Asplit_->FillComplete();
    Asplit_->OptimizeStorage();

    // build BWT (padded to full size)
    //
    //  0   Mr Dinv
    //  0    I
    //
    Teuchos::RCP<Epetra_CrsMatrix> BWT = Teuchos::rcp(MOERTEL::MatMatMult(*B_,false,*WT_,false,10));
    tmp = Teuchos::rcp(MOERTEL::PaddedMatrix(BWT->RowMap(),0.0,25));
    MOERTEL::MatrixMatrixAdd(*BWT,false,1.0,*tmp,0.0);
    tmp->FillComplete(BWT->DomainMap(),BWT->RangeMap());
    BWT = tmp;
    tmp = Teuchos::null;

    // split BWT to obtain M = Mr Dinv
    Teuchos::RCP<Epetra_CrsMatrix> Zero11;
    Teuchos::RCP<Epetra_CrsMatrix> M;
    Teuchos::RCP<Epetra_CrsMatrix> Zero21;
    Teuchos::RCP<Epetra_CrsMatrix> I22;
    MOERTEL::SplitMatrix2x2(BWT,map1,map2,Zero11,M,Zero21,I22);


    // build matrix Ahat11 = Atilde11 - M Atilde22 M^T
    Teuchos::RCP<Epetra_CrsMatrix> Ahat11 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,*map1,50,false));
    MOERTEL::MatrixMatrixAdd(*Atilde11,false,1.0,*Ahat11,0.0);
    Teuchos::RCP<Epetra_CrsMatrix> tmp1 = Teuchos::rcp(MOERTEL::MatMatMult(*Atilde22,false,*M,true,10));
    Teuchos::RCP<Epetra_CrsMatrix> tmp2 = Teuchos::rcp(MOERTEL::MatMatMult(*M,false,*tmp1,false,10));
    MOERTEL::MatrixMatrixAdd(*tmp2,false,-1.0,*Ahat11,1.0);
    Ahat11->FillComplete();
    Ahat11->OptimizeStorage();

    // build matrix Ahat
    //
    //  Ahat11   0   =   Atilde11 - M Atilde22 M^T   0
    //  0        0       0                           0
    //
    Ahat_ = Teuchos::rcp(MOERTEL::PaddedMatrix(A_->RowMap(),0.0,25));
    MOERTEL::MatrixMatrixAdd(*Ahat11,false,1.0,*Ahat_,0.0);
    Ahat_->FillComplete();
    Ahat_->OptimizeStorage();


    // build mlapi objects
    Space space(A_->RowMatrixRowMap());
    Operator mlapiAsplit(space,space,Asplit_.get(),false);
    Operator mlapiAtildesplit(space,space,Atildesplit_.get(),false);
    Operator mlapiAhat(space,space,Ahat_.get(),false);
    Operator mlapiBWT(space,space,BWT.get(),false);
    Operator mlapiBWTcoarse;
    Operator ImBWTfine = GetIdentity(space,space) - mlapiBWT;
    Operator ImBWTcoarse;
    Operator Ptent;
    Operator P;
    Operator Pmod;
    Operator Rtent;
    Operator R;
    Operator Rmod;
    Operator IminusA;
    Operator C;
    InverseOperator S;

    mlapiAtildesplit_.resize(maxlevels);
    mlapiAhat_.resize(maxlevels);
    mlapiImBWT_.resize(maxlevels);
    mlapiImWBT_.resize(maxlevels);
    mlapiRmod_.resize(maxlevels);
    mlapiPmod_.resize(maxlevels);
    mlapiS_.resize(maxlevels);

    mlapiAtildesplit_[0] = mlapiAtildesplit;
    mlapiAhat_[0]        = mlapiAhat;
    mlapiImBWT_[0]       = ImBWTfine;
    mlapiImWBT_[0]       = GetTranspose(ImBWTfine);


    // build nullspace
    MultiVector NS;
    MultiVector NextNS;
    NS.Reshape(mlapiAsplit.GetRangeSpace(),nsdim);
    if (nullspace)
    {
        for (int i=0; i<nsdim; ++i)
            for (int j=0; j<NS.GetMyLength(); ++j)
                NS(j,i) = nullspace[i*NS.GetMyLength()+j];
    }
    else
    {
        if (numpde==1) NS = 1.0;
        else
        {
            NS = 0.0;
            for (int i=0; i<NS.GetMyLength(); ++i)
                for (int j=0; j<numpde; ++j)
                    if ( i % numpde == j)
                        NS(i,j) = 1.0;
        }
    }

    double lambdamax;

    // construct the hierarchy
    int level=0;
    for (level=0; level<maxlevels-1; ++level)
    {
        // this level's smoothing operator
        mlapiAtildesplit = mlapiAtildesplit_[level];

        // build smoother
        if (Comm().MyPID()==0)
        {
            ML_print_line("-", 78);
            std::cout << "MOERTEL/ML : creating smoother level " << level << std::endl;
            fflush(stdout);
        }
        S.Reshape(mlapiAtildesplit,smoothertype,mlparams_);

        if (level) mlparams_.set("PDE equations", NS.GetNumVectors());

        if (Comm().MyPID()==0)
        {
            ML_print_line("-", 80);
            std::cout << "MOERTEL/ML : creating level " << level+1 << std::endl;
            ML_print_line("-", 80);
            fflush(stdout);
        }
        mlparams_.set("workspace: current level",level);

        // get tentative prolongator based on decoupled original system
        GetPtent(mlapiAsplit,mlparams_,NS,Ptent,NextNS);
        NS = NextNS;

        // do prolongator smoothing
        if (damping)
        {
            if (eigenanalysis == "Anorm")
                lambdamax = MaxEigAnorm(mlapiAsplit,true);
            else if (eigenanalysis == "cg")
                lambdamax = MaxEigCG(mlapiAsplit,true);
            else if (eigenanalysis == "power-method")
                lambdamax = MaxEigPowerMethod(mlapiAsplit,true);
            else ML_THROW("incorrect parameter (" + eigenanalysis + ")", -1);

            IminusA = GetJacobiIterationOperator(mlapiAsplit,damping/lambdamax);
            P       = IminusA * Ptent;
            R       = GetTranspose(P);
            Rtent   = GetTranspose(Ptent);
        }
        else
        {
            P     = Ptent;
            Rtent = GetTranspose(Ptent);
            R     = Rtent;
            lambdamax = -1.0;
        }

        // do variational coarse grid of split original matrix Asplit
        C = GetRAP(R,mlapiAsplit,P);

        // compute the mortar projections on coarse grid
        mlapiBWTcoarse = GetRAP(Rtent,mlapiBWT,Ptent);
        ImBWTcoarse    = GetIdentity(C.GetDomainSpace(),C.GetRangeSpace());
        ImBWTcoarse    = ImBWTcoarse - mlapiBWTcoarse;

        // do modified prolongation and restriction
        if (ptype=="mod_full")
            Rmod = ImBWTcoarse * ( R * ImBWTfine ) + mlapiBWTcoarse * ( R * mlapiBWT );
        else if (ptype=="mod_middle")
            Rmod = ImBWTcoarse * ( R * ImBWTfine );
        else if (ptype=="mod_simple")
            Rmod = R * ImBWTfine;
        else if (ptype=="original")
            Rmod = R;
        else
            ML_THROW("incorrect parameter ( " + ptype + " )", -1);
        Pmod = GetTranspose(Rmod);

        // store matrix for construction of next level
        mlapiAsplit = C;

        // make coarse smoothing operator
        // make coarse residual operator
        mlapiAtildesplit_[level+1] = GetRAP(Rmod,mlapiAtildesplit,Pmod);
        mlapiAhat_[level+1]        = GetRAP(Rmod,mlapiAhat_[level],Pmod);
        mlapiImBWT_[level]         = ImBWTfine;
        mlapiImBWT_[level+1]       = ImBWTcoarse;
        mlapiImWBT_[level]         = GetTranspose(ImBWTfine);
        mlapiImWBT_[level+1]       = GetTranspose(ImBWTcoarse);
        mlapiRmod_[level]          = Rmod;
        mlapiPmod_[level]          = Pmod;
        mlapiS_[level]             = S;

        // prepare for next level
        mlapiBWT  = mlapiBWTcoarse;
        ImBWTfine = ImBWTcoarse;

        // break if coarsest level is below specified size
        if (mlapiAsplit.GetNumGlobalRows() <= maxcoarsesize)
        {
            ++level;
            break;
        }

    } // for (level=0; level<maxlevels-1; ++level)

    // do coarse smoother
    S.Reshape(mlapiAtildesplit_[level],coarsetype,mlparams_);
    mlapiS_[level] = S;

    // store max number of levels
    maxlevels_ = level+1;

    iscomputed_ = true;
    return true;
}
/*----------------------------------------------------------------------*
 |  compute the preconditioner (public)                      m.gee 03/06|
 *----------------------------------------------------------------------*/
bool MOERTEL::Mortar_ML_Preconditioner::Compute()
{

    using namespace MLAPI;

    iscomputed_ = false;

    MLAPI::Init();

    // get parameters
    int     maxlevels     = mlparams_.get("max levels",10);
    int     maxcoarsesize = mlparams_.get("coarse: max size",10);
    double* nullspace     = mlparams_.get("null space: vectors",(double*)NULL);
    int     nsdim         = mlparams_.get("null space: dimension",1);
    int     numpde        = mlparams_.get("PDE equations",1);
    double  damping       = mlparams_.get("aggregation: damping factor",1.33);
    std::string  eigenanalysis = mlparams_.get("eigen-analysis: type", "Anorm");
    std::string  smoothertype  = mlparams_.get("smoother: type","symmetric Gauss-Seidel");
    std::string  coarsetype    = mlparams_.get("coarse: type","Amesos-KLU");
    std::string  ptype         = mlparams_.get("prolongator: type","mod_full");

    MLAPI::Space space(A_->RowMatrixRowMap());
    MLAPI::Operator mlapiA(space,space,A_.get(),false);
    MLAPI::Operator mlapiAtilde(space,space,Atilde_.get(),false);

    // make the multiplication of BWT
    Teuchos::RCP<Epetra_CrsMatrix> BWT = Teuchos::rcp(MOERTEL::MatMatMult(*B_,false,*WT_,false,0));
    Teuchos::RCP<Epetra_CrsMatrix> tmp = Teuchos::rcp(MOERTEL::PaddedMatrix(BWT->RowMap(),0.0,25));
    MOERTEL::MatrixMatrixAdd(*BWT,false,1.0,*tmp,1.0);
    tmp->FillComplete(BWT->DomainMap(),BWT->RangeMap());
    BWT = tmp;
    tmp = Teuchos::null;

    MLAPI::Operator mlapiBWT(space,space,BWT.get(),false);

    mlapiImBWT_.resize(maxlevels);
    mlapiImWBT_.resize(maxlevels);
    mlapiRmod_.resize(maxlevels);
    mlapiPmod_.resize(maxlevels);
    mlapiAtilde_.resize(maxlevels);
    mlapiS_.resize(maxlevels);

    // build nullspace;
    MLAPI::MultiVector NS;
    MLAPI::MultiVector NextNS;

    NS.Reshape(mlapiA.GetRangeSpace(),nsdim);
    if (nullspace)
    {
        for (int i=0; i<nsdim; ++i)
            for (int j=0; j<NS.GetMyLength(); ++j)
                NS(j,i) = nullspace[i*NS.GetMyLength()+j];
    }
    else
    {
        if (numpde==1) NS = 1.0;
        else
        {
            NS = 0.0;
            for (int i=0; i<NS.GetMyLength(); ++i)
                for (int j=0; j<numpde; ++j)
                    if ( i % numpde == j)
                        NS(i,j) = 1.0;
        }
    }

    double lambdamax;
    MLAPI::Operator Ptent;
    MLAPI::Operator P;
    MLAPI::Operator Rtent;
    MLAPI::Operator R;
    MLAPI::Operator IminusA;
    MLAPI::Operator C;

    MLAPI::Operator Pmod;
    MLAPI::Operator Rmod;
    MLAPI::Operator ImBWTfine;
    MLAPI::Operator ImBWTcoarse;
    MLAPI::Operator mlapiBWTcoarse;
    MLAPI::InverseOperator S;

    mlapiAtilde_[0] = mlapiAtilde;

    int level;
    for (level=0; level<maxlevels-1; ++level)
    {
        // this level's operator
        mlapiAtilde = mlapiAtilde_[level];

        // build smoother
        if (Comm().MyPID()==0)
        {
            ML_print_line("-", 78);
            std::cout << "MOERTEL/ML : creating smoother level " << level << std::endl;
            fflush(stdout);
        }
        S.Reshape(mlapiAtilde,smoothertype,mlparams_);

        if (level) mlparams_.set("PDE equations", NS.GetNumVectors());


        if (Comm().MyPID()==0)
        {
            ML_print_line("-", 80);
            std::cout << "MOERTEL/ML : creating level " << level+1 << std::endl;
            ML_print_line("-", 80);
            fflush(stdout);
        }

        mlparams_.set("workspace: current level",level);
        GetPtent(mlapiA,mlparams_,NS,Ptent,NextNS);
        NS = NextNS;

        if (damping)
        {
            if (eigenanalysis == "Anorm")
                lambdamax = MaxEigAnorm(mlapiA,true);
            else if (eigenanalysis == "cg")
                lambdamax = MaxEigCG(mlapiA,true);
            else if (eigenanalysis == "power-method")
                lambdamax = MaxEigPowerMethod(mlapiA,true);
            else ML_THROW("incorrect parameter (" + eigenanalysis + ")", -1);

            IminusA = GetJacobiIterationOperator(mlapiA,damping/lambdamax);
            P = IminusA * Ptent;
        }
        else
        {
            P = Ptent;
            lambdamax = -1.0;
        }

        R = GetTranspose(P);
        if (damping)
            Rtent = GetTranspose(Ptent);
        else
            Rtent = R;

        // variational coarse grid
        C = GetRAP(R,mlapiA,P);

        // compute fine mortar projection operator
        ImBWTfine = GetIdentity(mlapiA.GetDomainSpace(),mlapiA.GetRangeSpace());
        ImBWTfine = ImBWTfine - mlapiBWT;

        // compute fine mortar projection operator
        mlapiBWTcoarse = GetRAP(Rtent,mlapiBWT,Ptent);
        ImBWTcoarse = GetIdentity(C.GetDomainSpace(),C.GetRangeSpace());
        ImBWTcoarse = ImBWTcoarse - mlapiBWTcoarse;
        // make modified restriction/prolongation
        if (ptype=="mod_full")
            Rmod = ImBWTcoarse * ( R * ImBWTfine ) + mlapiBWTcoarse * ( R * mlapiBWT );
        else if (ptype=="mod_middle")
            Rmod = ImBWTcoarse * ( R * ImBWTfine );
        else if (ptype=="mod_simple")
            Rmod = R * ImBWTfine;
        else if (ptype=="original")
            Rmod = R;
        else
            ML_THROW("incorrect parameter ( " + ptype + " )", -1);

        Pmod = GetTranspose(Rmod);

        // store original matrix for construction of next level
        mlapiA = C;

        // make final coarse grid operator
        C = GetRAP(Rmod,mlapiAtilde,Pmod);

        // store values
        mlapiImBWT_[level]    = ImBWTfine;
        mlapiImBWT_[level+1]  = ImBWTcoarse;
        mlapiImWBT_[level]    = GetTranspose(ImBWTfine);
        mlapiImWBT_[level+1]  = GetTranspose(ImBWTcoarse);
        mlapiRmod_[level]     = Rmod;
        mlapiPmod_[level]     = Pmod;
        mlapiAtilde_[level+1] = C;
        mlapiS_[level]        = S;

        // prepare for next level
        mlapiBWT = mlapiBWTcoarse;

        // break if coarsest level is below specified size
        if (C.GetNumGlobalRows() <= maxcoarsesize)
        {
            ++level;
            break;
        }

    } // for (level=0; level<maxlevels-1; ++level)

    // set coarse solver
    if (Comm().MyPID()==0)
    {
        ML_print_line("-", 78);
        std::cout << "MOERTEL/ML : creating coarse solver level " << level << std::endl;
        fflush(stdout);
    }
    S.Reshape(mlapiAtilde_[level],coarsetype,mlparams_);
    mlapiS_[level] = S;

    // store number of levels
    maxlevels_ = level+1;

    iscomputed_ = true;
    return true;
}