//=========================================================================
bool EpetraExt::RowMatrix_Transpose::fwd()
{
  int i, j, NumIndices, err;

  Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix*>(origObj_);

  // Now copy values and global indices into newly create transpose storage

  for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter
  for (i=0; i<NumMyRows_; i++)
  {
    if(OrigMatrixIsCrsMatrix_)
      err = OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_);
    else
      err = origObj_->ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_);
    if (err != 0) {
      std::cerr << "ExtractMyRowCopy/View failed."<<std::endl;
      throw err;
    }

    int ii = origObj_->RowMatrixRowMap().GID(i);
    for (j=0; j<NumIndices; j++)
    {
      int TransRow = Indices_[j];
      int loc = TransNumNz_[TransRow];
      TransIndices_[TransRow][loc] = ii;
      TransValues_[TransRow][loc] = Values_[j];
      ++TransNumNz_[TransRow]; // increment counter into current transpose row
    }
  }

  //  Build Transpose matrix with some rows being shared across processors.
  //  We will use a view here since the matrix will not be used for anything else
  const Epetra_Map & TransMap = origObj_->RowMatrixColMap();

  Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_);
  TransMap.MyGlobalElements(TransMyGlobalEquations_);
  
  for (i=0; i<NumMyCols_; i++)
      EPETRA_CHK_ERR(TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], 
                     TransNumNz_[i], TransValues_[i], TransIndices_[i]));
 
  // Note: The following call to FillComplete is currently necessary because
  //     some global constants that are needed by the Export () are computed in this routine
  EPETRA_CHK_ERR(TempTransA1.FillComplete(false));

  // Now that transpose matrix with shared rows is entered, update values of target transpose matrix
  TransposeMatrix_->PutScalar(0.0);  // Zero out all values of the matrix

  EPETRA_CHK_ERR(TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add));

  return(0);
}
//=========================================================================
RowMatrix_Transpose::NewTypeRef
RowMatrix_Transpose::
operator()( OriginalTypeRef orig )
{
  origObj_ = &orig;

  int i, j, err;

  if( !TransposeRowMap_ )
  {
    if( IgnoreNonLocalCols_ )
      TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorRangeMap()); // Should be replaced with refcount =
    else
      TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorDomainMap()); // Should be replaced with refcount =
  }

  // This routine will work for any RowMatrix object, but will attempt cast the matrix to a CrsMatrix if
  // possible (because we can then use a View of the matrix and graph, which is much cheaper).

  // First get the local indices to count how many nonzeros will be in the 
  // transpose graph on each processor
  Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix*>(&orig);

  OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked

  NumMyRows_ = orig.NumMyRows();
  NumMyCols_ = orig.NumMyCols();
  TransNumNz_ = new int[NumMyCols_];
  TransIndices_ = new int*[NumMyCols_];
  TransValues_ = new double*[NumMyCols_];
  TransMyGlobalEquations_ = new int[NumMyCols_];

  int NumIndices;

  if (OrigMatrixIsCrsMatrix_)
  {
    const Epetra_CrsGraph & OrigGraph = OrigCrsMatrix->Graph(); // Get matrix graph

    for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0;
    for (i=0; i<NumMyRows_; i++)
    {
      err = OrigGraph.ExtractMyRowView(i, NumIndices, Indices_); // Get view of ith row
      if (err != 0) throw OrigGraph.ReportError("ExtractMyRowView failed",err);
      for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]];
    }
  }
  else // Original is not a CrsMatrix
  {
    MaxNumEntries_ = 0;
    int NumEntries;
    for (i=0; i<NumMyRows_; i++)
    {
      orig.NumMyRowEntries(i, NumEntries);
      MaxNumEntries_ = EPETRA_MAX(MaxNumEntries_, NumEntries);
    }
    Indices_ = new int[MaxNumEntries_];
    Values_ = new double[MaxNumEntries_];

    for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0;
    for (i=0; i<NumMyRows_; i++)
    {
      err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_); 
      if (err != 0) {
        std::cerr << "ExtractMyRowCopy failed."<<std::endl;
        throw err;
      }
      for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]];
    }
  }

  // Most of remaining code is common to both cases
  for(i=0; i<NumMyCols_; i++)
  {
    NumIndices = TransNumNz_[i];
    if (NumIndices>0)
    {
      TransIndices_[i] = new int[NumIndices];
      TransValues_[i] = new double[NumIndices];
    }
  }

  // Now copy values and global indices into newly create transpose storage

  for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter
  for (i=0; i<NumMyRows_; i++)
  {
    if (OrigMatrixIsCrsMatrix_)
      err = OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_);
    else
      err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_);
    if (err != 0) {
      std::cerr << "ExtractMyRowCopy failed."<<std::endl;
      throw err;
    }

    int ii = orig.RowMatrixRowMap().GID(i);
    for (j=0; j<NumIndices; j++)
    {
      int TransRow = Indices_[j];
      int loc = TransNumNz_[TransRow];
      TransIndices_[TransRow][loc] = ii;
      TransValues_[TransRow][loc] = Values_[j];
      ++TransNumNz_[TransRow]; // increment counter into current transpose row
    }
  }

  //  Build Transpose matrix with some rows being shared across processors.
  //  We will use a view here since the matrix will not be used for anything else

  const Epetra_Map & TransMap = orig.RowMatrixColMap();

  Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_);
  TransMap.MyGlobalElements(TransMyGlobalEquations_);
  
  for (i=0; i<NumMyCols_; i++) {
    err = TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], 
                     TransNumNz_[i], TransValues_[i], TransIndices_[i]);
    if (err < 0) throw TempTransA1.ReportError("InsertGlobalValues failed.",err);
  }
 
  // Note: The following call to FillComplete is currently necessary because
  //      some global constants that are needed by the Export () are computed in this routine
  err = TempTransA1.FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_, false);
  if (err != 0) {
    throw TempTransA1.ReportError("FillComplete failed.",err);
  }

  // Now that transpose matrix with shared rows is entered, create a new matrix that will
  // get the transpose with uniquely owned rows (using the same row distribution as A).
  if( IgnoreNonLocalCols_ )
    TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_, *TransposeRowMap_, 0);
  else
    TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_,0);

  // Create an Export object that will move TempTransA around
  TransposeExporter_ = new Epetra_Export(TransMap, *TransposeRowMap_);

  err = TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add);
  if (err != 0) throw TransposeMatrix_->ReportError("Export failed.",err);
  
  err = TransposeMatrix_->FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_);
  if (err != 0) throw TransposeMatrix_->ReportError("FillComplete failed.",err);

  if (MakeDataContiguous_) {
    err = TransposeMatrix_->MakeDataContiguous();
    if (err != 0) throw TransposeMatrix_->ReportError("MakeDataContiguous failed.",err);
  }

  newObj_ = TransposeMatrix_;

  return *newObj_;
}
//=========================================================================
int Epetra_RowMatrixTransposer::UpdateTransposeValues(Epetra_RowMatrix * MatrixWithNewValues){

  int i, j, NumIndices;

  if (!TransposeCreated_) EPETRA_CHK_ERR(-1); // Transpose must be already created

  // Sanity check of incoming matrix.  Perform some tests to see if it is compatible with original input matrix
  if (OrigMatrix_!=MatrixWithNewValues) { // Check if pointer of new matrix is same as previous input matrix
    OrigMatrix_ = MatrixWithNewValues; // Reset this pointer if not, then check for other attributes
    if (NumMyRows_ != OrigMatrix_->NumMyRows() ||
  NumMyCols_ != OrigMatrix_->NumMyCols() ||
  NumMyRows_ != OrigMatrix_->NumMyRows()) {
      EPETRA_CHK_ERR(-2); // New matrix not compatible with previous
    }
  }

  Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix *>(MatrixWithNewValues);

  
  OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked


  // Now copy values and global indices into newly create transpose storage

  for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter
  for (i=0; i<NumMyRows_; i++) {
    if (OrigMatrixIsCrsMatrix_) {
      EPETRA_CHK_ERR(OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_));
    }
    else {
      EPETRA_CHK_ERR(OrigMatrix_->ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_));
    }

    int ii = OrigMatrix_->RowMatrixRowMap().GID64(i); // FIXME long long
    for (j=0; j<NumIndices; j++) {
      int TransRow = Indices_[j];
      int loc = TransNumNz_[TransRow];
      TransIndices_[TransRow][loc] = ii;
      TransValues_[TransRow][loc] = Values_[j];
      ++TransNumNz_[TransRow]; // increment counter into current transpose row
    }
  }

  //  Build Transpose matrix with some rows being shared across processors.
  //  We will use a view here since the matrix will not be used for anything else

  const Epetra_Map & TransMap = OrigMatrix_->RowMatrixColMap();

  Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_);
  TransMap.MyGlobalElements(TransMyGlobalEquations_);
  /* Add  rows one-at-a-time */

  for (i=0; i<NumMyCols_; i++)
    {
      EPETRA_CHK_ERR(TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], 
                TransNumNz_[i], TransValues_[i], TransIndices_[i]));
    }
  // Note: The following call to FillComplete is currently necessary because
  //       some global constants that are needed by the Export () are computed in this routine
  const Epetra_Map& domain_map = OrigMatrix_->OperatorDomainMap();
  const Epetra_Map& range_map = OrigMatrix_->OperatorRangeMap();

  EPETRA_CHK_ERR(TempTransA1.FillComplete(range_map, domain_map, false));

  // Now that transpose matrix with shared rows is entered, update values of target transpose matrix

  TransposeMatrix_->PutScalar(0.0);  // Zero out all values of the matrix

  EPETRA_CHK_ERR(TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add));

  return(0);
}
//=========================================================================
int Epetra_RowMatrixTransposer::CreateTranspose (const bool MakeDataContiguous, 
             Epetra_CrsMatrix *& TransposeMatrix, 
             Epetra_Map * TransposeRowMap_in) {

// FIXME long long

  int i, j;

  if (TransposeCreated_) DeleteData(); // Get rid of existing data first

  if (TransposeRowMap_in==0)
    TransposeRowMap_ = (Epetra_Map *) &(OrigMatrix_->OperatorDomainMap()); // Should be replaced with refcount =
  else
    TransposeRowMap_ = TransposeRowMap_in; 

  // This routine will work for any RowMatrix object, but will attempt cast the matrix to a CrsMatrix if
  // possible (because we can then use a View of the matrix and graph, which is much cheaper).

  // First get the local indices to count how many nonzeros will be in the 
  // transpose graph on each processor


  Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix *>(OrigMatrix_);

  OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked

  NumMyRows_ = OrigMatrix_->NumMyRows();
  NumMyCols_ = OrigMatrix_->NumMyCols();
  NumMyRows_ = OrigMatrix_->NumMyRows();
  TransNumNz_ = new int[NumMyCols_];
  TransIndices_ = new int*[NumMyCols_];
  TransValues_ = new double*[NumMyCols_];


  int NumIndices;

  if (OrigMatrixIsCrsMatrix_) {


    const Epetra_CrsGraph & OrigGraph = OrigCrsMatrix->Graph(); // Get matrix graph

    for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0;
    for (i=0; i<NumMyRows_; i++) {
      EPETRA_CHK_ERR(OrigGraph.ExtractMyRowView(i, NumIndices, Indices_)); // Get view of ith row
      for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]];
    }
  }
  else { // OrigMatrix is not a CrsMatrix

    MaxNumEntries_ = 0;
    int NumEntries;
    for (i=0; i<NumMyRows_; i++) {
      OrigMatrix_->NumMyRowEntries(i, NumEntries);
      MaxNumEntries_ = EPETRA_MAX(MaxNumEntries_, NumEntries);
    }
    Indices_ = new int[MaxNumEntries_];
    Values_ = new double[MaxNumEntries_];

    for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0;
    for (i=0; i<NumMyRows_; i++) {
      // Get ith row
      EPETRA_CHK_ERR(OrigMatrix_->ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_)); 
      for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]];
    }
  }


  // Most of remaining code is common to both cases

  for(i=0; i<NumMyCols_; i++) {
    NumIndices = TransNumNz_[i];
    if (NumIndices>0) {
      TransIndices_[i] = new int[NumIndices];
      TransValues_[i] = new double[NumIndices];
    }
  }

  // Now copy values and global indices into newly created transpose storage

  for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter
  for (i=0; i<NumMyRows_; i++) {
    if (OrigMatrixIsCrsMatrix_) {
      EPETRA_CHK_ERR(OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_));
    }
    else {
      EPETRA_CHK_ERR(OrigMatrix_->ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_));
    }

    int ii = OrigMatrix_->RowMatrixRowMap().GID64(i); // FIXME long long
    for (j=0; j<NumIndices; j++) {
      int TransRow = Indices_[j];
      int loc = TransNumNz_[TransRow];
      TransIndices_[TransRow][loc] = ii;
      TransValues_[TransRow][loc] = Values_[j];
      ++TransNumNz_[TransRow]; // increment counter into current transpose row
    }
  }

  //  Build Transpose matrix with some rows being shared across processors.
  //  We will use a view here since the matrix will not be used for anything else

  const Epetra_Map & TransMap = OrigMatrix_->RowMatrixColMap();

  Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_);
  TransMyGlobalEquations_ = new int[NumMyCols_];

  TransMap.MyGlobalElements(TransMyGlobalEquations_);

  /* Add  rows one-at-a-time */

  for (i=0; i<NumMyCols_; i++)
    {
      EPETRA_CHK_ERR(TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], 
                TransNumNz_[i], TransValues_[i], TransIndices_[i]));
    }
  // Note: The following call to FillComplete is currently necessary because
  //       some global constants that are needed by the Export () are computed in this routine

  const Epetra_Map& domain_map = OrigMatrix_->OperatorDomainMap();
  const Epetra_Map& range_map = OrigMatrix_->OperatorRangeMap();

  EPETRA_CHK_ERR(TempTransA1.FillComplete(range_map, domain_map, false));

  // Now that transpose matrix with shared rows is entered, create a new matrix that will
  // get the transpose with uniquely owned rows (using the same row distribution as A).

  TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_,0);

  // Create an Export object that will move TempTransA around

  TransposeExporter_ = new Epetra_Export(TransMap, *TransposeRowMap_);

  EPETRA_CHK_ERR(TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add));
  
  EPETRA_CHK_ERR(TransposeMatrix_->FillComplete(range_map, domain_map));

  if (MakeDataContiguous) {
    EPETRA_CHK_ERR(TransposeMatrix_->MakeDataContiguous());
  }

  TransposeMatrix = TransposeMatrix_;
  TransposeCreated_ = true;

  return(0);
}