// =============================================================================
PyObject * Epetra_NumPyMultiVector::
NormWeighted(const Epetra_MultiVector & weights) const
{
  int n = NumVectors();
  double * result = new double[n];
  npy_intp numVectors[ ] = {n};
  PyObject * po;
  double * data;  

  int status = Epetra_MultiVector::NormWeighted(weights,result);

  if (status)
  {
    PyErr_Format(PyExc_RuntimeError, "NormWeighted returned error code %d", status);
    goto fail;
  }
  po   = PyArray_SimpleNew(1, numVectors, NPY_DOUBLE);
  data = (double*) PyArray_DATA((PyArrayObject*)po);
  for (int i=0; i<n; i++) data[i] = result[i];
  delete [] result;
  return PyArray_Return((PyArrayObject*)po);
 fail:
  delete [] result;
  return NULL;
}
void Epetra_FEVector::zeroNonlocalData()
{
  if (nonlocalIDs<int_type>().size() > 0) {
    int maxelemSize = Map().MaxElementSize();
    for(int vi=0; vi<NumVectors(); ++vi) {
      for(size_t i=0; i<nonlocalIDs<int_type>().size(); ++i) {
        int elemSize = nonlocalElementSize_[i];
        for(int j=0; j<elemSize; ++j) {
          nonlocalCoefs_[vi][i*maxelemSize+j] = 0.0;
        }
      }
    }
  }
}
void Epetra_FEVector::createNonlocalMapAndExporter()
{
  std::vector<int_type>& nonlocalIDs_var = nonlocalIDs<int_type>();
  delete nonlocalMap_;
  int_type* nlIDptr = nonlocalIDs_var.size()>0 ? &nonlocalIDs_var[0] : NULL;
  int* nlElSzptr = nonlocalElementSize_.size()>0 ? &nonlocalElementSize_[0] : NULL;
  nonlocalMap_ = new Epetra_BlockMap ((int_type) -1, (int) nonlocalIDs_var.size(), nlIDptr,
                                      nlElSzptr, Map().IndexBase(), Map().Comm());
  delete exporter_;
  exporter_ = new Epetra_Export (*nonlocalMap_, Map());

  delete nonlocalVector_;
  nonlocalVector_ = new Epetra_MultiVector (*nonlocalMap_, NumVectors());
}
//=========================================================================
Teuchos::RCP<Epetra_MultiVector>
BlockMultiVector::GetBlock(int GlobalBlockRow)
{
  int offset = GlobalBlockRow * BaseMap_.NumMyElements();
  int numVecs = NumVectors();
  double **pointers = Pointers();
  double **block_pointers = new double*[numVecs];
  for (int i=0; i<numVecs; i++)
    block_pointers[i] = pointers[i]+offset;
  Teuchos::RCP<Epetra_MultiVector> block = 
    Teuchos::rcp(new Epetra_MultiVector(View, BaseMap_, block_pointers,
					numVecs));
  delete [] block_pointers;
  return block;
}
// =============================================================================
Epetra_NumPyMultiVector::Epetra_NumPyMultiVector(const Epetra_MultiVector & source):
  Epetra_MultiVector(source)
{
  map = new Epetra_BlockMap(source.Map());
  npy_intp dims[ ] = { NumVectors(), map->NumMyPoints() };
  double **v = NULL;
  Epetra_MultiVector::ExtractView(&v);
  array = (PyArrayObject *) PyArray_SimpleNewFromData(2,dims,NPY_DOUBLE,
						      (void *)v[0]);
  if (!array)
  {
    cleanup();
    throw PythonException();
  }
}
ostream& Ifpack2_SparseContainer<T>::Print(ostream & os) const
{
    os << "================================================================================" << endl;
    os << "Ifpack2_SparseContainer" << endl;
    os << "Number of rows          = " << NumRows() << endl;
    os << "Number of vectors       = " << NumVectors() << endl;
    os << "IsInitialized()         = " << IsInitialized() << endl;
    os << "IsComputed()            = " << IsComputed() << endl;
    os << "Flops in Initialize()   = " << InitializeFlops() << endl;
    os << "Flops in Compute()      = " << ComputeFlops() << endl;
    os << "Flops in ApplyInverse() = " << ApplyInverseFlops() << endl;
    os << "================================================================================" << endl;
    os << endl;

    return(os);
}
//----------------------------------------------------------------------------
void Epetra_FEVector::destroyNonlocalData()
{
#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
  nonlocalIDs_int_.clear();
#endif
#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
  nonlocalIDs_LL_.clear();
#endif
  nonlocalElementSize_.clear();

  if (nonlocalCoefs_.size() > 0) {
    for(int i=0; i<NumVectors(); ++i) {
      nonlocalCoefs_[i].clear();
    }
  }
}
//=========================================================================
int BlockMultiVector::LoadBlockValues(const Epetra_MultiVector & BaseVector, int GlobalBlockRow) 
{
   int IndexOffset = GlobalBlockRow * Offset_;
   int localIndex=0;

   // For each entry in the base vector, translate its global ID
   // by the IndexOffset and load into this blockVector
   for (int i=0; i<BaseMap_.NumMyElements(); i++) {
      localIndex = this->Map().LID((IndexOffset + BaseMap_.GID(i)));
      if (localIndex==-1) { 
	     cout << "Error in  BlockMultiVector::GetBlock: " << i << " " 
		  << IndexOffset << " " << BaseMap_.GID(i) << endl;
	     return -1;
      }
      for (int j=0; j<NumVectors(); j++)
	(*this)[j][localIndex] = BaseVector[j][i];
   }

   return 0;
}
int Epetra_FEVector::GlobalAssemble(Epetra_CombineMode mode,
                                    bool reuse_map_and_exporter)
{
  //In this method we need to gather all the non-local (overlapping) data
  //that's been input on each processor, into the (probably) non-overlapping
  //distribution defined by the map that 'this' vector was constructed with.

  //We don't need to do anything if there's only one processor or if
  //ignoreNonLocalEntries_ is true.
  if (Map().Comm().NumProc() < 2 || ignoreNonLocalEntries_) {
    return(0);
  }

  if (nonlocalMap_ == 0 || !reuse_map_and_exporter) {
    createNonlocalMapAndExporter<int_type>();
  }

  Epetra_MultiVector& nonlocalVector = *nonlocalVector_;
  nonlocalVector.PutScalar(0.0);

  int elemSize = Map().MaxElementSize();
  for(int vi=0; vi<NumVectors(); ++vi) {
    for(size_t i=0; i<nonlocalIDs<int_type>().size(); ++i) {
      for(int j=0; j<nonlocalElementSize_[i]; ++j) {
        nonlocalVector.ReplaceGlobalValue(nonlocalIDs<int_type>()[i], j, vi,
                                          nonlocalCoefs_[vi][i*elemSize+j]);
      }
    }
  }

  EPETRA_CHK_ERR( Export(nonlocalVector, *exporter_, mode) );

  if (reuse_map_and_exporter) {
    zeroNonlocalData<int_type>();
  }
  else {
    destroyNonlocalData();
  }

  return(0);
}
// =============================================================================
Epetra_NumPyMultiVector::Epetra_NumPyMultiVector(Epetra_DataAccess CV,
						 const Epetra_MultiVector & source,
						 PyObject * range):
  Epetra_MultiVector(CV, source, getRange(range, source), getRangeLen(range, source))
{
  // Store the local map
  map = new Epetra_BlockMap(source.Map());

  // Wrap the Epetra_MultiVector
  npy_intp dims[ ] = { NumVectors(), MyLength() };
  double **v  = NULL;
  Epetra_MultiVector::ExtractView(&v);
  array = (PyArrayObject *) PyArray_SimpleNewFromData(2,dims,NPY_DOUBLE,
						      (void *)v[0]);
  if (!array)
  {
    cleanup();
    throw PythonException();
  }

  // We're done with the tmp_range array
  Py_XDECREF(tmp_range);
  tmp_range = NULL;
}
// =============================================================================
Epetra_NumPyMultiVector::Epetra_NumPyMultiVector(Epetra_DataAccess CV,
						 const Epetra_NumPyMultiVector & source,
						 PyObject * range):
  Epetra_MultiVector(CV, (const Epetra_MultiVector &) source,
		     getRange(   range, (const Epetra_MultiVector) source),
		     getRangeLen(range, (const Epetra_MultiVector) source))
{
  // Store the Epetra_NumPyMultiVector's map
  map = new Epetra_BlockMap(source.Map());

  // Inintialize the local numpy array
  PyArrayObject * src_array = (PyArrayObject *) (source.ExtractView());
  int nd;
  // This shouldn't happen, but it does . . .
  if (NULL == src_array) nd = 2;
  else nd = PyArray_NDIM(src_array);
  npy_intp * dims = new npy_intp[nd];
  dims[0] = NumVectors();
  if (NULL == src_array) dims[1] = source.MyLength();
  else for (int i=1; i<nd; i++) dims[i] = PyArray_DIMS(src_array)[i];

  double **v = NULL;
  Epetra_MultiVector::ExtractView(&v);
  array = (PyArrayObject *) PyArray_SimpleNewFromData(nd,dims,NPY_DOUBLE,
						      (void *)v[0]);
  delete [] dims;
  if (!array)
  {
    cleanup();
    throw PythonException();
  }

  // We're done with the tmp_range array
  Py_XDECREF(tmp_range);
  tmp_range = NULL;
}
int Epetra_FEVector::inputNonlocalValues(int_type GID, int numValues,
                                         const double* values, bool suminto,
                                         int vectorIndex)
{
  if(!Map().template GlobalIndicesIsType<int_type>())
  throw ReportError("Epetra_FEVector::inputValues mismatch between argument types (int/long long) and map type.", -1);

  
  //find offset of GID in nonlocalIDs_var

  std::vector<int_type>& nonlocalIDs_var = nonlocalIDs<int_type>();

  typename std::vector<int_type>::iterator it = std::lower_bound(nonlocalIDs_var.begin(), nonlocalIDs_var.end(), GID);
  int offset = (int) (it - nonlocalIDs_var.begin());
  int insertPoint = offset;
  if (it == nonlocalIDs_var.end() || *it != GID) {
    offset = -1;
  }

  int elemSize = Map().MaxElementSize();
  if (offset >= 0) {
    //if offset >= 0 (meaning GID was found)
    //  put value in nonlocalCoefs_[vectorIndex][offset*elemSize]

    if (numValues != nonlocalElementSize_[offset]) {
      cerr << "Epetra_FEVector ERROR: block-size for GID " << GID << " is "
     << numValues<<" which doesn't match previously set block-size of "
     << nonlocalElementSize_[offset] << endl;
      return(-1);
    }

    offset = offset*elemSize;

    if (suminto) {
      for(int j=0; j<numValues; ++j) {
        nonlocalCoefs_[vectorIndex][offset+j] += values[j];
      }
    }
    else {
      for(int j=0; j<numValues; ++j) {
        nonlocalCoefs_[vectorIndex][offset+j] = values[j];
      }
    }
  }
  else {
    //else
    //  insert GID in nonlocalIDs_
    //  insert numValues   in nonlocalElementSize_
    //  insert values in nonlocalCoefs_

    nonlocalIDs_var.insert(it, GID);
    nonlocalElementSize_.insert(nonlocalElementSize_.begin()+insertPoint, numValues);

    //to keep nonlocalCoefs_[i] the same length for each vector in the multi-
    //vector, we'll insert positions for each vector even though values are
    //only being set for one of them...
    for(int i=0; i<NumVectors(); ++i) {
      for(int ii=0; ii<elemSize; ++ii) {
        nonlocalCoefs_[i].insert(nonlocalCoefs_[i].begin()+insertPoint*elemSize+ii, 0.0);
      }
    }

    for(int j=0; j<numValues; ++j) {
      nonlocalCoefs_[vectorIndex][insertPoint*elemSize+j] = values[j];
    }
  }

  return(0);
}