FiniteDifferenceColoring::FiniteDifferenceColoring(
         Teuchos::ParameterList& printingParams,
	 const Teuchos::RCP<Interface::Required>& i,
	 const NOX::Epetra::Vector& x,
	 const Teuchos::RCP<Epetra_CrsGraph>& rawGraph_,
	 const Teuchos::RCP<Epetra_MapColoring>& colorMap_,
	 const Teuchos::RCP< vector<Epetra_IntVector> >& columns_,
	 bool parallelColoring,
	 bool distance1_,
	 double beta_, double alpha_) :
  FiniteDifference(printingParams, i, x, rawGraph_, beta_, alpha_),
  coloringType(NOX_SERIAL),
  distance1(distance1_),
  colorMap(colorMap_),
  columns(columns_),
  numColors(colorMap->NumColors()),
  maxNumColors(colorMap->MaxNumColors()),
  colorList(colorMap->ListOfColors()),
  cMap(0),
  Importer(0),
  colorVect(0),
  betaColorVect(0),
  mappedColorVect(new Epetra_Vector(rawGraph_->ColMap())),
  xCol_perturb(new Epetra_Vector(rawGraph_->ColMap())),
  columnMap(&(rawGraph_->ColMap())),
  rowColImporter(new Epetra_Import(*columnMap, fo.Map()))
{
  label = "NOX::FiniteDifferenceColoring Jacobian";

  if( parallelColoring )
    coloringType = NOX_PARALLEL;

  createColorContainers();
}
Exemplo n.º 2
0
Colorer::Colorer(Teuchos::RCP<const Epetra_CrsGraph> input_graph,
		 const Teuchos::ParameterList& paramlist, bool compute_now):
  Operator(input_graph, paramlist, 1)
{
#ifdef HAVE_EPETRAEXT
  colmap_ = Teuchos::rcp(&(input_graph->ColMap()),false);
#endif /* HAVE_EPETRAEXT */

#ifdef HAVE_ISORROPIA_ZOLTAN
  lib_ = Teuchos::rcp(new ZoltanLibClass(input_graph, Library::graph_input_));
#else /* HAVE_ISORROPIA_ZOLTAN */
  throw Isorropia::Exception("Coloring only available in Zoltan");
  return ;
#endif /* HAVE_ISORROPIA_ZOLTAN */
  if (compute_now)
    color(true);
}
Exemplo n.º 3
0
void panzer::ScatterResidual_BlockedEpetra<panzer::Traits::Jacobian, Traits,LO,GO>::
evaluateFields(typename Traits::EvalData workset)
{ 
   using Teuchos::RCP;
   using Teuchos::ArrayRCP;
   using Teuchos::ptrFromRef;
   using Teuchos::rcp_dynamic_cast;

   using Thyra::VectorBase;
   using Thyra::SpmdVectorBase;
   using Thyra::ProductVectorBase;
   using Thyra::BlockedLinearOpBase;

   typedef BlockedEpetraLinearObjContainer BLOC;

   std::vector<std::pair<int,GO> > GIDs;
   std::vector<LO> LIDs;
   std::vector<double> jacRow;

   // for convenience pull out some objects from workset
   std::string blockId = workset.block_id;
   const std::vector<std::size_t> & localCellIds = workset.cell_local_ids;

   RCP<const BLOC> blockedContainer = blockedContainer_;

   RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer->get_f());
   Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer->get_A());

   int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
   std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel
   for(int blk=0;blk<numFieldBlocks;blk++) {
      int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
      blockOffsets[blk] = blockOffset;
   }

   boost::unordered_map<std::pair<int,int>,Teuchos::RCP<Epetra_CrsMatrix> > jacEpetraBlocks;

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets" may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // scatter operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      globalIndexer_->getElementGIDs(cellLocalId,GIDs,blockId); 

      // caculate the local IDs for this element
      LIDs.resize(GIDs.size());
      for(std::size_t i=0;i<GIDs.size();i++) {
         // used for doing local ID lookups
         RCP<const Epetra_Map> r_map = blockedContainer->getMapForBlock(GIDs[i].first);

         LIDs[i] = r_map->LID(GIDs[i].second);
      }

      // loop over each field to be scattered
      Teuchos::ArrayRCP<double> local_r;
      for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
         int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum);

         // grab local data for inputing
         if(r!=Teuchos::null) {
            RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex));
            block_r->getNonconstLocalData(ptrFromRef(local_r));
         }

         const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum);
        
         // loop over the basis functions (currently they are nodes)
         for(std::size_t rowBasisNum = 0; rowBasisNum < elmtOffset.size(); rowBasisNum++) {
            const ScalarT & scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,rowBasisNum);
            int rowOffset = elmtOffset[rowBasisNum];
            int r_lid = LIDs[rowOffset];
    
            // Sum residual
            if(local_r!=Teuchos::null)
               local_r[r_lid] += (scatterField.val());

            blockOffsets[numFieldBlocks] = scatterField.size(); // add the sentinel
    
            // loop over the sensitivity indices: all DOFs on a cell
            jacRow.resize(scatterField.size());
            
            for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex) {
               jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex);
            }
    
            for(int blockColIndex=0;blockColIndex<numFieldBlocks;blockColIndex++) {
               int start = blockOffsets[blockColIndex];
               int end = blockOffsets[blockColIndex+1];

               if(end-start<=0) 
                  continue;

               // check hash table for jacobian sub block
               std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex);
               Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex];

               // if you didn't find one before, add it to the hash table
               if(subJac==Teuchos::null) {
                  Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); 

                  // block operator is null, don't do anything (it is excluded)
                  if(Teuchos::is_null(tOp))
                     continue;

                  Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp);
                  subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true);
                  jacEpetraBlocks[blockIndex] = subJac;
               }

               // Sum Jacobian
               int err = subJac->SumIntoMyValues(r_lid, end-start, &jacRow[start],&LIDs[start]);
               if(err!=0) {
                 RCP<const Epetra_Map> rr = blockedContainer->getMapForBlock(GIDs[start].first);
                 bool sameColMap = subJac->ColMap().SameAs(*rr);

                 std::stringstream ss;
                 ss << "Failed inserting row: " << GIDs[rowOffset].second << " (" << r_lid << "): ";
                 for(int i=start;i<end;i++)
                   ss << GIDs[i].second << " (" << LIDs[i] << ") ";
                 ss << std::endl;
                 ss << "Into block " << blockRowIndex << ", " << blockColIndex << std::endl;

                 ss << "scatter field = ";
                 scatterFields_[fieldIndex].print(ss);
                 ss << std::endl;

                 ss << "Same map = " << (sameColMap ? "true" : "false") << std::endl; 
                 
                 TEUCHOS_TEST_FOR_EXCEPTION(err!=0,std::runtime_error,ss.str());
               }
            }
         } // end rowBasisNum
      } // end fieldIndex
   }
}
Exemplo n.º 4
0
void
Albany::ModelEvaluator::evalModel(const InArgs& inArgs,
                                 const OutArgs& outArgs) const
{
  Teuchos::TimeMonitor Timer(*timer); //start timer
  //
  // Get the input arguments
  //
  Teuchos::RCP<const Epetra_Vector> x = inArgs.get_x();
  Teuchos::RCP<const Epetra_Vector> x_dot;
  Teuchos::RCP<const Epetra_Vector> x_dotdot;

  //create comm and node objects for Epetra -> Tpetra conversions
  Teuchos::RCP<const Teuchos::Comm<int> > commT = app->getComm();
  Teuchos::RCP<Epetra_Comm> comm = Albany::createEpetraCommFromTeuchosComm(commT);
  //Create Tpetra copy of x, call it xT
  Teuchos::RCP<const Tpetra_Vector> xT;
  if (x != Teuchos::null)
    xT  = Petra::EpetraVector_To_TpetraVectorConst(*x, commT);

  double alpha     = 0.0;
  double omega     = 0.0;
  double beta      = 1.0;
  double curr_time = 0.0;

  if(num_time_deriv > 0)
    x_dot = inArgs.get_x_dot();
  if(num_time_deriv > 1)
    x_dotdot = inArgs.get_x_dotdot();

  //Declare and create Tpetra copy of x_dot, call it x_dotT
  Teuchos::RCP<const Tpetra_Vector> x_dotT;
  if (Teuchos::nonnull(x_dot))
    x_dotT = Petra::EpetraVector_To_TpetraVectorConst(*x_dot, commT);

  //Declare and create Tpetra copy of x_dotdot, call it x_dotdotT
  Teuchos::RCP<const Tpetra_Vector> x_dotdotT;
  if (Teuchos::nonnull(x_dotdot))
    x_dotdotT = Petra::EpetraVector_To_TpetraVectorConst(*x_dotdot, commT);

  if (Teuchos::nonnull(x_dot)){
    alpha = inArgs.get_alpha();
    beta = inArgs.get_beta();
    curr_time  = inArgs.get_t();
  }
  if (Teuchos::nonnull(x_dotdot)) {
    omega = inArgs.get_omega();
  }

  for (int i=0; i<num_param_vecs; i++) {
    Teuchos::RCP<const Epetra_Vector> p = inArgs.get_p(i);
    if (p != Teuchos::null) {
      for (unsigned int j=0; j<sacado_param_vec[i].size(); j++) {
        sacado_param_vec[i][j].baseValue = (*p)[j];
      }
    }
  }
  for (int i=0; i<num_dist_param_vecs; i++) {
    Teuchos::RCP<const Epetra_Vector> p = inArgs.get_p(i+num_param_vecs);
    //create Tpetra copy of p
    Teuchos::RCP<const Tpetra_Vector> pT;
    if (p != Teuchos::null) {
      pT = Petra::EpetraVector_To_TpetraVectorConst(*p, commT);
      //*(distParamLib->get(dist_param_names[i])->vector()) = *p;
      *(distParamLib->get(dist_param_names[i])->vector()) = *pT;
    }
  }

  //
  // Get the output arguments
  //
  EpetraExt::ModelEvaluator::Evaluation<Epetra_Vector> f_out = outArgs.get_f();
  Teuchos::RCP<Epetra_Operator> W_out = outArgs.get_W();

  // Cast W to a CrsMatrix, throw an exception if this fails
  Teuchos::RCP<Epetra_CrsMatrix> W_out_crs;
#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
  //IK, 7/15/14: adding object to hold mass matrix to be written to matrix market file
  Teuchos::RCP<Epetra_CrsMatrix> Mass;
  //IK, 7/15/14: needed for writing mass matrix out to matrix market file
  EpetraExt::ModelEvaluator::Evaluation<Epetra_Vector> ftmp = outArgs.get_f();
#endif

  if (W_out != Teuchos::null) {
    W_out_crs = Teuchos::rcp_dynamic_cast<Epetra_CrsMatrix>(W_out, true);
#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
    //IK, 7/15/14: adding object to hold mass matrix to be written to matrix market file
    Mass = Teuchos::rcp_dynamic_cast<Epetra_CrsMatrix>(W_out, true);
#endif
  }


int test_var = 0;
if(test_var != 0){
std::cout << "The current solution length is: " << x->MyLength() << std::endl;
x->Print(std::cout);

}

  // Get preconditioner operator, if requested
  Teuchos::RCP<Epetra_Operator> WPrec_out;
  if (outArgs.supports(OUT_ARG_WPrec)) WPrec_out = outArgs.get_WPrec();

  //
  // Compute the functions
  //
  bool f_already_computed = false;

  // W matrix
  if (W_out != Teuchos::null) {
    app->computeGlobalJacobian(alpha, beta, omega, curr_time, x_dot.get(), x_dotdot.get(),*x,
                               sacado_param_vec, f_out.get(), *W_out_crs);
#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
    //IK, 7/15/14: write mass matrix to matrix market file
    //Warning: to read this in to MATLAB correctly, code must be run in serial.
    //Otherwise Mass will have a distributed Map which would also need to be read in to MATLAB for proper
    //reading in of Mass.
    app->computeGlobalJacobian(1.0, 0.0, 0.0, curr_time, x_dot.get(), x_dotdot.get(), *x,
                               sacado_param_vec, ftmp.get(), *Mass);
    EpetraExt::RowMatrixToMatrixMarketFile("mass.mm", *Mass);
    EpetraExt::BlockMapToMatrixMarketFile("rowmap.mm", Mass->RowMap());
    EpetraExt::BlockMapToMatrixMarketFile("colmap.mm", Mass->ColMap());
    Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
#endif
    f_already_computed=true;
if(test_var != 0){
//std::cout << "The current rhs length is: " << f_out->MyLength() << std::endl;
//f_out->Print(std::cout);
std::cout << "The current Jacobian length is: " << W_out_crs->NumGlobalRows() << std::endl;
W_out_crs->Print(std::cout);
}
  }

  if (WPrec_out != Teuchos::null) {
    app->computeGlobalJacobian(alpha, beta, omega, curr_time, x_dot.get(), x_dotdot.get(), *x,
                               sacado_param_vec, f_out.get(), *Extra_W_crs);
    f_already_computed=true;
if(test_var != 0){
//std::cout << "The current rhs length is: " << f_out->MyLength() << std::endl;
//f_out->Print(std::cout);
std::cout << "The current preconditioner length is: " << Extra_W_crs->NumGlobalRows() << std::endl;
Extra_W_crs->Print(std::cout);
}

    app->computeGlobalPreconditioner(Extra_W_crs, WPrec_out);
  }

  // scalar df/dp
  for (int i=0; i<num_param_vecs; i++) {
    Teuchos::RCP<Epetra_MultiVector> dfdp_out =
      outArgs.get_DfDp(i).getMultiVector();
    if (dfdp_out != Teuchos::null) {
      Teuchos::Array<int> p_indexes =
        outArgs.get_DfDp(i).getDerivativeMultiVector().getParamIndexes();
      Teuchos::RCP<ParamVec> p_vec;
      if (p_indexes.size() == 0)
        p_vec = Teuchos::rcp(&sacado_param_vec[i],false);
      else {
        p_vec = Teuchos::rcp(new ParamVec);
        for (int j=0; j<p_indexes.size(); j++)
          p_vec->addParam(sacado_param_vec[i][p_indexes[j]].family,
                          sacado_param_vec[i][p_indexes[j]].baseValue);
      }

      app->computeGlobalTangent(0.0, 0.0, 0.0, curr_time, false, x_dot.get(), x_dotdot.get(), *x,
                                sacado_param_vec, p_vec.get(),
                                NULL, NULL, NULL, NULL, f_out.get(), NULL,
                                dfdp_out.get());

      f_already_computed=true;
if(test_var != 0){
std::cout << "The current rhs length is: " << f_out->MyLength() << std::endl;
f_out->Print(std::cout);
}
    }
  }

  // distributed df/dp
  for (int i=0; i<num_dist_param_vecs; i++) {
    Teuchos::RCP<Epetra_Operator> dfdp_out =
      outArgs.get_DfDp(i+num_param_vecs).getLinearOp();
    if (dfdp_out != Teuchos::null) {
      Teuchos::RCP<DistributedParameterDerivativeOp> dfdp_op =
        Teuchos::rcp_dynamic_cast<DistributedParameterDerivativeOp>(dfdp_out);
      dfdp_op->set(curr_time, x_dotT, x_dotdotT, xT,
                   Teuchos::rcp(&sacado_param_vec,false));
    }
  }

  // f
  if (app->is_adjoint) {
    Derivative f_deriv(f_out, DERIV_TRANS_MV_BY_ROW);
    int response_index = 0; // need to add capability for sending this in
    app->evaluateResponseDerivative(response_index, curr_time, x_dot.get(), x_dotdot.get(), *x,
                                    sacado_param_vec, NULL,
                                    NULL, f_deriv, Derivative(), Derivative(), Derivative());
  }
  else {
    if (f_out != Teuchos::null && !f_already_computed) {
      app->computeGlobalResidual(curr_time, x_dot.get(), x_dotdot.get(), *x,
                                  sacado_param_vec, *f_out);
if(test_var != 0){
std::cout << "The current rhs length is: " << f_out->MyLength() << std::endl;
f_out->Print(std::cout);
}
    }
  }


  // Response functions
  for (int i=0; i<outArgs.Ng(); i++) {
    //Set curr_time to final time at which response occurs.
    if(num_time_deriv > 0)
      curr_time  = inArgs.get_t();
    Teuchos::RCP<Epetra_Vector> g_out = outArgs.get_g(i);
    //Declare Tpetra_Vector copy of g_out
    Teuchos::RCP<Tpetra_Vector> g_outT;
    bool g_computed = false;

    Derivative dgdx_out = outArgs.get_DgDx(i);
    Derivative dgdxdot_out = outArgs.get_DgDx_dot(i);
    Derivative dgdxdotdot_out = outArgs.get_DgDx_dotdot(i);

    // dg/dx, dg/dxdot
    if (!dgdx_out.isEmpty() || !dgdxdot_out.isEmpty() || !dgdxdotdot_out.isEmpty() ) {
      app->evaluateResponseDerivative(i, curr_time, x_dot.get(), x_dotdot.get(), *x,
                                      sacado_param_vec, NULL,
                                      g_out.get(), dgdx_out,
                                      dgdxdot_out, dgdxdotdot_out, Derivative());
      g_computed = true;
    }

    // dg/dp
    for (int j=0; j<num_param_vecs; j++) {
      Teuchos::RCP<Epetra_MultiVector> dgdp_out =
        outArgs.get_DgDp(i,j).getMultiVector();
      //Declare Tpetra copy of dgdp_out
      Teuchos::RCP<Tpetra_MultiVector> dgdp_outT;
      if (dgdp_out != Teuchos::null) {
        Teuchos::Array<int> p_indexes =
          outArgs.get_DgDp(i,j).getDerivativeMultiVector().getParamIndexes();
        Teuchos::RCP<ParamVec> p_vec;
        if (p_indexes.size() == 0)
          p_vec = Teuchos::rcp(&sacado_param_vec[j],false);
        else {
          p_vec = Teuchos::rcp(new ParamVec);
          for (int k=0; k<p_indexes.size(); k++)
            p_vec->addParam(sacado_param_vec[j][p_indexes[k]].family,
                            sacado_param_vec[j][p_indexes[k]].baseValue);
        }
        //create Tpetra copy of g_out, call it g_outT
        if (g_out != Teuchos::null)
           g_outT = Petra::EpetraVector_To_TpetraVectorNonConst(*g_out, commT);
        //create Tpetra copy of dgdp_out, call it dgdp_outT
        if (dgdp_out != Teuchos::null)
           dgdp_outT = Petra::EpetraMultiVector_To_TpetraMultiVector(*dgdp_out, commT);
	app->evaluateResponseTangentT(i, alpha, beta, omega, curr_time, false,
				     x_dotT.get(), x_dotdotT.get(), *xT,
				     sacado_param_vec, p_vec.get(),
				     NULL, NULL, NULL, NULL, g_outT.get(), NULL,
				     dgdp_outT.get());
        //convert g_outT to Epetra_Vector g_out
        if (g_out != Teuchos::null)
          Petra::TpetraVector_To_EpetraVector(g_outT, *g_out, comm);
        //convert dgdp_outT to Epetra_MultiVector dgdp_out
        if (dgdp_out != Teuchos::null)
          Petra::TpetraMultiVector_To_EpetraMultiVector(dgdp_outT, *dgdp_out, comm);
        g_computed = true;
      }
    }

    // Need to handle dg/dp for distributed p
    for(int j=0; j<num_dist_param_vecs; j++) {
      Derivative dgdp_out = outArgs.get_DgDp(i,j+num_param_vecs);
      if (!dgdp_out.isEmpty()) {
        dgdp_out.getMultiVector()->PutScalar(0.);
        app->evaluateResponseDistParamDeriv(i, curr_time, x_dot.get(), x_dotdot.get(), *x, sacado_param_vec, dist_param_names[j], dgdp_out.getMultiVector().get());
      }
    }

    if (g_out != Teuchos::null && !g_computed) {
      //create Tpetra copy of g_out, call it g_outT
      g_outT = Petra::EpetraVector_To_TpetraVectorNonConst(*g_out, commT);
      app->evaluateResponseT(i, curr_time, x_dotT.get(), x_dotdotT.get(), *xT, sacado_param_vec,
			    *g_outT);
      //convert g_outT to Epetra_Vector g_out
      Petra::TpetraVector_To_EpetraVector(g_outT, *g_out, comm);
    }
  }

  //
  // Stochastic Galerkin
  //
#ifdef ALBANY_SG
  InArgs::sg_const_vector_t x_sg = inArgs.get_x_sg();
  if (x_sg != Teuchos::null) {
    app->init_sg(inArgs.get_sg_basis(),
                 inArgs.get_sg_quadrature(),
                 inArgs.get_sg_expansion(),
                 x_sg->productComm());
    InArgs::sg_const_vector_t x_dot_sg  = Teuchos::null;
    InArgs::sg_const_vector_t x_dot_sg  = Teuchos::null;
    if(num_time_deriv > 0)
      x_dotdot_sg  = inArgs.get_x_dotdot_sg();
    if(num_time_deriv > 1)
      x_dotdot_sg  = inArgs.get_x_dotdot_sg();
    if (x_dot_sg != Teuchos::null || x_dotdot_sg != Teuchos::null) {
      alpha = inArgs.get_alpha();
      beta = inArgs.get_beta();
      curr_time  = inArgs.get_t();
    }
    if (x_dotdot_sg != Teuchos::null) {
      omega = inArgs.get_omega();
    }

    InArgs::sg_const_vector_t epetra_p_sg = inArgs.get_p_sg(0);
    Teuchos::Array<int> p_sg_index;
    for (int i=0; i<num_param_vecs; i++) {
      InArgs::sg_const_vector_t p_sg = inArgs.get_p_sg(i);
      if (p_sg != Teuchos::null) {
        p_sg_index.push_back(i);
        for (int j=0; j<p_sg_vals[i].size(); j++) {
          int num_sg_blocks = p_sg->size();
          p_sg_vals[i][j].reset(app->getStochasticExpansion(), num_sg_blocks);
          p_sg_vals[i][j].copyForWrite();
          for (int l=0; l<num_sg_blocks; l++) {
            p_sg_vals[i][j].fastAccessCoeff(l) = (*p_sg)[l][j];
          }
        }
      }
    }

    OutArgs::sg_vector_t f_sg = outArgs.get_f_sg();
    OutArgs::sg_operator_t W_sg = outArgs.get_W_sg();
    bool f_sg_computed = false;

    // W_sg
    if (W_sg != Teuchos::null) {
      Stokhos::VectorOrthogPoly<Epetra_CrsMatrix> W_sg_crs(W_sg->basis(),
                                                           W_sg->map());
      for (int i=0; i<W_sg->size(); i++)
        W_sg_crs.setCoeffPtr(
          i,
          Teuchos::rcp_dynamic_cast<Epetra_CrsMatrix>(W_sg->getCoeffPtr(i)));
      app->computeGlobalSGJacobian(alpha, beta, omega, curr_time,
                                   x_dot_sg.get(),  x_dotdot_sg.get(), *x_sg,
                                   sacado_param_vec, p_sg_index, p_sg_vals,
                                   f_sg.get(), W_sg_crs);
      f_sg_computed = true;
    }

    // df/dp_sg
    for (int i=0; i<num_param_vecs; i++) {
      Teuchos::RCP< Stokhos::EpetraMultiVectorOrthogPoly > dfdp_sg
        = outArgs.get_DfDp_sg(i).getMultiVector();
      if (dfdp_sg != Teuchos::null) {
        Teuchos::Array<int> p_indexes =
          outArgs.get_DfDp_sg(i).getDerivativeMultiVector().getParamIndexes();
        Teuchos::RCP<ParamVec> p_vec;
        if (p_indexes.size() == 0)
          p_vec = Teuchos::rcp(&sacado_param_vec[i],false);
        else {
          p_vec = Teuchos::rcp(new ParamVec);
          for (int j=0; j<p_indexes.size(); j++)
            p_vec->addParam(sacado_param_vec[i][p_indexes[j]].family,
                            sacado_param_vec[i][p_indexes[j]].baseValue);
        }

        app->computeGlobalSGTangent(0.0, 0.0, 0.0, curr_time, false,
                                    x_dot_sg.get(), x_dotdot_sg.get(),*x_sg,
                                    sacado_param_vec, p_sg_index, p_sg_vals,
                                    p_vec.get(), NULL, NULL, NULL, NULL,
                                    f_sg.get(), NULL, dfdp_sg.get());

        f_sg_computed = true;
      }
    }

    if (f_sg != Teuchos::null && !f_sg_computed)
      app->computeGlobalSGResidual(curr_time, x_dot_sg.get(), x_dotdot_sg.get(),*x_sg,
                                   sacado_param_vec, p_sg_index, p_sg_vals,
                                   *f_sg);

    // Response functions
    for (int i=0; i<outArgs.Ng(); i++) {
      OutArgs::sg_vector_t g_sg = outArgs.get_g_sg(i);
      bool g_sg_computed = false;

      SGDerivative dgdx_sg = outArgs.get_DgDx_sg(i);
      SGDerivative dgdxdot_sg = outArgs.get_DgDx_dot_sg(i);
      SGDerivative dgdxdotdot_sg = outArgs.get_DgDx_dotdot_sg(i);

      // dg/dx, dg/dxdot
      if (!dgdx_sg.isEmpty() || !dgdxdot_sg.isEmpty() || !dgdxdotdot_sg.isEmpty()) {
        app->evaluateSGResponseDerivative(
            i, curr_time, x_dot_sg.get(), x_dotdot_sg.get(), *x_sg,
            sacado_param_vec, p_sg_index, p_sg_vals,
            NULL, g_sg.get(), dgdx_sg,
            dgdxdot_sg, dgdxdotdot_sg, SGDerivative());
        g_sg_computed = true;
      }

      // dg/dp
      for (int j=0; j<num_param_vecs; j++) {
        Teuchos::RCP< Stokhos::EpetraMultiVectorOrthogPoly > dgdp_sg =
          outArgs.get_DgDp_sg(i,j).getMultiVector();
        if (dgdp_sg != Teuchos::null) {
          Teuchos::Array<int> p_indexes =
            outArgs.get_DgDp_sg(i,j).getDerivativeMultiVector().getParamIndexes();
          Teuchos::RCP<ParamVec> p_vec;
          if (p_indexes.size() == 0)
            p_vec = Teuchos::rcp(&sacado_param_vec[j],false);
          else {
            p_vec = Teuchos::rcp(new ParamVec);
            for (int k=0; k<p_indexes.size(); k++)
              p_vec->addParam(sacado_param_vec[j][p_indexes[k]].family,
                              sacado_param_vec[j][p_indexes[k]].baseValue);
          }
          app->evaluateSGResponseTangent(i, alpha, beta, omega, curr_time, false,
                                         x_dot_sg.get(), x_dotdot_sg.get(), *x_sg,
                                         sacado_param_vec, p_sg_index,
                                         p_sg_vals, p_vec.get(),
                                         NULL, NULL, NULL, NULL, g_sg.get(),
                                         NULL, dgdp_sg.get());
          g_sg_computed = true;

        }
      }

      if (g_sg != Teuchos::null && !g_sg_computed)
        app->evaluateSGResponse(i, curr_time, x_dot_sg.get(), x_dotdot_sg.get(), *x_sg,
                                sacado_param_vec, p_sg_index, p_sg_vals,
                                *g_sg);
    }
  }
#endif
#ifdef ALBANY_ENSEMBLE

  //
  // Multi-point evaluation
  //
  mp_const_vector_t x_mp = inArgs.get_x_mp();
  if (x_mp != Teuchos::null) {
    mp_const_vector_t x_dot_mp  = Teuchos::null;
    mp_const_vector_t x_dotdot_mp  = Teuchos::null;
    if(num_time_deriv > 0)
      x_dot_mp  = inArgs.get_x_dot_mp();
    if(num_time_deriv > 1)
      x_dotdot_mp  = inArgs.get_x_dotdot_mp();
    if (x_dot_mp != Teuchos::null || x_dotdot_mp != Teuchos::null) {
      alpha = inArgs.get_alpha();
      //omega = inArgs.get_omega();
      beta = inArgs.get_beta();
      curr_time  = inArgs.get_t();
    }
    if (x_dotdot_mp != Teuchos::null) {
      omega = inArgs.get_omega();
    }

    Teuchos::Array<int> p_mp_index;
    for (int i=0; i<num_param_vecs; i++) {
      mp_const_vector_t p_mp = inArgs.get_p_mp(i);
      if (p_mp != Teuchos::null) {
        p_mp_index.push_back(i);
        for (int j=0; j<p_mp_vals[i].size(); j++) {
          int num_mp_blocks = p_mp->size();
          p_mp_vals[i][j].reset(num_mp_blocks);
          p_mp_vals[i][j].copyForWrite();
          for (int l=0; l<num_mp_blocks; l++) {
            p_mp_vals[i][j].fastAccessCoeff(l) = (*p_mp)[l][j];
          }
        }
      }
    }

    mp_vector_t f_mp = outArgs.get_f_mp();
    mp_operator_t W_mp = outArgs.get_W_mp();
    bool f_mp_computed = false;

    // W_mp
    if (W_mp != Teuchos::null) {
      Stokhos::ProductContainer<Epetra_CrsMatrix> W_mp_crs(W_mp->map());
      for (int i=0; i<W_mp->size(); i++)
        W_mp_crs.setCoeffPtr(
          i,
          Teuchos::rcp_dynamic_cast<Epetra_CrsMatrix>(W_mp->getCoeffPtr(i)));
      app->computeGlobalMPJacobian(alpha, beta, omega, curr_time,
                                   x_dot_mp.get(), x_dotdot_mp.get(), *x_mp,
                                   sacado_param_vec, p_mp_index, p_mp_vals,
                                   f_mp.get(), W_mp_crs);
      f_mp_computed = true;
    }

    // df/dp_mp
    for (int i=0; i<num_param_vecs; i++) {
      Teuchos::RCP< Stokhos::ProductEpetraMultiVector > dfdp_mp
        = outArgs.get_DfDp_mp(i).getMultiVector();
      if (dfdp_mp != Teuchos::null) {
        Teuchos::Array<int> p_indexes =
          outArgs.get_DfDp_mp(i).getDerivativeMultiVector().getParamIndexes();
        Teuchos::RCP<ParamVec> p_vec;
        if (p_indexes.size() == 0)
          p_vec = Teuchos::rcp(&sacado_param_vec[i],false);
        else {
          p_vec = Teuchos::rcp(new ParamVec);
          for (int j=0; j<p_indexes.size(); j++)
            p_vec->addParam(sacado_param_vec[i][p_indexes[j]].family,
                            sacado_param_vec[i][p_indexes[j]].baseValue);
        }

        app->computeGlobalMPTangent(0.0, 0.0, 0.0, curr_time, false,
                                    x_dot_mp.get(), x_dotdot_mp.get(), *x_mp,
                                    sacado_param_vec, p_mp_index, p_mp_vals,
                                    p_vec.get(), NULL, NULL, NULL, NULL,
                                    f_mp.get(), NULL, dfdp_mp.get());

        f_mp_computed = true;
      }
    }

    if (f_mp != Teuchos::null && !f_mp_computed)
      app->computeGlobalMPResidual(curr_time, x_dot_mp.get(), x_dotdot_mp.get(), *x_mp,
                                   sacado_param_vec, p_mp_index, p_mp_vals,
                                   *f_mp);

    // Response functions
    for (int i=0; i<outArgs.Ng(); i++) {
      mp_vector_t g_mp = outArgs.get_g_mp(i);
      bool g_mp_computed = false;

      MPDerivative dgdx_mp = outArgs.get_DgDx_mp(i);
      MPDerivative dgdxdot_mp = outArgs.get_DgDx_dot_mp(i);
      MPDerivative dgdxdotdot_mp = outArgs.get_DgDx_dotdot_mp(i);

      // dg/dx, dg/dxdot
      if (!dgdx_mp.isEmpty() || !dgdxdot_mp.isEmpty() || !dgdxdotdot_mp.isEmpty() ) {
        app->evaluateMPResponseDerivative(
            i, curr_time, x_dot_mp.get(), x_dotdot_mp.get(), *x_mp,
            sacado_param_vec, p_mp_index, p_mp_vals,
            NULL, g_mp.get(), dgdx_mp,
            dgdxdot_mp, dgdxdotdot_mp, MPDerivative());
        g_mp_computed = true;
      }

      // dg/dp
      for (int j=0; j<num_param_vecs; j++) {
        Teuchos::RCP< Stokhos::ProductEpetraMultiVector > dgdp_mp =
          outArgs.get_DgDp_mp(i,j).getMultiVector();
        if (dgdp_mp != Teuchos::null) {
          Teuchos::Array<int> p_indexes =
            outArgs.get_DgDp_mp(i,j).getDerivativeMultiVector().getParamIndexes();
          Teuchos::RCP<ParamVec> p_vec;
          if (p_indexes.size() == 0)
            p_vec = Teuchos::rcp(&sacado_param_vec[j],false);
          else {
            p_vec = Teuchos::rcp(new ParamVec);
            for (int k=0; k<p_indexes.size(); k++)
              p_vec->addParam(sacado_param_vec[j][p_indexes[k]].family,
                              sacado_param_vec[j][p_indexes[k]].baseValue);
          }
          app->evaluateMPResponseTangent(i, alpha, beta, omega, curr_time, false,
                                         x_dot_mp.get(), x_dotdot_mp.get(), *x_mp,
                                         sacado_param_vec, p_mp_index,
                                         p_mp_vals, p_vec.get(),
                                         NULL, NULL, NULL, NULL, g_mp.get(),
                                         NULL, dgdp_mp.get());
          g_mp_computed = true;
        }
      }

      if (g_mp != Teuchos::null && !g_mp_computed)
        app->evaluateMPResponse(i, curr_time, x_dot_mp.get(), x_dotdot_mp.get(), *x_mp,
                                sacado_param_vec, p_mp_index, p_mp_vals,
                                *g_mp);
    }
  }
#endif
}
static int run_test(Teuchos::RCP<Epetra_CrsMatrix> matrix,
	  bool verbose,           // display the graph before & after
	  bool contract,          // set global number of partitions to 1/2 num procs
	  int partitioningType,   // hypergraph or graph partitioning, or simple
	  int vertexWeightType,   // use vertex weights?
	  int edgeWeightType,     // use edge/hyperedge weights?
	  int objectType)         // use isorropia's CrsMatrix or CrsGraph
{
  int rc=0, fail = 0;
#ifdef HAVE_EPETRAEXT
  int localProc = 0;
  double balance1, balance2, cutn1, cutn2, cutl1, cutl2;
  double balance3, cutn3, cutl3;
  double cutWgt1, cutWgt2, cutWgt3;
  int numCuts1, numCuts2, numCuts3, valid;
  int numPartitions = 0;
  int keepDenseEdges = 0;
  int numProcs = 1;

#ifdef HAVE_MPI
  const Epetra_MpiComm &Comm = dynamic_cast<const Epetra_MpiComm &>(matrix->Comm());
  localProc = Comm.MyPID();
  numProcs = Comm.NumProc();
#else
  const Epetra_SerialComm &Comm = dynamic_cast<const Epetra_SerialComm &>(matrix->Comm());
#endif

  int numRows = matrix->NumGlobalRows();

  if (numRows < (numProcs * 100)){
    // By default Zoltan throws out dense edges, defined as those
    // whose number of non-zeros exceeds 25% of the number of vertices.
    //
    // If dense edges are thrown out of a small matrix, there may be nothing left.
    keepDenseEdges = 1;
  }

  double myShareBefore = 1.0 / numProcs;
  double myShare = myShareBefore;

  if (contract){
    numPartitions = numProcs / 2;

    if (numPartitions > numRows)
      numPartitions = numRows;

    if (numPartitions > 0){
      if (localProc < numPartitions){
	myShare = 1.0 / numPartitions;
      }
      else{
	myShare = 0.0;
      }
    }
    else{
      contract = 0;
    }
  }

  // If we want Zoltan's or Isorropia's default weights, then we don't
  // need to supply a CostDescriber object to createBalancedCopy,
  // so we get to test the API functions that don't take a CostDescriber.

  bool noCosts = ((vertexWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS) &&
		   (edgeWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS));

  // Test the interface that has no parameters, if possible

  bool noParams =
    ((partitioningType == HYPERGRAPH_PARTITIONING) && // default, so requires no params
     (numPartitions == 0) &&                          // >0 would require a parameter
     (keepDenseEdges == 0));                          // >0 would require a parameter

  // Maps for original object
  const Epetra_Map &sourceRowMap = matrix->RowMap();
  const Epetra_Map &sourceRangeMap = matrix->RangeMap();
//   const Epetra_Map &sourceColMap = matrix->ColMap();
  const Epetra_Map &sourceDomainMap = matrix->DomainMap();

  int numCols = matrix->NumGlobalCols();
  int nMyRows = sourceRowMap.NumMyElements();
  int base = sourceRowMap.IndexBase();

  // Compute vertex and edge weights

  Isorropia::Epetra::CostDescriber costs;

  Teuchos::RCP<Epetra_Vector> vptr;

  Teuchos::RCP<Epetra_CrsMatrix> eptr;

  Teuchos::RCP<Epetra_Vector> hyperEdgeWeights;

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      // Create graph edge weights.

      eptr = Teuchos::rcp(new Epetra_CrsMatrix(*matrix));

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	eptr->PutScalar(1.0);   // set all nonzeros to 1.0
      }
      else{
	int maxRowSize = eptr->MaxNumEntries();
	double *newVal = NULL;
	if (maxRowSize > 0){
	  newVal = new double [maxRowSize];
	  for (int j=0; j<maxRowSize; j++){
	    newVal[j] = localProc + 1 + j;
	  }
	}
	int numEntries;
	int *idx;
	double *val;
	for (int i=0; i<nMyRows; i++){
	  rc = eptr->ExtractMyRowView(i, numEntries, val, idx);
	  for (int j=0; j<numEntries; j++){
	    val[j] = newVal[j];
	  }
	}
	if (newVal) delete [] newVal;
      }

      eptr->FillComplete(sourceDomainMap, sourceRangeMap);

      costs.setGraphEdgeWeights(eptr);
    }
    else{
      // Create hyperedge weights.  (Note that the list of hyperedges that a
      // process provides weights for has no relation to the columns
      // that it has non-zeroes for, or the rows that is has.  Hypergraphs
      // in general are not square.  Also more than one process can provide
      // a weight for the same edge.  Zoltan combines the weights according
      // to the value of the PHG_EDGE_WEIGHT_OPERATION parameter.  The default
      // for this parameter is to use the maximum edge weight provided by any
      // process for a given hyperedge.)

      Epetra_Map hyperEdgeMap(numCols, base, Comm);

      hyperEdgeWeights = Teuchos::rcp(new Epetra_Vector(hyperEdgeMap));

      int *edgeGIDs = NULL;
      double *weights = NULL;
      int numHEweights = hyperEdgeMap.NumMyElements();

      if (numHEweights){
	edgeGIDs = new int [numHEweights];
	weights = new double [numHEweights];

	if (edgeWeightType == SUPPLY_EQUAL_WEIGHTS){
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    weights[i] = 1.0;
	  }
	}
	else{
	  int hiVolumeStart = matrix->NumGlobalCols() / 3;
	  int hiVolumeEnd = hiVolumeStart * 2;
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    if ((edgeGIDs[i] < hiVolumeStart) || (edgeGIDs[i] >= hiVolumeEnd)){
	      weights[i] = 1.0;
	    }
	    else{
	      weights[i] = 3.0;
	    }
	  }
	}
	hyperEdgeWeights->ReplaceGlobalValues(numHEweights, weights, edgeGIDs);
      }

      if (weights){
	delete [] weights;
	delete [] edgeGIDs;
      }

      costs.setHypergraphEdgeWeights(hyperEdgeWeights);
    }
  }

  bool need_importer = false;

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    need_importer = true;  // to redistribute row weights

    double *val = NULL;

    if (nMyRows){
      val = new double [nMyRows];

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0;
	}
      }
      else if (vertexWeightType == SUPPLY_UNEQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0 + ((localProc+1) / 2);
	}
      }
    }

    vptr = Teuchos::rcp(new Epetra_Vector(Copy, sourceRowMap, val));

    if (val) delete [] val;

    costs.setVertexWeights(vptr);
  }

  // Calculate partition quality metrics before calling Zoltan

  if (partitioningType == GRAPH_PARTITIONING){
    rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShare, balance1, numCuts1, cutWgt1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, numCuts3, cutWgt3, cutn3, cutl3);
    }
  }
  else{
    rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShare, balance1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, cutn3, cutl3);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  Teuchos::ParameterList params;

#ifdef HAVE_ISORROPIA_ZOLTAN

  if (!noParams){

    // We're using Zoltan for partitioning and supplying
    // parameters, overriding defaults.

    Teuchos::ParameterList &sublist = params.sublist("Zoltan");

    if (partitioningType == GRAPH_PARTITIONING){
      params.set("PARTITIONING METHOD", "GRAPH");
      sublist.set("GRAPH_PACKAGE", "PHG");
    }
    else{
      params.set("PARTITIONING METHOD", "HYPERGRAPH");
      sublist.set("LB_APPROACH", "PARTITION");
      sublist.set("PHG_CUT_OBJECTIVE", "CONNECTIVITY");  // "cutl"
    }

    if (keepDenseEdges){
      // only throw out rows that have no zeroes, default is to
      // throw out if .25 or more of the columns are non-zero
      sublist.set("PHG_EDGE_SIZE_THRESHOLD", "1.0");
    }
     if (numPartitions > 0){
	// test #Partitions < #Processes
	std::ostringstream os;
	os << numPartitions;
	std::string s = os.str();
	//	sublist.set("NUM_GLOBAL_PARTS", s);
	params.set("NUM PARTS", s);
      }

      //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters
      //sublist.set("DEBUG_LEVEL", "5");   // proc 0 will trace Zoltan calls
      //sublist.set("DEBUG_MEMORY", "2");  // Zoltan will trace alloc & free
  }

#else
    ERROREXIT((localProc==0),
      "Zoltan partitioning required but Zoltan not available.")
#endif

  // Function scope values

  Teuchos::RCP<Epetra_Vector> newvwgts;
  Teuchos::RCP<Epetra_CrsMatrix> newewgts;

  // Function scope values required for LinearProblem

  Epetra_LinearProblem *problem = NULL;
  Epetra_Map *LHSmap = NULL;
  Epetra_MultiVector *RHS = NULL;
  Epetra_MultiVector *LHS = NULL;

  // Reference counted pointer to balanced object

  Epetra_CrsMatrix *matrixPtr=NULL;
  Epetra_CrsGraph *graphPtr=NULL;
  Epetra_RowMatrix *rowMatrixPtr=NULL;
  Epetra_LinearProblem *problemPtr=NULL;

  // Row map for balanced object
  const Epetra_BlockMap *targetBlockRowMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetRowMap=NULL;            // for all other inputs

  // Column map for balanced object
  const Epetra_BlockMap *targetBlockColMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetColMap=NULL;            // for all other inputs

  if (objectType == EPETRA_CRSMATRIX){
    if (noParams && noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(matrixPtr->RowMap());
    targetColMap = &(matrixPtr->ColMap());
  }
  else if (objectType == EPETRA_CRSGRAPH){
    const Epetra_CrsGraph graph = matrix->Graph();
    if (noParams && noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph);
    }
    else if (noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph, params);
    }
    targetBlockRowMap = &(graphPtr->RowMap());
    targetBlockColMap = &(graphPtr->ColMap());
  }
  else if (objectType == EPETRA_ROWMATRIX){
    if (noParams && noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(rowMatrixPtr->RowMatrixRowMap());
    targetColMap = &(rowMatrixPtr->RowMatrixColMap());
  }
  else if (objectType == EPETRA_LINEARPROBLEM){

    // Create a linear problem with this matrix.

    LHSmap = new Epetra_Map(numCols, base, Comm);

    int myRHSsize = sourceRowMap.NumMyElements();
    int myLHSsize = LHSmap->NumMyElements();

    int valSize = ((myRHSsize > myLHSsize) ? myRHSsize : myLHSsize);

    double *vals = NULL;

    if (valSize){
      vals = new double [valSize];
    }

    if (valSize){
      for (int i=0; i < valSize; i++){
	// put my rank in my portion of LHS and my portion of RHS
	vals[i] = localProc;
      }
    }

    RHS = new Epetra_MultiVector(Copy, sourceRowMap, vals, 1, 1);

    LHS = new Epetra_MultiVector(Copy, *LHSmap, vals, 1, 1);

    if (valSize){
      delete [] vals;
    }

    problem = new Epetra_LinearProblem(matrix.get(), LHS, RHS);

    Epetra_LinearProblem lp = *problem;

    if (lp.CheckInput()){
      ERROREXIT((localProc==0), "Error creating a LinearProblem");
    }
    if (noParams && noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp);
    }
    else if (noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp, params);
    }

    targetRowMap = &(problemPtr->GetMatrix()->RowMatrixRowMap());
    targetColMap = &(problemPtr->GetMatrix()->RowMatrixColMap());
  }

  // Redistribute the edge weights
  // Comment this out since we don't redistribute columns

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      Epetra_Import *importer = NULL;

      if (objectType == EPETRA_CRSGRAPH){
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *graphPtr));
	targetRowMap = &(newewgts->RowMap());
	targetColMap = &(newewgts->ColMap());
      }
      else{
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *targetRowMap, *targetColMap, 0));
      }

      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
      newewgts->Import(*eptr, *importer, Insert);
      newewgts->FillComplete(*targetColMap, *targetRowMap);

      costs.setGraphEdgeWeights(newewgts);
    }
  }

  // Redistribute the vertex weights

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    Epetra_Import *importer = NULL;

    if (objectType == EPETRA_CRSGRAPH){
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetBlockRowMap));
      importer = new Epetra_Import(*targetBlockRowMap, sourceRowMap);
    }
    else{
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetRowMap));
      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
    }

    newvwgts->Import(*vptr, *importer, Insert);
    costs.setVertexWeights(newvwgts);
  }

  if (localProc == 0){
    test_type(numPartitions, partitioningType, vertexWeightType, edgeWeightType, objectType);
  }

  if (verbose){

    // Picture of problem before balancing

    if (objectType == EPETRA_LINEARPROBLEM){

      ispatest::show_matrix("Before load balancing", *problem, Comm);
    }
    else{
      ispatest::show_matrix("Before load balancing", matrix->Graph(), Comm);
    }

    // Picture of problem after balancing

    if (objectType == EPETRA_LINEARPROBLEM){
      ispatest::show_matrix("After load balancing (x in Ax=b is not redistributed)", *problemPtr, Comm);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      ispatest::show_matrix("After load balancing", *rowMatrixPtr, Comm);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      ispatest::show_matrix("After load balancing", matrixPtr->Graph(), Comm);
    }
    else if (objectType == EPETRA_CRSGRAPH){
      ispatest::show_matrix("After load balancing", *graphPtr, Comm);
    }
  }

  // After partitioning, recompute the metrics

  if (partitioningType == GRAPH_PARTITIONING){
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_graph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_graph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_graph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else {
      rc = ispatest::compute_graph_metrics(*graphPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
  }
  else{
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_hypergraph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_hypergraph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_hypergraph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else{
      rc = ispatest::compute_hypergraph_metrics(*graphPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  std::string why;

  if (partitioningType == GRAPH_PARTITIONING){
    fail = (cutWgt2 > cutWgt1);
    why = "New weighted edge cuts are worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;

      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;

      std::cout << " Total edge cuts: " << numCuts1;
      std::cout << " Total weighted edge cuts: " << cutWgt1 << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
      std::cout << " Total edge cuts: " << numCuts2;
      std::cout << " Total weighted edge cuts: " << cutWgt2 << std::endl;
    }
  }
  else{
      fail = (cutl2 > cutl1);
      why = "New cutl is worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;
      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
    }
  }

  if (fail){
    if (localProc == 0) std::cout << "ERROR: "+why << std::endl;
  }

  // Check that input matrix is valid.  This test constructs an "x"
  // with the matrix->DomainMap() and a "y" with matrix->RangeMap()
  // and then calculates y = Ax.

  if (objectType == EPETRA_LINEARPROBLEM){
    valid = ispatest::test_matrix_vector_multiply(*problemPtr);
  }
  else if (objectType == EPETRA_ROWMATRIX){
    valid = ispatest::test_row_matrix_vector_multiply(*rowMatrixPtr);
  }
  else if (objectType == EPETRA_CRSMATRIX){
    valid = ispatest::test_matrix_vector_multiply(*matrixPtr);
  }
  else{
    valid = ispatest::test_matrix_vector_multiply(*graphPtr);
  }

  if (!valid){
    if (localProc == 0) std::cout << "Rebalanced matrix is not a valid Epetra matrix" << std::endl;
    fail = 1;
  }
  else{
    if (localProc == 0) std::cout << "Rebalanced matrix is a valid Epetra matrix" << std::endl;
  }

  if (localProc == 0)
    std::cout << std::endl;



#else
  std::cout << "test_simple main: currently can only test "
	 << "with Epetra and EpetraExt enabled." << std::endl;
  rc = -1;
#endif

  return fail;
}
// helper routines
bool SplitMatrix2x2(Teuchos::RCP<const Epetra_CrsMatrix> A,
    const Epetra_Map& A11rowmap,
    const Epetra_Map& A22rowmap,
    Teuchos::RCP<Epetra_CrsMatrix>& A11,
    Teuchos::RCP<Epetra_CrsMatrix>& A12,
    Teuchos::RCP<Epetra_CrsMatrix>& A21,
    Teuchos::RCP<Epetra_CrsMatrix>& A22)
{
  if (A==Teuchos::null)
  {
    std::cout << "ERROR: SplitMatrix2x2: A==null on entry" << std::endl;
    return false;
  }

  const Epetra_Comm& Comm   = A->Comm();
  const Epetra_Map&  A22map = A22rowmap;
  const Epetra_Map&  A11map = A11rowmap;

  //----------------------------- create a parallel redundant map of A22map
  std::map<int,int> a22gmap;
  {
    std::vector<int> a22global(A22map.NumGlobalElements());
    int count=0;
    for (int proc=0; proc<Comm.NumProc(); ++proc)
    {
      int length = 0;
      if (proc==Comm.MyPID())
      {
        for (int i=0; i<A22map.NumMyElements(); ++i)
        {
          a22global[count+length] = A22map.GID(i);
          ++length;
        }
      }
      Comm.Broadcast(&length,1,proc);
      Comm.Broadcast(&a22global[count],length,proc);
      count += length;
    }
    if (count != A22map.NumGlobalElements())
    {
      std::cout << "ERROR SplitMatrix2x2: mismatch in dimensions" << std::endl;
      return false;
    }

    // create the map
    for (int i=0; i<count; ++i)
      a22gmap[a22global[i]] = 1;
    a22global.clear();
  }

  //--------------------------------------------------- create matrix A22
  A22 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A22map,100));
  {
    std::vector<int>    a22gcindices(100);
    std::vector<double> a22values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A22map.MyGID(grid)==false)
        continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }
      if (numentries>(int)a22gcindices.size())
      {
        a22gcindices.resize(numentries);
        a22values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid in a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr==a22gmap.end()) continue;
        //std::cout << gcid << " ";
        a22gcindices[count] = gcid;
        a22values[count]    = values[j];
        ++count;
      }
      //std::cout << std::endl; fflush(stdout);
      // add this filtered row to A22
      err = A22->InsertGlobalValues(grid,count,&a22values[0],&a22gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } //for (int i=0; i<A->NumMyRows(); ++i)
    a22gcindices.clear();
    a22values.clear();
  }
  A22->FillComplete();
  A22->OptimizeStorage();

  //----------------------------------------------------- create matrix A11
  A11 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A11map,100));
  {
    std::vector<int>    a11gcindices(100);
    std::vector<double> a11values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A11map.MyGID(grid)==false) continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }
      if (numentries>(int)a11gcindices.size())
      {
        a11gcindices.resize(numentries);
        a11values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid as part of a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr!=a22gmap.end()) continue;
        a11gcindices[count] = gcid;
        a11values[count] = values[j];
        ++count;
      }
      err = A11->InsertGlobalValues(grid,count,&a11values[0],&a11gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } // for (int i=0; i<A->NumMyRows(); ++i)
    a11gcindices.clear();
    a11values.clear();
  }
  A11->FillComplete();
  A11->OptimizeStorage();

  //---------------------------------------------------- create matrix A12
  A12 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A11map,100));
  {
    std::vector<int>    a12gcindices(100);
    std::vector<double> a12values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A11map.MyGID(grid)==false) continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }

      if (numentries>(int)a12gcindices.size())
      {
        a12gcindices.resize(numentries);
        a12values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid as part of a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr==a22gmap.end()) continue;
        a12gcindices[count] = gcid;
        a12values[count] = values[j];
        ++count;
      }
      err = A12->InsertGlobalValues(grid,count,&a12values[0],&a12gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } // for (int i=0; i<A->NumMyRows(); ++i)
    a12values.clear();
    a12gcindices.clear();
  }
  A12->FillComplete(A22map,A11map);
  A12->OptimizeStorage();

  //----------------------------------------------------------- create A21
  A21 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A22map,100));
  {
    std::vector<int>    a21gcindices(100);
    std::vector<double> a21values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A22map.MyGID(grid)==false) continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }

      if (numentries>(int)a21gcindices.size())
      {
        a21gcindices.resize(numentries);
        a21values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid as part of a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr!=a22gmap.end()) continue;
        a21gcindices[count] = gcid;
        a21values[count] = values[j];
        ++count;
      }
      err = A21->InsertGlobalValues(grid,count,&a21values[0],&a21gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } // for (int i=0; i<A->NumMyRows(); ++i)
    a21values.clear();
    a21gcindices.clear();
  }
  A21->FillComplete(A11map,A22map);
  A21->OptimizeStorage();

  //-------------------------------------------------------------- tidy up
  a22gmap.clear();
  return true;
}
AmesosBTFGlobal_LinearProblem::NewTypeRef
AmesosBTFGlobal_LinearProblem::
operator()( OriginalTypeRef orig )
{
  origObj_ = &orig;

  // Extract the matrix and vectors from the linear problem
  OldRHS_ = Teuchos::rcp( orig.GetRHS(), false );
  OldLHS_ = Teuchos::rcp( orig.GetLHS(), false );
  OldMatrix_ = Teuchos::rcp( dynamic_cast<Epetra_CrsMatrix *>( orig.GetMatrix() ), false );
	
  int nGlobal = OldMatrix_->NumGlobalRows(); 
  int n = OldMatrix_->NumMyRows();

  // Check if the matrix is on one processor.
  int myMatProc = -1, matProc = -1;
  int myPID = OldMatrix_->Comm().MyPID();
  int numProcs = OldMatrix_->Comm().NumProc();

  const Epetra_BlockMap& oldRowMap = OldMatrix_->RowMap();

  // Get some information about the parallel distribution.
  int maxMyRows = 0;
  std::vector<int> numGlobalElem( numProcs );
  OldMatrix_->Comm().GatherAll(&n, &numGlobalElem[0], 1);
  OldMatrix_->Comm().MaxAll(&n, &maxMyRows, 1);

  for (int proc=0; proc<numProcs; proc++) 
  {
    if (OldMatrix_->NumGlobalNonzeros() == OldMatrix_->NumMyNonzeros())
      myMatProc = myPID;
  }

  OldMatrix_->Comm().MaxAll( &myMatProc, &matProc, 1 );

  Teuchos::RCP<Epetra_CrsMatrix> serialMatrix;
  Teuchos::RCP<Epetra_Map> serialMap;	
  if( oldRowMap.DistributedGlobal() && matProc == -1) 
  {
    // The matrix is distributed and needs to be moved to processor zero.
    // Set the zero processor as the master.
    matProc = 0;
    serialMap = Teuchos::rcp( new Epetra_Map( Epetra_Util::Create_Root_Map( OldMatrix_->RowMap(), matProc ) ) );
    
    Epetra_Import serialImporter( *serialMap, OldMatrix_->RowMap() );
    serialMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *serialMap, 0 ) );
    serialMatrix->Import( *OldMatrix_, serialImporter, Insert );
    serialMatrix->FillComplete();
  }
  else {
    // The old matrix has already been moved to one processor (matProc).
    serialMatrix = OldMatrix_;
  }

  if( debug_ )
  {
    cout << "Original (serial) Matrix:\n";
    cout << *serialMatrix << endl;
  }

  // Obtain the current row and column orderings
  std::vector<int> origGlobalRows(nGlobal), origGlobalCols(nGlobal);
  serialMatrix->RowMap().MyGlobalElements( &origGlobalRows[0] );
  serialMatrix->ColMap().MyGlobalElements( &origGlobalCols[0] );
  
  // Perform reindexing on the full serial matrix (needed for BTF).
  Epetra_Map reIdxMap( serialMatrix->RowMap().NumGlobalElements(), serialMatrix->RowMap().NumMyElements(), 0, serialMatrix->Comm() );
  Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans =
    Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap ) );
  Epetra_CrsMatrix newSerialMatrix = (*reIdxTrans)( *serialMatrix );
  reIdxTrans->fwd();
  
  // Compute and apply BTF to the serial CrsMatrix and has been filtered by the threshold
  EpetraExt::AmesosBTF_CrsMatrix BTFTrans( threshold_, upperTri_, verbose_, debug_ );
  Epetra_CrsMatrix newSerialMatrixBTF = BTFTrans( newSerialMatrix );
  
  rowPerm_ = BTFTrans.RowPerm();
  colPerm_ = BTFTrans.ColPerm();
  blockPtr_ = BTFTrans.BlockPtr();
  numBlocks_ = BTFTrans.NumBlocks();
 
  if (myPID == matProc && verbose_) {
    bool isSym = true;
    for (int i=0; i<nGlobal; ++i) {
      if (rowPerm_[i] != colPerm_[i]) {
        isSym = false;
        break;
      }
    }
    std::cout << "The BTF permutation symmetry (0=false,1=true) is : " << isSym << std::endl;
  }
  
  // Compute the permutation w.r.t. the original row and column GIDs.
  std::vector<int> origGlobalRowsPerm(nGlobal), origGlobalColsPerm(nGlobal);
  if (myPID == matProc) {
    for (int i=0; i<nGlobal; ++i) {
      origGlobalRowsPerm[i] = origGlobalRows[ rowPerm_[i] ];
      origGlobalColsPerm[i] = origGlobalCols[ colPerm_[i] ];
    }
  }
  OldMatrix_->Comm().Broadcast( &origGlobalRowsPerm[0], nGlobal, matProc );
  OldMatrix_->Comm().Broadcast( &origGlobalColsPerm[0], nGlobal, matProc );

  // Generate the full serial matrix that imports according to the previously computed BTF.
  Epetra_CrsMatrix newSerialMatrixT( Copy, newSerialMatrixBTF.RowMap(), 0 );
  newSerialMatrixT.Import( newSerialMatrix, *(BTFTrans.Importer()), Insert );
  newSerialMatrixT.FillComplete();
  
  if( debug_ )
  {
    cout << "Original (serial) Matrix permuted via BTF:\n";
    cout << newSerialMatrixT << endl;
  }

  // Perform reindexing on the full serial matrix (needed for balancing).
  Epetra_Map reIdxMap2( newSerialMatrixT.RowMap().NumGlobalElements(), newSerialMatrixT.RowMap().NumMyElements(), 0, newSerialMatrixT.Comm() );
  Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans2 =
    Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap2 ) );
  Epetra_CrsMatrix tNewSerialMatrixT = (*reIdxTrans2)( newSerialMatrixT );
  reIdxTrans2->fwd();

  Teuchos::RCP<Epetra_Map> balancedMap;
  
  if (balance_ == "linear") {
    
    // Distribute block somewhat evenly across processors
    std::vector<int> rowDist(numProcs+1,0);
    int balRows = nGlobal / numProcs + 1;
    int numRows = balRows, currProc = 1;
    for ( int i=0; i<numBlocks_ || currProc < numProcs; ++i ) {
      if (blockPtr_[i] > numRows) {
	rowDist[currProc++] = blockPtr_[i-1];
	numRows = blockPtr_[i-1] + balRows;
      }      
    }
    rowDist[numProcs] = nGlobal;
   
    // Create new Map based on this linear distribution.
    int numMyBalancedRows = rowDist[myPID+1]-rowDist[myPID];

    NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, &origGlobalRowsPerm[ rowDist[myPID] ], 0, OldMatrix_->Comm() ) );
    // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric!
    //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) );
    
    if ( verbose_ ) 
      std::cout << "Processor " << myPID << " has " << numMyBalancedRows << " rows." << std::endl;    
    //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, 0, serialMatrix->Comm() ) );
  }
  else if (balance_ == "isorropia") {
	
    // Compute block adjacency graph for partitioning.
    std::vector<double> weight;
    Teuchos::RCP<Epetra_CrsGraph> blkGraph;
    EpetraExt::BlockAdjacencyGraph adjGraph;
    blkGraph = adjGraph.compute( const_cast<Epetra_CrsGraph&>(tNewSerialMatrixT.Graph()), 
							numBlocks_, blockPtr_, weight, verbose_);
    Epetra_Vector rowWeights( View, blkGraph->Map(), &weight[0] );
    
    // Call Isorropia to rebalance this graph.
    Teuchos::RCP<Epetra_CrsGraph> balancedGraph =
      Isorropia::Epetra::create_balanced_copy( *blkGraph, rowWeights );
    
    int myNumBlkRows = balancedGraph->NumMyRows();    
    
    //std::vector<int> myGlobalElements(nGlobal);
    std::vector<int> newRangeElements(nGlobal), newDomainElements(nGlobal);
    int grid = 0, myElements = 0;
    for (int i=0; i<myNumBlkRows; ++i) {
      grid = balancedGraph->GRID( i );
      for (int j=blockPtr_[grid]; j<blockPtr_[grid+1]; ++j) {
	newRangeElements[myElements++] = origGlobalRowsPerm[j];
	//myGlobalElements[myElements++] = j;
      }
    }

    NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &newRangeElements[0], 0, OldMatrix_->Comm() ) );
    // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric!
    //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) );
    //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &myGlobalElements[0], 0, serialMatrix->Comm() ) );

    if ( verbose_ ) 
      std::cout << "Processor " << myPID << " has " << myElements << " rows." << std::endl;
  }
  
  // Use New Domain and Range Maps to Generate Importer
  //for now, assume they start out as identical
  Epetra_Map OldRowMap = OldMatrix_->RowMap();
  Epetra_Map OldColMap = OldMatrix_->ColMap();
  
  if( debug_ )
  {
    cout << "New Row Map\n";
    cout << *NewRowMap_ << endl;
    //cout << "New Col Map\n";
    //cout << *NewColMap_ << endl;
  }

  // Generate New Graph
  // NOTE:  Right now we are creating the graph, assuming that the permutation is symmetric!
  // NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) );
  NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, 0 ) );
  Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) );
  Importer2_ = Teuchos::rcp( new Epetra_Import( OldRowMap, *NewRowMap_ ) );
  NewGraph_->Import( OldMatrix_->Graph(), *Importer_, Insert );
  NewGraph_->FillComplete();

  if( debug_ )
  {
    cout << "NewGraph\n";
    cout << *NewGraph_;
  }

  // Create new linear problem and import information from old linear problem
  NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) );
  NewMatrix_->Import( *OldMatrix_, *Importer_, Insert );
  NewMatrix_->FillComplete();

  NewLHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldLHS_->NumVectors() ) );
  NewLHS_->Import( *OldLHS_, *Importer_, Insert );
  
  NewRHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldRHS_->NumVectors() ) );
  NewRHS_->Import( *OldRHS_, *Importer_, Insert );

  if( debug_ )
  {
    cout << "New Matrix\n";
    cout << *NewMatrix_ << endl;
  }

  newObj_ = new Epetra_LinearProblem( &*NewMatrix_, &*NewLHS_, &*NewRHS_ );

  return *newObj_;
}