void RTOpPack::deserialize(
  const RTOpT<Scalar> &op,
  int num_values_in,
  int num_indexes_in,
  int num_chars_in,
  const char reduct_obj_ext[],
  ReductTarget *reduct_obj
  )
{
  using Teuchos::arrayView;
  typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
  typedef Teuchos::SerializationTraits<int,primitive_value_type> PVTST;
  typedef Teuchos::SerializationTraits<int,index_type> ITST;
  typedef Teuchos::SerializationTraits<int,char_type> CTST;
  const Ordinal
    prim_value_type_size = PVTST::fromCountToIndirectBytes(1),
    index_type_size = ITST::fromCountToIndirectBytes(1);
  //char_type_size = CTST::fromCountToIndirectBytes(1);
  const Ordinal
    num_values_off = 0,
    num_indexes_off = num_values_off + index_type_size,
    num_chars_off = num_indexes_off + index_type_size,
    values_off = num_chars_off + index_type_size,
    indexes_off = values_off + num_values_in * prim_value_type_size,
    chars_off = indexes_off + num_indexes_in * index_type_size;
#ifdef RTOP_DEBUG
  Ordinal num_values = -1, num_indexes = -1, num_chars = -1;
  ITST::deserialize(index_type_size, &reduct_obj_ext[num_values_off], 1, &num_values);
  ITST::deserialize(index_type_size, &reduct_obj_ext[num_indexes_off], 1, &num_indexes);
  ITST::deserialize(index_type_size, &reduct_obj_ext[num_chars_off], 1, &num_chars);
  TEST_FOR_EXCEPT(
    !(
      num_values==num_values_in && num_indexes==num_indexes_in
      && num_chars==num_chars_in )
    );
#endif
  op.load_reduct_obj_state(
    arrayView(PVTST::convertFromCharPtr(&reduct_obj_ext[values_off]), num_values_in),
    arrayView(ITST::convertFromCharPtr(&reduct_obj_ext[indexes_off]), num_indexes_in),
    arrayView(CTST::convertFromCharPtr(&reduct_obj_ext[chars_off]), num_chars_in),
    Teuchos::ptr(reduct_obj)
    );
  // ToDo: Change above implementation to only require indirect serialization!
}
      virtual Teuchos::ArrayRCP< scalar_type > 
      fetchNonConstView (multivector_type& A) const
      {
	using Teuchos::arcpFromArrayView;
	using Teuchos::arrayView;
	typedef Teuchos::ArrayView< scalar_type >::size_type size_type;

	const size_type nelts = fetchArrayLength (A);
	// The returned ArrayRCP does NOT own A.Values().
	return arcpFromArrayView (arrayView (A.Values(), nelts));
      }
void RTOpPack::serialize(
  const RTOpT<Scalar> &op,
  Ordinal num_values,
  Ordinal num_indexes,
  Ordinal num_chars,
  const ReductTarget &reduct_obj,
  char reduct_obj_ext[]
  )
{
  using Teuchos::arrayView;
  typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
  typedef Teuchos::SerializationTraits<Ordinal, primitive_value_type> PVTST;
  typedef Teuchos::SerializationTraits<Ordinal, index_type> ITST;
  typedef Teuchos::SerializationTraits<Ordinal, char_type> CTST;
  const Ordinal
    prim_value_type_size = PVTST::fromCountToIndirectBytes(1),
    index_type_size = ITST::fromCountToIndirectBytes(1);
  //char_type_size = CTST::fromCountToIndirectBytes(1);
  const Ordinal
    num_values_off = 0,
    num_indexes_off = num_values_off + index_type_size,
    num_chars_off = num_indexes_off + index_type_size,
    values_off = num_chars_off + index_type_size,
    indexes_off = values_off + num_values * prim_value_type_size,
    chars_off = indexes_off + num_indexes * index_type_size;
  ITST::serialize(1, &num_values, index_type_size, &reduct_obj_ext[num_values_off]);
  ITST::serialize(1, &num_indexes, index_type_size, &reduct_obj_ext[num_indexes_off]);
  ITST::serialize(1, &num_chars, index_type_size, &reduct_obj_ext[num_chars_off]);
  op.extract_reduct_obj_state(
    reduct_obj,
    arrayView(PVTST::convertFromCharPtr(&reduct_obj_ext[values_off]), num_values),
    arrayView(ITST::convertFromCharPtr(&reduct_obj_ext[indexes_off]), num_indexes),
    arrayView(CTST::convertFromCharPtr(&reduct_obj_ext[chars_off]), num_chars)
    );
  // ToDo: Change above implementation to only require indirect serialization!
}
      virtual Teuchos::ArrayRCP< const scalar_type > 
      fetchConstView (const multivector_type& A) const
      {
	using Teuchos::arcpFromArrayView;
	using Teuchos::arrayView;
	using Teuchos::ArrayView;
	typedef ArrayView< scalar_type >::size_type size_type;

	const size_type nelts = fetchArrayLength (A);
	const scalar_type* A_ptr = A.Values();
	ArrayView< const scalar_type > A_view = arrayView (A_ptr, nelts);

	// The returned ArrayRCP does NOT own A.Values().
	return arcpFromArrayView (A_view);
      }
twoD_diffusion_problem<Scalar,MeshScalar,BasisScalar,LocalOrdinal,GlobalOrdinal,
		       Node>::
twoD_diffusion_problem(
  const Teuchos::RCP<const Teuchos::Comm<int> >& comm, 
  LocalOrdinal n, LocalOrdinal d, 
  BasisScalar s, BasisScalar mu, 
  bool log_normal_,
  bool eliminate_bcs_) :
  mesh(n*n),
  log_normal(log_normal_),
  eliminate_bcs(eliminate_bcs_)
{
  using Teuchos::Array;
  using Teuchos::ArrayView;
  using Teuchos::arrayView;
  using Teuchos::ArrayRCP;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Tpetra::global_size_t;

  //////////////////////////////////////////////////////////////////////////////
  // Construct the mesh.  
  // The mesh is uniform and the nodes are numbered
  // LEFT to RIGHT, DOWN to UP.
  //
  // 5-6-7-8-9
  // | | | | |
  // 0-1-2-3-4
  /////////////////////////////////////////////////////////////////////////////
  MeshScalar xyLeft = -.5;
  MeshScalar xyRight = .5;
  h = (xyRight - xyLeft)/((MeshScalar)(n-1));
  Array<GlobalOrdinal> global_dof_indices;
  for (GlobalOrdinal j=0; j<n; j++) {
    MeshScalar y = xyLeft + j*h;
    for (GlobalOrdinal i=0; i<n; i++) {
      MeshScalar x = xyLeft + i*h;
      GlobalOrdinal idx = j*n+i;
      mesh[idx].x = x;
      mesh[idx].y = y;
      if (i == 0 || i == n-1 || j == 0 || j == n-1)
	mesh[idx].boundary = true;
      if (i != 0)
	mesh[idx].left = idx-1;
      if (i != n-1)
	mesh[idx].right = idx+1;
      if (j != 0)
	mesh[idx].down = idx-n;
      if (j != n-1)
	mesh[idx].up = idx+n;
      if (!(eliminate_bcs && mesh[idx].boundary))
	global_dof_indices.push_back(idx);
    }
  }
  
  // Solution vector map
  global_size_t n_global_dof = global_dof_indices.size();
  int n_proc = comm->getSize();
  int proc_id = comm->getRank();
  size_t n_my_dof = n_global_dof / n_proc;
  if (proc_id == n_proc-1)
    n_my_dof += n_global_dof % n_proc;
  ArrayView<GlobalOrdinal> my_dof = 
    global_dof_indices.view(proc_id*(n_global_dof / n_proc), n_my_dof);
  x_map = Tpetra::createNonContigMap<LocalOrdinal,GlobalOrdinal>(my_dof, comm);

  // Initial guess, initialized to 0.0
  x_init = Tpetra::createVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(x_map);
  x_init->putScalar(0.0);

  // Parameter vector map
  p_map = Tpetra::createLocalMap<LocalOrdinal,GlobalOrdinal>(d, comm);

  // Response vector map
  g_map = Tpetra::createLocalMap<LocalOrdinal,GlobalOrdinal>(1, comm);

  // Initial parameters
  p_init = Tpetra::createVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(p_map);
  p_init->putScalar(0.0);

  // Parameter names
  p_names = Teuchos::rcp(new Array<std::string>(d));
  for (LocalOrdinal i=0;i<d;i++) {
    std::stringstream ss;
    ss << "KL Random Variable " << i+1;
    (*p_names)[i] = ss.str(); 
  }

  // Build Jacobian graph
  size_t NumMyElements = x_map->getNodeNumElements();
  ArrayView<const GlobalOrdinal> MyGlobalElements = 
    x_map->getNodeElementList ();
  graph = rcp(new Tpetra_CrsGraph(x_map, 5));
  for (size_t i=0; i<NumMyElements; ++i ) {

    // Center
    GlobalOrdinal global_idx = MyGlobalElements[i];
    graph->insertGlobalIndices(global_idx, arrayView(&global_idx, 1));

    if (!mesh[global_idx].boundary) {
      // Down
      if (!(eliminate_bcs && mesh[mesh[global_idx].down].boundary))
	graph->insertGlobalIndices(global_idx, 
				   arrayView(&mesh[global_idx].down,1));

      // Left
      if (!(eliminate_bcs && mesh[mesh[global_idx].left].boundary))
	graph->insertGlobalIndices(global_idx, 
				   arrayView(&mesh[global_idx].left,1));

      // Right
      if (!(eliminate_bcs && mesh[mesh[global_idx].right].boundary))
	graph->insertGlobalIndices(global_idx, 
				   arrayView(&mesh[global_idx].right,1));

      // Up
      if (!(eliminate_bcs && mesh[mesh[global_idx].up].boundary))
	graph->insertGlobalIndices(global_idx, 
				   arrayView(&mesh[global_idx].up,1));
    }
  }
  graph->fillComplete();

  // Construct deterministic operator
  A = rcp(new Tpetra_CrsMatrix(graph));
 
  // Construct the RHS vector.
  b = Tpetra::createVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(x_map);
  ArrayRCP<Scalar> b_view = b->get1dViewNonConst();
  for(size_t i=0; i<NumMyElements; ++i) {
    GlobalOrdinal global_idx = MyGlobalElements[i];
    if (mesh[global_idx].boundary)
      b_view[i] = 0;
    else 
      b_view[i] = 1;
  }

  // Diffusion functions
  klFunc = rcp(new KL_Diffusion_Func(xyLeft, xyRight, mu, s, 1.0, d));
  lnFunc = rcp(new LogNormal_Diffusion_Func<KL_Diffusion_Func>(*klFunc));
}
void
twoD_diffusion_problem<Scalar,MeshScalar,BasisScalar,LocalOrdinal,GlobalOrdinal,
		       Node>::
computeA(const FuncT& func, const Tpetra_Vector& p, Tpetra_CrsMatrix& jac)
{
  using Teuchos::ArrayView;
  using Teuchos::arrayView;

  jac.resumeFill();
  jac.setAllToScalar(0.0);

  Teuchos::ArrayRCP<const Scalar> p_view = p.get1dView();
  Teuchos::Array<Scalar> rv(p_view());
  size_t NumMyElements = x_map->getNodeNumElements();
  ArrayView<const GlobalOrdinal> MyGlobalElements = 
    x_map->getNodeElementList ();
  MeshScalar h2 = h*h;
  Scalar val;

  for(size_t i=0 ; i<NumMyElements; ++i ) {
      
    // Center
    GlobalOrdinal global_idx = MyGlobalElements[i];
    if (mesh[global_idx].boundary) {
      val = 1.0;
      jac.replaceGlobalValues(global_idx, arrayView(&global_idx,1), 
			      arrayView(&val,1));
    }
    else {
      Scalar a_down = 
	-func(mesh[global_idx].x, mesh[global_idx].y-h/2.0, rv)/h2;
      Scalar a_left = 
	-func(mesh[global_idx].x-h/2.0, mesh[global_idx].y, rv)/h2;
      Scalar a_right = 
	-func(mesh[global_idx].x+h/2.0, mesh[global_idx].y, rv)/h2;
      Scalar a_up = 
	-func(mesh[global_idx].x, mesh[global_idx].y+h/2.0, rv)/h2;
      
      // Center
      val = -(a_down + a_left + a_right + a_up);
      jac.replaceGlobalValues(global_idx, arrayView(&global_idx,1), 
			      arrayView(&val,1));

      // Down
      if (!(eliminate_bcs && mesh[mesh[global_idx].down].boundary))
	jac.replaceGlobalValues(global_idx, 
				arrayView(&mesh[global_idx].down,1), 
				arrayView(&a_down,1));
      
      // Left
      if (!(eliminate_bcs && mesh[mesh[global_idx].left].boundary))
	jac.replaceGlobalValues(global_idx, 
				arrayView(&mesh[global_idx].left,1), 
				arrayView(&a_left,1));
      
      // Right
      if (!(eliminate_bcs && mesh[mesh[global_idx].right].boundary))
	jac.replaceGlobalValues(global_idx, 
				arrayView(&mesh[global_idx].right,1), 
				arrayView(&a_right,1));

      // Up
      if (!(eliminate_bcs && mesh[mesh[global_idx].up].boundary))
	jac.replaceGlobalValues(global_idx, 
				arrayView(&mesh[global_idx].up,1), 
				arrayView(&a_up,1));
    }
  }
  jac.fillComplete();
}
void RTOpPack::SPMD_apply_op(
  const Teuchos::Comm<index_type> *comm,
  const RTOpT<Scalar> &op,
  const int num_cols,
  const int num_vecs,
  const ConstSubVectorView<Scalar> sub_vecs[],
  const int num_targ_vecs,
  const SubVectorView<Scalar> sub_targ_vecs[],
  ReductTarget*const reduct_objs[]
  )
{
  using Teuchos::arrayView;
#ifdef RTOPPACK_ENABLE_SHOW_DUMP
  Teuchos::RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();
  Teuchos::OSTab tab(out);
  if(show_spmd_apply_op_dump) {
    *out << "\nEntering RTOpPack::SPMD_apply_op(...) ...\n";
    *out
      << "\ncomm = " << (comm?comm->description():"NULL")
      << "\nop = " << op.description()
      << "\nnum_cols = " << num_cols
      << "\nnum_vecs = " << num_vecs
      << "\nnum_targ_vecs = " << num_targ_vecs
      << "\n";
    if( num_vecs && sub_vecs ) {
      *out << "\nInput vectors:\n";
      Teuchos::OSTab tab2(out);
      for( int kc = 0; kc < num_cols; ++kc ) {
        for( int k = 0; k < num_vecs; ++k ) {
          *out << "\nvecs["<<kc<<","<<k<<"] =\n";
          print(sub_vecs[kc*num_vecs+k],*out);
        }
      }
    }
    if( num_targ_vecs && sub_targ_vecs ) {
      *out << "\nInput/output vectors *before* transforamtion:\n";
      Teuchos::OSTab tab2(out);
      for( int kc = 0; kc < num_cols; ++kc ) {
        for( int k = 0; k < num_targ_vecs; ++k ) {
          *out << "\nvecs["<<kc<<","<<k<<"] =\n";
          print(sub_targ_vecs[kc*num_targ_vecs+k],*out);
        }
      }
    }
    if(reduct_objs) {
      *out << "\nInput/output reduction objects *before* reduction:\n";
      Teuchos::OSTab tab2(out);
      for( int kc = 0; kc < num_cols; ++kc ) {
        *out
          << "\nreduct_objs["<<kc<<"] =\n"
          << describe(*reduct_objs[kc],Teuchos::VERB_EXTREME);
      }
    }
  }
#endif // RTOPPACK_ENABLE_SHOW_DUMP
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
  if( reduct_objs == NULL && sub_vecs == NULL && sub_targ_vecs == NULL ) {
    // This is a transformation operation with no data on this processor.
    // Therefore, we can just exist!
  }
  else {
    const int localSubDim =
      ( num_vecs
        ? ( sub_vecs ? sub_vecs[0].subDim() : 0 )
        : ( sub_targ_vecs ? sub_targ_vecs[0].subDim() : 0 )
        );
    // See if we need to do any global communication at all?
    if( comm==NULL || reduct_objs == NULL ) {
      if( ( sub_vecs || sub_targ_vecs ) && localSubDim ) {
        for( int kc = 0; kc < num_cols; ++kc ) {
          op.apply_op(
            arrayView(sub_vecs+kc*num_vecs, num_vecs),
            arrayView(sub_targ_vecs+kc*num_targ_vecs, num_targ_vecs),
            reduct_objs ? Teuchos::ptr(reduct_objs[kc]) : Teuchos::null
            );
        }
      }
    }
    else {
      // Check the preconditions for excluding empty target vectors.
      TEST_FOR_EXCEPTION(
        ( ( num_vecs && !sub_vecs) || ( num_targ_vecs && !sub_targ_vecs) ) && !( !sub_vecs && !sub_targ_vecs )
        ,std::logic_error
        ,"SPMD_apply_op(...): Error, invalid arguments num_vecs = " << num_vecs
        << ", sub_vecs = " << sub_vecs << ", num_targ_vecs = " << num_targ_vecs
        << ", sub_targ_vecs = " << sub_targ_vecs
        );
      //
      // There is a non-null reduction target object and we are using
      // SPMD so we need to reduce it across processors
      //
      // Allocate the intermediate target object and perform the
      // reduction for the vector elements on this processor.
      //
      Workspace<Teuchos::RCP<ReductTarget> >
        i_reduct_objs( wss, num_cols );
      for( int kc = 0; kc < num_cols; ++kc ) {
        i_reduct_objs[kc] = op.reduct_obj_create();
        if( ( sub_vecs || sub_targ_vecs ) && localSubDim ) {
          op.apply_op(
            arrayView(sub_vecs+kc*num_vecs, num_vecs),
            arrayView(sub_targ_vecs+kc*num_targ_vecs, num_targ_vecs),
            i_reduct_objs[kc].ptr()
            );
        }
      }
#ifdef RTOPPACK_ENABLE_SHOW_DUMP
      if(show_spmd_apply_op_dump) {
        if(reduct_objs) {
          *out << "\nIntermediate reduction objects in this process before global reduction:\n";
          Teuchos::OSTab tab2(out);
          for( int kc = 0; kc < num_cols; ++kc ) {
            *out
              << "\ni_reduct_objs["<<kc<<"] =\n"
              << describe(*i_reduct_objs[kc],Teuchos::VERB_EXTREME);
          }
        }
      }
#endif // RTOPPACK_ENABLE_SHOW_DUMP
      //
      // Reduce the local intermediate reduction objects into the global reduction objects
      //
      Workspace<const ReductTarget*>
        _i_reduct_objs( wss, num_cols );
      for( int kc = 0; kc < num_cols; ++kc ) {
        _i_reduct_objs[kc] = &*i_reduct_objs[kc];
      }
#ifdef RTOPPACK_ENABLE_SHOW_DUMP
      if(show_spmd_apply_op_dump) {
        if(reduct_objs) {
          *out << "\nPerforming global reduction ...\n";
        }
      }
#endif // RTOPPACK_ENABLE_SHOW_DUMP
      SPMD_all_reduce(comm,op,num_cols,&_i_reduct_objs[0],reduct_objs);
    }
  }
#ifdef RTOPPACK_ENABLE_SHOW_DUMP
  if(show_spmd_apply_op_dump) {
    if( num_targ_vecs && sub_targ_vecs ) {
      *out << "\nInput/output vectors *after* transforamtion:\n";
      Teuchos::OSTab tab2(out);
      for( int kc = 0; kc < num_cols; ++kc ) {
        for( int k = 0; k < num_targ_vecs; ++k ) {
          *out << "\nvecs["<<kc<<","<<k<<"] =\n";
          print(sub_targ_vecs[kc*num_targ_vecs+k],*out);
        }
      }
    }
    if(reduct_objs) {
      *out << "\nInput/output reduction objects *after* reduction:\n";
      Teuchos::OSTab tab2(out);
      for( int kc = 0; kc < num_cols; ++kc ) {
        *out
          << "\nreduct_objs["<<kc<<"] =\n"
          << describe(*reduct_objs[kc],Teuchos::VERB_EXTREME);
      }
    }
    *out << "\nLeaving RTOpPack::SPMD_apply_op(...) ...\n";
  }
#endif // RTOPPACK_ENABLE_SHOW_DUMP
}