bool comparePCEs(const PCEType& a1,
                   const std::string& a1_name,
                   const Stokhos::OrthogPolyApprox<OrdinalType,ValueType>&a2,
                   const std::string& a2_name,
                   const ValueType& rel_tol, const ValueType& abs_tol,
                   Teuchos::FancyOStream& out)
  {
    bool success = true;

    out << "Comparing " << a1_name << " == " << a2_name << " ... ";

    const OrdinalType n = a1.size();

    // Compare sizes
    if (a2.size() != n) {
      out << "\nError, "<<a1_name<<".size() = "<<a1.size()<<" == "
          << a2_name<<".size() = "<<a2.size()<<" : failed!\n";
      return false;
    }

    // Compare elements
    for( OrdinalType i = 0; i < n; ++i ) {
      ValueType nrm = std::sqrt(a2.basis()->norm_squared(i));
      ValueType err = std::abs(a1.coeff(i) - a2[i]) / nrm;
      ValueType tol =
        abs_tol + rel_tol*std::max(std::abs(a1.coeff(i)),std::abs(a2[i]))/nrm;
      if (err  > tol) {
        out
          <<"\nError, relErr("<<a1_name<<"["<<i<<"],"
          <<a2_name<<"["<<i<<"]) = relErr("<<a1.coeff(i)<<","<<a2[i]<<") = "
          <<err<<" <= tol = "<<tol<<": failed!\n";
        success = false;
      }
    }
    if (success) {
      out << "passed\n";
    }
    else {
      out << std::endl
          << a1_name << " = " << a1 << std::endl
          << a2_name << " = " << a2 << std::endl;
    }

    return success;
  }
ordinal_type
Stokhos::ProductLanczosPCEBasis<ordinal_type, value_type>::
isInvariant(const Stokhos::OrthogPolyApprox<ordinal_type, value_type>& pce) const
{
  Teuchos::RCP<const Stokhos::OrthogPolyBasis<ordinal_type,value_type> > basis =
    pce.basis();
  ordinal_type dim = basis->dimension();
  value_type tol = 1.0e-15;

  // Check if basis is a product basis
  Teuchos::RCP<const Stokhos::ProductBasis<ordinal_type,value_type> > prod_basis = Teuchos::rcp_dynamic_cast<const Stokhos::ProductBasis<ordinal_type,value_type> >(basis);
  if (prod_basis == Teuchos::null)
    return -2;

  // Build list of dimensions pce depends on by looping over each dimension, 
  // computing norm of pce with just that dimension -- note we don't include
  // the constant term
  Teuchos::Array<ordinal_type> dependent_dims;
  tmp_pce.reset(basis);
  for (ordinal_type i=0; i<dim; i++) {
    ordinal_type p = prod_basis->getCoordinateBases()[i]->order();
    tmp_pce.init(0.0);
    for (ordinal_type j=1; j<=p; j++)
      tmp_pce.term(i,j) = pce.term(i,j);
    value_type nrm = tmp_pce.two_norm();
    if (nrm > tol) dependent_dims.push_back(i);
  }

  // If dependent_dims has length 1, pce a function of a single variable,
  // which is an invariant subspace
  if (dependent_dims.size() == 1)
    return dependent_dims[0];

  // If dependent_dims has length 0, pce is constant
  else if (dependent_dims.size() == 0)
    return -1;

  // Otherwise pce depends on more than one variable
  return -2;
}
Stokhos::MonoProjPCEBasis<ordinal_type, value_type>::
MonoProjPCEBasis(
   ordinal_type p,
   const Stokhos::OrthogPolyApprox<ordinal_type, value_type>& pce,
   const Stokhos::Quadrature<ordinal_type, value_type>& quad,
   const Stokhos::Sparse3Tensor<ordinal_type, value_type>& Cijk,
   bool limit_integration_order_) :
  RecurrenceBasis<ordinal_type, value_type>("Monomial Projection", p, true),
  limit_integration_order(limit_integration_order_),
  pce_sz(pce.basis()->size()),
  pce_norms(pce.basis()->norm_squared()),
  a(pce_sz), 
  b(pce_sz),
  basis_vecs(pce_sz, p+1),
  new_pce(p+1)
{
  // If the original basis is normalized, we can use the standard QR
  // factorization.  For simplicity, we renormalize the PCE coefficients
  // for a normalized basis
  Stokhos::OrthogPolyApprox<ordinal_type, value_type> normalized_pce(pce);
  for (ordinal_type i=0; i<pce_sz; i++) {
    pce_norms[i] = std::sqrt(pce_norms[i]);
    normalized_pce[i] *= pce_norms[i];
  }

  // Evaluate PCE at quad points
  ordinal_type nqp = quad.size();
  Teuchos::Array<value_type> pce_vals(nqp);
  const Teuchos::Array<value_type>& weights = quad.getQuadWeights();
  const Teuchos::Array< Teuchos::Array<value_type> >& quad_points =
    quad.getQuadPoints();
  const Teuchos::Array< Teuchos::Array<value_type> >& basis_values =
    quad.getBasisAtQuadPoints();
  for (ordinal_type i=0; i<nqp; i++) {
    pce_vals[i] = normalized_pce.evaluate(quad_points[i], basis_values[i]);
  }

  // Form Kylov matrix up to order pce_sz
  matrix_type K(pce_sz, pce_sz);

  // Compute matrix
  matrix_type A(pce_sz, pce_sz);
  typedef Stokhos::Sparse3Tensor<ordinal_type, value_type> Cijk_type;
  for (typename Cijk_type::k_iterator k_it = Cijk.k_begin();
       k_it != Cijk.k_end(); ++k_it) {
    ordinal_type k = index(k_it);
    for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); 
	 j_it != Cijk.j_end(k_it); ++j_it) {
      ordinal_type j = index(j_it);
      value_type val = 0;
      for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it);
	   i_it != Cijk.i_end(j_it); ++i_it) {
	ordinal_type i = index(i_it);
	value_type c = value(i_it) / (pce_norms[j]*pce_norms[k]);
	val += pce[i]*c;
      }
      A(k,j) = val;
    }
  }

  // Each column i is given by projection of the i-th order monomial 
  // onto original basis
  vector_type u0 = Teuchos::getCol(Teuchos::View, K, 0);
  u0(0) = 1.0;
  for (ordinal_type i=1; i<pce_sz; i++)
    u0(i) = 0.0;
  for (ordinal_type k=1; k<pce_sz; k++) {
    vector_type u = Teuchos::getCol(Teuchos::View, K, k);
    vector_type up = Teuchos::getCol(Teuchos::View, K, k-1);
    u.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, up, 0.0);
  }
  /*
  for (ordinal_type j=0; j<pce_sz; j++) {
    for (ordinal_type i=0; i<pce_sz; i++) {
      value_type val = 0.0;
      for (ordinal_type k=0; k<nqp; k++)
	val += weights[k]*std::pow(pce_vals[k],j)*basis_values[k][i];
      K(i,j) = val;
    }
  }
  */

  std::cout << K << std::endl << std::endl;

  // Compute QR factorization of K
  ordinal_type ws_size, info;
  value_type ws_size_query;
  Teuchos::Array<value_type> tau(pce_sz);
  Teuchos::LAPACK<ordinal_type,value_type> lapack;
  lapack.GEQRF(pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &ws_size_query, -1, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "GEQRF returned value " << info);
  ws_size = static_cast<ordinal_type>(ws_size_query);
  Teuchos::Array<value_type> work(ws_size);
  lapack.GEQRF(pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &work[0], ws_size, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "GEQRF returned value " << info);
  
  // Get Q
  lapack.ORGQR(pce_sz, pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &ws_size_query, -1, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "ORGQR returned value " << info);
  ws_size = static_cast<ordinal_type>(ws_size_query);
  work.resize(ws_size);
  lapack.ORGQR(pce_sz, pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &work[0], ws_size, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "ORGQR returned value " << info);

  // Get basis vectors
  for (ordinal_type j=0; j<p+1; j++)
    for (ordinal_type i=0; i<pce_sz; i++)
      basis_vecs(i,j) = K(i,j);

  
  // Compute T = Q'*A*Q
  matrix_type prod(pce_sz,pce_sz);
  prod.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, K, A, 0.0);
  matrix_type T(pce_sz,pce_sz);
  T.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, prod, K, 0.0);

  //std::cout << T << std::endl;

  // Recursion coefficients are diagonal and super diagonal
  b[0] = 1.0;
  for (ordinal_type i=0; i<pce_sz-1; i++) {
    a[i] = T(i,i);
    b[i+1] = T(i,i+1);
  }
  a[pce_sz-1] = T(pce_sz-1,pce_sz-1);

  // Setup rest of basis
  this->setup();

  // Project original PCE into the new basis
  vector_type u(pce_sz);
  for (ordinal_type i=0; i<pce_sz; i++)
    u[i] = normalized_pce[i];
  new_pce.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, basis_vecs, u, 
		   0.0);
  for (ordinal_type i=0; i<=p; i++)
    new_pce[i] /= this->norms[i];
}