Teuchos::RCP<Epetra_CrsGraph> sparse3Tensor2CrsGraph( const Stokhos::Sparse3Tensor<ordinal_type,value_type>& Cijk, const Epetra_BlockMap& map) { typedef Stokhos::Sparse3Tensor<ordinal_type,value_type> Cijk_type; // Graph to be created Teuchos::RCP<Epetra_CrsGraph> graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, map, 0)); // Loop over Cijk entries including a non-zero in the graph at // indices (i,j) if there is any k for which Cijk is non-zero for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { ordinal_type j = index(j_it); for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { ordinal_type i = index(i_it); graph->InsertGlobalIndices(i, 1, &j); } } } // Sort, remove redundencies, transform to local, ... graph->FillComplete(); return graph; }
void Stokhos::BasisInteractionGraph::initialize(const Stokhos::OrthogPolyBasis<int,double> & max_basis, const Stokhos::Sparse3Tensor<int,double> & Cijk, int porder) { using Teuchos::RCP; typedef Stokhos::Sparse3Tensor<int,double> Cijk_type; // // max it out if defaulted // if(porder<0) // porder = max_basis.size(); // RCP<Stokhos::Sparse3Tensor<int,double> > Cijk = max_basis.computeTripleProductTensor(porder); Cijk_type::k_iterator k_end = Cijk.k_end(); if (onlyUseLinear_) { int dim = max_basis.dimension(); k_end = Cijk.find_k(dim+1); } vecLookup_.resize(max_basis.size()); // defines number of rows numCols_ = vecLookup_.size(); // set number of columns // Loop over Cijk entries including a non-zero in the graph at // indices (i,j) if there is any k for which Cijk is non-zero for(Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=k_end; ++k_it) { for(Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { int j = index(j_it); for(Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { int i = index(i_it); vecLookup_[i].push_back(j); } } } }
Stokhos::AdaptivityManager::Sparse3TensorHash::Sparse3TensorHash(const Stokhos::Sparse3Tensor<int,double> & Cijk) { typedef Stokhos::Sparse3Tensor<int,double>::k_iterator k_iterator; typedef Stokhos::Sparse3Tensor<int,double>::kj_iterator kj_iterator; typedef Stokhos::Sparse3Tensor<int,double>::kji_iterator kji_iterator; for(k_iterator k_it = Cijk.k_begin();k_it!=Cijk.k_end();k_it++) { int k = *k_it; for(kj_iterator j_it = Cijk.j_begin(k_it);j_it!=Cijk.j_end(k_it);j_it++) { int j = *j_it; for(kji_iterator i_it = Cijk.i_begin(j_it);i_it!=Cijk.i_end(j_it);i_it++) { int i = *i_it; hashMap_[IJK(i,j,k)] = i_it.value(); } } } }
Stokhos::MonoProjPCEBasis<ordinal_type, value_type>:: MonoProjPCEBasis( ordinal_type p, const Stokhos::OrthogPolyApprox<ordinal_type, value_type>& pce, const Stokhos::Quadrature<ordinal_type, value_type>& quad, const Stokhos::Sparse3Tensor<ordinal_type, value_type>& Cijk, bool limit_integration_order_) : RecurrenceBasis<ordinal_type, value_type>("Monomial Projection", p, true), limit_integration_order(limit_integration_order_), pce_sz(pce.basis()->size()), pce_norms(pce.basis()->norm_squared()), a(pce_sz), b(pce_sz), basis_vecs(pce_sz, p+1), new_pce(p+1) { // If the original basis is normalized, we can use the standard QR // factorization. For simplicity, we renormalize the PCE coefficients // for a normalized basis Stokhos::OrthogPolyApprox<ordinal_type, value_type> normalized_pce(pce); for (ordinal_type i=0; i<pce_sz; i++) { pce_norms[i] = std::sqrt(pce_norms[i]); normalized_pce[i] *= pce_norms[i]; } // Evaluate PCE at quad points ordinal_type nqp = quad.size(); Teuchos::Array<value_type> pce_vals(nqp); const Teuchos::Array<value_type>& weights = quad.getQuadWeights(); const Teuchos::Array< Teuchos::Array<value_type> >& quad_points = quad.getQuadPoints(); const Teuchos::Array< Teuchos::Array<value_type> >& basis_values = quad.getBasisAtQuadPoints(); for (ordinal_type i=0; i<nqp; i++) { pce_vals[i] = normalized_pce.evaluate(quad_points[i], basis_values[i]); } // Form Kylov matrix up to order pce_sz matrix_type K(pce_sz, pce_sz); // Compute matrix matrix_type A(pce_sz, pce_sz); typedef Stokhos::Sparse3Tensor<ordinal_type, value_type> Cijk_type; for (typename Cijk_type::k_iterator k_it = Cijk.k_begin(); k_it != Cijk.k_end(); ++k_it) { ordinal_type k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { ordinal_type j = index(j_it); value_type val = 0; for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { ordinal_type i = index(i_it); value_type c = value(i_it) / (pce_norms[j]*pce_norms[k]); val += pce[i]*c; } A(k,j) = val; } } // Each column i is given by projection of the i-th order monomial // onto original basis vector_type u0 = Teuchos::getCol(Teuchos::View, K, 0); u0(0) = 1.0; for (ordinal_type i=1; i<pce_sz; i++) u0(i) = 0.0; for (ordinal_type k=1; k<pce_sz; k++) { vector_type u = Teuchos::getCol(Teuchos::View, K, k); vector_type up = Teuchos::getCol(Teuchos::View, K, k-1); u.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, up, 0.0); } /* for (ordinal_type j=0; j<pce_sz; j++) { for (ordinal_type i=0; i<pce_sz; i++) { value_type val = 0.0; for (ordinal_type k=0; k<nqp; k++) val += weights[k]*std::pow(pce_vals[k],j)*basis_values[k][i]; K(i,j) = val; } } */ std::cout << K << std::endl << std::endl; // Compute QR factorization of K ordinal_type ws_size, info; value_type ws_size_query; Teuchos::Array<value_type> tau(pce_sz); Teuchos::LAPACK<ordinal_type,value_type> lapack; lapack.GEQRF(pce_sz, pce_sz, K.values(), K.stride(), &tau[0], &ws_size_query, -1, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "GEQRF returned value " << info); ws_size = static_cast<ordinal_type>(ws_size_query); Teuchos::Array<value_type> work(ws_size); lapack.GEQRF(pce_sz, pce_sz, K.values(), K.stride(), &tau[0], &work[0], ws_size, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "GEQRF returned value " << info); // Get Q lapack.ORGQR(pce_sz, pce_sz, pce_sz, K.values(), K.stride(), &tau[0], &ws_size_query, -1, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "ORGQR returned value " << info); ws_size = static_cast<ordinal_type>(ws_size_query); work.resize(ws_size); lapack.ORGQR(pce_sz, pce_sz, pce_sz, K.values(), K.stride(), &tau[0], &work[0], ws_size, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "ORGQR returned value " << info); // Get basis vectors for (ordinal_type j=0; j<p+1; j++) for (ordinal_type i=0; i<pce_sz; i++) basis_vecs(i,j) = K(i,j); // Compute T = Q'*A*Q matrix_type prod(pce_sz,pce_sz); prod.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, K, A, 0.0); matrix_type T(pce_sz,pce_sz); T.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, prod, K, 0.0); //std::cout << T << std::endl; // Recursion coefficients are diagonal and super diagonal b[0] = 1.0; for (ordinal_type i=0; i<pce_sz-1; i++) { a[i] = T(i,i); b[i+1] = T(i,i+1); } a[pce_sz-1] = T(pce_sz-1,pce_sz-1); // Setup rest of basis this->setup(); // Project original PCE into the new basis vector_type u(pce_sz); for (ordinal_type i=0; i<pce_sz; i++) u[i] = normalized_pce[i]; new_pce.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, basis_vecs, u, 0.0); for (ordinal_type i=0; i<=p; i++) new_pce[i] /= this->norms[i]; }
static FlatSparse3Tensor_kji create( const Stokhos::ProductBasis<OrdinalType,ValueType>& basis, const Stokhos::Sparse3Tensor<OrdinalType,ValueType>& Cijk, const Teuchos::ParameterList& params = Teuchos::ParameterList()) { typedef Stokhos::Sparse3Tensor<OrdinalType,ValueType> Cijk_type; // Compute number of j's for each k const size_type dimension = basis.size(); const size_type nk = Cijk.num_k(); std::vector< size_t > j_coord_work( nk , (size_t) 0 ); size_type j_entry_count = 0 ; for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { OrdinalType k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { ++j_coord_work[k]; ++j_entry_count; } } } // Compute number of i's for each k and j std::vector< size_t > i_coord_work( j_entry_count , (size_t) 0 ); size_type i_entry_count = 0 ; size_type j_entry = 0 ; for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { OrdinalType k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { ++i_coord_work[j_entry]; ++i_entry_count; } ++j_entry; } } } /* // Pad each row to have size divisible by alignment size enum { Align = Kokkos::Impl::is_same<ExecutionSpace,Kokkos::Cuda>::value ? 32 : 2 }; for ( size_type i = 0 ; i < dimension ; ++i ) { const size_t rem = coord_work[i] % Align; if (rem > 0) { const size_t pad = Align - rem; coord_work[i] += pad; entry_count += pad; } } */ // Allocate tensor data FlatSparse3Tensor_kji tensor ; tensor.m_dim = dimension; tensor.m_j_coord = coord_array_type( "j_coord" , j_entry_count ); tensor.m_i_coord = coord_array_type( "i_coord" , i_entry_count ); tensor.m_value = value_array_type( "value" , i_entry_count ); tensor.m_num_j = entry_array_type( "num_j" , nk ); tensor.m_num_i = entry_array_type( "num_i" , j_entry_count ); tensor.m_j_row_map = row_map_array_type( "j_row_map" , nk+1 ); tensor.m_i_row_map = row_map_array_type( "i_row_map" , j_entry_count+1 ); tensor.m_flops = 3*j_entry_count + 2*i_entry_count; // Create mirror, is a view if is host memory typename coord_array_type::HostMirror host_j_coord = Kokkos::create_mirror_view( tensor.m_j_coord ); typename coord_array_type::HostMirror host_i_coord = Kokkos::create_mirror_view( tensor.m_i_coord ); typename value_array_type::HostMirror host_value = Kokkos::create_mirror_view( tensor.m_value ); typename entry_array_type::HostMirror host_num_j = Kokkos::create_mirror_view( tensor.m_num_j ); typename entry_array_type::HostMirror host_num_i = Kokkos::create_mirror_view( tensor.m_num_i ); typename entry_array_type::HostMirror host_j_row_map = Kokkos::create_mirror_view( tensor.m_j_row_map ); typename entry_array_type::HostMirror host_i_row_map = Kokkos::create_mirror_view( tensor.m_i_row_map ); // Compute j row map size_type sum = 0; host_j_row_map(0) = 0; for ( size_type k = 0 ; k < nk ; ++k ) { sum += j_coord_work[k]; host_j_row_map(k+1) = sum; host_num_j(k) = 0; } // Compute i row map sum = 0; host_i_row_map(0) = 0; for ( size_type j = 0 ; j < j_entry_count ; ++j ) { sum += i_coord_work[j]; host_i_row_map(j+1) = sum; host_num_i(j) = 0; } for ( size_type k = 0 ; k < nk ; ++k ) { j_coord_work[k] = host_j_row_map[k]; } for ( size_type j = 0 ; j < j_entry_count ; ++j ) { i_coord_work[j] = host_i_row_map[j]; } for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { OrdinalType k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { const size_type jEntry = j_coord_work[k]; ++j_coord_work[k]; host_j_coord(jEntry) = j ; ++host_num_j(k); for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { OrdinalType i = index(i_it); ValueType c = Stokhos::value(i_it); const size_type iEntry = i_coord_work[jEntry]; ++i_coord_work[jEntry]; host_value(iEntry) = (j != k) ? c : 0.5*c; host_i_coord(iEntry) = i ; ++host_num_i(jEntry); ++tensor.m_nnz; } } } } // Copy data to device if necessary Kokkos::deep_copy( tensor.m_j_coord , host_j_coord ); Kokkos::deep_copy( tensor.m_i_coord , host_i_coord ); Kokkos::deep_copy( tensor.m_value , host_value ); Kokkos::deep_copy( tensor.m_num_j , host_num_j ); Kokkos::deep_copy( tensor.m_num_i , host_num_i ); Kokkos::deep_copy( tensor.m_j_row_map , host_j_row_map ); Kokkos::deep_copy( tensor.m_i_row_map , host_i_row_map ); return tensor ; }
static FlatSparse3Tensor create( const Stokhos::ProductBasis<OrdinalType,ValueType>& basis, const Stokhos::Sparse3Tensor<OrdinalType,ValueType>& Cijk ) { typedef Stokhos::Sparse3Tensor<OrdinalType,ValueType> Cijk_type; // Compute number of k's for each i const size_type dimension = basis.size(); std::vector< size_t > k_coord_work( dimension , (size_t) 0 ); size_type k_entry_count = 0 ; for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); i_it!=Cijk.i_end(); ++i_it) { OrdinalType i = index(i_it); k_coord_work[i] = Cijk.num_k(i_it); k_entry_count += Cijk.num_k(i_it); } // Compute number of j's for each i and k std::vector< size_t > j_coord_work( k_entry_count , (size_t) 0 ); size_type j_entry_count = 0 ; size_type k_entry = 0 ; for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); i_it!=Cijk.i_end(); ++i_it) { for (typename Cijk_type::ik_iterator k_it = Cijk.k_begin(i_it); k_it != Cijk.k_end(i_it); ++k_it, ++k_entry) { OrdinalType k = index(k_it); for (typename Cijk_type::ikj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { ++j_coord_work[k_entry]; ++j_entry_count; } } } } /* // Pad each row to have size divisible by alignment size enum { Align = KokkosArray::Impl::is_same<DeviceType,KokkosArray::Cuda>::value ? 32 : 2 }; for ( size_type i = 0 ; i < dimension ; ++i ) { const size_t rem = coord_work[i] % Align; if (rem > 0) { const size_t pad = Align - rem; coord_work[i] += pad; entry_count += pad; } } */ // Allocate tensor data FlatSparse3Tensor tensor ; tensor.m_k_coord = coord_array_type( "k_coord" , k_entry_count ); tensor.m_j_coord = coord_array_type( "j_coord" , j_entry_count ); tensor.m_value = value_array_type( "value" , j_entry_count ); tensor.m_num_k = entry_array_type( "num_k" , dimension ); tensor.m_num_j = entry_array_type( "num_j" , k_entry_count ); tensor.m_k_row_map = row_map_array_type( "k_row_map" , dimension+1 ); tensor.m_j_row_map = row_map_array_type( "j_row_map" , k_entry_count+1 ); // Create mirror, is a view if is host memory typename coord_array_type::HostMirror host_k_coord = KokkosArray::create_mirror_view( tensor.m_k_coord ); typename coord_array_type::HostMirror host_j_coord = KokkosArray::create_mirror_view( tensor.m_j_coord ); typename value_array_type::HostMirror host_value = KokkosArray::create_mirror_view( tensor.m_value ); typename entry_array_type::HostMirror host_num_k = KokkosArray::create_mirror_view( tensor.m_num_k ); typename entry_array_type::HostMirror host_num_j = KokkosArray::create_mirror_view( tensor.m_num_j ); typename entry_array_type::HostMirror host_k_row_map = KokkosArray::create_mirror_view( tensor.m_k_row_map ); typename entry_array_type::HostMirror host_j_row_map = KokkosArray::create_mirror_view( tensor.m_j_row_map ); // Compute k row map size_type sum = 0; host_k_row_map(0) = 0; for ( size_type i = 0 ; i < dimension ; ++i ) { sum += k_coord_work[i]; host_k_row_map(i+1) = sum; } // Compute j row map sum = 0; host_j_row_map(0) = 0; for ( size_type i = 0 ; i < k_entry_count ; ++i ) { sum += j_coord_work[i]; host_j_row_map(i+1) = sum; } for ( size_type i = 0 ; i < dimension ; ++i ) { k_coord_work[i] = host_k_row_map[i]; } for ( size_type i = 0 ; i < k_entry_count ; ++i ) { j_coord_work[i] = host_j_row_map[i]; } for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); i_it!=Cijk.i_end(); ++i_it) { OrdinalType i = index(i_it); for (typename Cijk_type::ik_iterator k_it = Cijk.k_begin(i_it); k_it != Cijk.k_end(i_it); ++k_it) { OrdinalType k = index(k_it); const size_type kEntry = k_coord_work[i]; ++k_coord_work[i]; host_k_coord(kEntry) = k ; ++host_num_k(i); for (typename Cijk_type::ikj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); ValueType c = Stokhos::value(j_it); if (j >= k) { const size_type jEntry = j_coord_work[kEntry]; ++j_coord_work[kEntry]; host_value(jEntry) = (j != k) ? c : 0.5*c; host_j_coord(jEntry) = j ; ++host_num_j(kEntry); ++tensor.m_nnz; } } } } // Copy data to device if necessary KokkosArray::deep_copy( tensor.m_k_coord , host_k_coord ); KokkosArray::deep_copy( tensor.m_j_coord , host_j_coord ); KokkosArray::deep_copy( tensor.m_value , host_value ); KokkosArray::deep_copy( tensor.m_num_k , host_num_k ); KokkosArray::deep_copy( tensor.m_num_j , host_num_j ); KokkosArray::deep_copy( tensor.m_k_row_map , host_k_row_map ); KokkosArray::deep_copy( tensor.m_j_row_map , host_j_row_map ); tensor.m_flops = 5*tensor.m_nnz + dimension; return tensor ; }