void Stokhos::BasisInteractionGraph::initialize(const Stokhos::ProductBasis<int,double> & masterBasis, const Stokhos::Sparse3Tensor<int,double> & Cijk, const Stokhos::ProductBasis<int,double> & rowBasis, const Stokhos::ProductBasis<int,double> & colBasis,int porder) { // for determining if their is an interaction or not Stokhos::BasisInteractionGraph masterBig(masterBasis,Cijk,onlyUseLinear_,porder); vecLookup_.resize(rowBasis.size()); // defines number of rows // set number of columns numCols_ = colBasis.size(); // build row basis terms std::vector<int> rowIndexToMasterIndex(rowBasis.size()); for(int i=0;i<rowBasis.size();i++) rowIndexToMasterIndex[i] = masterBasis.index(rowBasis.term(i)); // build column basis terms std::vector<int> colIndexToMasterIndex(colBasis.size()); for(int i=0;i<colBasis.size();i++) colIndexToMasterIndex[i] = masterBasis.index(colBasis.term(i)); // build graph by looking up sparsity in master basis for(int r=0;r<rowBasis.size();r++) { int masterRow = rowIndexToMasterIndex[r]; for(int c=0;c<colBasis.size();c++) { int masterCol = colIndexToMasterIndex[c]; // is row and column active in master element bool activeRC = masterBig(masterRow,masterCol); // if active add to local graph if(activeRC) vecLookup_[r].push_back(c); } } }
static FlatSparse3Tensor_kji create( const Stokhos::ProductBasis<OrdinalType,ValueType>& basis, const Stokhos::Sparse3Tensor<OrdinalType,ValueType>& Cijk, const Teuchos::ParameterList& params = Teuchos::ParameterList()) { typedef Stokhos::Sparse3Tensor<OrdinalType,ValueType> Cijk_type; // Compute number of j's for each k const size_type dimension = basis.size(); const size_type nk = Cijk.num_k(); std::vector< size_t > j_coord_work( nk , (size_t) 0 ); size_type j_entry_count = 0 ; for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { OrdinalType k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { ++j_coord_work[k]; ++j_entry_count; } } } // Compute number of i's for each k and j std::vector< size_t > i_coord_work( j_entry_count , (size_t) 0 ); size_type i_entry_count = 0 ; size_type j_entry = 0 ; for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { OrdinalType k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { ++i_coord_work[j_entry]; ++i_entry_count; } ++j_entry; } } } /* // Pad each row to have size divisible by alignment size enum { Align = Kokkos::Impl::is_same<ExecutionSpace,Kokkos::Cuda>::value ? 32 : 2 }; for ( size_type i = 0 ; i < dimension ; ++i ) { const size_t rem = coord_work[i] % Align; if (rem > 0) { const size_t pad = Align - rem; coord_work[i] += pad; entry_count += pad; } } */ // Allocate tensor data FlatSparse3Tensor_kji tensor ; tensor.m_dim = dimension; tensor.m_j_coord = coord_array_type( "j_coord" , j_entry_count ); tensor.m_i_coord = coord_array_type( "i_coord" , i_entry_count ); tensor.m_value = value_array_type( "value" , i_entry_count ); tensor.m_num_j = entry_array_type( "num_j" , nk ); tensor.m_num_i = entry_array_type( "num_i" , j_entry_count ); tensor.m_j_row_map = row_map_array_type( "j_row_map" , nk+1 ); tensor.m_i_row_map = row_map_array_type( "i_row_map" , j_entry_count+1 ); tensor.m_flops = 3*j_entry_count + 2*i_entry_count; // Create mirror, is a view if is host memory typename coord_array_type::HostMirror host_j_coord = Kokkos::create_mirror_view( tensor.m_j_coord ); typename coord_array_type::HostMirror host_i_coord = Kokkos::create_mirror_view( tensor.m_i_coord ); typename value_array_type::HostMirror host_value = Kokkos::create_mirror_view( tensor.m_value ); typename entry_array_type::HostMirror host_num_j = Kokkos::create_mirror_view( tensor.m_num_j ); typename entry_array_type::HostMirror host_num_i = Kokkos::create_mirror_view( tensor.m_num_i ); typename entry_array_type::HostMirror host_j_row_map = Kokkos::create_mirror_view( tensor.m_j_row_map ); typename entry_array_type::HostMirror host_i_row_map = Kokkos::create_mirror_view( tensor.m_i_row_map ); // Compute j row map size_type sum = 0; host_j_row_map(0) = 0; for ( size_type k = 0 ; k < nk ; ++k ) { sum += j_coord_work[k]; host_j_row_map(k+1) = sum; host_num_j(k) = 0; } // Compute i row map sum = 0; host_i_row_map(0) = 0; for ( size_type j = 0 ; j < j_entry_count ; ++j ) { sum += i_coord_work[j]; host_i_row_map(j+1) = sum; host_num_i(j) = 0; } for ( size_type k = 0 ; k < nk ; ++k ) { j_coord_work[k] = host_j_row_map[k]; } for ( size_type j = 0 ; j < j_entry_count ; ++j ) { i_coord_work[j] = host_i_row_map[j]; } for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { OrdinalType k = index(k_it); for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { const size_type jEntry = j_coord_work[k]; ++j_coord_work[k]; host_j_coord(jEntry) = j ; ++host_num_j(k); for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { OrdinalType i = index(i_it); ValueType c = Stokhos::value(i_it); const size_type iEntry = i_coord_work[jEntry]; ++i_coord_work[jEntry]; host_value(iEntry) = (j != k) ? c : 0.5*c; host_i_coord(iEntry) = i ; ++host_num_i(jEntry); ++tensor.m_nnz; } } } } // Copy data to device if necessary Kokkos::deep_copy( tensor.m_j_coord , host_j_coord ); Kokkos::deep_copy( tensor.m_i_coord , host_i_coord ); Kokkos::deep_copy( tensor.m_value , host_value ); Kokkos::deep_copy( tensor.m_num_j , host_num_j ); Kokkos::deep_copy( tensor.m_num_i , host_num_i ); Kokkos::deep_copy( tensor.m_j_row_map , host_j_row_map ); Kokkos::deep_copy( tensor.m_i_row_map , host_i_row_map ); return tensor ; }
static FlatSparse3Tensor create( const Stokhos::ProductBasis<OrdinalType,ValueType>& basis, const Stokhos::Sparse3Tensor<OrdinalType,ValueType>& Cijk ) { typedef Stokhos::Sparse3Tensor<OrdinalType,ValueType> Cijk_type; // Compute number of k's for each i const size_type dimension = basis.size(); std::vector< size_t > k_coord_work( dimension , (size_t) 0 ); size_type k_entry_count = 0 ; for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); i_it!=Cijk.i_end(); ++i_it) { OrdinalType i = index(i_it); k_coord_work[i] = Cijk.num_k(i_it); k_entry_count += Cijk.num_k(i_it); } // Compute number of j's for each i and k std::vector< size_t > j_coord_work( k_entry_count , (size_t) 0 ); size_type j_entry_count = 0 ; size_type k_entry = 0 ; for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); i_it!=Cijk.i_end(); ++i_it) { for (typename Cijk_type::ik_iterator k_it = Cijk.k_begin(i_it); k_it != Cijk.k_end(i_it); ++k_it, ++k_entry) { OrdinalType k = index(k_it); for (typename Cijk_type::ikj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); if (j >= k) { ++j_coord_work[k_entry]; ++j_entry_count; } } } } /* // Pad each row to have size divisible by alignment size enum { Align = KokkosArray::Impl::is_same<DeviceType,KokkosArray::Cuda>::value ? 32 : 2 }; for ( size_type i = 0 ; i < dimension ; ++i ) { const size_t rem = coord_work[i] % Align; if (rem > 0) { const size_t pad = Align - rem; coord_work[i] += pad; entry_count += pad; } } */ // Allocate tensor data FlatSparse3Tensor tensor ; tensor.m_k_coord = coord_array_type( "k_coord" , k_entry_count ); tensor.m_j_coord = coord_array_type( "j_coord" , j_entry_count ); tensor.m_value = value_array_type( "value" , j_entry_count ); tensor.m_num_k = entry_array_type( "num_k" , dimension ); tensor.m_num_j = entry_array_type( "num_j" , k_entry_count ); tensor.m_k_row_map = row_map_array_type( "k_row_map" , dimension+1 ); tensor.m_j_row_map = row_map_array_type( "j_row_map" , k_entry_count+1 ); // Create mirror, is a view if is host memory typename coord_array_type::HostMirror host_k_coord = KokkosArray::create_mirror_view( tensor.m_k_coord ); typename coord_array_type::HostMirror host_j_coord = KokkosArray::create_mirror_view( tensor.m_j_coord ); typename value_array_type::HostMirror host_value = KokkosArray::create_mirror_view( tensor.m_value ); typename entry_array_type::HostMirror host_num_k = KokkosArray::create_mirror_view( tensor.m_num_k ); typename entry_array_type::HostMirror host_num_j = KokkosArray::create_mirror_view( tensor.m_num_j ); typename entry_array_type::HostMirror host_k_row_map = KokkosArray::create_mirror_view( tensor.m_k_row_map ); typename entry_array_type::HostMirror host_j_row_map = KokkosArray::create_mirror_view( tensor.m_j_row_map ); // Compute k row map size_type sum = 0; host_k_row_map(0) = 0; for ( size_type i = 0 ; i < dimension ; ++i ) { sum += k_coord_work[i]; host_k_row_map(i+1) = sum; } // Compute j row map sum = 0; host_j_row_map(0) = 0; for ( size_type i = 0 ; i < k_entry_count ; ++i ) { sum += j_coord_work[i]; host_j_row_map(i+1) = sum; } for ( size_type i = 0 ; i < dimension ; ++i ) { k_coord_work[i] = host_k_row_map[i]; } for ( size_type i = 0 ; i < k_entry_count ; ++i ) { j_coord_work[i] = host_j_row_map[i]; } for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); i_it!=Cijk.i_end(); ++i_it) { OrdinalType i = index(i_it); for (typename Cijk_type::ik_iterator k_it = Cijk.k_begin(i_it); k_it != Cijk.k_end(i_it); ++k_it) { OrdinalType k = index(k_it); const size_type kEntry = k_coord_work[i]; ++k_coord_work[i]; host_k_coord(kEntry) = k ; ++host_num_k(i); for (typename Cijk_type::ikj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { OrdinalType j = index(j_it); ValueType c = Stokhos::value(j_it); if (j >= k) { const size_type jEntry = j_coord_work[kEntry]; ++j_coord_work[kEntry]; host_value(jEntry) = (j != k) ? c : 0.5*c; host_j_coord(jEntry) = j ; ++host_num_j(kEntry); ++tensor.m_nnz; } } } } // Copy data to device if necessary KokkosArray::deep_copy( tensor.m_k_coord , host_k_coord ); KokkosArray::deep_copy( tensor.m_j_coord , host_j_coord ); KokkosArray::deep_copy( tensor.m_value , host_value ); KokkosArray::deep_copy( tensor.m_num_k , host_num_k ); KokkosArray::deep_copy( tensor.m_num_j , host_num_j ); KokkosArray::deep_copy( tensor.m_k_row_map , host_k_row_map ); KokkosArray::deep_copy( tensor.m_j_row_map , host_j_row_map ); tensor.m_flops = 5*tensor.m_nnz + dimension; return tensor ; }