void Stokhos::BasisInteractionGraph::initialize(const Stokhos::ProductBasis<int,double> & masterBasis,
                                                const Stokhos::Sparse3Tensor<int,double> & Cijk,
                                                const Stokhos::ProductBasis<int,double> & rowBasis,
                                                const Stokhos::ProductBasis<int,double> & colBasis,int porder)
{
   // for determining if their is an interaction or not
   Stokhos::BasisInteractionGraph masterBig(masterBasis,Cijk,onlyUseLinear_,porder);

   vecLookup_.resize(rowBasis.size()); // defines number of rows

   // set number of columns
   numCols_ = colBasis.size();

   // build row basis terms
   std::vector<int> rowIndexToMasterIndex(rowBasis.size());
   for(int i=0;i<rowBasis.size();i++) 
      rowIndexToMasterIndex[i] = masterBasis.index(rowBasis.term(i));

   // build column basis terms
   std::vector<int> colIndexToMasterIndex(colBasis.size());
   for(int i=0;i<colBasis.size();i++) 
      colIndexToMasterIndex[i] = masterBasis.index(colBasis.term(i));

   // build graph by looking up sparsity in master basis
   for(int r=0;r<rowBasis.size();r++) {
      int masterRow = rowIndexToMasterIndex[r];
      for(int c=0;c<colBasis.size();c++) {
         int masterCol = colIndexToMasterIndex[c];

         // is row and column active in master element
         bool activeRC = masterBig(masterRow,masterCol); 

         // if active add to local graph
         if(activeRC)
            vecLookup_[r].push_back(c);
      } 
   }
}
  static FlatSparse3Tensor_kji
  create( const Stokhos::ProductBasis<OrdinalType,ValueType>& basis,
          const Stokhos::Sparse3Tensor<OrdinalType,ValueType>& Cijk,
          const Teuchos::ParameterList& params = Teuchos::ParameterList())
  {
    typedef Stokhos::Sparse3Tensor<OrdinalType,ValueType> Cijk_type;

    // Compute number of j's for each k
    const size_type dimension = basis.size();
    const size_type nk = Cijk.num_k();
    std::vector< size_t > j_coord_work( nk , (size_t) 0 );
    size_type j_entry_count = 0 ;
    for (typename Cijk_type::k_iterator k_it=Cijk.k_begin();
         k_it!=Cijk.k_end(); ++k_it) {
      OrdinalType k = index(k_it);
      for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it);
           j_it != Cijk.j_end(k_it); ++j_it) {
        OrdinalType j = index(j_it);
        if (j >= k) {
          ++j_coord_work[k];
          ++j_entry_count;
        }
      }
    }

    // Compute number of i's for each k and j
    std::vector< size_t > i_coord_work( j_entry_count , (size_t) 0 );
    size_type i_entry_count = 0 ;
    size_type j_entry = 0 ;
    for (typename Cijk_type::k_iterator k_it=Cijk.k_begin();
         k_it!=Cijk.k_end(); ++k_it) {
      OrdinalType k = index(k_it);
      for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it);
           j_it != Cijk.j_end(k_it); ++j_it) {
        OrdinalType j = index(j_it);
        if (j >= k) {
          for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it);
               i_it != Cijk.i_end(j_it); ++i_it) {
            ++i_coord_work[j_entry];
            ++i_entry_count;
          }
          ++j_entry;
        }
      }
    }

    /*
    // Pad each row to have size divisible by alignment size
    enum { Align = Kokkos::Impl::is_same<ExecutionSpace,Kokkos::Cuda>::value ? 32 : 2 };
    for ( size_type i = 0 ; i < dimension ; ++i ) {
      const size_t rem = coord_work[i] % Align;
      if (rem > 0) {
        const size_t pad = Align - rem;
        coord_work[i] += pad;
        entry_count += pad;
      }
    }
    */

    // Allocate tensor data
    FlatSparse3Tensor_kji tensor ;
    tensor.m_dim = dimension;
    tensor.m_j_coord = coord_array_type( "j_coord" , j_entry_count );
    tensor.m_i_coord = coord_array_type( "i_coord" , i_entry_count );
    tensor.m_value = value_array_type( "value" , i_entry_count );
    tensor.m_num_j = entry_array_type( "num_j" , nk );
    tensor.m_num_i = entry_array_type( "num_i" , j_entry_count );
    tensor.m_j_row_map = row_map_array_type( "j_row_map" , nk+1 );
    tensor.m_i_row_map = row_map_array_type( "i_row_map" , j_entry_count+1 );
    tensor.m_flops = 3*j_entry_count + 2*i_entry_count;

    // Create mirror, is a view if is host memory
    typename coord_array_type::HostMirror
      host_j_coord = Kokkos::create_mirror_view( tensor.m_j_coord );
    typename coord_array_type::HostMirror
      host_i_coord = Kokkos::create_mirror_view( tensor.m_i_coord );
    typename value_array_type::HostMirror
      host_value = Kokkos::create_mirror_view( tensor.m_value );
    typename entry_array_type::HostMirror
      host_num_j = Kokkos::create_mirror_view( tensor.m_num_j );
    typename entry_array_type::HostMirror
      host_num_i = Kokkos::create_mirror_view( tensor.m_num_i );
    typename entry_array_type::HostMirror
      host_j_row_map = Kokkos::create_mirror_view( tensor.m_j_row_map );
    typename entry_array_type::HostMirror
      host_i_row_map = Kokkos::create_mirror_view( tensor.m_i_row_map );

    // Compute j row map
    size_type sum = 0;
    host_j_row_map(0) = 0;
    for ( size_type k = 0 ; k < nk ; ++k ) {
      sum += j_coord_work[k];
      host_j_row_map(k+1) = sum;
      host_num_j(k) = 0;
    }

    // Compute i row map
    sum = 0;
    host_i_row_map(0) = 0;
    for ( size_type j = 0 ; j < j_entry_count ; ++j ) {
      sum += i_coord_work[j];
      host_i_row_map(j+1) = sum;
      host_num_i(j) = 0;
    }

    for ( size_type k = 0 ; k < nk ; ++k ) {
      j_coord_work[k] = host_j_row_map[k];
    }
    for ( size_type j = 0 ; j < j_entry_count ; ++j ) {
      i_coord_work[j] = host_i_row_map[j];
    }

    for (typename Cijk_type::k_iterator k_it=Cijk.k_begin();
         k_it!=Cijk.k_end(); ++k_it) {
      OrdinalType k = index(k_it);
      for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it);
           j_it != Cijk.j_end(k_it); ++j_it) {
        OrdinalType j = index(j_it);
        if (j >= k) {
          const size_type jEntry = j_coord_work[k];
          ++j_coord_work[k];
          host_j_coord(jEntry) = j ;
          ++host_num_j(k);
          for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it);
               i_it != Cijk.i_end(j_it); ++i_it) {
            OrdinalType i = index(i_it);
            ValueType c = Stokhos::value(i_it);
            const size_type iEntry = i_coord_work[jEntry];
            ++i_coord_work[jEntry];
            host_value(iEntry) = (j != k) ? c : 0.5*c;
            host_i_coord(iEntry) = i ;
            ++host_num_i(jEntry);
            ++tensor.m_nnz;
          }
        }
      }
    }

    // Copy data to device if necessary
    Kokkos::deep_copy( tensor.m_j_coord , host_j_coord );
    Kokkos::deep_copy( tensor.m_i_coord , host_i_coord );
    Kokkos::deep_copy( tensor.m_value , host_value );
    Kokkos::deep_copy( tensor.m_num_j , host_num_j );
    Kokkos::deep_copy( tensor.m_num_i , host_num_i );
    Kokkos::deep_copy( tensor.m_j_row_map , host_j_row_map );
    Kokkos::deep_copy( tensor.m_i_row_map , host_i_row_map );

    return tensor ;
  }
Ejemplo n.º 3
0
  static FlatSparse3Tensor
  create( const Stokhos::ProductBasis<OrdinalType,ValueType>& basis,
	  const Stokhos::Sparse3Tensor<OrdinalType,ValueType>& Cijk )
  {
    typedef Stokhos::Sparse3Tensor<OrdinalType,ValueType> Cijk_type;
    
    // Compute number of k's for each i
    const size_type dimension = basis.size();
    std::vector< size_t > k_coord_work( dimension , (size_t) 0 );
    size_type k_entry_count = 0 ;
    for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); 
	 i_it!=Cijk.i_end(); ++i_it) {
      OrdinalType i = index(i_it);
      k_coord_work[i] = Cijk.num_k(i_it);
      k_entry_count += Cijk.num_k(i_it);
    }

    // Compute number of j's for each i and k
    std::vector< size_t > j_coord_work( k_entry_count , (size_t) 0 );
    size_type j_entry_count = 0 ;
    size_type k_entry = 0 ;
    for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); 
	 i_it!=Cijk.i_end(); ++i_it) {
      for (typename Cijk_type::ik_iterator k_it = Cijk.k_begin(i_it); 
	   k_it != Cijk.k_end(i_it); ++k_it, ++k_entry) {
	OrdinalType k = index(k_it);
	for (typename Cijk_type::ikj_iterator j_it = Cijk.j_begin(k_it); 
	     j_it != Cijk.j_end(k_it); ++j_it) {
	  OrdinalType j = index(j_it);
	  if (j >= k) {
	    ++j_coord_work[k_entry];
	    ++j_entry_count;
	  }
	}
      }
    }

    /*
    // Pad each row to have size divisible by alignment size
    enum { Align = KokkosArray::Impl::is_same<DeviceType,KokkosArray::Cuda>::value ? 32 : 2 };
    for ( size_type i = 0 ; i < dimension ; ++i ) {
      const size_t rem = coord_work[i] % Align;
      if (rem > 0) {
	const size_t pad = Align - rem;
	coord_work[i] += pad;
	entry_count += pad;
      }
    }
    */

    // Allocate tensor data
    FlatSparse3Tensor tensor ;
    tensor.m_k_coord = coord_array_type( "k_coord" , k_entry_count );
    tensor.m_j_coord = coord_array_type( "j_coord" , j_entry_count );
    tensor.m_value = value_array_type( "value" , j_entry_count );
    tensor.m_num_k = entry_array_type( "num_k" , dimension );
    tensor.m_num_j = entry_array_type( "num_j" , k_entry_count );
    tensor.m_k_row_map = row_map_array_type( "k_row_map" , dimension+1 );
    tensor.m_j_row_map = row_map_array_type( "j_row_map" , k_entry_count+1 );

    // Create mirror, is a view if is host memory
    typename coord_array_type::HostMirror
      host_k_coord = KokkosArray::create_mirror_view( tensor.m_k_coord );
    typename coord_array_type::HostMirror
      host_j_coord = KokkosArray::create_mirror_view( tensor.m_j_coord );
    typename value_array_type::HostMirror
      host_value = KokkosArray::create_mirror_view( tensor.m_value );
    typename entry_array_type::HostMirror
      host_num_k = KokkosArray::create_mirror_view( tensor.m_num_k );
    typename entry_array_type::HostMirror
      host_num_j = KokkosArray::create_mirror_view( tensor.m_num_j );
    typename entry_array_type::HostMirror
      host_k_row_map = KokkosArray::create_mirror_view( tensor.m_k_row_map );
    typename entry_array_type::HostMirror
      host_j_row_map = KokkosArray::create_mirror_view( tensor.m_j_row_map );

    // Compute k row map
    size_type sum = 0;
    host_k_row_map(0) = 0;
    for ( size_type i = 0 ; i < dimension ; ++i ) {
      sum += k_coord_work[i];
      host_k_row_map(i+1) = sum;
    }

    // Compute j row map
    sum = 0;
    host_j_row_map(0) = 0;
    for ( size_type i = 0 ; i < k_entry_count ; ++i ) {
      sum += j_coord_work[i];
      host_j_row_map(i+1) = sum;
    }

    for ( size_type i = 0 ; i < dimension ; ++i ) {
      k_coord_work[i] = host_k_row_map[i];
    }
    for ( size_type i = 0 ; i < k_entry_count ; ++i ) {
      j_coord_work[i] = host_j_row_map[i];
    }

    for (typename Cijk_type::i_iterator i_it=Cijk.i_begin(); 
	 i_it!=Cijk.i_end(); ++i_it) {
      OrdinalType i = index(i_it);
      for (typename Cijk_type::ik_iterator k_it = Cijk.k_begin(i_it); 
	   k_it != Cijk.k_end(i_it); ++k_it) {
	OrdinalType k = index(k_it);
	const size_type kEntry = k_coord_work[i]; 
	++k_coord_work[i];
	host_k_coord(kEntry) = k ;
	++host_num_k(i);
	for (typename Cijk_type::ikj_iterator j_it = Cijk.j_begin(k_it); 
	     j_it != Cijk.j_end(k_it); ++j_it) {
	  OrdinalType j = index(j_it);
	  ValueType c = Stokhos::value(j_it);
	  if (j >= k) {
	    const size_type jEntry = j_coord_work[kEntry]; 
	    ++j_coord_work[kEntry];
	    host_value(jEntry) = (j != k) ? c : 0.5*c;
	    host_j_coord(jEntry) = j ;
	    ++host_num_j(kEntry);
	    ++tensor.m_nnz;
	  }
	}
      }
    }

    // Copy data to device if necessary
    KokkosArray::deep_copy( tensor.m_k_coord , host_k_coord );
    KokkosArray::deep_copy( tensor.m_j_coord , host_j_coord );
    KokkosArray::deep_copy( tensor.m_value , host_value );
    KokkosArray::deep_copy( tensor.m_num_k , host_num_k );
    KokkosArray::deep_copy( tensor.m_num_j , host_num_j );
    KokkosArray::deep_copy( tensor.m_k_row_map , host_k_row_map );
    KokkosArray::deep_copy( tensor.m_j_row_map , host_j_row_map );

    tensor.m_flops = 5*tensor.m_nnz + dimension;

    return tensor ;
  }