KOKKOS_INLINE_FUNCTION void operator() (const team_member& dev) const { Kokkos::parallel_for(Kokkos::TeamThreadRange(dev,0,rows_per_team), [&] (const ordinal_type& loop) { const ordinal_type iRow = static_cast<ordinal_type> ( dev.league_rank() ) * rows_per_team + loop; if (iRow >= m_A.numRows ()) { return; } const KokkosSparse::SparseRowViewConst<AMatrix> row = m_A.rowConst(iRow); const ordinal_type row_length = static_cast<ordinal_type> (row.length); value_type sum = 0; Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,row_length), [&] (const ordinal_type& iEntry, value_type& lsum) { const value_type val = conjugate ? ATV::conj (row.value(iEntry)) : row.value(iEntry); lsum += val * m_x(row.colidx(iEntry)); },sum); Kokkos::single(Kokkos::PerThread(dev), [&] () { sum *= alpha; if (dobeta == 0) { m_y(iRow) = sum ; } else { m_y(iRow) = beta * m_y(iRow) + sum; } }); }); }