KOKKOS_INLINE_FUNCTION
  static void apply( const tensor_type & tensor ,
                     const MatrixValue * const a ,
                     const VectorValue * const x ,
                           VectorValue * const y )
  {
    const size_type nk = tensor.num_k();

    // Loop over k
    for ( size_type k = 0; k < nk; ++k) {
      const MatrixValue ak = a[k];
      const VectorValue xk = x[k];

      // Loop over j for this k
      const size_type nj = tensor.num_j(k);
      const size_type jBeg = tensor.j_begin(k);
      const size_type jEnd = jBeg + nj;
      for (size_type jEntry = jBeg; jEntry < jEnd; ++jEntry) {
        const size_type j = tensor.j_coord(jEntry);
        VectorValue tmp = a[j] * xk + ak * x[j];

        // Loop over i for this k,j
        const size_type ni = tensor.num_i(jEntry);
        const size_type iBeg = tensor.i_begin(jEntry);
        const size_type iEnd = iBeg + ni;
        for (size_type iEntry = iBeg; iEntry < iEnd; ++iEntry) {
          const size_type i = tensor.i_coord(iEntry);
          y[i] += tensor.value(iEntry) * tmp;
        }
      }
    }
  }