/// Enqueue the kernel to the specified command queue. void operator()(const command_queue &q) { prm_addr.clear(); for(auto p = prm_pos.begin(); p != prm_pos.end(); ++p) prm_addr.push_back(stack.data() + *p); cuda_check( cuLaunchKernel( K, static_cast<unsigned>(g_size.x), static_cast<unsigned>(g_size.y), static_cast<unsigned>(g_size.z), static_cast<unsigned>(w_size.x), static_cast<unsigned>(w_size.y), static_cast<unsigned>(w_size.z), static_cast<unsigned>(smem), q.raw(), prm_addr.data(), 0 ) ); reset(); }
inline cusparseHandle_t cusparse_handle(const command_queue &q) { typedef std::shared_ptr<std::remove_pointer<cusparseHandle_t>::type> smart_handle; typedef vex::detail::object_cache<vex::detail::index_by_context, smart_handle> cache_type; static cache_type cache; auto h = cache.find(q); if (h == cache.end()) { select_context(q); cusparseHandle_t handle; cuda_check( cusparseCreate(&handle) ); cuda_check( cusparseSetStream(handle, q.raw()) ); h = cache.insert(q, smart_handle(handle, detail::deleter())); } return h->second.get(); }
bool operator()(const command_queue &a, const command_queue &b) const { return a.raw() < b.raw(); }