C++ (Cpp) command_queue::enqueue_nd_range_kernel Examples

Programming Language: C++ (Cpp)

Class/Type: command_queue

Method/Function: enqueue_nd_range_kernel

Examples at hotexamples.com: 1

C++ (Cpp) command_queue::enqueue_nd_range_kernel - 1 examples found. These are the top rated real world C++ (Cpp) examples of command_queue::enqueue_nd_range_kernel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_context(30)

get_device(26)

enqueue_1d_range_kernel(26)

enqueue_task(14)

context(14)

device(11)

enqueue_map_buffer(5)

enqueue_fill_buffer(4)

get(4)

raw(3)

enqueue_write_image(3)

enqueue_read_image(3)

enqueue_write_buffer(3)

enqueue_svm_memcpy(3)

enqueue_svm_memcpy_async(3)

profiling_enabled(2)

enqueue_unmap_buffer(2)

check_device_version(2)

enqueue_read_buffer(2)

enqueue_svm_fill(2)

enqueue_copy_buffer(2)

enqueue_svm_unmap(1)

post_front(1)

stop(1)

start(1)

sequence(1)

enqueueBarrierWithWaitList(1)

enqueueMarkerWithWaitList(1)

get_properties(1)

pop_back(1)

enqueue_svm_map(1)

enqueue_nd_range_kernel(1)

flags(1)

enqueue_read_buffer_async(1)

enqueue_write_buffer_async(1)

wait(1)

Example #1

Show file

File: gemm.hpp Project: jayavanth/compute

inline void gemm(const matrix_order order,
                 const matrix_transpose trans_a,
                 const matrix_transpose trans_b,
                 const int M,
                 const int N,
                 const int K,
                 const Scalar alpha,
                 device_ptr<Scalar> A,
                 const int lda,
                 device_ptr<Scalar> B,
                 const int ldb,
                 const Scalar beta,
                 device_ptr<Scalar> C,
                 const int ldc,
                 command_queue &queue)
{
    (void) order;
    (void) trans_a;
    (void) trans_b;

    ::boost::compute::detail::meta_kernel k("gemm");
    k.add_set_arg<Scalar>("alpha", alpha);
    k.add_set_arg<Scalar>("beta", beta);
    k.add_set_arg<const cl_uint>("M", static_cast<const cl_uint>(M));
    k.add_set_arg<const cl_uint>("N", static_cast<const cl_uint>(N));
    k.add_set_arg<const cl_uint>("K", static_cast<const cl_uint>(K));
    k.add_set_arg<const cl_uint>("lda", static_cast<const cl_uint>(lda));
    k.add_set_arg<const cl_uint>("ldb", static_cast<const cl_uint>(ldb));
    k.add_set_arg<const cl_uint>("ldc", static_cast<const cl_uint>(ldc));
    size_t a_index = k.add_arg<const Scalar *>("__global", "A");
    size_t b_index = k.add_arg<const Scalar *>("__global", "B");
    size_t c_index = k.add_arg<Scalar *>("__global", "C");

    k <<
        k.decl<cl_uint>("i") << " = get_global_id(0);\n" <<
        k.decl<cl_uint>("j") << " = get_global_id(1);\n" <<
        k.decl<Scalar>("sum") << " = 0;\n" <<
        "for(uint k = 0; k < K; k++){\n" <<
        "    sum += " << A[k.expr<cl_uint>("i*lda+k")] << " * "
                      << B[k.expr<cl_uint>("k*ldb+j")] << ";\n" <<
        "};\n" <<
        C[k.expr<cl_uint>("i*ldc+j")] << "=" <<
            "alpha * sum + beta *" << C[k.expr<cl_uint>("i*ldc+j")] << ";\n";

    const context &context = queue.get_context();
    ::boost::compute::kernel kernel = k.compile(context);

    kernel.set_arg(a_index, A.get_buffer());
    kernel.set_arg(b_index, B.get_buffer());
    kernel.set_arg(c_index, C.get_buffer());

    size_t work_group_offsets[] = { 0, 0 };
    size_t work_group_sizes[] = { static_cast<size_t>(N),
                                  static_cast<size_t>(M) };
    queue.enqueue_nd_range_kernel(kernel,
                                  2,
                                  work_group_offsets,
                                  work_group_sizes,
                                  0);
}