void opencl_context::add(matrix& sum, const matrix& operand_1, const double operand_2) { opencl::matrix* sum_impl = dynamic_cast<opencl::matrix*>(sum.implementation()); opencl::matrix* operand_1_impl = dynamic_cast<opencl::matrix*>(operand_1.implementation()); _scalar_add->setArg(0, *sum_impl->get()); _scalar_add->setArg(1, *operand_1_impl->get()); _scalar_add->setArg(2, operand_2); _scalar_add->setArg(3, sum.row_count()); _scalar_add->setArg(4, sum.column_count()); cl::NDRange offset(0U, 0U); cl::NDRange workgroup_size(1U, 1U); cl::NDRange size(sum.row_count(), sum.column_count()); _command_queue->enqueueNDRangeKernel(*_scalar_add, offset, size, workgroup_size); _command_queue->finish(); }
void opencl_context::subtract(matrix& difference, const matrix& operand_1, const matrix& operand_2) { opencl::matrix* difference_impl = dynamic_cast<opencl::matrix*>(difference.implementation()); opencl::matrix* operand_1_impl = dynamic_cast<opencl::matrix*>(operand_1.implementation()); opencl::matrix* operand_2_impl = dynamic_cast<opencl::matrix*>(operand_2.implementation()); _matrix_subtract->setArg(0, *difference_impl->get()); _matrix_subtract->setArg(1, *operand_1_impl->get()); _matrix_subtract->setArg(2, *operand_2_impl->get()); _matrix_subtract->setArg(3, difference.row_count()); _matrix_subtract->setArg(4, difference.column_count()); cl::NDRange offset(0U, 0U); cl::NDRange workgroup_size(1U, 1U); cl::NDRange size(difference.row_count(), difference.column_count()); _command_queue->enqueueNDRangeKernel(*_matrix_subtract, offset, size, workgroup_size); _command_queue->finish(); }
void opencl_context::multiply_elementwise(matrix& product, const matrix& operand_1, const matrix& operand_2) { opencl::matrix* product_impl = dynamic_cast<opencl::matrix*>(product.implementation()); opencl::matrix* operand_1_impl = dynamic_cast<opencl::matrix*>(operand_1.implementation()); opencl::matrix* operand_2_impl = dynamic_cast<opencl::matrix*>(operand_2.implementation()); _matrix_elementwise_multiply->setArg(0, *product_impl->get()); _matrix_elementwise_multiply->setArg(1, *operand_1_impl->get()); _matrix_elementwise_multiply->setArg(2, *operand_2_impl->get()); _matrix_elementwise_multiply->setArg(3, product.row_count()); _matrix_elementwise_multiply->setArg(4, product.column_count()); cl::NDRange offset(0U, 0U); cl::NDRange workgroup_size(1U, 1U); cl::NDRange size(product.row_count(), product.column_count()); _command_queue->enqueueNDRangeKernel(*_matrix_elementwise_multiply, offset, size, workgroup_size); _command_queue->finish(); }
void opencl_context::multiply(matrix& product, const matrix& operand_1, const matrix& operand_2) { opencl::matrix* product_impl = (opencl::matrix*)(product.implementation()); opencl::matrix* operand_1_impl = (opencl::matrix*)(operand_1.implementation()); opencl::matrix* operand_2_impl = (opencl::matrix*)(operand_2.implementation()); _matrix_multiply_kernel->setArg(0, *product_impl->get()); _matrix_multiply_kernel->setArg(1, *operand_1_impl->get()); _matrix_multiply_kernel->setArg(2, *operand_2_impl->get()); // operand_1: m x n // operand_2: n x k // product: m x k _matrix_multiply_kernel->setArg(3, operand_1.row_count()); _matrix_multiply_kernel->setArg(4, operand_2.row_count()); _matrix_multiply_kernel->setArg(5, operand_2.column_count()); cl::NDRange offset(0U, 0U); cl::NDRange workgroup_size(1U, 1U); cl::NDRange size(product.row_count(), product.column_count()); _command_queue->enqueueNDRangeKernel(*_matrix_multiply_kernel, offset, size, workgroup_size); _command_queue->finish(); }
void set_smem(F &&f) { K.set_arg( argpos++, boost::compute::local_buffer<char>( f(workgroup_size()) ) ); }
/// Adds local memory to the kernel. void set_smem(size_t smem_per_thread) { K.set_arg( argpos++, boost::compute::local_buffer<char>(smem_per_thread * workgroup_size()) ); }
void set_smem(F &&f) { smem = f(workgroup_size()); }
/// Adds local memory to the kernel. void set_smem(size_t smem_per_thread) { smem = workgroup_size() * smem_per_thread; }