void main() { setup_oscillator(OSC_NORMAL); while (true) { output_A(0xff); output_B(0xff); output_C(0xff); output_D(0xff); output_E(0xff); output_high(pin_C1); output_high(pin_C0); output_high(pin_A4); Delay_ms(500); output_A(0x0); output_B(0x0); output_C(0x0); output_D(0x0); output_E(0x0); output_low(pin_C1); output_low(pin_C0); output_low(pin_A4); Delay_ms(500); } }
// This program implements a simple vector addition routine to demonstrate kernel execution // It is mostly a straightforward port of the vector addition example from Heterogeneous Computing with OpenCL int main() { // In this example, we will be summing two integer vectors of a hard-coded size const size_t vector_length = 64 * 1024 * 1024; const size_t vector_size = vector_length * sizeof(cl_int); // Minimal platform and device parameters are specified here const CLplusplus::Version target_version = CLplusplus::version_1p2; const cl_ulong min_mem_alloc_size = vector_size; const cl_ulong min_global_mem_size = 3 * vector_size; // Have the user select a suitable device, according to some criteria (see shared.hpp for more details) const auto selected_platform_and_device = Shared::select_device( [&](const CLplusplus::Platform & platform) -> bool { return (platform.version() >= target_version); // Platform OpenCL version is recent enough }, [&](const CLplusplus::Device & device) -> bool { if(device.version() < target_version) return false; // OpenCL platforms may support older-generation devices, which we need to eliminate const bool device_supports_ooe_execution = device.queue_properties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; return device.available() && // Device is available for compute purposes device.endian_little() && // Device is little-endian (device.execution_capabilities() & CL_EXEC_KERNEL) && // Device can execute OpenCL kernels device_supports_ooe_execution && // Device can execute OpenCL commands out of order device.compiler_available() && device.linker_available() && // Implementation has an OpenCL C compiler and linker for this device (device.max_mem_alloc_size() >= min_mem_alloc_size) && // Device accepts large enough global memory allocations (device.global_mem_size() >= min_global_mem_size); // Device has enough global memory } ); // Create an OpenCL context on the device with some default parameters (see shared.hpp for more details) const auto context = Shared::build_default_context(selected_platform_and_device); // Allocate our input and output buffers std::cout << "Creating buffers..." << std::endl; const auto input_A_buffer = context.create_buffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, vector_size); const auto input_B_buffer = context.create_buffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, vector_size); const auto output_C_buffer = context.create_buffer(CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, vector_size); // Create a program object from the basic vector addition example std::cout << "Loading program..." << std::endl; auto program = context.create_program_with_source_file("kernels/vector_add.cl"); // Start an asynchronous program build std::cout << "Starting to build program..." << std::endl; const auto build_event = program.build_with_event("-cl-mad-enable -cl-no-signed-zeros -cl-std=CL1.2 -cl-kernel-arg-info"); // Create an out-of-order command queue for the device const auto command_queue = context.create_command_queue(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); // Generate our input data and send it to the device std::cout << "Generating and sending data..." << std::endl; std::vector<cl_int> input_A(vector_length); for(size_t i = 0; i < vector_length; ++i) input_A[i] = i + 1; const auto write_A_event = command_queue.enqueued_write_buffer(static_cast<const void *>(&(input_A[0])), false, input_A_buffer, 0, vector_size, {}); std::vector<cl_int> input_B(vector_length); for(size_t i = 0; i < vector_length; ++i) input_B[i] = vector_length - i; const auto write_B_event = command_queue.enqueued_write_buffer(static_cast<const void *>(&(input_B[0])), false, input_B_buffer, 0, vector_size, {}); const auto all_write_events = command_queue.enqueued_marker_with_wait_list({write_A_event, write_B_event}); // Once the program is built, create a kernel object associated to our vector addition routine std::cout << std::endl; std::cout << "Creating a kernel for vector addition..." << std::endl; const auto kernel = program.create_kernel("vector_add", build_event); // Set its arguments as appropriate kernel.set_buffer_argument(0, &input_A_buffer); kernel.set_buffer_argument(1, &input_B_buffer); kernel.set_buffer_argument(2, &output_C_buffer); // Execute the kernel std::cout << "Starting the kernel..." << std::endl; const auto exec_event = command_queue.enqueued_1d_range_kernel(kernel, vector_length, {all_write_events}); // Once the kernel is done, synchronously read device output back into host memory std::cout << "Waiting for output..." << std::endl; std::vector<cl_int> output_C(vector_length); command_queue.read_buffer(output_C_buffer, 0, static_cast<void *>(&(output_C[0])), vector_size, {exec_event}); // Verify the output std::cout << std::endl; for(const auto & output : output_C) { if(output != vector_length + 1) { std::cout << "Incorrect output !" << std::endl; std::abort(); } } std::cout << "Vector addition was performed successfully !" << std::endl; return 0; }