/// Select best launch configuration for the given shared memory requirements. void config(const command_queue &q, std::function<size_t(size_t)> smem) { // Select workgroup size that would fit into the device. size_t ws = q.device().max_threads_per_block() / 2; size_t max_ws = max_threads_per_block(q); size_t max_smem = max_shared_memory_per_block(q); // Reduce workgroup size until it satisfies resource requirements: while( (ws > max_ws) || (smem(ws) > max_smem) ) ws /= 2; config(num_workgroups(q), ws); }
/// Select best launch configuration for the given shared memory requirements. void config(const cl::CommandQueue &queue, std::function<size_t(size_t)> smem) { cl::Device dev = queue.getInfo<CL_QUEUE_DEVICE>(); if ( is_cpu(queue) ) { w_size = 1; } else { // Select workgroup size that would fit into the device. w_size = dev.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0] / 2; size_t max_ws = max_threads_per_block(queue); size_t max_smem = max_shared_memory_per_block(queue); // Reduce workgroup size until it satisfies resource requirements: while( (w_size > max_ws) || (smem(w_size) > max_smem) ) w_size /= 2; } g_size = w_size * num_workgroups(queue); }
/// Select best launch configuration for the given shared memory requirements. void config(const boost::compute::command_queue &queue, std::function<size_t(size_t)> smem) { boost::compute::device dev = queue.get_device(); size_t ws; if ( is_cpu(queue) ) { ws = 1; } else { // Select workgroup size that would fit into the device. ws = dev.get_info<std::vector<size_t>>(CL_DEVICE_MAX_WORK_ITEM_SIZES)[0] / 2; size_t max_ws = max_threads_per_block(queue); size_t max_smem = max_shared_memory_per_block(queue); // Reduce workgroup size until it satisfies resource requirements: while( (ws > max_ws) || (smem(ws) > max_smem) ) ws /= 2; } config(num_workgroups(queue), ws); }