Пример #1
0
        /// Select best launch configuration for the given shared memory requirements.
        void config(const command_queue &q, std::function<size_t(size_t)> smem) {
            // Select workgroup size that would fit into the device.
            size_t ws = q.device().max_threads_per_block() / 2;

            size_t max_ws   = max_threads_per_block(q);
            size_t max_smem = max_shared_memory_per_block(q);

            // Reduce workgroup size until it satisfies resource requirements:
            while( (ws > max_ws) || (smem(ws) > max_smem) )
                ws /= 2;

            config(num_workgroups(q), ws);
        }
Пример #2
0
        /// Select best launch configuration for the given shared memory requirements.
        void config(const cl::CommandQueue &queue, std::function<size_t(size_t)> smem) {
            cl::Device dev = queue.getInfo<CL_QUEUE_DEVICE>();

            if ( is_cpu(queue) ) {
                w_size = 1;
            } else {
                // Select workgroup size that would fit into the device.
                w_size = dev.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0] / 2;

                size_t max_ws   = max_threads_per_block(queue);
                size_t max_smem = max_shared_memory_per_block(queue);

                // Reduce workgroup size until it satisfies resource requirements:
                while( (w_size > max_ws) || (smem(w_size) > max_smem) )
                    w_size /= 2;
            }

            g_size = w_size * num_workgroups(queue);
        }
Пример #3
0
        /// Select best launch configuration for the given shared memory requirements.
        void config(const boost::compute::command_queue &queue, std::function<size_t(size_t)> smem) {
            boost::compute::device dev = queue.get_device();

            size_t ws;

            if ( is_cpu(queue) ) {
                ws = 1;
            } else {
                // Select workgroup size that would fit into the device.
                ws = dev.get_info<std::vector<size_t>>(CL_DEVICE_MAX_WORK_ITEM_SIZES)[0] / 2;

                size_t max_ws   = max_threads_per_block(queue);
                size_t max_smem = max_shared_memory_per_block(queue);

                // Reduce workgroup size until it satisfies resource requirements:
                while( (ws > max_ws) || (smem(ws) > max_smem) )
                    ws /= 2;
            }

            config(num_workgroups(queue), ws);
        }