void record_full_timings(TimingType & timings, F functor, TestConfig & config, TestData & data) { typedef typename TestData::value_type ScalarType; double result = 0; functor(data); //startup run (ensures kernel compilation) for (unsigned int work_groups = config.min_work_groups(); work_groups <= config.max_work_groups(); work_groups *= 2) //iterate over number of work groups (compute units) { for (unsigned int local_workers = config.min_local_size(); local_workers <= config.max_local_size(); local_workers *= 2) //iterate over local thread number { //set parameter: set_kernel_params(config.program_name(), config.kernel_name(), work_groups, local_workers); //std::cout << "Benchmarking kernel " << config.kernel_name() << std::endl; result = execute(functor, data); //check for valid result: (kernels have an automatic fallback to smaller values included) if (!validate_result(config.program_name(), config.kernel_name(), work_groups, local_workers)) { std::cout << "Kernel start failed for kernel " << config.kernel_name() << " [" << work_groups << " groups, " << local_workers << " per group]" << std::endl; break; } else timings[result] = std::make_pair(work_groups * local_workers, local_workers); } } }
void optimize_restricted(viennacl::io::parameter_database & paras, TimingType & timings, F functor, TestConfig & config, TestData & data) { record_restricted_timings(timings, functor, config, data); record_kernel_parameters(paras, config.kernel_name(), timings); #ifdef ENABLE_VIENNAPROFILER write_viennaprofiler(timings, config.program_name(), config.kernel_name()); #endif print_best(timings, config.kernel_name()); print_default_restricted(timings, config.kernel_name()); }