T operator()(I1 begin1, S1 end1, I2 begin2, S2 end2, T init, BOp1 bop1_ = BOp1{}, BOp2 bop2_ = BOp2{}, P1 proj1_ = P1{}, P2 proj2_ = P2{}) const { auto &&bop1 = as_function(bop1_); auto &&bop2 = as_function(bop2_); auto &&proj1 = as_function(proj1_); auto &&proj2 = as_function(proj2_); for (; begin1 != end1 && begin2 != end2; ++begin1, ++begin2) init = bop1(init, bop2(proj1(*begin1), proj2(*begin2))); return init; }
T operator()(I1 begin1, S1 end1, I2 begin2, T init, BOp1 bop1_ = BOp1{}, BOp2 bop2_ = BOp2{}, P1 proj1_ = P1{}, P2 proj2_ = P2{}) const { auto &&bop1 = invokable(bop1_); auto &&bop2 = invokable(bop2_); auto &&proj1 = invokable(proj1_); auto &&proj2 = invokable(proj2_); for (; begin1 != end1; ++begin1, ++begin2) init = bop1(init, bop2(proj1(*begin1), proj2(*begin2))); return init; }
duration_type benchmark_simd(F f, V& op0, V& op1, V& op2, V& res, std::size_t number) { std::size_t s = op0.size(); duration_type t_res = duration_type::max(); for (std::size_t count = 0; count < number; ++count) { auto start = std::chrono::steady_clock::now(); for (std::size_t i = 0; i <= (s - B::size); i += B::size) { B bop0(&op0[i], aligned_mode()), bop1(&op1[i], aligned_mode()), bop2(&op2[i], aligned_mode()); B bres = f(bop0, bop1, bop2); bres.store_aligned(&res[i]); } auto end = std::chrono::steady_clock::now(); auto tmp = end - start; t_res = tmp < t_res ? tmp : t_res; } return t_res; }