// Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args, std::vector<T> &, std::vector<T> &, std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &, std::vector<T> &) { auto alpha_buffer = std::vector<T>{args.alpha}; tuner.AddArgumentScalar(static_cast<int>(args.m)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentOutput(b_mat); tuner.AddArgumentInput(alpha_buffer); }
// Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args, std::vector<T> &, std::vector<T> &, std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat, std::vector<T> &) { tuner.AddArgumentScalar(static_cast<int>(args.m)); tuner.AddArgumentScalar(static_cast<int>(args.n)); tuner.AddArgumentScalar(static_cast<int>(args.k)); tuner.AddArgumentScalar(GetRealArg(args.alpha)); tuner.AddArgumentScalar(GetRealArg(args.beta)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentScalar(0); // a_offset tuner.AddArgumentScalar(static_cast<int>(args.k)); // a_ld tuner.AddArgumentInput(b_mat); tuner.AddArgumentScalar(0); // b_offset tuner.AddArgumentScalar(static_cast<int>(args.n)); // b_ld tuner.AddArgumentOutput(c_mat); tuner.AddArgumentScalar(0); // c_offset tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld tuner.AddArgumentScalar(1); // c_do_transpose tuner.AddArgumentScalar(0); // a_conjugate tuner.AddArgumentScalar(0); // b_conjugate }
// Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args, std::vector<T> &x_vec, std::vector<T> &y_vec, std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &, std::vector<T> &) { auto a_rotated = (V==3) ? 1 : 0; tuner.AddArgumentScalar(static_cast<int>(args.m)); tuner.AddArgumentScalar(static_cast<int>(args.n)); tuner.AddArgumentScalar(GetRealArg(args.alpha)); tuner.AddArgumentScalar(GetRealArg(args.beta)); tuner.AddArgumentScalar(static_cast<int>(a_rotated)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentScalar(0); tuner.AddArgumentScalar(static_cast<int>(args.m)); tuner.AddArgumentInput(x_vec); tuner.AddArgumentScalar(0); tuner.AddArgumentScalar(1); tuner.AddArgumentOutput(y_vec); tuner.AddArgumentScalar(0); tuner.AddArgumentScalar(1); tuner.AddArgumentScalar(0); // Conjugate transpose tuner.AddArgumentScalar(0); // Additional parameter tuner.AddArgumentScalar(0); // Banded 'kl' tuner.AddArgumentScalar(0); // Banded 'ku' }