예제 #1
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &, std::vector<T> &,
                          std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
                          std::vector<T> &) {
   auto alpha_buffer = std::vector<T>{args.alpha};
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentOutput(b_mat);
   tuner.AddArgumentInput(alpha_buffer);
 }
예제 #2
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &, std::vector<T> &,
                          std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat,
                          std::vector<T> &) {
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentScalar(static_cast<int>(args.n));
   tuner.AddArgumentScalar(static_cast<int>(args.k));
   tuner.AddArgumentScalar(GetRealArg(args.alpha));
   tuner.AddArgumentScalar(GetRealArg(args.beta));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentScalar(0); // a_offset
   tuner.AddArgumentScalar(static_cast<int>(args.k)); // a_ld
   tuner.AddArgumentInput(b_mat);
   tuner.AddArgumentScalar(0); // b_offset
   tuner.AddArgumentScalar(static_cast<int>(args.n)); // b_ld
   tuner.AddArgumentOutput(c_mat);
   tuner.AddArgumentScalar(0); // c_offset
   tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld
   tuner.AddArgumentScalar(1); // c_do_transpose
   tuner.AddArgumentScalar(0); // a_conjugate
   tuner.AddArgumentScalar(0); // b_conjugate
 }
예제 #3
0
파일: xgemv.cpp 프로젝트: gpu/CLBlast
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &x_vec, std::vector<T> &y_vec,
                          std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &,
                          std::vector<T> &) {
   auto a_rotated = (V==3) ? 1 : 0;
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentScalar(static_cast<int>(args.n));
   tuner.AddArgumentScalar(GetRealArg(args.alpha));
   tuner.AddArgumentScalar(GetRealArg(args.beta));
   tuner.AddArgumentScalar(static_cast<int>(a_rotated));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentInput(x_vec);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(1);
   tuner.AddArgumentOutput(y_vec);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(1);
   tuner.AddArgumentScalar(0); // Conjugate transpose
   tuner.AddArgumentScalar(0); // Additional parameter
   tuner.AddArgumentScalar(0); // Banded 'kl'
   tuner.AddArgumentScalar(0); // Banded 'ku'
 }