Exemplo n.º 1
0
 static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments<T> &args) {
   if (V==1 || V==2) {
     auto LocalMemorySize = [args] (std::vector<size_t> v) { return v[0]*GetBytes(args.precision); };
     tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGS"+std::to_string(V)});
   }
   else {
     auto LocalMemorySize = [args] (std::vector<size_t> v) { return (v[0]*v[1] + v[1])*GetBytes(args.precision); };
     tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGS"+std::to_string(V), "WPT"+std::to_string(V)});
   }
 }
Exemplo n.º 2
0
 // Sets the constraints and local memory size
 static void SetConstraints(cltune::Tuner &tuner, const size_t id) {
   if (V==2 || V==3) {
     auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); };
     tuner.AddConstraint(id, MultipleOfX, {"WPT"+std::to_string(V), "VW"+std::to_string(V)});
   }
   if (V==3) {
     auto LargerOrEqual = [] (std::vector<size_t> v) { return v[0] >= v[1]; };
     tuner.AddConstraint(id, LargerOrEqual, {"WGS"+std::to_string(V), "WPT"+std::to_string(V)});
   }
 }
Exemplo n.º 3
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &, std::vector<T> &,
                          std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
                          std::vector<T> &) {
   auto alpha_buffer = std::vector<T>{args.alpha};
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentOutput(b_mat);
   tuner.AddArgumentInput(alpha_buffer);
 }
Exemplo n.º 4
0
  // Sets the constraints
  static void SetConstraints(cltune::Tuner &tuner, const size_t id) {
    auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); };
    auto MultipleOfXMulY = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]*v[2]); };
    auto MultipleOfXMulYDivZ = [] (std::vector<size_t> v) { return IsMultiple(v[0], (v[1]*v[2])/v[3]); };
    // Requirement for unrolling the WGD loop
    tuner.AddConstraint(id, MultipleOfX, {"WGD", "KWID"});
    // Required for integer MWID and NWID
    tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "MDIMCD", "VWMD"});
    tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "NDIMCD", "VWND"});
    // Required for integer MWIAD and NWIBD
    tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "MDIMAD", "VWMD"});
    tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "NDIMBD", "VWND"});
    // WGD has to be a multiple of KDIMAD = ((MDIMCD*NDIMCD)/(MDIMAD)) and KDIMBD = (...)
    tuner.AddConstraint(id, MultipleOfXMulYDivZ, {"WGD", "MDIMCD", "NDIMCD", "MDIMAD"});
    tuner.AddConstraint(id, MultipleOfXMulYDivZ, {"WGD", "MDIMCD", "NDIMCD", "NDIMBD"});

    // Extra constraints for variation 1 to limit the set of options significantly
    if (V==1) {
      auto IsEqual = [] (std::vector<size_t> v) { return v[0] == v[1]; };
      tuner.AddConstraint(id, IsEqual, {"MDIMCD", "MDIMAD"});
      tuner.AddConstraint(id, IsEqual, {"NDIMCD", "NDIMBD"});
    }
  }
Exemplo n.º 5
0
 // Sets the tuning parameters and their possible values
 static void SetParameters(cltune::Tuner &tuner, const size_t id) {
   if (V==1) { // limited subset of tuning parameters - but explorable exhaustively
     tuner.AddParameter(id, "WGD", {8, 16, 32});//64,128
     tuner.AddParameter(id, "MDIMCD", {4, 8, 16});
     tuner.AddParameter(id, "NDIMCD", {4, 8, 16});
     tuner.AddParameter(id, "MDIMAD", {4, 8, 16});
     tuner.AddParameter(id, "NDIMBD", {4, 8, 16});
     tuner.AddParameter(id, "KWID", {1, 2, 4}); //1,4
     tuner.AddParameter(id, "VWMD", {1, 2, 4});
     tuner.AddParameter(id, "VWND", {1, 2, 4});
     tuner.AddParameter(id, "PADA", {1});//0
     tuner.AddParameter(id, "PADB", {1});//0
   } // a lot more tuning parameters - has to be sampled randomly, too much to test all
   else {
     tuner.AddParameter(id, "WGD", {8, 16, 32, 64, 128});
     tuner.AddParameter(id, "MDIMCD", {8, 16, 32});
     tuner.AddParameter(id, "NDIMCD", {8, 16, 32});
     tuner.AddParameter(id, "MDIMAD", {8, 16, 32});
     tuner.AddParameter(id, "NDIMBD", {8, 16, 32});
     tuner.AddParameter(id, "KWID", {2, 8, 16});
     tuner.AddParameter(id, "VWMD", {1, 2, 4, 8});
     tuner.AddParameter(id, "VWND", {1, 2, 4, 8});
     tuner.AddParameter(id, "PADA", {0, 1});
     tuner.AddParameter(id, "PADB", {0, 1});
   }
 }
Exemplo n.º 6
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &, std::vector<T> &,
                          std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat,
                          std::vector<T> &) {
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentScalar(static_cast<int>(args.n));
   tuner.AddArgumentScalar(static_cast<int>(args.k));
   tuner.AddArgumentScalar(GetRealArg(args.alpha));
   tuner.AddArgumentScalar(GetRealArg(args.beta));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentScalar(0); // a_offset
   tuner.AddArgumentScalar(static_cast<int>(args.k)); // a_ld
   tuner.AddArgumentInput(b_mat);
   tuner.AddArgumentScalar(0); // b_offset
   tuner.AddArgumentScalar(static_cast<int>(args.n)); // b_ld
   tuner.AddArgumentOutput(c_mat);
   tuner.AddArgumentScalar(0); // c_offset
   tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld
   tuner.AddArgumentScalar(1); // c_do_transpose
   tuner.AddArgumentScalar(0); // a_conjugate
   tuner.AddArgumentScalar(0); // b_conjugate
 }
Exemplo n.º 7
0
 // Sets the local memory size
 static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments<T> &args) {
   auto LocalMemorySize = [args] (std::vector<size_t> v) {
     return ((v[0]*(v[0] + v[1]) + v[0]*(v[0] + v[2]))*GetBytes(args.precision));
   };
   tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "PADA", "PADB"});
 }
Exemplo n.º 8
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &x_vec, std::vector<T> &y_vec,
                          std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &,
                          std::vector<T> &) {
   auto a_rotated = (V==3) ? 1 : 0;
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentScalar(static_cast<int>(args.n));
   tuner.AddArgumentScalar(GetRealArg(args.alpha));
   tuner.AddArgumentScalar(GetRealArg(args.beta));
   tuner.AddArgumentScalar(static_cast<int>(a_rotated));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentInput(x_vec);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(1);
   tuner.AddArgumentOutput(y_vec);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(1);
   tuner.AddArgumentScalar(0); // Conjugate transpose
   tuner.AddArgumentScalar(0); // Additional parameter
   tuner.AddArgumentScalar(0); // Banded 'kl'
   tuner.AddArgumentScalar(0); // Banded 'ku'
 }
Exemplo n.º 9
0
 // Sets the tuning parameters and their possible values
 static void SetParameters(cltune::Tuner &tuner, const size_t id) {
   tuner.AddParameter(id, "TRA_DIM", {4, 8, 16, 32, 64});
   tuner.AddParameter(id, "TRA_WPT", {1, 2, 4, 8, 16});
   tuner.AddParameter(id, "TRA_PAD", {0, 1});
   tuner.AddParameter(id, "TRA_SHUFFLE", {0, 1});
 }