Esempio n. 1
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &, std::vector<T> &,
                          std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat,
                          std::vector<T> &) {
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentScalar(static_cast<int>(args.n));
   tuner.AddArgumentScalar(static_cast<int>(args.k));
   tuner.AddArgumentScalar(GetRealArg(args.alpha));
   tuner.AddArgumentScalar(GetRealArg(args.beta));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentScalar(0); // a_offset
   tuner.AddArgumentScalar(static_cast<int>(args.k)); // a_ld
   tuner.AddArgumentInput(b_mat);
   tuner.AddArgumentScalar(0); // b_offset
   tuner.AddArgumentScalar(static_cast<int>(args.n)); // b_ld
   tuner.AddArgumentOutput(c_mat);
   tuner.AddArgumentScalar(0); // c_offset
   tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld
   tuner.AddArgumentScalar(1); // c_do_transpose
   tuner.AddArgumentScalar(0); // a_conjugate
   tuner.AddArgumentScalar(0); // b_conjugate
 }
Esempio n. 2
0
 // Sets the kernel's arguments
 static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
                          std::vector<T> &x_vec, std::vector<T> &y_vec,
                          std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &,
                          std::vector<T> &) {
   auto a_rotated = (V==3) ? 1 : 0;
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentScalar(static_cast<int>(args.n));
   tuner.AddArgumentScalar(GetRealArg(args.alpha));
   tuner.AddArgumentScalar(GetRealArg(args.beta));
   tuner.AddArgumentScalar(static_cast<int>(a_rotated));
   tuner.AddArgumentInput(a_mat);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(static_cast<int>(args.m));
   tuner.AddArgumentInput(x_vec);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(1);
   tuner.AddArgumentOutput(y_vec);
   tuner.AddArgumentScalar(0);
   tuner.AddArgumentScalar(1);
   tuner.AddArgumentScalar(0); // Conjugate transpose
   tuner.AddArgumentScalar(0); // Additional parameter
   tuner.AddArgumentScalar(0); // Banded 'kl'
   tuner.AddArgumentScalar(0); // Banded 'ku'
 }
Esempio n. 3
0
void FillVector(Queue &queue, const Device &device,
                const Program &program, const Databases &,
                EventPointer event, const std::vector<Event> &waitForEvents,
                const size_t n, const size_t inc, const size_t offset,
                const Buffer<T> &dest,
                const T constant_value) {
  auto kernel = Kernel(program, "FillVector");
  kernel.SetArgument(0, static_cast<int>(n));
  kernel.SetArgument(1, static_cast<int>(inc));
  kernel.SetArgument(2, static_cast<int>(offset));
  kernel.SetArgument(3, dest());
  kernel.SetArgument(4, GetRealArg(constant_value));
  auto local = std::vector<size_t>{64};
  auto global = std::vector<size_t>{Ceil(n, 64)};
  RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}
Esempio n. 4
0
Double GetRealScalar(structlpsolvecaller *lpsolvecaller, int element)
{
        zval arg;
        ZVAL_UNDEF(&arg);
        Double a = 0.0;

        arg = GetpMatrix(lpsolvecaller, element);

        if ((!Z_ISUNDEF(arg)) && (Z_TYPE(arg) != IS_LONG) && (Z_TYPE(arg) != IS_DOUBLE)) {
            ZVAL_UNDEF(&arg);
        }

        if (Z_ISUNDEF(arg)) {
            abort();
            ErrMsgTxt(lpsolvecaller, "Expecting a scalar argument.");
        } else {
                a = GetRealArg(lpsolvecaller, arg);
        }
        return(a);
}
Esempio n. 5
0
void PadCopyTransposeMatrix(Queue &queue, const Device &device,
                            const Databases &db,
                            EventPointer event, const std::vector<Event> &waitForEvents,
                            const size_t src_one, const size_t src_two,
                            const size_t src_ld, const size_t src_offset,
                            const Buffer<T> &src,
                            const size_t dest_one, const size_t dest_two,
                            const size_t dest_ld, const size_t dest_offset,
                            const Buffer<T> &dest,
                            const T alpha,
                            const Program &program, const bool do_pad,
                            const bool do_transpose, const bool do_conjugate,
                            const bool upper = false, const bool lower = false,
                            const bool diagonal_imag_zero = false) {

  // Determines whether or not the fast-version could potentially be used
  auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) &&
                         (src_one == dest_one) && (src_two == dest_two) && (src_ld == dest_ld) &&
                         (upper == false) && (lower == false) && (diagonal_imag_zero == false);

  // Determines the right kernel
  auto kernel_name = std::string{};
  if (do_transpose) {
    if (use_fast_kernel &&
        IsMultiple(src_ld, db["TRA_WPT"]) &&
        IsMultiple(src_one, db["TRA_WPT"]*db["TRA_DIM"]) &&
        IsMultiple(src_two, db["TRA_WPT"]*db["TRA_DIM"])) {
      kernel_name = "TransposeMatrixFast";
    }
    else {
      use_fast_kernel = false;
      kernel_name = (do_pad) ? "TransposePadMatrix" : "TransposeMatrix";
    }
  }
  else {
    if (use_fast_kernel &&
        IsMultiple(src_ld, db["COPY_VW"]) &&
        IsMultiple(src_one, db["COPY_VW"]*db["COPY_DIMX"]) &&
        IsMultiple(src_two, db["COPY_WPT"]*db["COPY_DIMY"])) {
      kernel_name = "CopyMatrixFast";
    }
    else {
      use_fast_kernel = false;
      kernel_name = (do_pad) ? "CopyPadMatrix" : "CopyMatrix";
    }
  }

  // Retrieves the kernel from the compiled binary
  auto kernel = Kernel(program, kernel_name);

  // Sets the kernel arguments
  if (use_fast_kernel) {
    kernel.SetArgument(0, static_cast<int>(src_ld));
    kernel.SetArgument(1, src());
    kernel.SetArgument(2, dest());
    kernel.SetArgument(3, GetRealArg(alpha));
  }
  else {
    kernel.SetArgument(0, static_cast<int>(src_one));
    kernel.SetArgument(1, static_cast<int>(src_two));
    kernel.SetArgument(2, static_cast<int>(src_ld));
    kernel.SetArgument(3, static_cast<int>(src_offset));
    kernel.SetArgument(4, src());
    kernel.SetArgument(5, static_cast<int>(dest_one));
    kernel.SetArgument(6, static_cast<int>(dest_two));
    kernel.SetArgument(7, static_cast<int>(dest_ld));
    kernel.SetArgument(8, static_cast<int>(dest_offset));
    kernel.SetArgument(9, dest());
    kernel.SetArgument(10, GetRealArg(alpha));
    if (do_pad) {
      kernel.SetArgument(11, static_cast<int>(do_conjugate));
    }
    else {
      kernel.SetArgument(11, static_cast<int>(upper));
      kernel.SetArgument(12, static_cast<int>(lower));
      kernel.SetArgument(13, static_cast<int>(diagonal_imag_zero));
    }
  }

  // Launches the kernel and returns the error code. Uses global and local thread sizes based on
  // parameters in the database.
  if (do_transpose) {
    if (use_fast_kernel) {
      const auto global = std::vector<size_t>{
        dest_one / db["TRA_WPT"],
        dest_two / db["TRA_WPT"]
      };
      const auto local = std::vector<size_t>{db["TRA_DIM"], db["TRA_DIM"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
    else {
      const auto global = std::vector<size_t>{
        Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
        Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"])
      };
      const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
  }
  else {
    if (use_fast_kernel) {
      const auto global = std::vector<size_t>{
        dest_one / db["COPY_VW"],
        dest_two / db["COPY_WPT"]
      };
      const auto local = std::vector<size_t>{db["COPY_DIMX"], db["COPY_DIMY"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
    else {
      const auto global = std::vector<size_t>{
        Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]),
        Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"])
      };
      const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
  }
}
Esempio n. 6
0
int GetRealSparseVector(structlpsolvecaller *lpsolvecaller, int element, Double *vec, int *index, int start, int len, int col)
{
	int	m, n, count = 0;
        zval    pm = GetpMatrix(lpsolvecaller, element);
        zval    *data;
        HashTable *arr_hash;
        Double a;
        zend_string *key;
        ulong i;

        if ((Z_ISUNDEF(pm)) || (Z_TYPE(pm) != IS_ARRAY)) {
            ErrMsgTxt(lpsolvecaller, "invalid vector.");
        }

#if 1
        m = GetM(lpsolvecaller, pm);
	n = GetN(lpsolvecaller, pm);
#else
        m = zend_hash_num_elements(Z_ARRVAL_P(pm));
        n = 1;
#endif

	if (  ((col == 0) && (((m != 1) && (n != 1)) || ((m == 1) && (n > len)) || ((n == 1) && (m > len)))) ||
              ((col != 0) && ((m > len) || (col > n))) /* ||
	      !IsNumeric(pm) ||
              IsComplex(pm) */ ) {
/* Printf("1: m=%d, n=%d, col=%d, len=%d, IsNumeric=%d, IsComplex=%d\n", m,n,col,len,IsNumeric(pm),IsComplex(pm)); */
		ErrMsgTxt(lpsolvecaller, "invalid vector.");
	}

        if ((((n == 1) || (col != 0)) && (m > len)) || ((col == 0) && (m == 1) && (n > len))) {
/* Printf("2: m=%d, n=%d, col=%d, len=%d\n", m,n,col,len); */
                ErrMsgTxt(lpsolvecaller, "invalid vector.");
        }

        arr_hash = Z_ARRVAL(pm);
        ZEND_HASH_FOREACH_KEY_VAL(arr_hash, i, key, data) {

            if (key) {
                ErrMsgTxt(lpsolvecaller, "invalid vector.");
            } else {
                zval pm = *data;

                a = 0;

                if (Z_TYPE(pm) == IS_ARRAY) {
                    zval    *data;
                    HashTable *arr_hash;
                    zend_string *key;
                    ulong i;

                    arr_hash = Z_ARRVAL(pm);
                    ZEND_HASH_FOREACH_KEY_VAL(arr_hash, i, key, data) {

                        if (key) {
                            ErrMsgTxt(lpsolvecaller, "invalid vector.");
                        } else if (i + 1 == col) {
                            a = GetRealArg(lpsolvecaller, *data);
                            break;
                        }
                    } ZEND_HASH_FOREACH_END();
                } else {
                        a = GetRealArg(lpsolvecaller, pm);
                }

                if (a) {
                    *(vec++) = a;
                    *(index++) = start + i;
                    count++;
                }
            }