示例#1
0
void ActivateBeaconKeys(
        const std::string &cpid,
        const std::string &pubKey,
        const std::string &privKey)
{
    SetArgument("publickey" + cpid + GetNetSuffix(), pubKey);
    SetArgument("privatekey" + cpid + GetNetSuffix(), privKey);
}
示例#2
0
void FillVector(Queue &queue, const Device &device,
                const Program &program, const Databases &,
                EventPointer event, const std::vector<Event> &waitForEvents,
                const size_t n, const size_t inc, const size_t offset,
                const Buffer<T> &dest,
                const T constant_value) {
  auto kernel = Kernel(program, "FillVector");
  kernel.SetArgument(0, static_cast<int>(n));
  kernel.SetArgument(1, static_cast<int>(inc));
  kernel.SetArgument(2, static_cast<int>(offset));
  kernel.SetArgument(3, dest());
  kernel.SetArgument(4, GetRealArg(constant_value));
  auto local = std::vector<size_t>{64};
  auto global = std::vector<size_t>{Ceil(n, 64)};
  RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}
示例#3
0
void FORMULA_Formula::TryCaptureArguments(FORMULA_Formula* src)
{
	if (GetArgumentsNum() != src->GetArgumentsNum())
		return;
	FORMULA_Arguments::iterator src_current = src->m_arguments.begin();
	while (src_current != src->m_arguments.end())
	{
		if (src_current->second)
			SetArgument(src_current->second);
		src_current++;
	}
	src->m_arguments.clear();
}
示例#4
0
void PadCopyTransposeMatrix(Queue &queue, const Device &device,
                            const Databases &db,
                            EventPointer event, const std::vector<Event> &waitForEvents,
                            const size_t src_one, const size_t src_two,
                            const size_t src_ld, const size_t src_offset,
                            const Buffer<T> &src,
                            const size_t dest_one, const size_t dest_two,
                            const size_t dest_ld, const size_t dest_offset,
                            const Buffer<T> &dest,
                            const T alpha,
                            const Program &program, const bool do_pad,
                            const bool do_transpose, const bool do_conjugate,
                            const bool upper = false, const bool lower = false,
                            const bool diagonal_imag_zero = false) {

  // Determines whether or not the fast-version could potentially be used
  auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) &&
                         (src_one == dest_one) && (src_two == dest_two) && (src_ld == dest_ld) &&
                         (upper == false) && (lower == false) && (diagonal_imag_zero == false);

  // Determines the right kernel
  auto kernel_name = std::string{};
  if (do_transpose) {
    if (use_fast_kernel &&
        IsMultiple(src_ld, db["TRA_WPT"]) &&
        IsMultiple(src_one, db["TRA_WPT"]*db["TRA_DIM"]) &&
        IsMultiple(src_two, db["TRA_WPT"]*db["TRA_DIM"])) {
      kernel_name = "TransposeMatrixFast";
    }
    else {
      use_fast_kernel = false;
      kernel_name = (do_pad) ? "TransposePadMatrix" : "TransposeMatrix";
    }
  }
  else {
    if (use_fast_kernel &&
        IsMultiple(src_ld, db["COPY_VW"]) &&
        IsMultiple(src_one, db["COPY_VW"]*db["COPY_DIMX"]) &&
        IsMultiple(src_two, db["COPY_WPT"]*db["COPY_DIMY"])) {
      kernel_name = "CopyMatrixFast";
    }
    else {
      use_fast_kernel = false;
      kernel_name = (do_pad) ? "CopyPadMatrix" : "CopyMatrix";
    }
  }

  // Retrieves the kernel from the compiled binary
  auto kernel = Kernel(program, kernel_name);

  // Sets the kernel arguments
  if (use_fast_kernel) {
    kernel.SetArgument(0, static_cast<int>(src_ld));
    kernel.SetArgument(1, src());
    kernel.SetArgument(2, dest());
    kernel.SetArgument(3, GetRealArg(alpha));
  }
  else {
    kernel.SetArgument(0, static_cast<int>(src_one));
    kernel.SetArgument(1, static_cast<int>(src_two));
    kernel.SetArgument(2, static_cast<int>(src_ld));
    kernel.SetArgument(3, static_cast<int>(src_offset));
    kernel.SetArgument(4, src());
    kernel.SetArgument(5, static_cast<int>(dest_one));
    kernel.SetArgument(6, static_cast<int>(dest_two));
    kernel.SetArgument(7, static_cast<int>(dest_ld));
    kernel.SetArgument(8, static_cast<int>(dest_offset));
    kernel.SetArgument(9, dest());
    kernel.SetArgument(10, GetRealArg(alpha));
    if (do_pad) {
      kernel.SetArgument(11, static_cast<int>(do_conjugate));
    }
    else {
      kernel.SetArgument(11, static_cast<int>(upper));
      kernel.SetArgument(12, static_cast<int>(lower));
      kernel.SetArgument(13, static_cast<int>(diagonal_imag_zero));
    }
  }

  // Launches the kernel and returns the error code. Uses global and local thread sizes based on
  // parameters in the database.
  if (do_transpose) {
    if (use_fast_kernel) {
      const auto global = std::vector<size_t>{
        dest_one / db["TRA_WPT"],
        dest_two / db["TRA_WPT"]
      };
      const auto local = std::vector<size_t>{db["TRA_DIM"], db["TRA_DIM"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
    else {
      const auto global = std::vector<size_t>{
        Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
        Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"])
      };
      const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
  }
  else {
    if (use_fast_kernel) {
      const auto global = std::vector<size_t>{
        dest_one / db["COPY_VW"],
        dest_two / db["COPY_WPT"]
      };
      const auto local = std::vector<size_t>{db["COPY_DIMX"], db["COPY_DIMY"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
    else {
      const auto global = std::vector<size_t>{
        Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]),
        Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"])
      };
      const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"]};
      RunKernel(kernel, queue, device, global, local, event, waitForEvents);
    }
  }
}
示例#5
0
void PadCopyTransposeMatrixBatched(Queue &queue, const Device &device,
                                   const Databases &db,
                                   EventPointer event, const std::vector<Event> &waitForEvents,
                                   const size_t src_one, const size_t src_two,
                                   const size_t src_ld, const Buffer<int> &src_offsets,
                                   const Buffer<T> &src,
                                   const size_t dest_one, const size_t dest_two,
                                   const size_t dest_ld, const Buffer<int> &dest_offsets,
                                   const Buffer<T> &dest,
                                   const Program &program, const bool do_pad,
                                   const bool do_transpose, const bool do_conjugate,
                                   const size_t batch_count) {

  // Determines the right kernel
  auto kernel_name = std::string{};
  if (do_transpose) {
    kernel_name = (do_pad) ? "TransposePadMatrixBatched" : "TransposeMatrixBatched";
  }
  else {
    kernel_name = (do_pad) ? "CopyPadMatrixBatched" : "CopyMatrixBatched";
  }

  // Retrieves the kernel from the compiled binary
  auto kernel = Kernel(program, kernel_name);

  // Sets the kernel arguments
  kernel.SetArgument(0, static_cast<int>(src_one));
  kernel.SetArgument(1, static_cast<int>(src_two));
  kernel.SetArgument(2, static_cast<int>(src_ld));
  kernel.SetArgument(3, src_offsets());
  kernel.SetArgument(4, src());
  kernel.SetArgument(5, static_cast<int>(dest_one));
  kernel.SetArgument(6, static_cast<int>(dest_two));
  kernel.SetArgument(7, static_cast<int>(dest_ld));
  kernel.SetArgument(8, dest_offsets());
  kernel.SetArgument(9, dest());
  if (do_pad) {
    kernel.SetArgument(10, static_cast<int>(do_conjugate));
  }

  // Launches the kernel and returns the error code. Uses global and local thread sizes based on
  // parameters in the database.
  if (do_transpose) {
    const auto global = std::vector<size_t>{
      Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
      Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
      batch_count
    };
    const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"], 1};
    RunKernel(kernel, queue, device, global, local, event, waitForEvents);
  }
  else {
    const auto global = std::vector<size_t>{
      Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]),
      Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"]),
      batch_count
    };
    const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"], 1};
    RunKernel(kernel, queue, device, global, local, event, waitForEvents);
  }
}
int ParseArgs(int argc, char *argv[], user_settings *set)
{
	if (argv == NULL || set == NULL)
		return USR_PTR_PASS_FAIL;

	if (argc < 2) {
		DisplayHelp(argv[0]);
		return USR_HELP;
	}

	// Detecting Help Requried
	for (int i = 1; i < argc; i++) {
		if (strcmp(argv[i], "-help") == 0) {
			DisplayHelp(argv[0]);
			return USR_HELP;
		}
		else if (strcmp(argv[i], "-exthelp") == 0) {
			DisplayExtendedHelp(argv[0]);
			return USR_HELP;
		}
	}

	// Allocating Memory for Content Path Ptrs
	set->common.contentPath = calloc(CIA_MAX_CONTENT, sizeof(char*));
	if (set->common.contentPath == NULL) {
		fprintf(stderr, "[SETTING ERROR] Not Enough Memory\n");
		return USR_MEM_ERROR;
	}

	// Initialise Keys
	InitKeys(&set->common.keys);

	// Setting Defaults
	SetDefaults(set);

	// Parsing Arguments
	int set_result;
	for (int i = 1; i < argc; i += set_result) {
		set_result = SetArgument(argc, i, argv, set);
		if (set_result < 1) {
			fprintf(stderr, "[RESULT] Invalid arguments, see '%s -help'\n", argv[0]);
			return set_result;
		}
	}

	// Checking arguments
	if ((set_result = CheckArgumentCombination(set)) != 0)
		return set_result;

	// Setting Keys
	if ((set_result = SetKeys(&set->common.keys)) != 0)
		return set_result;

	// Generating outpath if required
	if (!set->common.outFileName) {
		char *source_path = NULL;
		if (set->ncch.buildNcch0)
			source_path = set->common.rsfPath;
		else if (set->common.workingFileType == infile_ncsd || set->common.workingFileType == infile_cia || set->common.workingFileType == infile_srl)
			source_path = set->common.workingFilePath;
		else
			source_path = set->common.contentPath[0];
		set->common.outFileName_mallocd = true;
		set->common.outFileName = replace_filextention(source_path, GetOutputExtention(set->common.outFormat));
	}
	return 0;
}
示例#7
0
文件: xher2.cpp 项目: gcp/CLBlast
StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
                            const size_t n,
                            const T alpha,
                            const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
                            const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
                            const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
                            const bool packed) {

  // Makes sure the dimensions are larger than zero
  if (n == 0) { return StatusCode::kInvalidDimension; }

  // The data is either in the upper or lower triangle
  const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
                         (triangle == Triangle::kLower && layout == Layout::kRowMajor));
  const auto is_rowmajor = (layout == Layout::kRowMajor);

  // Tests the matrix and the vectors for validity
  auto status = StatusCode::kSuccess;
  if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
  else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
  if (ErrorIn(status)) { return status; }
  status = TestVectorX(n, x_buffer, x_offset, x_inc);
  if (ErrorIn(status)) { return status; }
  status = TestVectorY(n, y_buffer, y_offset, y_inc);
  if (ErrorIn(status)) { return status; }

  // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
  auto alpha_buffer = Buffer<T>(context_, 1);
  alpha_buffer.Write(queue_, 1, &alpha);

  // Retrieves the kernel from the compiled binary
  try {
    const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
    auto kernel = Kernel(program, "Xher2");

    // Sets the kernel arguments
    kernel.SetArgument(0, static_cast<int>(n));
    kernel.SetArgument(1, alpha_buffer());
    kernel.SetArgument(2, x_buffer());
    kernel.SetArgument(3, static_cast<int>(x_offset));
    kernel.SetArgument(4, static_cast<int>(x_inc));
    kernel.SetArgument(5, y_buffer());
    kernel.SetArgument(6, static_cast<int>(y_offset));
    kernel.SetArgument(7, static_cast<int>(y_inc));
    kernel.SetArgument(8, a_buffer());
    kernel.SetArgument(9, static_cast<int>(a_offset));
    kernel.SetArgument(10, static_cast<int>(a_ld));
    kernel.SetArgument(11, static_cast<int>(is_upper));
    kernel.SetArgument(12, static_cast<int>(is_rowmajor));

    // Launches the kernel
    auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]);
    auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
    auto global = std::vector<size_t>{global_one, global_two};
    auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
    status = RunKernel(kernel, queue_, device_, global, local, event_);
    if (ErrorIn(status)) { return status; }

    // Succesfully finished the computation
    return StatusCode::kSuccess;
  } catch (...) { return StatusCode::kInvalidKernel; }
}
示例#8
0
文件: xim2col.cpp 项目: gpu/CLBlast
void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size_t width,
                          const size_t kernel_h, const size_t kernel_w, const size_t pad_h,
                          const size_t pad_w, const size_t stride_h, const size_t stride_w,
                          const size_t dilation_h, const size_t dilation_w,
                          const Buffer<T> &im_buffer, const size_t im_offset,
                          const Buffer<T> &col_buffer, const size_t col_offset) {

  // Makes sure all dimensions are larger than zero
  if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); }

  // Sets the output height and width
  const auto size_h = height + 2 * pad_h;
  const auto padding_h = dilation_h * (kernel_h - 1) + 1;
  const auto output_h = (size_h >= padding_h) ? (size_h - padding_h) / stride_h + 1 : 1;
  const auto size_w = width + 2 * pad_w;
  const auto padding_w = dilation_w * (kernel_w - 1) + 1;
  const auto output_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1;

  // Retrieves the Xcopy kernel from the compiled binary
  auto kernel = Kernel(program_, "im2col");

  // Sets the kernel arguments
  kernel.SetArgument(0, static_cast<int>(height));
  kernel.SetArgument(1, static_cast<int>(width));
  kernel.SetArgument(2, static_cast<int>(channels));
  kernel.SetArgument(3, static_cast<int>(output_h));
  kernel.SetArgument(4, static_cast<int>(output_w));
  kernel.SetArgument(5, static_cast<int>(kernel_h));
  kernel.SetArgument(6, static_cast<int>(kernel_w));
  kernel.SetArgument(7, static_cast<int>(pad_h));
  kernel.SetArgument(8, static_cast<int>(pad_w));
  kernel.SetArgument(9, static_cast<int>(stride_h));
  kernel.SetArgument(10, static_cast<int>(stride_w));
  kernel.SetArgument(11, static_cast<int>(dilation_h));
  kernel.SetArgument(12, static_cast<int>(dilation_w));
  kernel.SetArgument(13, im_buffer());
  kernel.SetArgument(14, static_cast<int>(im_offset));
  kernel.SetArgument(15, col_buffer());
  kernel.SetArgument(16, static_cast<int>(col_offset));

  // Launches the kernel
  const auto w_ceiled = Ceil(output_w, db_["COPY_DIMX"]);
  const auto h_ceiled = Ceil(output_h, db_["COPY_DIMY"]);
  const auto global = std::vector<size_t>{w_ceiled, h_ceiled * channels};
  const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"]};
  RunKernel(kernel, queue_, device_, global, local, event_);
}
示例#9
0
void Xgemm<T>::DoGemm(const std::vector<size_t>& global,
                      const std::vector<size_t>& local,
                      const std::string& kernelName,
                      std::string argumentOrder,
                      const Layout layout,
                      const Transpose a_transpose, const Transpose b_transpose,
                      const size_t m, const size_t n, const size_t k,
                      const T alpha,
                      const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
                      const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
                      const T beta,
                      const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {

  // Makes sure all dimensions are larger than zero
  if ((m == 0) || (n == 0) || (k == 0)) { throw BLASError(StatusCode::kInvalidDimension); }

  // Computes whether or not the matrices are transposed in memory. This is based on their layout
  // (row or column-major) and whether or not they are requested to be pre-transposed. Note
  // that the Xgemm kernel expects either matrices A and C (in case of row-major) or B (in case of
  // col-major) to be transformed, so transposing requirements are not the same as whether or not
  // the matrix is actually transposed in memory.
  const auto a_rotated = (layout == Layout::kColMajor && a_transpose != Transpose::kNo) ||
                         (layout == Layout::kRowMajor && a_transpose == Transpose::kNo);
  const auto b_rotated = (layout == Layout::kColMajor && b_transpose != Transpose::kNo) ||
                         (layout == Layout::kRowMajor && b_transpose == Transpose::kNo);
  const auto c_rotated = (layout == Layout::kRowMajor);
  static const auto a_want_rotated = false;
  static const auto b_want_rotated = true;
  static const auto c_want_rotated = false;
  const auto a_do_transpose = a_rotated != a_want_rotated;
  const auto b_do_transpose = b_rotated != b_want_rotated;
  const auto c_do_transpose = c_rotated != c_want_rotated;

  // In case of complex data-types, the transpose can also become a conjugate transpose
  const auto a_conjugate = (a_transpose == Transpose::kConjugate);
  const auto b_conjugate = (b_transpose == Transpose::kConjugate);

  // Retrieves the proper XgemmDirect kernel from the compiled binary
  //const auto name = (a_do_transpose) ? (b_do_transpose ? "XgemmDirectTT" : "XgemmDirectTN") :
                                       //(b_do_transpose ? "XgemmDirectNT" : "XgemmDirectNN");
  auto kernel = Kernel(*program_, kernelName);

  size_t i = 0;
  auto setArg = [&](std::string arg) {
    arg = trim(arg);
    if (arg == "m") { kernel.SetArgument(i, static_cast<int>(m)); } else
    if (arg == "n") { kernel.SetArgument(i, static_cast<int>(n)); } else
    if (arg == "k") { kernel.SetArgument(i, static_cast<int>(k)); } else
    if (arg == "a") { kernel.SetArgument(i, a_buffer()); } else
    if (arg == "b") { kernel.SetArgument(i, b_buffer()); } else
    if (arg == "c") { kernel.SetArgument(i, c_buffer()); } 
    i = i+1;
  };

  // parse and sets the kernel arguments
  std::string delimiter(",");
  size_t pos = 0;
  while ((pos = argumentOrder.find(delimiter)) != std::string::npos) {
    setArg(argumentOrder.substr(0, pos));
    argumentOrder.erase(0, pos + delimiter.length());
  }
  setArg(argumentOrder); // set last arg

  // Launches the kernel
  RunKernel(kernel, queue_, device_, global, local, event_);
}