void
xSyr2k<double>::
roundtrip_func()
{
    timer.Start(timer_id);
	cl_int err;
    buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
                                        (buffer_.lda_ * buffer_.a_num_vectors_ +
                                            buffer_.offA_) * sizeof(double),
                                        NULL, &err);
    buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
                                        (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                            buffer_.offB_) * sizeof(double),
                                        NULL, &err);
	buffer_.buf_c_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
                                        (buffer_.ldc_ * buffer_.c_num_vectors_ +
                                            buffer_.offC_) * sizeof(double),
                                        NULL, &err);

	this->initialize_gpu_buffer();
    clblasDsyr2k(order_, buffer_.uplo_, buffer_.trans_, buffer_.n_,
                      buffer_.k_, buffer_.alpha_, buffer_.buf_a_, buffer_.offA_,
                      buffer_.lda_, buffer_.buf_b_, buffer_.offB_, buffer_.ldb_,
                      buffer_.beta_, buffer_.buf_c_, buffer_.offC_,
                      buffer_.ldc_, 1, &queue_, 0, NULL, NULL);
	err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
								  buffer_.offC_ * sizeof(double),
								  buffer_.ldc_ * buffer_.c_num_vectors_ *
                                       sizeof(double),
								  buffer_.c_, 0, NULL, &event_);
    clWaitForEvents(1, &event_);
    timer.Stop(timer_id);
}
Beispiel #2
0
cl_int Dsyr2k_internal(
  cl_env *env, double *a, double *b, double *c, double alpha, double beta,
  clblasTranspose transAB, clblasUplo uplo, 
  int ar, int ac, int br, int bc, int cr, int cc, int size_a, int size_b, int size_c)
{
  CHECK(clblasSetup());
  cl_event events[NEVENTS];
  int nevent = 0;
  cl_mem mem_a = create_mem(env, a, size_a, CL_MEM_READ_ONLY, &(events[nevent++]));
  cl_mem mem_b = create_mem(env, b, size_b, CL_MEM_READ_ONLY, &(events[nevent++]));
  cl_mem mem_c;
  if (beta != 0) mem_c = create_mem(env, c, size_c, CL_MEM_READ_WRITE, &(events[nevent++]));
  else mem_c = create_mem(env, NULL, size_c, CL_MEM_READ_WRITE, NULL);
  
  int k = transAB == clblasNoTrans ? ac : ar;
  cl_int err = clblasDsyr2k(clblasColumnMajor, uplo, transAB,
    cr, k, alpha, mem_a, 0, ar, mem_b, 0, br, beta, mem_c, 0, cr,
    1, &(env->queues[0]), nevent, events, &(events[nevent]));
  CHECK(err);
  events[nevent+1] = *read_mem(env, mem_c, c, size_c, 1, &(events[nevent]));
  CHECK(clWaitForEvents(1, &(events[nevent+1])));
  CHECK(clReleaseMemObject(mem_a));
  CHECK(clReleaseMemObject(mem_b));
  CHECK(clReleaseMemObject(mem_c));
  clblasTeardown();
  return CL_SUCCESS;
}
void
xSyr2k<double>::
call_func()
{
    timer.Start(timer_id);

    clblasDsyr2k(order_, buffer_.uplo_, buffer_.trans_, buffer_.n_,
                      buffer_.k_, buffer_.alpha_, buffer_.buf_a_, buffer_.offA_,
                      buffer_.lda_, buffer_.buf_b_, buffer_.offB_, buffer_.ldb_,
                      buffer_.beta_, buffer_.buf_c_, buffer_.offC_,
                      buffer_.ldc_, 1, &queue_, 0, NULL, &event_);

    clWaitForEvents(1, &event_);
    timer.Stop(timer_id);
}