void xSyr2k<double>:: roundtrip_func() { timer.Start(timer_id); cl_int err; buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY, (buffer_.lda_ * buffer_.a_num_vectors_ + buffer_.offA_) * sizeof(double), NULL, &err); buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(double), NULL, &err); buffer_.buf_c_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE, (buffer_.ldc_ * buffer_.c_num_vectors_ + buffer_.offC_) * sizeof(double), NULL, &err); this->initialize_gpu_buffer(); clblasDsyr2k(order_, buffer_.uplo_, buffer_.trans_, buffer_.n_, buffer_.k_, buffer_.alpha_, buffer_.buf_a_, buffer_.offA_, buffer_.lda_, buffer_.buf_b_, buffer_.offB_, buffer_.ldb_, buffer_.beta_, buffer_.buf_c_, buffer_.offC_, buffer_.ldc_, 1, &queue_, 0, NULL, NULL); err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE, buffer_.offC_ * sizeof(double), buffer_.ldc_ * buffer_.c_num_vectors_ * sizeof(double), buffer_.c_, 0, NULL, &event_); clWaitForEvents(1, &event_); timer.Stop(timer_id); }
cl_int Dsyr2k_internal( cl_env *env, double *a, double *b, double *c, double alpha, double beta, clblasTranspose transAB, clblasUplo uplo, int ar, int ac, int br, int bc, int cr, int cc, int size_a, int size_b, int size_c) { CHECK(clblasSetup()); cl_event events[NEVENTS]; int nevent = 0; cl_mem mem_a = create_mem(env, a, size_a, CL_MEM_READ_ONLY, &(events[nevent++])); cl_mem mem_b = create_mem(env, b, size_b, CL_MEM_READ_ONLY, &(events[nevent++])); cl_mem mem_c; if (beta != 0) mem_c = create_mem(env, c, size_c, CL_MEM_READ_WRITE, &(events[nevent++])); else mem_c = create_mem(env, NULL, size_c, CL_MEM_READ_WRITE, NULL); int k = transAB == clblasNoTrans ? ac : ar; cl_int err = clblasDsyr2k(clblasColumnMajor, uplo, transAB, cr, k, alpha, mem_a, 0, ar, mem_b, 0, br, beta, mem_c, 0, cr, 1, &(env->queues[0]), nevent, events, &(events[nevent])); CHECK(err); events[nevent+1] = *read_mem(env, mem_c, c, size_c, 1, &(events[nevent])); CHECK(clWaitForEvents(1, &(events[nevent+1]))); CHECK(clReleaseMemObject(mem_a)); CHECK(clReleaseMemObject(mem_b)); CHECK(clReleaseMemObject(mem_c)); clblasTeardown(); return CL_SUCCESS; }
void xSyr2k<double>:: call_func() { timer.Start(timer_id); clblasDsyr2k(order_, buffer_.uplo_, buffer_.trans_, buffer_.n_, buffer_.k_, buffer_.alpha_, buffer_.buf_a_, buffer_.offA_, buffer_.lda_, buffer_.buf_b_, buffer_.offB_, buffer_.ldb_, buffer_.beta_, buffer_.buf_c_, buffer_.offC_, buffer_.ldc_, 1, &queue_, 0, NULL, &event_); clWaitForEvents(1, &event_); timer.Stop(timer_id); }