예제 #1
0
 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
   #ifdef OPENCL_API
     auto queue_plain = queue();
     auto event = cl_event{};
     auto status = Col2im<T>(args.kernel_mode,
                             args.channels, args.height, args.width,
                             args.kernel_h, args.kernel_w,
                             args.pad_h, args.pad_w,
                             args.stride_h, args.stride_w,
                             args.dilation_h, args.dilation_w,
                             buffers.b_mat(), args.b_offset, // col
                             buffers.a_mat(), args.a_offset, // im
                             &queue_plain, &event);
     if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
   #elif CUDA_API
     auto status = Col2im<T>(args.kernel_mode,
                             args.channels, args.height, args.width,
                             args.kernel_h, args.kernel_w,
                             args.pad_h, args.pad_w,
                             args.stride_h, args.stride_w,
                             args.dilation_h, args.dilation_w,
                             buffers.b_mat(), args.b_offset, // col
                             buffers.a_mat(), args.a_offset, // im
                             queue.GetContext()(), queue.GetDevice()());
     cuStreamSynchronize(queue());
   #endif
   return status;
 }
예제 #2
0
파일: xher.hpp 프로젝트: gpu/CLBlast
 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
   #ifdef OPENCL_API
     auto queue_plain = queue();
     auto event = cl_event{};
     auto status = Her(args.layout, args.triangle,
                       args.n, args.alpha,
                       buffers.x_vec(), args.x_offset, args.x_inc,
                       buffers.a_mat(), args.a_offset, args.a_ld,
                       &queue_plain, &event);
     if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
   #elif CUDA_API
     auto status = Her(args.layout, args.triangle,
                       args.n, args.alpha,
                       buffers.x_vec(), args.x_offset, args.x_inc,
                       buffers.a_mat(), args.a_offset, args.a_ld,
                       queue.GetContext()(), queue.GetDevice()());
     cuStreamSynchronize(queue());
   #endif
   return status;
 }
예제 #3
0
파일: xtrmv.hpp 프로젝트: dividiti/CLBlast
 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
   auto queue_plain = queue();
   auto event = cl_event{};
   auto status = Trmv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal,
                         args.n,
                         buffers.a_mat(), args.a_offset, args.a_ld,
                         buffers.x_vec(), args.x_offset, args.x_inc,
                         &queue_plain, &event);
   if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
   return status;
 }
예제 #4
0
 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
   auto queue_plain = queue();
   auto event = cl_event{};
   auto status = Omatcopy<T>(args.layout, args.a_transpose,
                             args.m, args.n, args.alpha,
                             buffers.a_mat(), args.a_offset, args.a_ld,
                             buffers.b_mat(), args.b_offset, args.b_ld,
                             &queue_plain, &event);
   if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
   return status;
 }
예제 #5
0
파일: xsbmv.hpp 프로젝트: gcp/CLBlast
 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
   auto queue_plain = queue();
   auto event = cl_event{};
   auto status = Sbmv(args.layout, args.triangle,
                      args.n, args.kl, args.alpha,
                      buffers.a_mat(), args.a_offset, args.a_ld,
                      buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
                      buffers.y_vec(), args.y_offset, args.y_inc,
                      &queue_plain, &event);
   clWaitForEvents(1, &event);
   return status;
 }
예제 #6
0
파일: xhemm.hpp 프로젝트: gcp/CLBlast
 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
   auto queue_plain = queue();
   auto event = cl_event{};
   auto status = Hemm(args.layout, args.side, args.triangle,
                      args.m, args.n, args.alpha,
                      buffers.a_mat(), args.a_offset, args.a_ld,
                      buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
                      buffers.c_mat(), args.c_offset, args.c_ld,
                      &queue_plain, &event);
   clWaitForEvents(1, &event);
   return status;
 }