void addKernel(const std::string& file_name, const std::string& kernel_name) { std::ifstream ifs; ifs.open(file_name.c_str(), std::ios::in); if (!ifs.is_open()) { std::cout << "Impossible to open '" << file_name << "'" << std::endl; } std::string kernel_str((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); //If the kernel has not already been added m_kernels[kernel_name] = kernel_str; m_program_sources.push_back({m_kernels[kernel_name].c_str(), kernel_str.size()}); }
static void customcl_setup( std::string cl_program = "blocking-2-v4", std::string arithmetic = "float") { err = 0; // build options for opencl. std::string cl_build_options = "-DT=" + arithmetic + " -DT4=" + arithmetic + "4" + " -DT8=" + arithmetic + "8" + " -DT16=" + arithmetic + "16" + " " + (arithmetic == "double" ? " -DSAMPLE_NEEDS_DOUBLE" : "") + " " + (arithmetic == "half" ? " -DSAMPLE_NEEDS_HALF" : ""); // clkernel name. std::string clkernel_path = "clkernel/"; caffe::cl_program = cl_program; if(cl_program == "blocking-2-v4") { clkernel_path += "gemm-blocking-2x2-vload4.cl"; }else if(cl_program == "blocking-4-v4") { clkernel_path += "gemm-blocking-4x4-vload4.cl"; }else if(cl_program == "noblock-v8") { clkernel_path += "gemm-noblock-vload8.cl"; } std::ifstream kernel_file(clkernel_path); std::string kernel_str((std::istreambuf_iterator<char>(kernel_file)), std::istreambuf_iterator<char>()); viennacl::ocl::current_context().build_options( "-DT=" + arithmetic + " -DT4=" + arithmetic + "4" + " -DT8=" + arithmetic + "8" + " -DT16=" + arithmetic + "16" + " " + (arithmetic == "double" ? " -DSAMPLE_NEEDS_DOUBLE" : "") + " " + (arithmetic == "half" ? " -DSAMPLE_NEEDS_HALF" : "")); cl_prog = viennacl::ocl::get_context(0).add_program( kernel_str, "gemm_program"); gemm_exec = cl_prog.get_kernel("gemm"); transpose_exec = cl_prog.get_kernel("transpose"); copy_exec = cl_prog.get_kernel("copy"); transpose_ptr = (void*)clCreateBuffer( viennacl::ocl::current_context().handle().get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, TRANSPOSE_BUFFER_DIM * TRANSPOSE_BUFFER_DIM * 8, host_trans_buffer, &err ); SAMPLE_CHECK_ERRORS(err); copy_ptr = (void*)clCreateBuffer( viennacl::ocl::current_context().handle().get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, TRANSPOSE_BUFFER_DIM * TRANSPOSE_BUFFER_DIM * 8, host_copy_buffer, &err ); SAMPLE_CHECK_ERRORS(err); result_ptr = (void*)clCreateBuffer( viennacl::ocl::current_context().handle().get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, TRANSPOSE_BUFFER_DIM * TRANSPOSE_BUFFER_DIM * 8, host_result_buffer, &err ); SAMPLE_CHECK_ERRORS(err); }