Esempio n. 1
0
    void addKernel(const std::string& file_name, const std::string& kernel_name)
    {
        std::ifstream ifs;
        ifs.open(file_name.c_str(), std::ios::in);
        if (!ifs.is_open())
        {
            std::cout << "Impossible to open '" << file_name << "'" << std::endl;
        }

        std::string kernel_str((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());

        //If the kernel has not already been added
        m_kernels[kernel_name] = kernel_str;
        m_program_sources.push_back({m_kernels[kernel_name].c_str(), kernel_str.size()});
    }
static void customcl_setup(
    std::string cl_program = "blocking-2-v4", 
    std::string arithmetic = "float") {

    err = 0;
    // build options for opencl.
    std::string cl_build_options =
        "-DT=" + arithmetic +
        " -DT4=" + arithmetic + "4" + 
        " -DT8=" + arithmetic + "8" + 
        " -DT16=" + arithmetic + "16" + 
        " " + (arithmetic == "double" ? " -DSAMPLE_NEEDS_DOUBLE" : "") + 
        " " + (arithmetic == "half" ? " -DSAMPLE_NEEDS_HALF" : "");

    // clkernel name.
    std::string clkernel_path = "clkernel/";
    
    caffe::cl_program = cl_program;
    if(cl_program == "blocking-2-v4") {
      clkernel_path += "gemm-blocking-2x2-vload4.cl";
    }else if(cl_program == "blocking-4-v4") {
      clkernel_path += "gemm-blocking-4x4-vload4.cl";
    }else if(cl_program == "noblock-v8") {
      clkernel_path += "gemm-noblock-vload8.cl";
    }

    std::ifstream kernel_file(clkernel_path);
    std::string kernel_str((std::istreambuf_iterator<char>(kernel_file)),
                           std::istreambuf_iterator<char>());
    viennacl::ocl::current_context().build_options(
        "-DT=" + arithmetic +
        " -DT4=" + arithmetic + "4" + 
        " -DT8=" + arithmetic + "8" + 
        " -DT16=" + arithmetic + "16" + 
        " " + (arithmetic == "double" ? " -DSAMPLE_NEEDS_DOUBLE" : "") + 
        " " + (arithmetic == "half" ? " -DSAMPLE_NEEDS_HALF" : ""));
        

    cl_prog = viennacl::ocl::get_context(0).add_program(
        kernel_str, "gemm_program");

    gemm_exec = cl_prog.get_kernel("gemm");
    transpose_exec = cl_prog.get_kernel("transpose"); 
    copy_exec = cl_prog.get_kernel("copy"); 

    transpose_ptr = (void*)clCreateBuffer(
        viennacl::ocl::current_context().handle().get(),
        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
        TRANSPOSE_BUFFER_DIM * TRANSPOSE_BUFFER_DIM * 8,
        host_trans_buffer,
        &err
    );
    SAMPLE_CHECK_ERRORS(err);

    copy_ptr = (void*)clCreateBuffer(
        viennacl::ocl::current_context().handle().get(),
        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
        TRANSPOSE_BUFFER_DIM * TRANSPOSE_BUFFER_DIM * 8,
        host_copy_buffer,
        &err
    );
    SAMPLE_CHECK_ERRORS(err);

    result_ptr = (void*)clCreateBuffer(
        viennacl::ocl::current_context().handle().get(),
        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
        TRANSPOSE_BUFFER_DIM * TRANSPOSE_BUFFER_DIM * 8,
        host_result_buffer,
        &err
    );
    SAMPLE_CHECK_ERRORS(err);
}