Ejemplo n.º 1
0
        static void local_terminal_init(const Vector &x, backend::source_generator &src,
            const backend::command_queue &q, const std::string &prm_name,
            detail::kernel_generator_state_ptr state)
        {
            typedef typename detail::return_type<Vector>::type x_type;
            typedef decltype(std::declval<Val>() * std::declval<x_type>()) res_type;

            src.new_line()
                << type_name<res_type>() << " " << prm_name << "_sum = "
                << res_type() << ";";
            src.new_line() << "if (" << prm_name << "_ptr)";
            src.open("{");
            src.new_line() << type_name<Ptr>() << " row_beg = " << prm_name << "_ptr[idx];";
            src.new_line() << type_name<Ptr>() << " row_end = " << prm_name << "_ptr[idx+1];";
            src.new_line() << "for(" << type_name<Ptr>() << " j = row_beg; j < row_end; ++j)";
            src.open("{");

            src.new_line() << type_name<Col>() << " idx = " << prm_name << "_col[j];";

            detail::output_local_preamble init_x(src, q, prm_name + "_x", state);
            boost::proto::eval(boost::proto::as_child(x), init_x);

            src.new_line() << prm_name << "_sum += " << prm_name << "_val[j] * ";

            detail::vector_expr_context expr_x(src, q, prm_name + "_x", state);
            boost::proto::eval(boost::proto::as_child(x), expr_x);

            src << ";";

            src.close("}");
            src.close("}");
        }
Ejemplo n.º 2
0
        static void local_terminal_init(const Vector &x, backend::source_generator &src,
            const backend::command_queue &q, const std::string &prm_name,
            detail::kernel_generator_state_ptr state)
        {
            typedef typename detail::return_type<Vector>::type x_type;
            typedef spmv_ops_impl<Val, x_type> spmv_ops;

            spmv_ops::decl_accum_var(src, prm_name + "_sum");
            src.open("{");

            // ELL part
            src.new_line() << "for(size_t j = 0; j < " << prm_name << "_ell_width; ++j)";
            src.open("{");
            src.new_line() << type_name<Col>() << " nnz_idx = idx + j * " << prm_name << "_ell_pitch;";
            src.new_line() << type_name<Col>() << " c = " << prm_name << "_ell_col[nnz_idx];";
            src.new_line() << "if (c != (" << type_name<Col>() << ")(-1))";
            src.open("{");

            src.new_line() << type_name<Col>() << " idx = c;";

            {
                detail::output_local_preamble init_x(src, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), init_x);

                backend::source_generator vec_value;
                detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), expr_x);

                spmv_ops::append_product(src, prm_name + "_sum", prm_name + "_ell_val[nnz_idx]", vec_value.str());
            }

            src.close("} else break;");
            src.close("}");

            // CSR part
            src.new_line() << "if (" << prm_name << "_csr_ptr)";
            src.open("{");
            src.new_line() << type_name<Ptr>() << " csr_beg = " << prm_name << "_csr_ptr[idx];";
            src.new_line() << type_name<Ptr>() << " csr_end = " << prm_name << "_csr_ptr[idx+1];";
            src.new_line() << "for(" << type_name<Ptr>() << " j = csr_beg; j < csr_end; ++j)";
            src.open("{");

            src.new_line() << type_name<Col>() << " idx = " << prm_name << "_csr_col[j];";

            {
                detail::output_local_preamble init_x(src, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), init_x);

                backend::source_generator vec_value;
                detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), expr_x);

                spmv_ops::append_product(src, prm_name + "_sum", prm_name + "_csr_val[j]", vec_value.str());
            }

            src.close("}");
            src.close("}");
            src.close("}");
        }
Ejemplo n.º 3
0
    static void get(backend::source_generator &src,
            const ccsr_product<val_t, col_t, idx_t, T>&,
            const backend::command_queue&, const std::string &prm_name,
            detail::kernel_generator_state_ptr)
    {
        typedef decltype(val_t() * T()) res_t;

        src.function<res_t>(prm_name + "_spmv")
            .open("(")
                .template parameter< global_ptr<const idx_t> >("idx")
                .template parameter< global_ptr<const idx_t> >("row")
                .template parameter< global_ptr<const col_t> >("col")
                .template parameter< global_ptr<const val_t> >("val")
                .template parameter< global_ptr<const T>     >("vec")
                .template parameter< size_t >("i")
            .close(")").open("{");

        src.new_line() << type_name<res_t>() << " sum = 0;";
        src.new_line() << "for(size_t pos = idx[i], j = row[pos], end = row[pos+1]; j < end; ++j)";
        src.open("{");
        src.new_line() << "sum += val[j] * vec[i + col[j]];";
        src.close("}");
        src.new_line() << "return sum;";
        src.close("}");
    }
Ejemplo n.º 4
0
inline void kernel_radix(backend::source_generator &o, pow radix, bool invert) {
    o << in_place_dft(radix.value, invert);

    // kernel.
    o.begin_kernel("radix");
    o.begin_kernel_parameters();
    o.template parameter< global_ptr<const T2> >("x");
    o.template parameter< global_ptr<      T2> >("y");
    o.template parameter< cl_uint              >("p");
    o.template parameter< cl_uint              >("threads");
    o.end_kernel_parameters();

    o.new_line() << "const size_t i = " << o.global_id(0) << ";";
    o.new_line() << "if(i >= threads) return;";

    // index in input sequence, in 0..P-1
    o.new_line() << "const size_t k = i % p;";
    o.new_line() << "const size_t batch_offset = " << o.global_id(1) << " * threads * " << radix.value << ";";

    // read
    o.new_line() << "x += i + batch_offset;";
    for(size_t i = 0; i < radix.value; ++i)
        o.new_line() << type_name<T2>() << " v" << i << " = x[" << i << " * threads];";

    // twiddle
    o.new_line() << "if(p != 1)";
    o.open("{");
    for(size_t i = 1; i < radix.value; ++i) {
        const T alpha = -boost::math::constants::two_pi<T>() * i / radix.value;
        o.new_line() << "v" << i << " = mul(v" << i << ", twiddle("
          << "(" << type_name<T>() << ")" << std::setprecision(16) << alpha << " * k / p));";
    }
    o.close("}");

    // inplace DFT
    o.new_line() << "dft" << radix.value;
    param_list(o, "&", 0, radix.value);
    o << ";";

    // write back
    o.new_line() << "const size_t j = k + (i - k) * " << radix.value << ";";
    o.new_line() << "y += j + batch_offset;";
    for(size_t i = 0; i < radix.value; i++)
        o.new_line() << "y[" << i << " * p] = v" << i << ";";
    o.end_kernel();
}
Ejemplo n.º 5
0
    static void define(backend::source_generator &src, const std::string &fname)
    {
        const size_t N = cl_vector_length<T>::value;

        typedef typename std::conditional<
                    sizeof(T) < 32, cl_uint, cl_ulong
                >::type ctr_t;

        const size_t ctr_n = sizeof(T) <= 8 ? 2 : 4;

        typedef typename Generator::template function<ctr_t, ctr_n> generator;

        const size_t key_n = generator::K;

        generator::define(src);

        src.begin_function<T>(fname);
        src.begin_function_parameters();
        src.template parameter<cl_ulong>("prm1");
        src.template parameter<cl_ulong>("prm2");
        src.end_function_parameters();

        src.new_line() << "union ";
        src.open("{");
        src.new_line() << type_name<ctr_t>() << " ctr[" << ctr_n << "];";
        if (std::is_same<Ts, cl_float>::value) {
            src.new_line()
                << type_name<cl_uint>() << " res_i[" << N << "];";
            src.new_line()
                << type_name<cl_float>() << " res_f[" << N << "];";
        } else if (std::is_same<Ts, cl_double>::value) {
            src.new_line()
                << type_name<cl_ulong>() << " res_i[" << N << "];";
            src.new_line()
                << type_name<cl_double>() << " res_f[" << N << "];";
        }
        src.new_line() << type_name<T>() << " res;";
        src.close("} ctr;");

        src.new_line() << type_name<ctr_t>() << " key[" << key_n << "];";

        for(size_t i = 0; i < ctr_n; i += 2)
            src.new_line()
                << "ctr.ctr[" << i     << "] = prm1; "
                << "ctr.ctr[" << i + 1 << "] = prm2;";

        for(size_t i = 0; i < key_n; ++i)
            src.new_line() << "key[" << i << "] = 0x12345678;";

        src.new_line() << generator::name() << "(ctr.ctr, key);";

        if(std::is_same<Ts, cl_float>::value) {
            for(size_t i = 0; i < N; ++i)
                src.new_line()
                    << "ctr.res_f[" << i << "] = ctr.res_i[" << i
                    << "] / " << std::numeric_limits<cl_uint>::max()
                    << ".0f;";
        } else if (std::is_same<Ts, cl_double>::value) {
            for(size_t i = 0; i < N; ++i)
                src.new_line()
                    << "ctr.res_f[" << i << "] = ctr.res_i[" << i
                    << "] / " << std::numeric_limits<cl_ulong>::max()
                    << ".0;";
        }
        src.new_line() << "return ctr.res;";

        src.end_function();
    }
Ejemplo n.º 6
0
    static void define(backend::source_generator &src, const std::string &fname)
    {
        const size_t N        = cl_vector_length<T>::value;
        const bool   is_float = std::is_same<Ts, cl_float>::value;
        const size_t ctr_n    = is_float ? 2 : 4;

        typedef typename Generator::template function<cl_uint, ctr_n> generator;

        const size_t key_n = generator::K;

        generator::define(src);

        src.begin_function<T>(fname);
        src.begin_function_parameters();
        src.template parameter<cl_ulong>("prm1");
        src.template parameter<cl_ulong>("prm2");
        src.end_function_parameters();

#if defined(VEXCL_BACKEND_JIT)
        src.new_line() << "#define cospi(x) cos(M_PI * (x))";
#endif

        src.new_line() << "union ";
        src.open("{");
        src.new_line() << type_name<cl_uint>() << " ctr[" << ctr_n << "];";
        if (is_float) {
            src.new_line() << type_name<cl_uint>()  << " res_i[2];";
        } else {
            src.new_line() << type_name<cl_ulong>()  << " res_i[2];";
        }
        src.close("} ctr;");
        src.new_line() << type_name<Ts>() << " u[2];";

        src.new_line() << type_name<cl_uint>() << " key[" << key_n << "];";

        for(size_t i = 0; i < ctr_n; i += 2)
            src.new_line()
                << "ctr.ctr[" << i     << "] = prm1; "
                << "ctr.ctr[" << i + 1 << "] = prm2;";

        for(size_t i = 0; i < key_n; ++i)
            src.new_line() << "key[" << i << "] = 0x12345678;";

        if (N > 1) {
            src.new_line() << "union ";
            src.open("{");
            src.new_line() << type_name<Ts>() << " z[" << N << "];";
            src.new_line() << type_name<T>() << " v;";
            src.close("} res;");
        }

        for(size_t i = 0 ; i < N ; i += 2) {
            src.new_line() << generator::name() << "(ctr.ctr, key);";

            if(is_float) {
                for(size_t i = 0; i < 2; ++i)
                    src.new_line()
                        << "u[" << i << "] = ctr.res_i[" << i
                        << "] / " << std::numeric_limits<cl_uint>::max()
                        << ".0f;";
            } else {
                for(size_t i = 0; i < 2; ++i)
                    src.new_line()
                        << "u[" << i << "] = ctr.res_i[" << i
                        << "] / " << std::numeric_limits<cl_ulong>::max()
                        << ".0;";
            }

            if(N == 1) {
                src.new_line()
                    << "return sqrt(-2 * log(u[0])) * cospi(2 * u[1]);\n";
            } else {
                src.open("{");

                src.new_line() << type_name<Ts>()
                    << " l = sqrt(-2 * log(u[0])), cs, sn;";

#if defined(VEXCL_BACKEND_CUDA)
                src.new_line() << "sincospi(2 * u[1], &sn, &cs);";
#else
                src.new_line() << "sn = sincos("
                    << std::setprecision(16)
                    << boost::math::constants::two_pi<double>()
                    << " * u[1], &cs);";
#endif
                src.new_line() << "res.z[" << i     << "] = l * cs;";
                src.new_line() << "res.z[" << i + 1 << "] = l * sn;";

                src.close("}");
            }
        }

        if (N > 1)
            src.new_line() << "return res.v;";

        src.end_function();
    }