Beispiel #1
0
        static void local_terminal_init(const Vector &x, backend::source_generator &src,
            const backend::command_queue &q, const std::string &prm_name,
            detail::kernel_generator_state_ptr state)
        {
            typedef typename detail::return_type<Vector>::type x_type;
            typedef decltype(std::declval<Val>() * std::declval<x_type>()) res_type;

            src.new_line()
                << type_name<res_type>() << " " << prm_name << "_sum = "
                << res_type() << ";";
            src.new_line() << "if (" << prm_name << "_ptr)";
            src.open("{");
            src.new_line() << type_name<Ptr>() << " row_beg = " << prm_name << "_ptr[idx];";
            src.new_line() << type_name<Ptr>() << " row_end = " << prm_name << "_ptr[idx+1];";
            src.new_line() << "for(" << type_name<Ptr>() << " j = row_beg; j < row_end; ++j)";
            src.open("{");

            src.new_line() << type_name<Col>() << " idx = " << prm_name << "_col[j];";

            detail::output_local_preamble init_x(src, q, prm_name + "_x", state);
            boost::proto::eval(boost::proto::as_child(x), init_x);

            src.new_line() << prm_name << "_sum += " << prm_name << "_val[j] * ";

            detail::vector_expr_context expr_x(src, q, prm_name + "_x", state);
            boost::proto::eval(boost::proto::as_child(x), expr_x);

            src << ";";

            src.close("}");
            src.close("}");
        }
Beispiel #2
0
inline void twiddle_code(backend::source_generator &o) {
    o.function<T2>("twiddle").open("(")
        .template parameter<T>("alpha")
    .close(")").open("{");

    if(std::is_same<T, cl_double>::value) {
        // use sincos with double since we probably want higher precision
#ifndef VEXCL_BACKEND_CUDA
        o.new_line() << type_name<T>() << " cs, sn = sincos(alpha, &cs);";
#else
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "sincos(alpha, &sn, &cs);";
#endif
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
    } else {
        // use native with float since we probably want higher performance
#ifndef VEXCL_BACKEND_CUDA
        o.new_line() << type_name<T2>() << " r = {"
            "native_cos(alpha), native_sin(alpha)};";
#else
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "__sincosf(alpha, &sn, &cs);";
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
#endif
    }

    o.new_line() << "return r;";
    o.close("}");
}
Beispiel #3
0
    static void get(backend::source_generator &src,
            const temporary<T, Tag, Expr> &term,
            const backend::command_queue &queue, const std::string &prm_name,
            detail::kernel_generator_state_ptr state)
    {
        auto s = state->find("tmp_locinit");

        if (s == state->end()) {
            s = state->insert(std::make_pair(
                        std::string("tmp_locinit"),
                        boost::any(std::set<size_t>())
                        )).first;
        }

        auto &pos = boost::any_cast< std::set<size_t>& >(s->second);
        auto p = pos.find(Tag);

        if (p == pos.end()) {
            pos.insert(Tag);

            detail::output_local_preamble init_ctx(src, queue, prm_name, state);
            boost::proto::eval(boost::proto::as_child(term.expr), init_ctx);

            src.new_line() << type_name<T>() << " temp_" << Tag << " = ";

            detail::vector_expr_context expr_ctx(src, queue, prm_name, state);
            boost::proto::eval(boost::proto::as_child(term.expr), expr_ctx);

            src << ";";
        }
    }
Beispiel #4
0
inline void mul_code(backend::source_generator &o, bool invert) {
    o.function<T2>("mul").open("(")
        .template parameter<T2>("a")
        .template parameter<T2>("b")
    .close(")").open("{");

    if(invert) { // conjugate b
        o.new_line() << type_name<T2>() << " r = {"
            "a.x * b.x + a.y * b.y, "
            "a.y * b.x - a.x * b.y};";
    } else {
        o.new_line() << type_name<T2>() << " r = {"
            "a.x * b.x - a.y * b.y, "
            "a.y * b.x + a.x * b.y};";
    }

    o.new_line() << "return r;";
    o.close("}");
}
Beispiel #5
0
    static void get(backend::source_generator &src,
            const ccsr_product<val_t, col_t, idx_t, T>&,
            const backend::command_queue&, const std::string &prm_name,
            detail::kernel_generator_state_ptr)
    {
        typedef decltype(val_t() * T()) res_t;

        src.function<res_t>(prm_name + "_spmv")
            .open("(")
                .template parameter< global_ptr<const idx_t> >("idx")
                .template parameter< global_ptr<const idx_t> >("row")
                .template parameter< global_ptr<const col_t> >("col")
                .template parameter< global_ptr<const val_t> >("val")
                .template parameter< global_ptr<const T>     >("vec")
                .template parameter< size_t >("i")
            .close(")").open("{");

        src.new_line() << type_name<res_t>() << " sum = 0;";
        src.new_line() << "for(size_t pos = idx[i], j = row[pos], end = row[pos+1]; j < end; ++j)";
        src.open("{");
        src.new_line() << "sum += val[j] * vec[i + col[j]];";
        src.close("}");
        src.new_line() << "return sum;";
        src.close("}");
    }
Beispiel #6
0
inline void mul_code(backend::source_generator &o, bool invert) {
    o.begin_function<T2>("mul");
    o.begin_function_parameters();
    o.template parameter<T2>("a");
    o.template parameter<T2>("b");
    o.end_function_parameters();

    if(invert) { // conjugate b
        o.new_line() << type_name<T2>() << " r = {"
            "a.x * b.x + a.y * b.y, "
            "a.y * b.x - a.x * b.y};";
    } else {
        o.new_line() << type_name<T2>() << " r = {"
            "a.x * b.x - a.y * b.y, "
            "a.y * b.x + a.x * b.y};";
    }

    o.new_line() << "return r;";
    o.end_function();
}
Beispiel #7
0
        static void local_terminal_init(const Vector &x, backend::source_generator &src,
            const backend::command_queue &q, const std::string &prm_name,
            detail::kernel_generator_state_ptr state)
        {
            typedef typename detail::return_type<Vector>::type x_type;
            typedef spmv_ops_impl<Val, x_type> spmv_ops;

            spmv_ops::decl_accum_var(src, prm_name + "_sum");
            src.open("{");

            // ELL part
            src.new_line() << "for(size_t j = 0; j < " << prm_name << "_ell_width; ++j)";
            src.open("{");
            src.new_line() << type_name<Col>() << " nnz_idx = idx + j * " << prm_name << "_ell_pitch;";
            src.new_line() << type_name<Col>() << " c = " << prm_name << "_ell_col[nnz_idx];";
            src.new_line() << "if (c != (" << type_name<Col>() << ")(-1))";
            src.open("{");

            src.new_line() << type_name<Col>() << " idx = c;";

            {
                detail::output_local_preamble init_x(src, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), init_x);

                backend::source_generator vec_value;
                detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), expr_x);

                spmv_ops::append_product(src, prm_name + "_sum", prm_name + "_ell_val[nnz_idx]", vec_value.str());
            }

            src.close("} else break;");
            src.close("}");

            // CSR part
            src.new_line() << "if (" << prm_name << "_csr_ptr)";
            src.open("{");
            src.new_line() << type_name<Ptr>() << " csr_beg = " << prm_name << "_csr_ptr[idx];";
            src.new_line() << type_name<Ptr>() << " csr_end = " << prm_name << "_csr_ptr[idx+1];";
            src.new_line() << "for(" << type_name<Ptr>() << " j = csr_beg; j < csr_end; ++j)";
            src.open("{");

            src.new_line() << type_name<Col>() << " idx = " << prm_name << "_csr_col[j];";

            {
                detail::output_local_preamble init_x(src, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), init_x);

                backend::source_generator vec_value;
                detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state);
                boost::proto::eval(boost::proto::as_child(x), expr_x);

                spmv_ops::append_product(src, prm_name + "_sum", prm_name + "_csr_val[j]", vec_value.str());
            }

            src.close("}");
            src.close("}");
            src.close("}");
        }
Beispiel #8
0
inline void kernel_radix(backend::source_generator &o, pow radix, bool invert) {
    o << in_place_dft(radix.value, invert);

    // kernel.
    o.kernel("radix").open("(")
        .template parameter< global_ptr<const T2> >("x")
        .template parameter< global_ptr<      T2> >("y")
        .template parameter< cl_uint              >("p")
        .template parameter< cl_uint              >("threads")
    .close(")").open("{");

    o.new_line() << "const size_t i = " << o.global_id(0) << ";";
    o.new_line() << "if(i >= threads) return;";

    // index in input sequence, in 0..P-1
    o.new_line() << "const size_t k = i % p;";
    o.new_line() << "const size_t batch_offset = " << o.global_id(1) << " * threads * " << radix.value << ";";

    // read
    o.new_line() << "x += i + batch_offset;";
    for(size_t i = 0; i < radix.value; ++i)
        o.new_line() << type_name<T2>() << " v" << i << " = x[" << i << " * threads];";

    // twiddle
    o.new_line() << "if(p != 1)";
    o.open("{");
    for(size_t i = 1; i < radix.value; ++i) {
        const T alpha = -boost::math::constants::two_pi<T>() * i / radix.value;
        o.new_line() << "v" << i << " = mul(v" << i << ", twiddle("
          << "(" << type_name<T>() << ")" << std::setprecision(16) << alpha << " * k / p));";
    }
    o.close("}");

    // inplace DFT
    o.new_line() << "dft" << radix.value;
    param_list(o, "&", 0, radix.value);
    o << ";";

    // write back
    o.new_line() << "const size_t j = k + (i - k) * " << radix.value << ";";
    o.new_line() << "y += j + batch_offset;";
    for(size_t i = 0; i < radix.value; i++)
        o.new_line() << "y[" << i << " * p] = v" << i << ";";
    o.close("}");
}
Beispiel #9
0
inline void twiddle_code(backend::source_generator &o) {
    o.begin_function<T2>("twiddle");
    o.begin_function_parameters();
    o.template parameter<T>("alpha");
    o.end_function_parameters();

    if(std::is_same<T, cl_double>::value) {
        // use sincos with double since we probably want higher precision
#if defined(VEXCL_BACKEND_OPENCL) || defined(VEXCL_BACKEND_COMPUTE)
        o.new_line() << type_name<T>() << " cs, sn = sincos(alpha, &cs);";
#else
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "sincos(alpha, &sn, &cs);";
#endif
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
    } else {
        // use native with float since we probably want higher performance
#if defined(VEXCL_BACKEND_OPENCL) || defined(VEXCL_BACKEND_COMPUTE)
        o.new_line() << type_name<T2>() << " r = {"
            "native_cos(alpha), native_sin(alpha)};";
#elif defined(VEXCL_BACKEND_CUDA)
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "__sincosf(alpha, &sn, &cs);";
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
#elif defined(VEXCL_BACKEND_JIT)
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "sincosf(alpha, &sn, &cs);";
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
#else
#  error Unsupported backend!
#endif
    }

    o.new_line() << "return r;";
    o.end_function();
}
Beispiel #10
0
    static void define(backend::source_generator &src, const std::string &fname)
    {
        const size_t N = cl_vector_length<T>::value;

        typedef typename std::conditional<
                    sizeof(T) < 32, cl_uint, cl_ulong
                >::type ctr_t;

        const size_t ctr_n = sizeof(T) <= 8 ? 2 : 4;

        typedef typename Generator::template function<ctr_t, ctr_n> generator;

        const size_t key_n = generator::K;

        generator::define(src);

        src.begin_function<T>(fname);
        src.begin_function_parameters();
        src.template parameter<cl_ulong>("prm1");
        src.template parameter<cl_ulong>("prm2");
        src.end_function_parameters();

        src.new_line() << "union ";
        src.open("{");
        src.new_line() << type_name<ctr_t>() << " ctr[" << ctr_n << "];";
        if (std::is_same<Ts, cl_float>::value) {
            src.new_line()
                << type_name<cl_uint>() << " res_i[" << N << "];";
            src.new_line()
                << type_name<cl_float>() << " res_f[" << N << "];";
        } else if (std::is_same<Ts, cl_double>::value) {
            src.new_line()
                << type_name<cl_ulong>() << " res_i[" << N << "];";
            src.new_line()
                << type_name<cl_double>() << " res_f[" << N << "];";
        }
        src.new_line() << type_name<T>() << " res;";
        src.close("} ctr;");

        src.new_line() << type_name<ctr_t>() << " key[" << key_n << "];";

        for(size_t i = 0; i < ctr_n; i += 2)
            src.new_line()
                << "ctr.ctr[" << i     << "] = prm1; "
                << "ctr.ctr[" << i + 1 << "] = prm2;";

        for(size_t i = 0; i < key_n; ++i)
            src.new_line() << "key[" << i << "] = 0x12345678;";

        src.new_line() << generator::name() << "(ctr.ctr, key);";

        if(std::is_same<Ts, cl_float>::value) {
            for(size_t i = 0; i < N; ++i)
                src.new_line()
                    << "ctr.res_f[" << i << "] = ctr.res_i[" << i
                    << "] / " << std::numeric_limits<cl_uint>::max()
                    << ".0f;";
        } else if (std::is_same<Ts, cl_double>::value) {
            for(size_t i = 0; i < N; ++i)
                src.new_line()
                    << "ctr.res_f[" << i << "] = ctr.res_i[" << i
                    << "] / " << std::numeric_limits<cl_ulong>::max()
                    << ".0;";
        }
        src.new_line() << "return ctr.res;";

        src.end_function();
    }
Beispiel #11
0
    static void define(backend::source_generator &src, const std::string &fname)
    {
        const size_t N        = cl_vector_length<T>::value;
        const bool   is_float = std::is_same<Ts, cl_float>::value;
        const size_t ctr_n    = is_float ? 2 : 4;

        typedef typename Generator::template function<cl_uint, ctr_n> generator;

        const size_t key_n = generator::K;

        generator::define(src);

        src.begin_function<T>(fname);
        src.begin_function_parameters();
        src.template parameter<cl_ulong>("prm1");
        src.template parameter<cl_ulong>("prm2");
        src.end_function_parameters();

#if defined(VEXCL_BACKEND_JIT)
        src.new_line() << "#define cospi(x) cos(M_PI * (x))";
#endif

        src.new_line() << "union ";
        src.open("{");
        src.new_line() << type_name<cl_uint>() << " ctr[" << ctr_n << "];";
        if (is_float) {
            src.new_line() << type_name<cl_uint>()  << " res_i[2];";
        } else {
            src.new_line() << type_name<cl_ulong>()  << " res_i[2];";
        }
        src.close("} ctr;");
        src.new_line() << type_name<Ts>() << " u[2];";

        src.new_line() << type_name<cl_uint>() << " key[" << key_n << "];";

        for(size_t i = 0; i < ctr_n; i += 2)
            src.new_line()
                << "ctr.ctr[" << i     << "] = prm1; "
                << "ctr.ctr[" << i + 1 << "] = prm2;";

        for(size_t i = 0; i < key_n; ++i)
            src.new_line() << "key[" << i << "] = 0x12345678;";

        if (N > 1) {
            src.new_line() << "union ";
            src.open("{");
            src.new_line() << type_name<Ts>() << " z[" << N << "];";
            src.new_line() << type_name<T>() << " v;";
            src.close("} res;");
        }

        for(size_t i = 0 ; i < N ; i += 2) {
            src.new_line() << generator::name() << "(ctr.ctr, key);";

            if(is_float) {
                for(size_t i = 0; i < 2; ++i)
                    src.new_line()
                        << "u[" << i << "] = ctr.res_i[" << i
                        << "] / " << std::numeric_limits<cl_uint>::max()
                        << ".0f;";
            } else {
                for(size_t i = 0; i < 2; ++i)
                    src.new_line()
                        << "u[" << i << "] = ctr.res_i[" << i
                        << "] / " << std::numeric_limits<cl_ulong>::max()
                        << ".0;";
            }

            if(N == 1) {
                src.new_line()
                    << "return sqrt(-2 * log(u[0])) * cospi(2 * u[1]);\n";
            } else {
                src.open("{");

                src.new_line() << type_name<Ts>()
                    << " l = sqrt(-2 * log(u[0])), cs, sn;";

#if defined(VEXCL_BACKEND_CUDA)
                src.new_line() << "sincospi(2 * u[1], &sn, &cs);";
#else
                src.new_line() << "sn = sincos("
                    << std::setprecision(16)
                    << boost::math::constants::two_pi<double>()
                    << " * u[1], &cs);";
#endif
                src.new_line() << "res.z[" << i     << "] = l * cs;";
                src.new_line() << "res.z[" << i + 1 << "] = l * sn;";

                src.close("}");
            }
        }

        if (N > 1)
            src.new_line() << "return res.v;";

        src.end_function();
    }