예제 #1
0
파일: kernels.hpp 프로젝트: ddemidov/vexcl
inline void mul_code(backend::source_generator &o, bool invert) {
    o.begin_function<T2>("mul");
    o.begin_function_parameters();
    o.template parameter<T2>("a");
    o.template parameter<T2>("b");
    o.end_function_parameters();

    if(invert) { // conjugate b
        o.new_line() << type_name<T2>() << " r = {"
            "a.x * b.x + a.y * b.y, "
            "a.y * b.x - a.x * b.y};";
    } else {
        o.new_line() << type_name<T2>() << " r = {"
            "a.x * b.x - a.y * b.y, "
            "a.y * b.x + a.x * b.y};";
    }

    o.new_line() << "return r;";
    o.end_function();
}
예제 #2
0
파일: kernels.hpp 프로젝트: ddemidov/vexcl
inline void twiddle_code(backend::source_generator &o) {
    o.begin_function<T2>("twiddle");
    o.begin_function_parameters();
    o.template parameter<T>("alpha");
    o.end_function_parameters();

    if(std::is_same<T, cl_double>::value) {
        // use sincos with double since we probably want higher precision
#if defined(VEXCL_BACKEND_OPENCL) || defined(VEXCL_BACKEND_COMPUTE)
        o.new_line() << type_name<T>() << " cs, sn = sincos(alpha, &cs);";
#else
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "sincos(alpha, &sn, &cs);";
#endif
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
    } else {
        // use native with float since we probably want higher performance
#if defined(VEXCL_BACKEND_OPENCL) || defined(VEXCL_BACKEND_COMPUTE)
        o.new_line() << type_name<T2>() << " r = {"
            "native_cos(alpha), native_sin(alpha)};";
#elif defined(VEXCL_BACKEND_CUDA)
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "__sincosf(alpha, &sn, &cs);";
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
#elif defined(VEXCL_BACKEND_JIT)
        o.new_line() << type_name<T>() << " sn, cs;";
        o.new_line() << "sincosf(alpha, &sn, &cs);";
        o.new_line() << type_name<T2>() << " r = {cs, sn};";
#else
#  error Unsupported backend!
#endif
    }

    o.new_line() << "return r;";
    o.end_function();
}
예제 #3
0
파일: random.hpp 프로젝트: ddemidov/vexcl
    static void define(backend::source_generator &src, const std::string &fname)
    {
        const size_t N = cl_vector_length<T>::value;

        typedef typename std::conditional<
                    sizeof(T) < 32, cl_uint, cl_ulong
                >::type ctr_t;

        const size_t ctr_n = sizeof(T) <= 8 ? 2 : 4;

        typedef typename Generator::template function<ctr_t, ctr_n> generator;

        const size_t key_n = generator::K;

        generator::define(src);

        src.begin_function<T>(fname);
        src.begin_function_parameters();
        src.template parameter<cl_ulong>("prm1");
        src.template parameter<cl_ulong>("prm2");
        src.end_function_parameters();

        src.new_line() << "union ";
        src.open("{");
        src.new_line() << type_name<ctr_t>() << " ctr[" << ctr_n << "];";
        if (std::is_same<Ts, cl_float>::value) {
            src.new_line()
                << type_name<cl_uint>() << " res_i[" << N << "];";
            src.new_line()
                << type_name<cl_float>() << " res_f[" << N << "];";
        } else if (std::is_same<Ts, cl_double>::value) {
            src.new_line()
                << type_name<cl_ulong>() << " res_i[" << N << "];";
            src.new_line()
                << type_name<cl_double>() << " res_f[" << N << "];";
        }
        src.new_line() << type_name<T>() << " res;";
        src.close("} ctr;");

        src.new_line() << type_name<ctr_t>() << " key[" << key_n << "];";

        for(size_t i = 0; i < ctr_n; i += 2)
            src.new_line()
                << "ctr.ctr[" << i     << "] = prm1; "
                << "ctr.ctr[" << i + 1 << "] = prm2;";

        for(size_t i = 0; i < key_n; ++i)
            src.new_line() << "key[" << i << "] = 0x12345678;";

        src.new_line() << generator::name() << "(ctr.ctr, key);";

        if(std::is_same<Ts, cl_float>::value) {
            for(size_t i = 0; i < N; ++i)
                src.new_line()
                    << "ctr.res_f[" << i << "] = ctr.res_i[" << i
                    << "] / " << std::numeric_limits<cl_uint>::max()
                    << ".0f;";
        } else if (std::is_same<Ts, cl_double>::value) {
            for(size_t i = 0; i < N; ++i)
                src.new_line()
                    << "ctr.res_f[" << i << "] = ctr.res_i[" << i
                    << "] / " << std::numeric_limits<cl_ulong>::max()
                    << ".0;";
        }
        src.new_line() << "return ctr.res;";

        src.end_function();
    }
예제 #4
0
파일: random.hpp 프로젝트: ddemidov/vexcl
    static void define(backend::source_generator &src, const std::string &fname)
    {
        const size_t N        = cl_vector_length<T>::value;
        const bool   is_float = std::is_same<Ts, cl_float>::value;
        const size_t ctr_n    = is_float ? 2 : 4;

        typedef typename Generator::template function<cl_uint, ctr_n> generator;

        const size_t key_n = generator::K;

        generator::define(src);

        src.begin_function<T>(fname);
        src.begin_function_parameters();
        src.template parameter<cl_ulong>("prm1");
        src.template parameter<cl_ulong>("prm2");
        src.end_function_parameters();

#if defined(VEXCL_BACKEND_JIT)
        src.new_line() << "#define cospi(x) cos(M_PI * (x))";
#endif

        src.new_line() << "union ";
        src.open("{");
        src.new_line() << type_name<cl_uint>() << " ctr[" << ctr_n << "];";
        if (is_float) {
            src.new_line() << type_name<cl_uint>()  << " res_i[2];";
        } else {
            src.new_line() << type_name<cl_ulong>()  << " res_i[2];";
        }
        src.close("} ctr;");
        src.new_line() << type_name<Ts>() << " u[2];";

        src.new_line() << type_name<cl_uint>() << " key[" << key_n << "];";

        for(size_t i = 0; i < ctr_n; i += 2)
            src.new_line()
                << "ctr.ctr[" << i     << "] = prm1; "
                << "ctr.ctr[" << i + 1 << "] = prm2;";

        for(size_t i = 0; i < key_n; ++i)
            src.new_line() << "key[" << i << "] = 0x12345678;";

        if (N > 1) {
            src.new_line() << "union ";
            src.open("{");
            src.new_line() << type_name<Ts>() << " z[" << N << "];";
            src.new_line() << type_name<T>() << " v;";
            src.close("} res;");
        }

        for(size_t i = 0 ; i < N ; i += 2) {
            src.new_line() << generator::name() << "(ctr.ctr, key);";

            if(is_float) {
                for(size_t i = 0; i < 2; ++i)
                    src.new_line()
                        << "u[" << i << "] = ctr.res_i[" << i
                        << "] / " << std::numeric_limits<cl_uint>::max()
                        << ".0f;";
            } else {
                for(size_t i = 0; i < 2; ++i)
                    src.new_line()
                        << "u[" << i << "] = ctr.res_i[" << i
                        << "] / " << std::numeric_limits<cl_ulong>::max()
                        << ".0;";
            }

            if(N == 1) {
                src.new_line()
                    << "return sqrt(-2 * log(u[0])) * cospi(2 * u[1]);\n";
            } else {
                src.open("{");

                src.new_line() << type_name<Ts>()
                    << " l = sqrt(-2 * log(u[0])), cs, sn;";

#if defined(VEXCL_BACKEND_CUDA)
                src.new_line() << "sincospi(2 * u[1], &sn, &cs);";
#else
                src.new_line() << "sn = sincos("
                    << std::setprecision(16)
                    << boost::math::constants::two_pi<double>()
                    << " * u[1], &cs);";
#endif
                src.new_line() << "res.z[" << i     << "] = l * cs;";
                src.new_line() << "res.z[" << i + 1 << "] = l * sn;";

                src.close("}");
            }
        }

        if (N > 1)
            src.new_line() << "return res.v;";

        src.end_function();
    }