inline void mul_code(backend::source_generator &o, bool invert) { o.begin_function<T2>("mul"); o.begin_function_parameters(); o.template parameter<T2>("a"); o.template parameter<T2>("b"); o.end_function_parameters(); if(invert) { // conjugate b o.new_line() << type_name<T2>() << " r = {" "a.x * b.x + a.y * b.y, " "a.y * b.x - a.x * b.y};"; } else { o.new_line() << type_name<T2>() << " r = {" "a.x * b.x - a.y * b.y, " "a.y * b.x + a.x * b.y};"; } o.new_line() << "return r;"; o.end_function(); }
inline void twiddle_code(backend::source_generator &o) { o.begin_function<T2>("twiddle"); o.begin_function_parameters(); o.template parameter<T>("alpha"); o.end_function_parameters(); if(std::is_same<T, cl_double>::value) { // use sincos with double since we probably want higher precision #if defined(VEXCL_BACKEND_OPENCL) || defined(VEXCL_BACKEND_COMPUTE) o.new_line() << type_name<T>() << " cs, sn = sincos(alpha, &cs);"; #else o.new_line() << type_name<T>() << " sn, cs;"; o.new_line() << "sincos(alpha, &sn, &cs);"; #endif o.new_line() << type_name<T2>() << " r = {cs, sn};"; } else { // use native with float since we probably want higher performance #if defined(VEXCL_BACKEND_OPENCL) || defined(VEXCL_BACKEND_COMPUTE) o.new_line() << type_name<T2>() << " r = {" "native_cos(alpha), native_sin(alpha)};"; #elif defined(VEXCL_BACKEND_CUDA) o.new_line() << type_name<T>() << " sn, cs;"; o.new_line() << "__sincosf(alpha, &sn, &cs);"; o.new_line() << type_name<T2>() << " r = {cs, sn};"; #elif defined(VEXCL_BACKEND_JIT) o.new_line() << type_name<T>() << " sn, cs;"; o.new_line() << "sincosf(alpha, &sn, &cs);"; o.new_line() << type_name<T2>() << " r = {cs, sn};"; #else # error Unsupported backend! #endif } o.new_line() << "return r;"; o.end_function(); }
static void define(backend::source_generator &src, const std::string &fname) { const size_t N = cl_vector_length<T>::value; typedef typename std::conditional< sizeof(T) < 32, cl_uint, cl_ulong >::type ctr_t; const size_t ctr_n = sizeof(T) <= 8 ? 2 : 4; typedef typename Generator::template function<ctr_t, ctr_n> generator; const size_t key_n = generator::K; generator::define(src); src.begin_function<T>(fname); src.begin_function_parameters(); src.template parameter<cl_ulong>("prm1"); src.template parameter<cl_ulong>("prm2"); src.end_function_parameters(); src.new_line() << "union "; src.open("{"); src.new_line() << type_name<ctr_t>() << " ctr[" << ctr_n << "];"; if (std::is_same<Ts, cl_float>::value) { src.new_line() << type_name<cl_uint>() << " res_i[" << N << "];"; src.new_line() << type_name<cl_float>() << " res_f[" << N << "];"; } else if (std::is_same<Ts, cl_double>::value) { src.new_line() << type_name<cl_ulong>() << " res_i[" << N << "];"; src.new_line() << type_name<cl_double>() << " res_f[" << N << "];"; } src.new_line() << type_name<T>() << " res;"; src.close("} ctr;"); src.new_line() << type_name<ctr_t>() << " key[" << key_n << "];"; for(size_t i = 0; i < ctr_n; i += 2) src.new_line() << "ctr.ctr[" << i << "] = prm1; " << "ctr.ctr[" << i + 1 << "] = prm2;"; for(size_t i = 0; i < key_n; ++i) src.new_line() << "key[" << i << "] = 0x12345678;"; src.new_line() << generator::name() << "(ctr.ctr, key);"; if(std::is_same<Ts, cl_float>::value) { for(size_t i = 0; i < N; ++i) src.new_line() << "ctr.res_f[" << i << "] = ctr.res_i[" << i << "] / " << std::numeric_limits<cl_uint>::max() << ".0f;"; } else if (std::is_same<Ts, cl_double>::value) { for(size_t i = 0; i < N; ++i) src.new_line() << "ctr.res_f[" << i << "] = ctr.res_i[" << i << "] / " << std::numeric_limits<cl_ulong>::max() << ".0;"; } src.new_line() << "return ctr.res;"; src.end_function(); }
static void define(backend::source_generator &src, const std::string &fname) { const size_t N = cl_vector_length<T>::value; const bool is_float = std::is_same<Ts, cl_float>::value; const size_t ctr_n = is_float ? 2 : 4; typedef typename Generator::template function<cl_uint, ctr_n> generator; const size_t key_n = generator::K; generator::define(src); src.begin_function<T>(fname); src.begin_function_parameters(); src.template parameter<cl_ulong>("prm1"); src.template parameter<cl_ulong>("prm2"); src.end_function_parameters(); #if defined(VEXCL_BACKEND_JIT) src.new_line() << "#define cospi(x) cos(M_PI * (x))"; #endif src.new_line() << "union "; src.open("{"); src.new_line() << type_name<cl_uint>() << " ctr[" << ctr_n << "];"; if (is_float) { src.new_line() << type_name<cl_uint>() << " res_i[2];"; } else { src.new_line() << type_name<cl_ulong>() << " res_i[2];"; } src.close("} ctr;"); src.new_line() << type_name<Ts>() << " u[2];"; src.new_line() << type_name<cl_uint>() << " key[" << key_n << "];"; for(size_t i = 0; i < ctr_n; i += 2) src.new_line() << "ctr.ctr[" << i << "] = prm1; " << "ctr.ctr[" << i + 1 << "] = prm2;"; for(size_t i = 0; i < key_n; ++i) src.new_line() << "key[" << i << "] = 0x12345678;"; if (N > 1) { src.new_line() << "union "; src.open("{"); src.new_line() << type_name<Ts>() << " z[" << N << "];"; src.new_line() << type_name<T>() << " v;"; src.close("} res;"); } for(size_t i = 0 ; i < N ; i += 2) { src.new_line() << generator::name() << "(ctr.ctr, key);"; if(is_float) { for(size_t i = 0; i < 2; ++i) src.new_line() << "u[" << i << "] = ctr.res_i[" << i << "] / " << std::numeric_limits<cl_uint>::max() << ".0f;"; } else { for(size_t i = 0; i < 2; ++i) src.new_line() << "u[" << i << "] = ctr.res_i[" << i << "] / " << std::numeric_limits<cl_ulong>::max() << ".0;"; } if(N == 1) { src.new_line() << "return sqrt(-2 * log(u[0])) * cospi(2 * u[1]);\n"; } else { src.open("{"); src.new_line() << type_name<Ts>() << " l = sqrt(-2 * log(u[0])), cs, sn;"; #if defined(VEXCL_BACKEND_CUDA) src.new_line() << "sincospi(2 * u[1], &sn, &cs);"; #else src.new_line() << "sn = sincos(" << std::setprecision(16) << boost::math::constants::two_pi<double>() << " * u[1], &cs);"; #endif src.new_line() << "res.z[" << i << "] = l * cs;"; src.new_line() << "res.z[" << i + 1 << "] = l * sn;"; src.close("}"); } } if (N > 1) src.new_line() << "return res.v;"; src.end_function(); }