Beispiel #1
0
always_inline VecType vec_log_float(VecType x)
{
    typedef typename VecType::int_vec int_vec;

    int_vec e;
    x = frexp_float( x, e );

    const VecType sqrt_05 = 0.707106781186547524f;
    const VecType x_smaller_sqrt_05 = mask_lt(x, sqrt_05);
    e = e + int_vec(x_smaller_sqrt_05);
    VecType x_add = x;
    x_add = x_add & x_smaller_sqrt_05;
    x += x_add - VecType(VecType::gen_one());

    VecType y =
    (((((((( 7.0376836292E-2 * x
    - 1.1514610310E-1) * x
    + 1.1676998740E-1) * x
    - 1.2420140846E-1) * x
    + 1.4249322787E-1) * x
    - 1.6668057665E-1) * x
    + 2.0000714765E-1) * x
    - 2.4999993993E-1) * x
    + 3.3333331174E-1) * x * x*x;

    VecType fe = e.convert_to_float();
    y += fe * -2.12194440e-4;

    y -= 0.5 * x*x;            /* y - 0.5 x^2 */
    VecType z  = x + y;        /* ... + x  */

    return z + 0.693359375 * fe;
}
Beispiel #2
0
always_inline VecType vec_ceil_float(VecType const & arg)
{
    typedef VecType vec;

    const vec rounded = vec_round_float(arg);

    const vec rounded_smaller = mask_lt(rounded, arg);
    const vec add             = rounded_smaller & vec::gen_one();
    return rounded + add;
}
always_inline VecType vec_undenormalize(VecType arg)
{
    typedef typename VecType::float_type float_type;
    const float_type min_positive_value = std::numeric_limits<float_type>::min();

    const VecType abs_arg = abs(arg);
    const VecType abs_arg_lt_min = mask_lt(abs_arg, min_positive_value);
    const VecType zero = VecType::gen_zero();

    const VecType result = select(arg, zero, abs_arg_lt_min);
    return result;
}
Beispiel #4
0
always_inline VecType vec_tanh_float(VecType const & arg)
{
    /* this order of computation (large->small->medium) seems to be the most efficient on sse*/

    const VecType sign_arg = arg & VecType::gen_sign_mask();
    const VecType abs_arg  = arg ^ sign_arg;
    const VecType one      = VecType::gen_one();
    const VecType two (2.f);
    const VecType maxlogf_2 (22.f);
    const VecType limit_small (0.625f);

    /* large values */
    const VecType abs_big          = mask_gt(abs_arg, maxlogf_2);
    const VecType result_limit_abs = one;

    /* small values */
    const VecType f1((float)-5.70498872745e-3);
    const VecType f2((float) 2.06390887954e-2);
    const VecType f3((float)-5.37397155531e-2);
    const VecType f4((float) 1.33314422036e-1);
    const VecType f5((float)-3.33332819422e-1);

    const VecType arg_sqr = abs_arg * abs_arg;
    const VecType result_small = ((((f1 * arg_sqr
                                     + f2) * arg_sqr
                                    + f3) * arg_sqr
                                   + f4) * arg_sqr
                                  + f5) * arg_sqr * arg
        + arg;

    const VecType abs_small = mask_lt(abs_arg, limit_small);

    /* medium values */
    const VecType result_medium_abs = one - two / (vec_exp_tanh_float(abs_arg + abs_arg) + one);

    /* select from large and medium branches and set sign */
    const VecType result_lm_abs = select(result_medium_abs, result_limit_abs, abs_big);
    const VecType result_lm = result_lm_abs | sign_arg;

    const VecType result = select(result_lm, result_small, abs_small);

    return result;
}
always_inline VecType vec_exp_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    /* Express e**x = e**g 2**n
     *   = e**g e**( n loge(2) )
     *   = e**( g + n loge(2) )
     */

    // black magic
    VecType x = arg;
    VecType z = round(VecType(1.44269504088896341f) * x);
    int_vec n = z.truncate_to_int();
    x -= z*VecType(0.693359375f);
    x -= z*VecType(-2.12194440e-4f);

    /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */
    VecType p = VecType(VecType::gen_one()) +
        x * (1.00000035762786865234375f +
        x * (0.4999996721744537353515625f +
        x * (0.16665561497211456298828125f +
        x * (4.167006909847259521484375e-2f +
        x * (8.420792408287525177001953125e-3f +
        x * 1.386119984090328216552734375e-3f)))));

    /* multiply by power of 2 */
    VecType approx = ldexp_float(p, n);

    /* handle min/max boundaries */
    const VecType maxlogf(88.72283905206835f);
//    const VecType minlogf(-103.278929903431851103f);
	const VecType minlogf = -maxlogf;
    const VecType max_float(std::numeric_limits<float>::max());
    const VecType zero = VecType::gen_zero();

    VecType too_large = mask_gt(arg, maxlogf);
    VecType too_small = mask_lt(arg, minlogf);

    VecType ret = select(approx, max_float, too_large);
    ret = select(ret, zero, too_small);

    return ret;
}