always_inline VecType vec_log_float(VecType x) { typedef typename VecType::int_vec int_vec; int_vec e; x = frexp_float( x, e ); const VecType sqrt_05 = 0.707106781186547524f; const VecType x_smaller_sqrt_05 = mask_lt(x, sqrt_05); e = e + int_vec(x_smaller_sqrt_05); VecType x_add = x; x_add = x_add & x_smaller_sqrt_05; x += x_add - VecType(VecType::gen_one()); VecType y = (((((((( 7.0376836292E-2 * x - 1.1514610310E-1) * x + 1.1676998740E-1) * x - 1.2420140846E-1) * x + 1.4249322787E-1) * x - 1.6668057665E-1) * x + 2.0000714765E-1) * x - 2.4999993993E-1) * x + 3.3333331174E-1) * x * x*x; VecType fe = e.convert_to_float(); y += fe * -2.12194440e-4; y -= 0.5 * x*x; /* y - 0.5 x^2 */ VecType z = x + y; /* ... + x */ return z + 0.693359375 * fe; }
always_inline VecType vec_ceil_float(VecType const & arg) { typedef VecType vec; const vec rounded = vec_round_float(arg); const vec rounded_smaller = mask_lt(rounded, arg); const vec add = rounded_smaller & vec::gen_one(); return rounded + add; }
always_inline VecType vec_undenormalize(VecType arg) { typedef typename VecType::float_type float_type; const float_type min_positive_value = std::numeric_limits<float_type>::min(); const VecType abs_arg = abs(arg); const VecType abs_arg_lt_min = mask_lt(abs_arg, min_positive_value); const VecType zero = VecType::gen_zero(); const VecType result = select(arg, zero, abs_arg_lt_min); return result; }
always_inline VecType vec_tanh_float(VecType const & arg) { /* this order of computation (large->small->medium) seems to be the most efficient on sse*/ const VecType sign_arg = arg & VecType::gen_sign_mask(); const VecType abs_arg = arg ^ sign_arg; const VecType one = VecType::gen_one(); const VecType two (2.f); const VecType maxlogf_2 (22.f); const VecType limit_small (0.625f); /* large values */ const VecType abs_big = mask_gt(abs_arg, maxlogf_2); const VecType result_limit_abs = one; /* small values */ const VecType f1((float)-5.70498872745e-3); const VecType f2((float) 2.06390887954e-2); const VecType f3((float)-5.37397155531e-2); const VecType f4((float) 1.33314422036e-1); const VecType f5((float)-3.33332819422e-1); const VecType arg_sqr = abs_arg * abs_arg; const VecType result_small = ((((f1 * arg_sqr + f2) * arg_sqr + f3) * arg_sqr + f4) * arg_sqr + f5) * arg_sqr * arg + arg; const VecType abs_small = mask_lt(abs_arg, limit_small); /* medium values */ const VecType result_medium_abs = one - two / (vec_exp_tanh_float(abs_arg + abs_arg) + one); /* select from large and medium branches and set sign */ const VecType result_lm_abs = select(result_medium_abs, result_limit_abs, abs_big); const VecType result_lm = result_lm_abs | sign_arg; const VecType result = select(result_lm, result_small, abs_small); return result; }
always_inline VecType vec_exp_float(VecType const & arg) { typedef typename VecType::int_vec int_vec; /* Express e**x = e**g 2**n * = e**g e**( n loge(2) ) * = e**( g + n loge(2) ) */ // black magic VecType x = arg; VecType z = round(VecType(1.44269504088896341f) * x); int_vec n = z.truncate_to_int(); x -= z*VecType(0.693359375f); x -= z*VecType(-2.12194440e-4f); /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */ VecType p = VecType(VecType::gen_one()) + x * (1.00000035762786865234375f + x * (0.4999996721744537353515625f + x * (0.16665561497211456298828125f + x * (4.167006909847259521484375e-2f + x * (8.420792408287525177001953125e-3f + x * 1.386119984090328216552734375e-3f))))); /* multiply by power of 2 */ VecType approx = ldexp_float(p, n); /* handle min/max boundaries */ const VecType maxlogf(88.72283905206835f); // const VecType minlogf(-103.278929903431851103f); const VecType minlogf = -maxlogf; const VecType max_float(std::numeric_limits<float>::max()); const VecType zero = VecType::gen_zero(); VecType too_large = mask_gt(arg, maxlogf); VecType too_small = mask_lt(arg, minlogf); VecType ret = select(approx, max_float, too_large); ret = select(ret, zero, too_small); return ret; }