always_inline VecType vec_exp_tanh_float(VecType const & arg) { typedef typename VecType::int_vec int_vec; /* Express e**x = e**g 2**n * = e**g e**( n loge(2) ) * = e**( g + n loge(2) ) */ // black magic VecType x = arg; VecType z = round(VecType(1.44269504088896341f) * x); int_vec n = z.truncate_to_int(); x -= z*VecType(0.693359375f); x -= z*VecType(-2.12194440e-4f); /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */ VecType p = 1.f + x * (1.00000035762786865234375f + x * (0.4999996721744537353515625f + x * (0.16665561497211456298828125f + x * (4.167006909847259521484375e-2f + x * (8.420792408287525177001953125e-3f + x * 1.386119984090328216552734375e-3f))))); /* multiply by power of 2 */ VecType approx = ldexp_float(p, n); return approx; }
always_inline VecType vec_exp_float(VecType const & arg) { typedef typename VecType::int_vec int_vec; /* Express e**x = e**g 2**n * = e**g e**( n loge(2) ) * = e**( g + n loge(2) ) */ // black magic VecType x = arg; VecType z = round(VecType(1.44269504088896341f) * x); int_vec n = z.truncate_to_int(); x -= z*VecType(0.693359375f); x -= z*VecType(-2.12194440e-4f); /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */ VecType p = VecType(VecType::gen_one()) + x * (1.00000035762786865234375f + x * (0.4999996721744537353515625f + x * (0.16665561497211456298828125f + x * (4.167006909847259521484375e-2f + x * (8.420792408287525177001953125e-3f + x * 1.386119984090328216552734375e-3f))))); /* multiply by power of 2 */ VecType approx = ldexp_float(p, n); /* handle min/max boundaries */ const VecType maxlogf(88.72283905206835f); // const VecType minlogf(-103.278929903431851103f); const VecType minlogf = -maxlogf; const VecType max_float(std::numeric_limits<float>::max()); const VecType zero = VecType::gen_zero(); VecType too_large = mask_gt(arg, maxlogf); VecType too_small = mask_lt(arg, minlogf); VecType ret = select(approx, max_float, too_large); ret = select(ret, zero, too_small); return ret; }