Ejemplo n.º 1
0
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
                            mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N,
                            int sinonly, int alternating)
{
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
    slong power, k, m;
    int cosorsin;

    TMP_INIT;
    TMP_START;

    if (2 * N >= FACTORIAL_TAB_SIZE - 1)
    {
        flint_printf("_arb_sin_cos_taylor_rs: N too large!\n");
        abort();
    }

    if (N <= 1)
    {
        if (N == 0)
        {
            flint_mpn_zero(ysin, xn);
            if (!sinonly) flint_mpn_zero(ycos, xn);
            error[0] = 0;
        }
        else if (N == 1)
        {
            flint_mpn_copyi(ysin, x, xn);
            if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES);
            error[0] = 1;
        }
    }
    else
    {
        /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */
        m = 2;
        while (m * m < N)
            m += 2;

        /* todo: merge allocations */
        xpow = TMP_ALLOC_LIMBS((m + 1) * xn);
        s = TMP_ALLOC_LIMBS(xn + 2);
        t = TMP_ALLOC_LIMBS(2 * xn + 2);     /* todo: 1 limb too much? */

        /* higher index ---> */
        /*        | ---xn--- | */
        /* xpow = |  <temp>  | x^m | x^(m-1) | ... | x^2 | x | */

#define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn)
#define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn)

        mpn_sqr(XPOW_WRITE(1), x, xn);
        mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn);

        for (k = 4; k <= m; k += 2)
        {
            mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn);
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);
        }

        for (cosorsin = sinonly; cosorsin < 2; cosorsin++)
        {
            flint_mpn_zero(s, xn + 1);

            /* todo: skip one nonscalar multiplication (use x^m)
               when starting on x^0 */
            power = (N - 1) % m;

            for (k = N - 1; k >= 0; k--)
            {
                c = factorial_tab_numer[2 * k + cosorsin];
                new_denom = factorial_tab_denom[2 * k + cosorsin];
                old_denom = factorial_tab_denom[2 * k + cosorsin + 2];

                /* change denominators */
                if (new_denom != old_denom && k < N - 1)
                {
                    if (alternating && (k % 2 == 0))
                        s[xn] += old_denom;

                    mpn_divrem_1(s, 0, s, xn + 1, old_denom);

                    if (alternating && (k % 2 == 0))
                        s[xn] -= 1;
                }

                if (power == 0)
                {
                    /* add c * x^0 -- only top limb is affected */
                    if (alternating & k)
                        s[xn] -= c;
                    else
                        s[xn] += c;

                    /* Outer polynomial evaluation: multiply by x^m */
                    if (k != 0)
                    {
                        mpn_mul(t, s, xn + 1, XPOW_READ(m), xn);
                        flint_mpn_copyi(s, t + xn, xn + 1);
                    }

                    power = m - 1;
                }
                else
                {
                    if (alternating & k)
                        s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c);
                    else
                        s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c);

                    power--;
                }
            }

            /* finally divide by denominator */
            if (cosorsin == 0)
            {
                mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]);

                /* perturb down to a number < 1 if necessary. note that this
                   does not invalidate the error bound: 1 - ulp is either
                   1 ulp too small or must be closer to the exact value */
                if (t[xn] == 0)
                    flint_mpn_copyi(ycos, t, xn);
                else
                    flint_mpn_store(ycos, xn, LIMB_ONES);
            }
            else
            {
                mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]);
                mpn_mul(t, s, xn + 1, x, xn);
                flint_mpn_copyi(ysin, t + xn, xn);
            }
        }

        /* error bound (ulp) */
        error[0] = 2;
    }

    TMP_END;
}
Ejemplo n.º 2
0
void _arb_exp_taylor_rs(mp_ptr y, mp_limb_t * error,
    mp_srcptr x, mp_size_t xn, ulong N)
{
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
    slong power, k, m;

    TMP_INIT;
    TMP_START;

    if (N >= FACTORIAL_TAB_SIZE - 1)
    {
        flint_printf("_arb_exp_taylor_rs: N too large!\n");
        abort();
    }

    if (N <= 3)
    {
        if (N <= 1)
        {
            flint_mpn_zero(y, xn);
            y[xn] = N;
            error[0] = 0;
        }
        else if (N == 2)
        {
            flint_mpn_copyi(y, x, xn);
            y[xn] = 1;
            error[0] = 0;
        }
        else
        {
            /* 1 + x + x^2 / 2 */
            t = TMP_ALLOC_LIMBS(2 * xn);

            mpn_sqr(t, x, xn);
            mpn_rshift(t + xn, t + xn, xn, 1);
            y[xn] = mpn_add_n(y, x, t + xn, xn) + 1;

            error[0] = 2;
        }
    }
    else
    {
        /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */
        /* TODO: drop evenness assumption since we don't have sign issues here? */
        /* TODO: then just need to fix power construction below... */
        m = 2;
        while (m * m < N)
            m += 2;

        /* todo: merge allocations */
        xpow = TMP_ALLOC_LIMBS((m + 1) * xn);
        s = TMP_ALLOC_LIMBS(xn + 2);
        t = TMP_ALLOC_LIMBS(2 * xn + 2);     /* todo: 1 limb too much? */

        /* higher index ---> */
        /*        | ---xn--- | */
        /* xpow = |  <temp>  | x^m | x^(m-1) | ... | x^2 | x | */

#define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn)
#define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn)

        flint_mpn_copyi(XPOW_READ(1), x, xn);
        mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn);

        for (k = 4; k <= m; k += 2)
        {
            mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn);
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);
        }

        flint_mpn_zero(s, xn + 1);

        /* todo: skip one nonscalar multiplication (use x^m)
           when starting on x^0 */
        power = (N - 1) % m;

        for (k = N - 1; k >= 0; k--)
        {
            c = factorial_tab_numer[k];
            new_denom = factorial_tab_denom[k];
            old_denom = factorial_tab_denom[k+1];

            /* change denominators */
            if (new_denom != old_denom && k < N - 1)
            {
                mpn_divrem_1(s, 0, s, xn + 1, old_denom);
            }

            if (power == 0)
            {
                /* add c * x^0 -- only top limb is affected */
                s[xn] += c;

                /* Outer polynomial evaluation: multiply by x^m */
                if (k != 0)
                {
                    mpn_mul(t, s, xn + 1, XPOW_READ(m), xn);
                    flint_mpn_copyi(s, t + xn, xn + 1);
                }

                power = m - 1;
            }
            else
            {
                s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c);

                power--;
            }
        }

        /* finally divide by denominator */
        mpn_divrem_1(y, 0, s, xn + 1, factorial_tab_denom[0]);

        /* error bound (ulp) */
        error[0] = 2;
    }

    TMP_END;
}