void partitions_fmpz_fmpz_hrr(fmpz_t p, const fmpz_t n, int use_doubles) { arb_t x; arf_t bound; slong N; arb_init(x); arf_init(bound); N = partitions_hrr_needed_terms(fmpz_get_d(n)); if (fmpz_cmp_ui(n, 4e8) >= 0 && flint_get_num_threads() > 1) { hrr_sum_threaded(x, n, N, use_doubles); } else { partitions_hrr_sum_arb(x, n, 1, N, use_doubles); } partitions_rademacher_bound(bound, n, N); arb_add_error_arf(x, bound); if (!arb_get_unique_fmpz(p, x)) { flint_printf("not unique!\n"); arb_printd(x, 50); flint_printf("\n"); abort(); } arb_clear(x); arf_clear(bound); }
void arb_mat_mul(arb_mat_t C, const arb_mat_t A, const arb_mat_t B, slong prec) { if (flint_get_num_threads() > 1 && ((double) arb_mat_nrows(A) * (double) arb_mat_nrows(B) * (double) arb_mat_ncols(B) * (double) prec > 100000)) { arb_mat_mul_threaded(C, A, B, prec); } else { arb_mat_mul_classical(C, A, B, prec); } }
void zeta_powsum_series_naive_threaded(fmpcb_ptr z, const fmpcb_t s, const fmpcb_t a, long n, long len, long prec) { pthread_t * threads; powsum_arg_t * args; long i, num_threads; int split_each_term; num_threads = flint_get_num_threads(); threads = flint_malloc(sizeof(pthread_t) * num_threads); args = flint_malloc(sizeof(powsum_arg_t) * num_threads); split_each_term = (len > 1000); for (i = 0; i < num_threads; i++) { args[i].s = s; args[i].a = a; if (split_each_term) { long n0, n1; n0 = (len * i) / num_threads; n1 = (len * (i + 1)) / num_threads; args[i].z = z + n0; args[i].n0 = 0; args[i].n1 = n; args[i].d0 = n0; args[i].len = n1 - n0; } else { args[i].z = _fmpcb_vec_init(len); args[i].n0 = (n * i) / num_threads; args[i].n1 = (n * (i + 1)) / num_threads; args[i].d0 = 0; args[i].len = len; } args[i].prec = prec; pthread_create(&threads[i], NULL, _zeta_powsum_evaluator, &args[i]); } for (i = 0; i < num_threads; i++) { pthread_join(threads[i], NULL); } if (!split_each_term) { _fmpcb_vec_zero(z, len); for (i = 0; i < num_threads; i++) { _fmpcb_vec_add(z, z, args[i].z, len, prec); _fmpcb_vec_clear(args[i].z, len); } } flint_free(threads); flint_free(args); }
void _acb_poly_zeta_em_sum(acb_ptr z, const acb_t s, const acb_t a, int deflate, ulong N, ulong M, slong d, slong prec) { acb_ptr t, u, v, term, sum; acb_t Na, one; slong i; t = _acb_vec_init(d + 1); u = _acb_vec_init(d); v = _acb_vec_init(d); term = _acb_vec_init(d); sum = _acb_vec_init(d); acb_init(Na); acb_init(one); prec += 2 * (FLINT_BIT_COUNT(N) + FLINT_BIT_COUNT(d)); acb_one(one); /* sum 1/(k+a)^(s+x) */ if (acb_is_one(a) && d <= 3) _acb_poly_powsum_one_series_sieved(sum, s, N, d, prec); else if (N > 50 && flint_get_num_threads() > 1) _acb_poly_powsum_series_naive_threaded(sum, s, a, one, N, d, prec); else _acb_poly_powsum_series_naive(sum, s, a, one, N, d, prec); /* t = 1/(N+a)^(s+x); we might need one extra term for deflation */ acb_add_ui(Na, a, N, prec); _acb_poly_acb_invpow_cpx(t, Na, s, d + 1, prec); /* sum += (N+a) * 1/((s+x)-1) * t */ if (!deflate) { /* u = (N+a)^(1-(s+x)) */ acb_sub_ui(v, s, 1, prec); _acb_poly_acb_invpow_cpx(u, Na, v, d, prec); /* divide by 1/((s-1) + x) */ acb_sub_ui(v, s, 1, prec); acb_div(u, u, v, prec); for (i = 1; i < d; i++) { acb_sub(u + i, u + i, u + i - 1, prec); acb_div(u + i, u + i, v, prec); } _acb_vec_add(sum, sum, u, d, prec); } /* sum += ((N+a)^(1-(s+x)) - 1) / ((s+x) - 1) */ else { /* at s = 1, this becomes (N*t - 1)/x, i.e. just remove one coeff */ if (acb_is_one(s)) { for (i = 0; i < d; i++) acb_mul(u + i, t + i + 1, Na, prec); _acb_vec_add(sum, sum, u, d, prec); } else { /* TODO: this is numerically unstable for large derivatives, and divides by zero if s contains 1. We want a good way to evaluate the power series ((N+a)^y - 1) / y where y has nonzero constant term, without doing a division. How is this best done? */ _acb_vec_scalar_mul(t, t, d, Na, prec); acb_sub_ui(t + 0, t + 0, 1, prec); acb_sub_ui(u + 0, s, 1, prec); acb_inv(u + 0, u + 0, prec); for (i = 1; i < d; i++) acb_mul(u + i, u + i - 1, u + 0, prec); for (i = 1; i < d; i += 2) acb_neg(u + i, u + i); _acb_poly_mullow(v, u, d, t, d, d, prec); _acb_vec_add(sum, sum, v, d, prec); _acb_poly_acb_invpow_cpx(t, Na, s, d, prec); } } /* sum += u = 1/2 * t */ _acb_vec_scalar_mul_2exp_si(u, t, d, -WORD(1)); _acb_vec_add(sum, sum, u, d, prec); /* Euler-Maclaurin formula tail */ if (d < 5 || d < M / 10) _acb_poly_zeta_em_tail_naive(u, s, Na, t, M, d, prec); else _acb_poly_zeta_em_tail_bsplit(u, s, Na, t, M, d, prec); _acb_vec_add(z, sum, u, d, prec); _acb_vec_clear(t, d + 1); _acb_vec_clear(u, d); _acb_vec_clear(v, d); _acb_vec_clear(term, d); _acb_vec_clear(sum, d); acb_clear(Na); acb_clear(one); }
int main() { slong iter; flint_rand_t state; flint_printf("mul_threaded...."); fflush(stdout); flint_randinit(state); for (iter = 0; iter < 5000 * arb_test_multiplier(); iter++) { slong m, n, k, qbits1, qbits2, rbits1, rbits2, rbits3; fmpq_mat_t A, B, C; acb_mat_t a, b, c, d; flint_set_num_threads(1 + n_randint(state, 5)); qbits1 = 2 + n_randint(state, 200); qbits2 = 2 + n_randint(state, 200); rbits1 = 2 + n_randint(state, 200); rbits2 = 2 + n_randint(state, 200); rbits3 = 2 + n_randint(state, 200); m = n_randint(state, 10); n = n_randint(state, 10); k = n_randint(state, 10); fmpq_mat_init(A, m, n); fmpq_mat_init(B, n, k); fmpq_mat_init(C, m, k); acb_mat_init(a, m, n); acb_mat_init(b, n, k); acb_mat_init(c, m, k); acb_mat_init(d, m, k); fmpq_mat_randtest(A, state, qbits1); fmpq_mat_randtest(B, state, qbits2); fmpq_mat_mul(C, A, B); acb_mat_set_fmpq_mat(a, A, rbits1); acb_mat_set_fmpq_mat(b, B, rbits2); acb_mat_mul_threaded(c, a, b, rbits3); if (!acb_mat_contains_fmpq_mat(c, C)) { flint_printf("FAIL\n\n"); flint_printf("threads = %d, m = %wd, n = %wd, k = %wd, bits3 = %wd\n", flint_get_num_threads(), m, n, k, rbits3); flint_printf("A = "); fmpq_mat_print(A); flint_printf("\n\n"); flint_printf("B = "); fmpq_mat_print(B); flint_printf("\n\n"); flint_printf("C = "); fmpq_mat_print(C); flint_printf("\n\n"); flint_printf("a = "); acb_mat_printd(a, 15); flint_printf("\n\n"); flint_printf("b = "); acb_mat_printd(b, 15); flint_printf("\n\n"); flint_printf("c = "); acb_mat_printd(c, 15); flint_printf("\n\n"); flint_abort(); } /* test aliasing with a */ if (acb_mat_nrows(a) == acb_mat_nrows(c) && acb_mat_ncols(a) == acb_mat_ncols(c)) { acb_mat_set(d, a); acb_mat_mul_threaded(d, d, b, rbits3); if (!acb_mat_equal(d, c)) { flint_printf("FAIL (aliasing 1)\n\n"); flint_abort(); } } /* test aliasing with b */ if (acb_mat_nrows(b) == acb_mat_nrows(c) && acb_mat_ncols(b) == acb_mat_ncols(c)) { acb_mat_set(d, b); acb_mat_mul_threaded(d, a, d, rbits3); if (!acb_mat_equal(d, c)) { flint_printf("FAIL (aliasing 2)\n\n"); flint_abort(); } } fmpq_mat_clear(A); fmpq_mat_clear(B); fmpq_mat_clear(C); acb_mat_clear(a); acb_mat_clear(b); acb_mat_clear(c); acb_mat_clear(d); } flint_randclear(state); flint_cleanup(); flint_printf("PASS\n"); return EXIT_SUCCESS; }