T mod_exp(T a, T b, T n){ T res = 1; while(b){ if(b&1) res = mod_mul(res, a, n); a = mod_mul(a, a, n); b >>= 1; } return res; }
T mod_pow(T base, T exp, T m) { assert(base >= 0 && exp >= 0); base %= m; T result = 1; while (exp) { if (exp & 1) result = mod_mul(result, base, m); base = mod_mul(base, base, m); exp >>= 1; } return result; }
/*! * Gets \a n0 digits of \f$ \pi \f$ starting with digit \a n. The first digit after the decimal point is * digit 0. Because the digits are returned in a double, the number of digits in the result cannot be higher * than the precision of a double, regardless of n0. * * This method only returns accurate results when \f$ n \ge 4 \cdot n0 \f$. * * \param n Offset of digit to retrieve * \param n0 Number of digits to retrieve; also the precision of the result * * \return A decimal number whose integer part is 0 and whose decimal part corresponds to the nth digit of * pi. Another way to put it is, this is the decimal part of \f$ \pi \cdot 10^n \f$ , accurate to * \a n0 decimal places. */ double get_pi_digits(long n, long n0) { long k; long M, N, m, s; double b, c, x, t; int sign; double log_n; log_n = log(n); M = 2 * (long) (3 * n / log_n / log_n / log_n); N = (long) ((n + n0 + 1) * (log(10) / (log(2 * M_E * M)))) + 1; N += N % 2; b = 0; for(k = 0; k < (M + 1) * N; k += 2) { x = 0; m = 2 * k + 1; s = expt_mod(10, n, m); s = mod_mul(4, s, m); x += (double) s / (double) m; m = 2 * k + 3; s = expt_mod(10, n, m); s = mod_mul(4, s, m); x -= (double) s / (double) m; b += x; if(b <= -0.5) { b += 1; } else if(b >= 1) { b -= 2; } } c = 0; sign = -1; for(k = 0; k < N; k++) { m = 2 * M * N + 2 * k + 1; s = mod_sum_binom(k, N, m); s = mod_mul(s, expt_mod(5, N, m), m); s = mod_mul(s, expt_mod(10, n - N, m), m); s = mod_mul(4, s, m); b += sign * (double) s / (double) m; b = b - floor(b); sign = -sign; } return modf(b, &t); }
/* Multiply two transition matrices; result may alias either/both inputs. */ static void mrg_multiply(const mrg_transition_matrix* m, const mrg_transition_matrix* n, mrg_transition_matrix* result) { uint_least32_t rs = mod_mac(mod_mac(mod_mac(mod_mac(mod_mul(m->s, n->d), m->t, n->c), m->u, n->b), m->v, n->a), m->w, n->s); uint_least32_t rt = mod_mac(mod_mac(mod_mac(mod_mac(mod_mul_y(mod_mul(m->s, n->s)), m->t, n->w), m->u, n->v), m->v, n->u), m->w, n->t); uint_least32_t ru = mod_mac(mod_mac(mod_mac(mod_mul_y(mod_mac(mod_mul(m->s, n->a), m->t, n->s)), m->u, n->w), m->v, n->v), m->w, n->u); uint_least32_t rv = mod_mac(mod_mac(mod_mul_y(mod_mac(mod_mac(mod_mul(m->s, n->b), m->t, n->a), m->u, n->s)), m->v, n->w), m->w, n->v); uint_least32_t rw = mod_mac(mod_mul_y(mod_mac(mod_mac(mod_mac(mod_mul(m->s, n->c), m->t, n->b), m->u, n->a), m->v, n->s)), m->w, n->w); result->s = rs; result->t = rt; result->u = ru; result->v = rv; result->w = rw; mrg_update_cache(result); }
T chinese_remainder_theorem(const std::vector<T>& a, const std::vector<T>& m) { auto solve2 = [](T a0, T m0, T a1, T m1) { T t = mod_inverse(m0 % m1, m1); assert(t != m1); // Otherwise no solution exists. t = mod_mul(mod_sub(a1, a0 % m1, m1), t, m1); return std::make_pair(a0 + m0 * t, m0 * m1); }; std::pair<T, T> reduced{a[0], m[0]}; for (size_t i = 1; i < a.size(); ++i) { reduced = solve2(reduced.first, reduced.second, a[i], m[i]); assert(reduced.first >= 0 && reduced.first < reduced.second); } return reduced.first; }
static void mrg_apply_transition(const mrg_transition_matrix* mat, const mrg_state* st, mrg_state* r) { #ifdef __MTA__ uint_fast64_t s = mat->s; uint_fast64_t t = mat->t; uint_fast64_t u = mat->u; uint_fast64_t v = mat->v; uint_fast64_t w = mat->w; uint_fast64_t z1 = st->z1; uint_fast64_t z2 = st->z2; uint_fast64_t z3 = st->z3; uint_fast64_t z4 = st->z4; uint_fast64_t z5 = st->z5; uint_fast64_t temp = s * z1 + t * z2 + u * z3 + v * z4; r->z5 = mod_down(mod_down_fast(temp) + w * z5); uint_fast64_t a = mod_down(107374182 * s + t); uint_fast64_t sy = mod_down(104480 * s); r->z4 = mod_down(mod_down_fast(a * z1 + u * z2 + v * z3) + w * z4 + sy * z5); uint_fast64_t b = mod_down(107374182 * a + u); uint_fast64_t ay = mod_down(104480 * a); r->z3 = mod_down(mod_down_fast(b * z1 + v * z2 + w * z3) + sy * z4 + ay * z5); uint_fast64_t c = mod_down(107374182 * b + v); uint_fast64_t by = mod_down(104480 * b); r->z2 = mod_down(mod_down_fast(c * z1 + w * z2 + sy * z3) + ay * z4 + by * z5); uint_fast64_t d = mod_down(107374182 * c + w); uint_fast64_t cy = mod_down(104480 * c); r->z1 = mod_down(mod_down_fast(d * z1 + sy * z2 + ay * z3) + by * z4 + cy * z5); /* A^n = [d s*y a*y b*y c*y] */ /* [c w s*y a*y b*y] */ /* [b v w s*y a*y] */ /* [a u v w s*y] */ /* [s t u v w ] */ #else uint_fast32_t o1 = mod_mac_y(mod_mul(mat->d, st->z1), mod_mac4(0, mat->s, st->z2, mat->a, st->z3, mat->b, st->z4, mat->c, st->z5)); uint_fast32_t o2 = mod_mac_y(mod_mac2(0, mat->c, st->z1, mat->w, st->z2), mod_mac3(0, mat->s, st->z3, mat->a, st->z4, mat->b, st->z5)); uint_fast32_t o3 = mod_mac_y(mod_mac3(0, mat->b, st->z1, mat->v, st->z2, mat->w, st->z3), mod_mac2(0, mat->s, st->z4, mat->a, st->z5)); uint_fast32_t o4 = mod_mac_y(mod_mac4(0, mat->a, st->z1, mat->u, st->z2, mat->v, st->z3, mat->w, st->z4), mod_mul(mat->s, st->z5)); uint_fast32_t o5 = mod_mac2(mod_mac3(0, mat->s, st->z1, mat->t, st->z2, mat->u, st->z3), mat->v, st->z4, mat->w, st->z5); r->z1 = o1; r->z2 = o2; r->z3 = o3; r->z4 = o4; r->z5 = o5; #endif }
LL func(LL x,LL n){ return(mod_mul(x,x,n)+1)%n; }
template<class value_type> value_type mod_mul(value_type x, value_type k, ll m) { if(k == 0) { return 0; } if(k % 2 == 0) { return mod_mul((x+x) % m, k/2, m); } else { return (x + mod_mul(x, k-1, m)) % m; } }
static long mod_sum_binom(long k, long n, long m) { long j; long A, B, C, C_acc; long a, b, a_star, b_star; size_t num_factors_m; struct factor *prime_factors_m; struct factor *cur_fact; struct factor *r = NULL; struct factor *cur_r = NULL; if(k > n / 2) { return int_modulus(expt_mod(2, n, m) - mod_sum_binom(n - k - 1, n, m), m); } /* Step 1 */ prime_factors_m = calc_prime_factors(m, k, &num_factors_m); /* Step 2 */ A = 1; B = 1; C = 1; for(j = 0; j < num_factors_m; j++) { if(!cur_r) { r = malloc(sizeof(struct factor)); cur_r = r; } else { cur_r->next = malloc(sizeof(struct factor)); cur_r = cur_r->next; } cur_r->value = 1; cur_r->next = NULL; } for(j = 1; j <= k; j++) { /* Step 3a */ a = n - j + 1; b = j; /* Steps 3b and 3c */ cur_fact = prime_factors_m; cur_r = r; a_star = a; b_star = b; while(cur_fact) { while(a_star % cur_fact->value == 0) { a_star /= cur_fact->value; cur_r->value *= cur_fact->value; } while(b_star % cur_fact->value == 0) { b_star /= cur_fact->value; cur_r->value /= cur_fact->value; } cur_fact = cur_fact->next; cur_r = cur_r->next; } /* Step 3d */ A = mod_mul(A, a_star, m); B = mod_mul(B, b_star, m); C = mod_mul(C, b_star, m); C_acc = A; for(cur_r = r; cur_r; cur_r = cur_r ->next) { C_acc = mod_mul(C_acc, cur_r->value, m); } C = int_modulus(C + C_acc, m); } free_factors(prime_factors_m); free_factors(r); /* Step 4 */ return mod_mul(C, mod_inv(B, m), m); }
int main(){ int x = mod_mul(2, 5, 7); int y = mod_exp(2, 5, 7); printf("%d, %d\n", x, y); return 0; }