/* Recursive Karatsuba assuming polynomials are in revbin format. Assumes rev1 and rev2 are both of length 2^bits and that temp has space for 2^bits coefficients. */ void _fmpz_poly_mul_kara_recursive(fmpz * out, fmpz * rev1, fmpz * rev2, fmpz * temp, long bits) { long length = (1L << bits); long m = length / 2; if (length == 1) { fmpz_mul(out, rev1, rev2); fmpz_zero(out + 1); return; } _fmpz_vec_add(temp, rev1, rev1 + m, m); _fmpz_vec_add(temp + m, rev2, rev2 + m, m); _fmpz_poly_mul_kara_recursive(out, rev1, rev2, temp + 2 * m, bits - 1); _fmpz_poly_mul_kara_recursive(out + length, temp, temp + m, temp + 2 * m, bits - 1); _fmpz_poly_mul_kara_recursive(temp, rev1 + m, rev2 + m, temp + 2 * m, bits - 1); _fmpz_vec_sub(out + length, out + length, out, length); _fmpz_vec_sub(out + length, out + length, temp, length); _fmpz_vec_add_rev(out, temp, bits); }
void fmpz_mat_add(fmpz_mat_t res, const fmpz_mat_t mat1, const fmpz_mat_t mat2) { long i; if (res->c < 1) return; for (i = 0; i < res->r; i++) _fmpz_vec_add(res->rows[i], mat1->rows[i], mat2->rows[i], res->c); }
int dgsl_mp_call_inlattice(fmpz *rop, const dgsl_mp_t *self, gmp_randstate_t state) { assert(rop); assert(self); const long m = fmpz_mat_nrows(self->B); const long n = fmpz_mat_ncols(self->B); mpz_t tmp_g; mpz_init(tmp_g); fmpz_t tmp_f; fmpz_init(tmp_f); _fmpz_vec_zero(rop, n); for(long i=0; i<m; i++) { self->D[i]->call(tmp_g, self->D[i], state); fmpz_set_mpz(tmp_f, tmp_g); _fmpz_vec_scalar_addmul_fmpz(rop, self->B->rows[i], n, tmp_f); } _fmpz_vec_add(rop, rop, self->c_z, n); mpz_clear(tmp_g); fmpz_clear(tmp_f); return 0; }
void _fmpz_vec_scalar_submul_fmpz(fmpz * vec1, const fmpz * vec2, slong len2, const fmpz_t x) { fmpz c = *x; if (!COEFF_IS_MPZ(c)) { if (c == 0) return; else if (c == 1) _fmpz_vec_sub(vec1, vec1, vec2, len2); else if (c == -1) _fmpz_vec_add(vec1, vec1, vec2, len2); else _fmpz_vec_scalar_submul_si(vec1, vec2, len2, c); } else { slong i; for (i = 0; i < len2; i++) fmpz_submul(vec1 + i, vec2 + i, x); } }
void _fmpz_poly_compose_divconquer(fmpz * res, const fmpz * poly1, long len1, const fmpz * poly2, long len2) { long i, j, k, n; long *hlen, alloc, powlen; fmpz *v, **h, *pow, *temp; if (len1 == 1) { fmpz_set(res, poly1); return; } if (len2 == 1) { _fmpz_poly_evaluate_fmpz(res, poly1, len1, poly2); return; } if (len1 == 2) { _fmpz_poly_compose_horner(res, poly1, len1, poly2, len2); return; } /* Initialisation */ hlen = (long *) malloc(((len1 + 1) / 2) * sizeof(long)); for (k = 1; (2 << k) < len1; k++) ; hlen[0] = hlen[1] = ((1 << k) - 1) * (len2 - 1) + 1; for (i = k - 1; i > 0; i--) { long hi = (len1 + (1 << i) - 1) / (1 << i); for (n = (hi + 1) / 2; n < hi; n++) hlen[n] = ((1 << i) - 1) * (len2 - 1) + 1; } powlen = (1 << k) * (len2 - 1) + 1; alloc = 0; for (i = 0; i < (len1 + 1) / 2; i++) alloc += hlen[i]; v = _fmpz_vec_init(alloc + 2 * powlen); h = (fmpz **) malloc(((len1 + 1) / 2) * sizeof(fmpz *)); h[0] = v; for (i = 0; i < (len1 - 1) / 2; i++) { h[i + 1] = h[i] + hlen[i]; hlen[i] = 0; } hlen[(len1 - 1) / 2] = 0; pow = v + alloc; temp = pow + powlen; /* Let's start the actual work */ for (i = 0, j = 0; i < len1 / 2; i++, j += 2) { if (poly1[j + 1] != 0L) { _fmpz_vec_scalar_mul_fmpz(h[i], poly2, len2, poly1 + j + 1); fmpz_add(h[i], h[i], poly1 + j); hlen[i] = len2; } else if (poly1[j] != 0L) { fmpz_set(h[i], poly1 + j); hlen[i] = 1; } } if ((len1 & 1L)) { if (poly1[j] != 0L) { fmpz_set(h[i], poly1 + j); hlen[i] = 1; } } _fmpz_poly_mul(pow, poly2, len2, poly2, len2); powlen = 2 * len2 - 1; for (n = (len1 + 1) / 2; n > 2; n = (n + 1) / 2) { if (hlen[1] > 0) { long templen = powlen + hlen[1] - 1; _fmpz_poly_mul(temp, pow, powlen, h[1], hlen[1]); _fmpz_poly_add(h[0], temp, templen, h[0], hlen[0]); hlen[0] = FLINT_MAX(hlen[0], templen); } for (i = 1; i < n / 2; i++) { if (hlen[2*i + 1] > 0) { _fmpz_poly_mul(h[i], pow, powlen, h[2*i + 1], hlen[2*i + 1]); hlen[i] = hlen[2*i + 1] + powlen - 1; } else hlen[i] = 0; _fmpz_poly_add(h[i], h[i], hlen[i], h[2*i], hlen[2*i]); hlen[i] = FLINT_MAX(hlen[i], hlen[2*i]); } if ((n & 1L)) { _fmpz_vec_set(h[i], h[2*i], hlen[2*i]); hlen[i] = hlen[2*i]; } _fmpz_poly_mul(temp, pow, powlen, pow, powlen); powlen += powlen - 1; { fmpz * t = pow; pow = temp; temp = t; } } _fmpz_poly_mul(res, pow, powlen, h[1], hlen[1]); _fmpz_vec_add(res, res, h[0], hlen[0]); _fmpz_vec_clear(v, alloc + 2 * powlen); free(h); free(hlen); }
static void _qadic_exp_bsplit_series(fmpz *P, fmpz_t Q, fmpz *T, const fmpz *x, slong len, slong lo, slong hi, const fmpz *a, const slong *j, slong lena) { const slong d = j[lena - 1]; if (hi - lo == 1) { _fmpz_vec_set(P, x, len); _fmpz_vec_zero(P + len, 2*d - 1 - len); fmpz_set_si(Q, lo); _fmpz_vec_set(T, P, 2*d - 1); } else if (hi - lo == 2) { _fmpz_poly_sqr(P, x, len); _fmpz_vec_zero(P + (2*len - 1), d - (2*len - 1)); _fmpz_poly_reduce(P, 2*len - 1, a, j, lena); fmpz_set_si(Q, lo); fmpz_mul_si(Q, Q, lo + 1); _fmpz_vec_scalar_mul_si(T, x, len, lo + 1); _fmpz_vec_zero(T + len, d - len); _fmpz_vec_add(T, T, P, d); } else { const slong m = (lo + hi) / 2; fmpz *PR, *TR, *W; fmpz_t QR; PR = _fmpz_vec_init(2*d - 1); TR = _fmpz_vec_init(2*d - 1); W = _fmpz_vec_init(2*d - 1); fmpz_init(QR); _qadic_exp_bsplit_series(P, Q, T, x, len, lo, m, a, j, lena); _qadic_exp_bsplit_series(PR, QR, TR, x, len, m, hi, a, j, lena); _fmpz_poly_mul(W, TR, d, P, d); _fmpz_poly_reduce(W, 2*d - 1, a, j, lena); _fmpz_vec_scalar_mul_fmpz(T, T, d, QR); _fmpz_vec_add(T, T, W, d); _fmpz_poly_mul(W, P, d, PR, d); _fmpz_poly_reduce(W, 2*d - 1, a, j, lena); _fmpz_vec_swap(P, W, d); fmpz_mul(Q, Q, QR); _fmpz_vec_clear(PR, 2*d - 1); _fmpz_vec_clear(TR, 2*d - 1); _fmpz_vec_clear(W, 2*d - 1); fmpz_clear(QR); } }
static void __fmpz_poly_divrem_divconquer(fmpz * Q, fmpz * R, const fmpz * A, long lenA, const fmpz * B, long lenB) { if (lenA < 2 * lenB - 1) { /* Convert unbalanced division into a 2 n1 - 1 by n1 division */ const long n1 = lenA - lenB + 1; const long n2 = lenB - n1; const fmpz * p1 = A + n2; const fmpz * d1 = B + n2; const fmpz * d2 = B; fmpz * W = _fmpz_vec_init((2 * n1 - 1) + lenB - 1); fmpz * d1q1 = R + n2; fmpz * d2q1 = W + (2 * n1 - 1); _fmpz_poly_divrem_divconquer_recursive(Q, d1q1, W, p1, d1, n1); /* Compute d2q1 = Q d2, of length lenB - 1 */ if (n1 >= n2) _fmpz_poly_mul(d2q1, Q, n1, d2, n2); else _fmpz_poly_mul(d2q1, d2, n2, Q, n1); /* Compute BQ = d1q1 * x^n1 + d2q1, of length lenB - 1; then compute R = A - BQ */ _fmpz_vec_swap(R, d2q1, n2); _fmpz_vec_add(R + n2, R + n2, d2q1 + n2, n1 - 1); _fmpz_vec_sub(R, A, R, lenA); _fmpz_vec_clear(W, (2 * n1 - 1) + lenB - 1); } else if (lenA > 2 * lenB - 1) { /* We shift A right until it is of length 2 lenB - 1, call this p1 */ const long shift = lenA - 2 * lenB + 1; const fmpz * p1 = A + shift; fmpz * q1 = Q + shift; fmpz * q2 = Q; fmpz * W = R + lenA; fmpz * d1q1 = W + (2 * lenB - 1); /* XXX: In this case, we expect R to be of length lenA + 2 * (2 * lenB - 1) and A to be modifiable */ /* Set q1 to p1 div B, a 2 lenB - 1 by lenB division, so q1 ends up being of length lenB; set d1q1 = d1 * q1 of length 2 lenB - 1 */ _fmpz_poly_divrem_divconquer_recursive(q1, d1q1, W, p1, B, lenB); /* We have dq1 = d1 * q1 * x^shift, of length lenA Compute R = A - dq1; the first lenB coeffs represent remainder terms (zero if division is exact), leaving lenA - lenB significant terms which we use in the division */ _fmpz_vec_sub((fmpz *) A + shift, A + shift, d1q1, lenB - 1); /* Compute q2 = trunc(R) div B; it is a smaller division than the original since len(trunc(R)) = lenA - lenB */ __fmpz_poly_divrem_divconquer(q2, R, A, lenA - lenB, B, lenB); _fmpz_vec_sub(R + lenA - lenB, A + lenA - lenB, d1q1 + lenB - 1, lenB); /* We have Q = q1 * x^shift + q2; Q has length lenB + shift; note q2 has length shift since the above division is lenA - lenB by lenB We've also written the remainder in place */ } else /* lenA = 2 * lenB - 1 */ { fmpz * W = _fmpz_vec_init(lenA); _fmpz_poly_divrem_divconquer_recursive(Q, R, W, A, B, lenB); _fmpz_vec_sub(R, A, R, lenA); _fmpz_vec_clear(W, lenA); } }