void _elem_frac_poly_divrem(elem_ptr Q, elem_ptr Qden, elem_ptr R, elem_ptr Rden, elem_srcptr A, elem_srcptr Aden, long lenA, elem_srcptr B, elem_srcptr Bden, long lenB, const ring_t ring) { long lenQ = lenA - lenB + 1; long lenR = lenB - 1; long size = ring->size; elem_ptr den; elem_srcptr leadB = SRC_INDEX(B, lenB - 1, size); ulong d; /* TODO: lenB == 1 case efficiently */ _elem_poly_pseudo_divrem(Q, R, &d, A, lenA, B, lenB, ring); for ( ; lenR != 0 && elem_is_zero(SRC_INDEX(R, lenR - 1, size), ring); lenR--); /* TODO: lead^d = +/- 1 and other cases more efficiently */ /* see also fmpq_poly_divrem, which avoids the post-canonicalisation */ ELEM_TMP_INIT(den, ring); elem_pow_ui(den, leadB, d, ring); elem_mul(den, den, Aden, ring); elem_set(Rden, den, ring); elem_set(Qden, den, ring); _elem_vec_scalar_mul(Q, Q, lenQ, Bden, ring); ELEM_TMP_CLEAR(den, ring); }
void elem_mat_solve_fflu_precomp(elem_mat_t X, const long * perm, const elem_mat_t FFLU, const elem_mat_t B, const ring_t ring) { elem_ptr T; long i, j, k, m, n; const ring_struct * ering = RING_PARENT(ring); n = X->r; m = X->c; ELEM_TMP_INIT(T, ering); elem_mat_set_perm(X, perm, B, ring); for (k = 0; k < m; k++) { /* Fraction-free forward substitution */ for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { elem_mul(XX(j, k), XX(j, k), LU(i, i), ering); elem_mul(T, LU(j, i), XX(i, k), ering); elem_sub(XX(j, k), XX(j, k), T, ering); if (i > 0) { elem_divexact(XX(j, k), XX(j, k), LU(i-1, i-1), ering); } } } /* Fraction-free back substitution */ for (i = n - 2; i >= 0; i--) { elem_mul(XX(i, k), XX(i, k), LU(n-1, n-1), ering); for (j = i + 1; j < n; j++) { elem_mul(T, XX(j, k), LU(i, j), ering); elem_sub(XX(i, k), XX(i, k), T, ering); } elem_divexact(XX(i, k), XX(i, k), LU(i, i), ering); } } ELEM_TMP_CLEAR(T, ering); }
void _elem_vec_scalar_addmul(elem_ptr res, elem_srcptr vec, long len, elem_srcptr c, const ring_t ring) { long i, size = ring->size; elem_ptr t; ELEM_TMP_INIT(t, ring); for (i = 0; i < len; i++) { elem_mul(t, SRC_INDEX(vec, i, size), c, ring); elem_add(INDEX(res, i, size), SRC_INDEX(res, i, size), t, ring); } ELEM_TMP_CLEAR(t, ring); }
int main() { flint_rand_t state; long iter; printf("poly_divrem_basecase...."); fflush(stdout); flint_randinit(state); for (iter = 0; iter < 10000; iter++) { ring_t ZZ, ZZp, ZZpx[3]; ring_struct * ring; elem_ptr p; long size[3]; elem_poly_t A, B, C, Q, Q2, R, R2; ring_init_limb(ZZ); ELEM_TMP_INIT(p, ZZ); elem_set_ui(p, n_randtest_prime(state, 0), ZZ); ring_init_mod(ZZp, ZZ, p); ring_init_poly(ZZpx[0], ZZp); ring_init_poly(ZZpx[1], ZZpx[0]); ring_init_poly(ZZpx[2], ZZpx[1]); ring = ZZpx[n_randint(state, 2)]; size[0] = 1 + n_randint(state, 30); size[1] = 1 + n_randint(state, 30); size[2] = 1 + n_randint(state, 30); elem_init(A, ring); elem_init(B, ring); elem_init(C, ring); elem_init(Q, ring); elem_init(Q2, ring); elem_init(R, ring); elem_init(R2, ring); elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_mul(C, A, B, ring); elem_poly_divrem_basecase(Q, R, C, B, ring); if (!elem_equal(Q, A, ring) || !elem_is_zero(R, ring)) { printf("FAIL: (A * B) / B = A\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); elem_print(R, ring); printf("\n\n"); abort(); } elem_divrem(Q, R, A, B, ring); elem_mul(C, Q, B, ring); elem_add(C, C, R, ring); if (!elem_equal(C, A, ring)) { printf("FAIL: Q * B + R = A\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); elem_print(R, ring); printf("\n\n"); abort(); } elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_poly_divrem_basecase(Q, R, A, B, ring); elem_poly_divrem_basecase(A, R2, A, B, ring); if (!elem_equal(A, Q, ring) || !elem_equal(R, R2, ring)) { printf("FAIL: aliasing Q, A\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); elem_print(R, ring); printf("\n\n"); elem_print(R2, ring); printf("\n\n"); abort(); } elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_poly_divrem_basecase(Q, R, A, B, ring); elem_poly_divrem_basecase(Q2, A, A, B, ring); if (!elem_equal(A, R, ring) || !elem_equal(Q, Q2, ring)) { printf("FAIL: aliasing R, A\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); elem_print(Q2, ring); printf("\n\n"); elem_print(R, ring); printf("\n\n"); abort(); } elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_poly_divrem_basecase(Q, R, A, B, ring); elem_poly_divrem_basecase(Q2, B, A, B, ring); if (!elem_equal(B, R, ring) || !elem_equal(Q, Q2, ring)) { printf("FAIL: aliasing R, B\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); elem_print(Q2, ring); printf("\n\n"); elem_print(R, ring); printf("\n\n"); abort(); } elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_poly_divrem_basecase(Q, R, A, B, ring); elem_poly_divrem_basecase(B, R2, A, B, ring); if (!elem_equal(B, Q, ring) || !elem_equal(R, R2, ring)) { printf("FAIL: aliasing Q, B\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); elem_print(R, ring); printf("\n\n"); elem_print(R2, ring); printf("\n\n"); abort(); } elem_clear(A, ring); elem_clear(B, ring); elem_clear(C, ring); elem_clear(Q, ring); elem_clear(Q2, ring); elem_clear(R, ring); elem_clear(R2, ring); ring_clear(ZZpx[2]); ring_clear(ZZpx[1]); ring_clear(ZZpx[0]); ring_clear(ZZp); ELEM_TMP_CLEAR(p, ZZ); ring_clear(ZZ); } printf("PASS\n"); flint_randclear(state); return EXIT_SUCCESS; }
int main(int argc, char const *argv[]) { if (argc < 2) { printf("Not enough arguments\n"); return -1; } int test_method = atoi(argv[1]); openblas_set_num_threads(1); int m = 1024; int n = 1024; float *A = new float[m * n]; for (int i = 0; i < m * n; i++) { A[i] = rand() / RAND_MAX; } float *a = new float[n]; for (int i = 0; i < n; i++) { a[i] = rand() / RAND_MAX; } float *B = new float[m * n]; for (int i = 0; i < m * n; i++) { B[i] = rand() / RAND_MAX; } float *b = new float[n]; for (int i = 0; i < n; i++) { b[i] = rand() / RAND_MAX; } float *C = new float[m]; float *c = new float[m]; float *temp_a = new float[m]; float *temp_b = new float[m]; float *res = new float[m]; switch (test_method) { case 0: { omp_set_num_threads(3); double begTime = CycleTimer::currentSeconds(); #pragma omp parallel for for (int i=0; i<3; ++i) { switch(i) { case 0: { cblas_sgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, A, n, a, 1, 1.0, temp_a, 1); break; } case 1: { cblas_sgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, B, n, b, 1, 1.0, temp_b, 1); break; } case 2: { elem_mul(res, C, c, m); break; } } } for (int i=0; i<m; ++i) { res[i] += temp_a[i] + temp_b[i]; } double endTime = CycleTimer::currentSeconds(); printf("%f\n", (endTime - begTime)); break; } case 1: { double begTime = CycleTimer::currentSeconds(); cblas_sgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, A, n, a, 1, 1.0, res, 1); cblas_sgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, B, n, b, 1, 1.0, res, 1); elem_mul(res, C, c, m); double endTime = CycleTimer::currentSeconds(); printf("%f\n", (endTime - begTime)); break; } default: { printf("No matched test method\n"); return -1; } } delete [] A; delete [] B; delete [] a; delete [] b; delete [] C; delete [] c; delete [] temp_a; delete [] temp_b; delete [] res; return 0; }
void _elem_poly_gcd_subresultant(elem_ptr res, elem_srcptr poly1, long len1, elem_srcptr poly2, long len2, const ring_t ring) { if (len2 == 1) { _elem_vec_gcd(res, poly1, len1, poly2, ring); } else { elem_ptr a, b, d, g, h; elem_ptr A, B, W; long alloc, lenA, lenB, size = ring->size; alloc = len1 + len2 + 5; W = _elem_vec_init(alloc, ring); A = INDEX(W, 0, size); B = INDEX(A, len1, size); a = INDEX(B, len2, size); b = INDEX(a, 1, size); d = INDEX(b, 1, size); g = INDEX(d, 1, size); h = INDEX(g, 1, size); lenA = len1; lenB = len2; _elem_vec_gcd(a, poly1, lenA, a, ring); _elem_vec_gcd(b, poly2, lenB, b, ring); _elem_vec_scalar_divexact(A, poly1, lenA, a, ring); _elem_vec_scalar_divexact(B, poly2, lenB, b, ring); elem_gcd(d, a, b, ring); elem_one(g, ring); elem_one(h, ring); while (1) { const long delta = lenA - lenB; _elem_poly_pseudo_rem_cohen(A, A, lenA, B, lenB, ring); ELEM_VEC_NORM(A, lenA, ring); if (lenA <= 1) break; { /* Swap A and B */ elem_ptr T; long len; T = A, A = B, B = T, len = lenA, lenA = lenB, lenB = len; } if (delta == 1) { elem_mul(b, g, h, ring); _elem_vec_scalar_divexact(B, B, lenB, b, ring); elem_set(g, SRC_INDEX(A, lenA - 1, size), ring); elem_set(h, g, ring); } else { elem_pow_ui(a, h, delta, ring); elem_mul(b, g, a, ring); _elem_vec_scalar_divexact(B, B, lenB, b, ring); elem_pow_ui(b, SRC_INDEX(A, lenA - 1, size), delta, ring); elem_mul(g, h, b, ring); elem_divexact(h, g, a, ring); elem_set(g, SRC_INDEX(A, lenA - 1, size), ring); } } if (lenA == 1) { elem_set(res, d, ring); _elem_vec_zero(INDEX(res, 1, size), len2 - 1, ring); } else { elem_zero(b, ring); _elem_vec_gcd(b, B, lenB, b, ring); _elem_vec_scalar_divexact(B, B, lenB, b, ring); if (elem_leading_sign(INDEX(B, lenB - 1, size), ring) < 0) elem_neg(d, d, ring); _elem_vec_scalar_mul(res, B, lenB, d, ring); if (len2 >= lenB) _elem_vec_zero(INDEX(res, lenB, size), len2 - lenB, ring); } _elem_vec_clear(W, alloc, ring); } }
void test_divexact(flint_rand_t state, const ring_t ring, const long * size, long iters) { long iter; for (iter = 0; iter < iters; iter++) { elem_ptr A, B, C, Q; A = elem_new(ring); B = elem_new(ring); C = elem_new(ring); Q = elem_new(ring); elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_mul(C, A, B, ring); elem_divexact(Q, C, B, ring); if (!elem_equal(Q, A, ring)) { printf("FAIL: (A * B) / B = A\n"); ring_print(ring); printf("\n\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); abort(); } elem_randtest_not_zero(A, state, size, ring); elem_set(B, A, ring); elem_divexact(C, A, A, ring); elem_divexact(Q, A, B, ring); if (!elem_equal(C, Q, ring) || !elem_is_one(Q, ring)) { printf("FAIL: aliasing A, B\n"); ring_print(ring); printf("\n\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); abort(); } elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_mul(A, A, B, ring); elem_divexact(Q, A, B, ring); elem_divexact(A, A, B, ring); if (!elem_equal(A, Q, ring)) { printf("FAIL: aliasing Q, A\n"); ring_print(ring); printf("\n\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); abort(); } elem_randtest(A, state, size, ring); elem_randtest_not_zero(B, state, size, ring); elem_mul(A, A, B, ring); elem_divexact(Q, A, B, ring); elem_divexact(B, A, B, ring); if (!elem_equal(B, Q, ring)) { printf("FAIL: aliasing Q, A\n"); ring_print(ring); printf("\n\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); abort(); } elem_randtest_not_zero(B, state, size, ring); elem_divexact(Q, B, B, ring); elem_divexact(B, B, B, ring); if (!elem_equal(B, Q, ring)) { printf("FAIL: aliasing Q, A, B\n"); ring_print(ring); printf("\n\n"); elem_print(A, ring); printf("\n\n"); elem_print(B, ring); printf("\n\n"); elem_print(C, ring); printf("\n\n"); elem_print(Q, ring); printf("\n\n"); abort(); } elem_del(A, ring); elem_del(B, ring); elem_del(C, ring); elem_del(Q, ring); } }