static void one_test (const struct matrix *A, const struct matrix *B, int i) { struct matrix R; struct matrix P; mp_ptr tp; matrix_init (&R, A->n + B->n + 1); matrix_init (&P, A->n + B->n + 1); tp = refmpn_malloc_limbs (mpn_matrix22_mul_itch (A->n, B->n)); ref_matrix22_mul (&R, A, B, tp); matrix_copy (&P, A); mpn_matrix22_mul (P.e00, P.e01, P.e10, P.e11, A->n, B->e00, B->e01, B->e10, B->e11, B->n, tp); P.n = A->n + B->n + 1; if (!matrix_equal_p (&R, &P)) { fprintf (stderr, "ERROR in test %d\n", i); gmp_fprintf (stderr, "A = (%Nx, %Nx\n %Nx, %Nx)\n" "B = (%Nx, %Nx\n %Nx, %Nx)\n" "R = (%Nx, %Nx (expected)\n %Nx, %Nx)\n" "P = (%Nx, %Nx (incorrect)\n %Nx, %Nx)\n", A->e00, A->n, A->e01, A->n, A->e10, A->n, A->e11, A->n, B->e00, B->n, B->e01, B->n, B->e10, B->n, B->e11, B->n, R.e00, R.n, R.e01, R.n, R.e10, R.n, R.e11, R.n, P.e00, P.n, P.e01, P.n, P.e10, P.n, P.e11, P.n); abort(); } refmpn_free_limbs (tp); matrix_clear (&R); matrix_clear (&P); }
/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs of temporary storage (see mpn_matrix22_mul_itch). */ void mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1, mp_ptr tp) { mp_size_t n; /* About the new size of M:s elements. Since M1's diagonal elements are > 0, no element can decrease. The new elements are of size M->n + M1->n, one limb more or less. The computation of the matrix product produces elements of size M->n + M1->n + 1. But the true size, after normalization, may be three limbs smaller. The reason that the product has normalized size >= M->n + M1->n - 2 is subtle. It depends on the fact that M and M1 can be factored as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have M ending with a large power and M1 starting with a large power of the same matrix. */ /* FIXME: Strassen multiplication gives only a small speedup. In FFT multiplication range, this function could be sped up quite a lot using invariance. */ ASSERT (M->n + M1->n < M->alloc); ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1] | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0); ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1] | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0); mpn_matrix22_mul (M->p[0][0], M->p[0][1], M->p[1][0], M->p[1][1], M->n, M1->p[0][0], M1->p[0][1], M1->p[1][0], M1->p[1][1], M1->n, tp); /* Index of last potentially non-zero limb, size is one greater. */ n = M->n + M1->n; n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0); M->n = n + 1; }