void nmod_mat_mul_check(nmod_mat_t C, const nmod_mat_t A, const nmod_mat_t B) { long i, j, k; mp_limb_t s0, s1, s2; mp_limb_t t0, t1; for (i = 0; i < A->r; i++) { for (j = 0; j < B->c; j++) { s0 = s1 = s2 = 0UL; for (k = 0; k < A->c; k++) { umul_ppmm(t1, t0, A->rows[i][k], B->rows[k][j]); add_sssaaaaaa(s2, s1, s0, s2, s1, s0, 0, t1, t0); } NMOD_RED(s2, s2, C->mod); NMOD_RED3(s0, s2, s1, s0, C->mod); C->rows[i][j] = s0; } } }
void slow_way(uint64_t* r,uint64_t* s,uint64_t siz) { r[0]=r[1]=r[2]=0; uint64_t i,j,z=0; for(i=0; i<siz; i++) { j=2*i; add_sssaaaaaa( r[2],r[1],r[0], r[2],r[1],r[0], 0, s[j], s[j+1] ); } }
int _nmod_vec_dot_bound_limbs(slong len, nmod_t mod) { mp_limb_t t2, t1, t0, u1, u0; umul_ppmm(t1, t0, mod.n - 1, mod.n - 1); umul_ppmm(t2, t1, t1, len); umul_ppmm(u1, u0, t0, len); add_sssaaaaaa(t2, t1, t0, t2, t1, UWORD(0), UWORD(0), u1, u0); if (t2 != 0) return 3; if (t1 != 0) return 2; return (t0 != 0); }
int main(void) { int i, j, result; FLINT_TEST_INIT(state); flint_printf("add_sssaaaaaa...."); fflush(stdout); for (i = 0; i < 1000000; i++) { mp_limb_t s[3], t[3], a[3], b[3]; for (j = 0; j < 3; j++) { s[j] = n_randtest(state); t[j] = n_randtest(state); a[j] = n_randtest(state); b[j] = n_randtest(state); } add_sssaaaaaa(s[2], s[1], s[0], a[2], a[1], a[0], b[2], b[1], b[0]); mpn_add_n(t, a, b, 3); result = ((s[2] == t[2]) && (s[1] == t[1]) && (s[0] == t[0])); if (!result) { flint_printf("FAIL:\n"); flint_printf("a[2] = %wu, a[1] = %wu, a[0] = %wu\n", a[2], a[1], a[0]); flint_printf("b[2] = %wu, b[1] = %wu, b[0] = %wu\n", b[2], b[1], b[0]); flint_printf("s[2] = %wu, s[1] = %wu, s[0] = %wu\n", s[2], s[1], s[0]); flint_printf("t[2] = %wu, t[1] = %wu, t[0] = %wu\n", t[2], t[1], t[0]); abort(); } } FLINT_TEST_CLEANUP(state); flint_printf("PASS\n"); return 0; }