static int abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) { int result; result = abs_sub_n (rm, rp, rs, n); ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n)); return result; }
static int add_signed_n (mp_ptr rp, mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n) { if (as != bs) return as ^ abs_sub_n (rp, ap, bp, n); else { ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n)); return as; } }
/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3). * * Resulting elements are of size up to rn + mn + 1. * * Temporary storage: 3 rn + 3 mn + 5. */ void mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn, mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn, mp_ptr tp) { mp_ptr s0, t0, u0, u1; int r1s, r3s, s0s, t0s, u1s; s0 = tp; tp += rn + 1; t0 = tp; tp += mn + 1; u0 = tp; tp += rn + mn + 1; u1 = tp; /* rn + mn + 2 */ MUL (u0, r1, rn, m2, mn); /* u5 = s5 * t6 */ r3s = abs_sub_n (r3, r3, r2, rn); /* r3 - r2 */ if (r3s) { r1s = abs_sub_n (r1, r1, r3, rn); r1[rn] = 0; } else { r1[rn] = mpn_add_n (r1, r1, r3, rn); r1s = 0; /* r1 - r2 + r3 */ } if (r1s) { s0[rn] = mpn_add_n (s0, r1, r0, rn); s0s = 0; } else if (r1[rn] != 0) { s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn); s0s = 1; /* s4 = -r0 + r1 - r2 + r3 */ /* Reverse sign! */ } else { s0s = abs_sub_n (s0, r0, r1, rn); s0[rn] = 0; } MUL (u1, r0, rn, m0, mn); /* u0 = s0 * t0 */ r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn); ASSERT (r0[rn+mn] < 2); /* u0 + u5 */ t0s = abs_sub_n (t0, m3, m2, mn); u1s = r3s^t0s^1; /* Reverse sign! */ MUL (u1, r3, rn, t0, mn); /* u2 = s2 * t2 */ u1[rn+mn] = 0; if (t0s) { t0s = abs_sub_n (t0, m1, t0, mn); t0[mn] = 0; } else { t0[mn] = mpn_add_n (t0, t0, m1, mn); } /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs at r3. I'd expect that for matrices of random size, the high words t0[mn] and r1[rn] are non-zero with a pretty small probability. If that can be confirmed this should be done as an unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn])) add_n. */ if (t0[mn] != 0) { MUL (r3, r1, rn, t0, mn + 1); /* u3 = s3 * t3 */ ASSERT (r1[rn] < 2); if (r1[rn] != 0) mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1); } else { MUL (r3, r1, rn + 1, t0, mn); } ASSERT (r3[rn+mn] < 4); u0[rn+mn] = 0; if (r1s^t0s) { r3s = abs_sub_n (r3, u0, r3, rn + mn + 1); } else { ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1)); r3s = 0; /* u3 + u5 */ } if (t0s) { t0[mn] = mpn_add_n (t0, t0, m0, mn); } else if (t0[mn] != 0) { t0[mn] -= mpn_sub_n (t0, t0, m0, mn); } else { t0s = abs_sub_n (t0, t0, m0, mn); } MUL (u0, r2, rn, t0, mn + 1); /* u6 = s6 * t4 */ ASSERT (u0[rn+mn] < 2); if (r1s) { ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn)); } else { r1[rn] += mpn_add_n (r1, r1, r2, rn); } rn++; t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn); /* u3 + u5 + u6 */ ASSERT (r2[rn+mn-1] < 4); r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn); /* -u2 + u3 + u5 */ ASSERT (r3[rn+mn-1] < 3); MUL (u0, s0, rn, m1, mn); /* u4 = s4 * t5 */ ASSERT (u0[rn+mn-1] < 2); t0[mn] = mpn_add_n (t0, m3, m1, mn); MUL (u1, r1, rn, t0, mn + 1); /* u1 = s1 * t1 */ mn += rn; ASSERT (u1[mn-1] < 4); ASSERT (u1[mn] == 0); ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn)); /* -u2 + u3 - u4 + u5 */ ASSERT (r1[mn-1] < 2); if (r3s) { ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn)); } else { ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn)); /* u1 + u2 - u3 - u5 */ } ASSERT (r3[mn-1] < 2); if (t0s) { ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn)); } else { ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn)); /* u1 - u3 - u5 - u6 */ } ASSERT (r2[mn-1] < 2); }