/* reverse_and_mulX_ghash interprets the bytes |b->c| as a reversed element of * the GHASH field, multiplies that by 'x' and serialises the result back into * |b|, but with GHASH's backwards bit ordering. */ static void reverse_and_mulX_ghash(polyval_block *b) { uint64_t hi = b->u[0]; uint64_t lo = b->u[1]; const crypto_word_t carry = constant_time_eq_w(hi & 1, 1); hi >>= 1; hi |= lo << 63; lo >>= 1; lo ^= ((uint64_t) constant_time_select_w(carry, 0xe1, 0)) << 56; b->u[0] = CRYPTO_bswap8(lo); b->u[1] = CRYPTO_bswap8(hi); }
// bn_mul_part_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r| // has length 4*|n|, |a| has length |n| + |tna|, |b| has length |n| + |tnb|, and // |t| has length 8*|n|. |n| must be a power of two. Additionally, we must have // 0 <= tna < n and 0 <= tnb < n, and |tna| and |tnb| must differ by at most // one. // // TODO(davidben): Make this take |size_t| and perhaps the actual lengths of |a| // and |b|. static void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n, int tna, int tnb, BN_ULONG *t) { // |n| is a power of two. assert(n != 0 && (n & (n - 1)) == 0); // Check |tna| and |tnb| are in range. assert(0 <= tna && tna < n); assert(0 <= tnb && tnb < n); assert(-1 <= tna - tnb && tna - tnb <= 1); int n2 = n * 2; if (n < 8) { bn_mul_normal(r, a, n + tna, b, n + tnb); OPENSSL_memset(r + n2 + tna + tnb, 0, n2 - tna - tnb); return; } // Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|. |a1| // and |b1| have size |tna| and |tnb|, respectively. // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used // for recursive calls. // Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1 // to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as: // // a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0 // t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR // their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1 // themselves store the absolute value. BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]); neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]); // Compute: // t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)| // r0,r1 = a0 * b0 // r2,r3 = a1 * b1 if (n == 8) { bn_mul_comba8(&t[n2], t, &t[n]); bn_mul_comba8(r, a, b); bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb); // |bn_mul_normal| only writes |tna| + |tna| words. Zero the rest. OPENSSL_memset(&r[n2 + tna + tnb], 0, sizeof(BN_ULONG) * (n2 - tna - tnb)); } else { BN_ULONG *p = &t[n2 * 2]; bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p); bn_mul_recursive(r, a, b, n, 0, 0, p); OPENSSL_memset(&r[n2], 0, sizeof(BN_ULONG) * n2); if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb); } else { int i = n; for (;;) { i /= 2; if (i < tna || i < tnb) { // E.g., n == 16, i == 8 and tna == 11. |tna| and |tnb| are within one // of each other, so if |tna| is larger and tna > i, then we know // tnb >= i, and this call is valid. bn_mul_part_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p); break; } if (i == tna || i == tnb) { // If there is only a bottom half to the number, just do it. We know // the larger of |tna - i| and |tnb - i| is zero. The other is zero or // -1 by because of |tna| and |tnb| differ by at most one. bn_mul_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p); break; } // This loop will eventually terminate when |i| falls below // |BN_MUL_RECURSIVE_SIZE_NORMAL| because we know one of |tna| and |tnb| // exceeds that. } } } // t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1 BN_ULONG c = bn_add_words(t, r, &r[n2], n2); // t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0. // The second term is stored as the absolute value, so we do this with a // constant-time select. BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2); BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2); bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2); OPENSSL_COMPILE_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t), crypto_word_t_too_small); c = constant_time_select_w(neg, c_neg, c_pos); // We now have our three components. Add them together. // r1,r2,c = r1,r2 + t2,t3,c c += bn_add_words(&r[n], &r[n], &t[n2], n2); // Propagate the carry bit to the end. for (int i = n + n2; i < n2 + n2; i++) { BN_ULONG old = r[i]; r[i] = old + c; c = r[i] < old; } // The product should fit without carries. assert(c == 0); }
// bn_mul_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r| has // length 2*|n2|, |a| has length |n2| + |dna|, |b| has length |n2| + |dnb|, and // |t| has length 4*|n2|. |n2| must be a power of two. Finally, we must have // -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dna| <= 0 and // -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dnb| <= 0. // // TODO(davidben): Simplify and |size_t| the calling convention around lengths // here. static void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2, int dna, int dnb, BN_ULONG *t) { // |n2| is a power of two. assert(n2 != 0 && (n2 & (n2 - 1)) == 0); // Check |dna| and |dnb| are in range. assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dna && dna <= 0); assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dnb && dnb <= 0); // Only call bn_mul_comba 8 if n2 == 8 and the // two arrays are complete [steve] if (n2 == 8 && dna == 0 && dnb == 0) { bn_mul_comba8(r, a, b); return; } // Else do normal multiply if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); if (dna + dnb < 0) { OPENSSL_memset(&r[2 * n2 + dna + dnb], 0, sizeof(BN_ULONG) * -(dna + dnb)); } return; } // Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|. // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used // for recursive calls. // Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1 // to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as: // // a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0 // // Note that we know |n| >= |BN_MUL_RECURSIVE_SIZE_NORMAL|/2 above, so // |tna| and |tnb| are non-negative. int n = n2 / 2, tna = n + dna, tnb = n + dnb; // t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR // their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1 // themselves store the absolute value. BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]); neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]); // Compute: // t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)| // r0,r1 = a0 * b0 // r2,r3 = a1 * b1 if (n == 4 && dna == 0 && dnb == 0) { bn_mul_comba4(&t[n2], t, &t[n]); bn_mul_comba4(r, a, b); bn_mul_comba4(&r[n2], &a[n], &b[n]); } else if (n == 8 && dna == 0 && dnb == 0) { bn_mul_comba8(&t[n2], t, &t[n]); bn_mul_comba8(r, a, b); bn_mul_comba8(&r[n2], &a[n], &b[n]); } else { BN_ULONG *p = &t[n2 * 2]; bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p); bn_mul_recursive(r, a, b, n, 0, 0, p); bn_mul_recursive(&r[n2], &a[n], &b[n], n, dna, dnb, p); } // t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1 BN_ULONG c = bn_add_words(t, r, &r[n2], n2); // t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0. // The second term is stored as the absolute value, so we do this with a // constant-time select. BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2); BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2); bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2); OPENSSL_COMPILE_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t), crypto_word_t_too_small); c = constant_time_select_w(neg, c_neg, c_pos); // We now have our three components. Add them together. // r1,r2,c = r1,r2 + t2,t3,c c += bn_add_words(&r[n], &r[n], &t[n2], n2); // Propagate the carry bit to the end. for (int i = n + n2; i < n2 + n2; i++) { BN_ULONG old = r[i]; r[i] = old + c; c = r[i] < old; } // The product should fit without carries. assert(c == 0); }