/************************************************* * Multiply/Add Words * *************************************************/ word bigint_mul_add_words(word z[], const word x[], u32bit x_size, word y) { const u32bit blocks = x_size - (x_size % 8); word carry = 0; for(u32bit j = 0; j != blocks; j += 8) carry = word8_madd3(z + j, x + j, y, carry); for(u32bit j = blocks; j != x_size; ++j) z[j] = word_madd3(x[j], y, z[j], carry, &carry); return carry; }
/* * Simple O(N^2) Squaring * * This is exactly the same algorithm as bigint_simple_mul, however * because C/C++ compilers suck at alias analysis it is good to have * the version where the compiler knows that x == y * * There is an O(n^1.5) squaring algorithm specified in Handbook of * Applied Cryptography, chapter 14 * */ void bigint_simple_sqr(word z[], const word x[], size_t x_size) { const size_t x_size_8 = x_size - (x_size % 8); clear_mem(z, 2*x_size); for(size_t i = 0; i != x_size; ++i) { const word x_i = x[i]; word carry = 0; for(size_t j = 0; j != x_size_8; j += 8) carry = word8_madd3(z + i + j, x + j, x_i, carry); for(size_t j = x_size_8; j != x_size; ++j) z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry); z[x_size+i] = carry; } }
/* * Montgomery Reduction Algorithm */ void bigint_monty_redc(word z[], const word p[], size_t p_size, word p_dash, word ws[]) { const size_t z_size = 2*(p_size+1); const size_t blocks_of_8 = p_size - (p_size % 8); for(size_t i = 0; i != p_size; ++i) { word* z_i = z + i; const word y = z_i[0] * p_dash; /* bigint_linmul3(ws, p, p_size, y); bigint_add2(z_i, z_size - i, ws, p_size+1); */ word carry = 0; for(size_t j = 0; j != blocks_of_8; j += 8) carry = word8_madd3(z_i + j, p + j, y, carry); for(size_t j = blocks_of_8; j != p_size; ++j) z_i[j] = word_madd3(p[j], y, z_i[j], &carry); word z_sum = z_i[p_size] + carry; carry = (z_sum < z_i[p_size]); z_i[p_size] = z_sum; for(size_t j = p_size + 1; carry && j != z_size - i; ++j) { ++z_i[j]; carry = !z_i[j]; } } /* * The result might need to be reduced mod p. To avoid a timing * channel, always perform the subtraction. If in the compution * of x - p a borrow is required then x was already < p. * * x - p starts at ws[0] and is p_size+1 bytes long * x starts at ws[p_size+1] and is also p_size+1 bytes log * (that's the copy_mem) * * Select which address to copy from indexing off of the final * borrow. */ word borrow = 0; for(size_t i = 0; i != p_size; ++i) ws[i] = word_sub(z[p_size + i], p[i], &borrow); ws[p_size] = word_sub(z[p_size+p_size], 0, &borrow); BOTAN_ASSERT(borrow == 0 || borrow == 1, "Expected borrow"); copy_mem(ws + p_size + 1, z + p_size, p_size + 1); copy_mem(z, ws + borrow*(p_size+1), p_size + 1); clear_mem(z + p_size + 1, z_size - p_size - 1); }