/** * @brief Perform a single Barrett reduction. * @param ctx [in] The bigint session context. * @param bi [in] A bigint. * @return The result of the Barrett reduction. */ bigint * ICACHE_FLASH_ATTR bi_barrett(BI_CTX *ctx, bigint *bi) { bigint *q1, *q2, *q3, *r1, *r2, *r; uint8_t mod_offset = ctx->mod_offset; bigint *bim = ctx->bi_mod[mod_offset]; int k = bim->size; check(bi); check(bim); /* use Classical method instead - Barrett cannot help here */ if (bi->size > k*2) { return bi_mod(ctx, bi); } q1 = comp_right_shift(bi_clone(ctx, bi), k-1); /* do outer partial multiply */ q2 = regular_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1); q3 = comp_right_shift(q2, k+1); r1 = comp_mod(bi, k+1); /* do inner partial multiply */ r2 = comp_mod(regular_multiply(ctx, q3, bim, k+1, 0), k+1); r = bi_subtract(ctx, r1, r2, NULL); /* if (r >= m) r = r - m; */ if (bi_compare(r, bim) >= 0) { r = bi_subtract(ctx, r, bim, NULL); } return r; }
/* * Karatsuba improves on regular multiplication due to only 3 multiplications * being done instead of 4. The additional additions/subtractions are O(N) * rather than O(N^2) and so for big numbers it saves on a few operations */ static bigint * ICACHE_FLASH_ATTR karatsuba(BI_CTX *ctx, bigint *bia, bigint *bib, int is_square) { bigint *x0, *x1; bigint *p0, *p1, *p2; int m; if (is_square) { m = (bia->size + 1)/2; } else { m = (max(bia->size, bib->size) + 1)/2; } x0 = bi_clone(ctx, bia); x0->size = m; x1 = bi_clone(ctx, bia); comp_right_shift(x1, m); bi_free(ctx, bia); /* work out the 3 partial products */ if (is_square) { p0 = bi_square(ctx, bi_copy(x0)); p2 = bi_square(ctx, bi_copy(x1)); p1 = bi_square(ctx, bi_add(ctx, x0, x1)); } else /* normal multiply */ { bigint *y0, *y1; y0 = bi_clone(ctx, bib); y0->size = m; y1 = bi_clone(ctx, bib); comp_right_shift(y1, m); bi_free(ctx, bib); p0 = bi_multiply(ctx, bi_copy(x0), bi_copy(y0)); p2 = bi_multiply(ctx, bi_copy(x1), bi_copy(y1)); p1 = bi_multiply(ctx, bi_add(ctx, x0, x1), bi_add(ctx, y0, y1)); } p1 = bi_subtract(ctx, bi_subtract(ctx, p1, bi_copy(p2), NULL), bi_copy(p0), NULL); comp_left_shift(p1, m); comp_left_shift(p2, 2*m); return bi_add(ctx, p1, bi_add(ctx, p0, p2)); }
/** * @brief Use the Chinese Remainder Theorem to quickly perform RSA decrypts. * * @param ctx [in] The bigint session context. * @param bi [in] The bigint to perform the exp/mod. * @param dP [in] CRT's dP bigint * @param dQ [in] CRT's dQ bigint * @param p [in] CRT's p bigint * @param q [in] CRT's q bigint * @param qInv [in] CRT's qInv bigint * @return The result of the CRT operation */ bigint * ICACHE_FLASH_ATTR bi_crt(BI_CTX *ctx, bigint *bi, bigint *dP, bigint *dQ, bigint *p, bigint *q, bigint *qInv) { bigint *m1, *m2, *h; /* Montgomery has a condition the 0 < x, y < m and these products violate * that condition. So disable Montgomery when using CRT */ #if defined(CONFIG_BIGINT_MONTGOMERY) ctx->use_classical = 1; #endif ctx->mod_offset = BIGINT_P_OFFSET; m1 = bi_mod_power(ctx, bi_copy(bi), dP); ctx->mod_offset = BIGINT_Q_OFFSET; m2 = bi_mod_power(ctx, bi, dQ); h = bi_subtract(ctx, bi_add(ctx, m1, p), bi_copy(m2), NULL); h = bi_multiply(ctx, h, qInv); ctx->mod_offset = BIGINT_P_OFFSET; h = bi_residue(ctx, h); #if defined(CONFIG_BIGINT_MONTGOMERY) ctx->use_classical = 0; /* reset for any further operation */ #endif return bi_add(ctx, m2, bi_multiply(ctx, q, h)); }
/** * @brief Perform a single montgomery reduction. * @param ctx [in] The bigint session context. * @param bixy [in] A bigint. * @return The result of the montgomery reduction. */ bigint * ICACHE_FLASH_ATTR bi_mont(BI_CTX *ctx, bigint *bixy) { int i = 0, n; uint8_t mod_offset = ctx->mod_offset; bigint *bim = ctx->bi_mod[mod_offset]; comp mod_inv = ctx->N0_dash[mod_offset]; check(bixy); if (ctx->use_classical) /* just use classical instead */ { return bi_mod(ctx, bixy); } n = bim->size; do { bixy = bi_add(ctx, bixy, comp_left_shift( bi_int_multiply(ctx, bim, bixy->comps[i]*mod_inv), i)); } while (++i < n); comp_right_shift(bixy, n); if (bi_compare(bixy, bim) >= 0) { bixy = bi_subtract(ctx, bixy, bim, NULL); } return bixy; }
/** * Use the Chinese Remainder Theorem to quickly perform RSA decrypts. * This should really be in bigint.c (and was at one stage), but needs * access to the RSA_CTX context... */ static bigint *bi_crt(const RSA_CTX *rsa, bigint *bi) { BI_CTX *ctx = rsa->bi_ctx; bigint *m1, *m2, *h; /* Montgomery has a condition the 0 < x, y < m and these products violate * that condition. So disable Montgomery when using CRT */ #if defined(CONFIG_BIGINT_MONTGOMERY) ctx->use_classical = 1; #endif ctx->mod_offset = BIGINT_P_OFFSET; m1 = bi_mod_power(ctx, bi_copy(bi), rsa->dP); ctx->mod_offset = BIGINT_Q_OFFSET; m2 = bi_mod_power(ctx, bi, rsa->dQ); h = bi_subtract(ctx, bi_add(ctx, m1, rsa->p), bi_copy(m2), NULL); h = bi_multiply(ctx, h, rsa->qInv); ctx->mod_offset = BIGINT_P_OFFSET; h = bi_residue(ctx, h); #if defined(CONFIG_BIGINT_MONTGOMERY) ctx->use_classical = 0; /* reset for any further operation */ #endif return bi_add(ctx, m2, bi_multiply(ctx, rsa->q, h)); }
/** * @brief Does both division and modulo calculations. * * Used extensively when doing classical reduction. * @param ctx [in] The bigint session context. * @param u [in] A bigint which is the numerator. * @param v [in] Either the denominator or the modulus depending on the mode. * @param is_mod [n] Determines if this is a normal division (0) or a reduction * (1). * @return The result of the division/reduction. */ bigint * ICACHE_FLASH_ATTR bi_divide(BI_CTX *ctx, bigint *u, bigint *v, int is_mod) { int n = v->size, m = u->size-n; int j = 0, orig_u_size = u->size; uint8_t mod_offset = ctx->mod_offset; comp d; bigint *quotient, *tmp_u; comp q_dash; check(u); check(v); /* if doing reduction and we are < mod, then return mod */ if (is_mod && bi_compare(v, u) > 0) { bi_free(ctx, v); return u; } quotient = alloc(ctx, m+1); tmp_u = alloc(ctx, n+1); v = trim(v); /* make sure we have no leading 0's */ d = (comp)((long_comp)COMP_RADIX/(V1+1)); /* clear things to start with */ memset(quotient->comps, 0, ((quotient->size)*COMP_BYTE_SIZE)); /* normalise */ if (d > 1) { u = bi_int_multiply(ctx, u, d); if (is_mod) { v = ctx->bi_normalised_mod[mod_offset]; } else { v = bi_int_multiply(ctx, v, d); } } if (orig_u_size == u->size) /* new digit position u0 */ { more_comps(u, orig_u_size + 1); } do { /* get a temporary short version of u */ memcpy(tmp_u->comps, &u->comps[u->size-n-1-j], (n+1)*COMP_BYTE_SIZE); /* calculate q' */ if (U(0) == V1) { q_dash = COMP_RADIX-1; } else { q_dash = (comp)(((long_comp)U(0)*COMP_RADIX + U(1))/V1); if (v->size > 1 && V2) { /* we are implementing the following: if (V2*q_dash > (((U(0)*COMP_RADIX + U(1) - q_dash*V1)*COMP_RADIX) + U(2))) ... */ comp inner = (comp)((long_comp)COMP_RADIX*U(0) + U(1) - (long_comp)q_dash*V1); if ((long_comp)V2*q_dash > (long_comp)inner*COMP_RADIX + U(2)) { q_dash--; } } } /* multiply and subtract */ if (q_dash) { int is_negative; tmp_u = bi_subtract(ctx, tmp_u, bi_int_multiply(ctx, bi_copy(v), q_dash), &is_negative); more_comps(tmp_u, n+1); Q(j) = q_dash; /* add back */ if (is_negative) { Q(j)--; tmp_u = bi_add(ctx, tmp_u, bi_copy(v)); /* lop off the carry */ tmp_u->size--; v->size--; } } else { Q(j) = 0; } /* copy back to u */ memcpy(&u->comps[u->size-n-1-j], tmp_u->comps, (n+1)*COMP_BYTE_SIZE); } while (++j <= m); bi_free(ctx, tmp_u); bi_free(ctx, v); if (is_mod) /* get the remainder */ { bi_free(ctx, quotient); return bi_int_divide(ctx, trim(u), d); } else /* get the quotient */ { bi_free(ctx, u); return trim(quotient); } }