void fp2_mulc_low(dv2_t c, fp2_t a, fp2_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 + a_1, t1 = b_0 + b_1. */ fp_addn_low(t0, a[0], a[1]); fp_addn_low(t1, b[0], b[1]); /* c_0 = a_0 * b_0, c_1 = a_1 * b_1, t2 = (a_0 + a_1) * (b_0 + b_1). */ fp_muln_low(c[0], a[0], b[0]); fp_muln_low(c[1], a[1], b[1]); fp_muln_low(t2, t0, t1); /* t0 = (a_0 * b_0) + (a_1 * b_1). */ fp_addd_low(t0, c[0], c[1]); /* c_0 = (a_0 * b_0) + u^2 * (a_1 * b_1). */ fp_subd_low(c[0], c[0], c[1]); #ifndef FP_QNRES /* t1 = u^2 * (a_1 * b_1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subd_low(c[0], c[0], c[1]); } #endif /* c_1 = (t2 - t0). */ fp_subd_low(c[1], t2, t0); /* c_0 = c_0 + 2^N * p/4. */ bn_lshb_low(c[0] + FP_DIGS - 1, c[0] + FP_DIGS - 1, FP_DIGS + 1, 2); fp_addn_low(c[0] + FP_DIGS, c[0] + FP_DIGS, fp_prime_get()); bn_rshb_low(c[0] + FP_DIGS - 1, c[0] + FP_DIGS - 1, FP_DIGS + 1, 2); }
void fp2_mul_basic(fp2_t c, fp2_t a, fp2_t b) { dv_t t0, t1, t2, t3, t4; dv_null(t0); dv_null(t1); dv_null(t2); dv_null(t3); dv_null(t4); TRY { dv_new(t0); dv_new(t1); dv_new(t2); dv_new(t3); dv_new(t4); /* Karatsuba algorithm. */ /* t2 = a_0 + a_1, t1 = b0 + b1. */ fp_add(t2, a[0], a[1]); fp_add(t1, b[0], b[1]); /* t3 = (a_0 + a_1) * (b0 + b1). */ fp_muln_low(t3, t2, t1); /* t0 = a_0 * b0, t4 = a_1 * b1. */ fp_muln_low(t0, a[0], b[0]); fp_muln_low(t4, a[1], b[1]); /* t2 = (a_0 * b0) + (a_1 * b1). */ fp_addc_low(t2, t0, t4); /* t1 = (a_0 * b0) + u^2 * (a_1 * b1). */ fp_subc_low(t1, t0, t4); /* t1 = u^2 * (a_1 * b1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subc_low(t1, t1, t4); } /* c_0 = t1 mod p. */ fp_rdc(c[0], t1); /* t4 = t3 - t2. */ fp_subc_low(t4, t3, t2); /* c_1 = t4 mod p. */ fp_rdc(c[1], t4); } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { dv_free(t0); dv_free(t1); dv_free(t2); dv_free(t3); dv_free(t4); } }
void fp_mul_karat(fp_t c, const fp_t a, const fp_t b) { dv_t t; dv_null(t); TRY { /* We need a temporary variable so that c can be a or b. */ dv_new(t); dv_zero(t, 2 * FP_DIGS); if (FP_DIGS > 1) { fp_mul_karat_imp(t, a, b, FP_DIGS, FP_KARAT); } else { fp_muln_low(t, a, b); } fp_rdc(c, t); } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { dv_free(t); } }
void fp2_muln_low(dv2_t c, fp2_t a, fp2_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 + a_1, t1 = b_0 + b_1. */ #ifdef FP_SPACE fp_addn_low(t0, a[0], a[1]); fp_addn_low(t1, b[0], b[1]); #else fp_addm_low(t0, a[0], a[1]); fp_addm_low(t1, b[0], b[1]); #endif /* c_0 = a_0 * b_0, c_1 = a_1 * b_1. */ fp_muln_low(c[0], a[0], b[0]); fp_muln_low(c[1], a[1], b[1]); /* t2 = (a_0 + a_1) * (b_0 + b_1). */ fp_muln_low(t2, t0, t1); /* t0 = (a_0 * b_0) + (a_1 * b_1). */ #ifdef FP_SPACE fp_addd_low(t0, c[0], c[1]); #else fp_addc_low(t0, c[0], c[1]); #endif /* c_0 = (a_0 * b_0) + u^2 * (a_1 * b_1). */ fp_subc_low(c[0], c[0], c[1]); #ifndef FP_QNRES /* t1 = u^2 * (a_1 * b_1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subc_low(c[0], c[0], c[1]); } for (int i = 0; i <= fp_prime_get_qnr(); i++) { fp_addc_low(c[0], c[0], c[1]); } #endif /* c_1 = t2 - t0. */ #ifdef FP_SPACE fp_subd_low(c[1], t2, t0); #else fp_subc_low(c[1], t2, t0); #endif }
void fp3_sqrn_low(dv3_t c, fp3_t a) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; align dig_t t3[2 * FP_DIGS], t4[2 * FP_DIGS], t5[2 * FP_DIGS]; /* t0 = a_0^2. */ fp_sqrn_low(t0, a[0]); /* t1 = 2 * a_1 * a_2. */ #ifdef FP_SPACE fp_dbln_low(t2, a[1]); #else fp_dblm_low(t2, a[1]); #endif fp_muln_low(t1, t2, a[2]); /* t2 = a_2^2. */ fp_sqrn_low(t2, a[2]); /* t3 = (a_0 + a_2 + a_1)^2, t4 = (a_0 + a_2 - a_1)^2. */ #ifdef FP_SPACE fp_addn_low(t3, a[0], a[2]); fp_addn_low(t4, t3, a[1]); #else fp_addm_low(t3, a[0], a[2]); fp_addm_low(t4, t3, a[1]); #endif fp_subm_low(t5, t3, a[1]); fp_sqrn_low(t3, t4); fp_sqrn_low(t4, t5); /* t4 = (t4 + t3)/2. */ fp_addd_low(t4, t4, t3); fp_hlvd_low(t4, t4); /* t3 = t3 - t4 - t1. */ fp_addc_low(t5, t1, t4); fp_subc_low(t3, t3, t5); /* c_2 = t4 - t0 - t2. */ fp_addc_low(t5, t0, t2); fp_subc_low(c[2], t4, t5); /* c_0 = t0 + t1 * B. */ fp_subc_low(c[0], t0, t1); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[0], c[0], t1); } /* c_1 = t3 + t2 * B. */ fp_subc_low(c[1], t3, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[1], c[1], t2); } }
void fp3_muln_low(dv3_t c, fp3_t a, fp3_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS], t3[2 * FP_DIGS]; align dig_t t4[2 * FP_DIGS], t5[2 * FP_DIGS], t6[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 * b_0, t1 = a_1 * b_1, t2 = a_2 * b_2. */ fp_muln_low(t0, a[0], b[0]); fp_muln_low(t1, a[1], b[1]); fp_muln_low(t2, a[2], b[2]); /* t3 = (a_1 + a_2) * (b_1 + b_2). */ #ifdef FP_SPACE fp_addn_low(t3, a[1], a[2]); fp_addn_low(t4, b[1], b[2]); #else fp_addm_low(t3, a[1], a[2]); fp_addm_low(t4, b[1], b[2]); #endif fp_muln_low(t5, t3, t4); fp_addd_low(t6, t1, t2); fp_subc_low(t4, t5, t6); fp_subc_low(c[0], t0, t4); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[0], c[0], t4); } #ifdef FP_SPACE fp_addn_low(t4, a[0], a[1]); fp_addn_low(t5, b[0], b[1]); #else fp_addm_low(t4, a[0], a[1]); fp_addm_low(t5, b[0], b[1]); #endif fp_muln_low(t6, t4, t5); fp_addd_low(t4, t0, t1); fp_subc_low(t4, t6, t4); fp_subc_low(c[1], t4, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[1], c[1], t2); } #ifdef FP_SPACE fp_addn_low(t5, a[0], a[2]); fp_addn_low(t6, b[0], b[2]); #else fp_addm_low(t5, a[0], a[2]); fp_addm_low(t6, b[0], b[2]); #endif fp_muln_low(t4, t5, t6); fp_addd_low(t6, t0, t2); fp_subc_low(t5, t4, t6); fp_addc_low(c[2], t5, t1); }
void fp_mul_comba(fp_t c, const fp_t a, const fp_t b) { dv_t t; dv_null(t); TRY { /* We need a temporary variable so that c can be a or b. */ dv_new(t); fp_muln_low(t, a, b); fp_rdc(c, t); dv_free(t); } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { dv_free(t); } }
void fp12_mul_dxs_lazyr(fp12_t c, fp12_t a, fp12_t b) { fp6_t t0; dv6_t u0, u1, u2; fp6_null(t0); dv6_null(u0); dv6_null(u1); dv6_null(u2); TRY { fp6_new(t0); dv6_new(u0); dv6_new(u1); dv6_new(u2); if (ep2_curve_is_twist() == EP_DTYPE) { #if EP_ADD == BASIC /* t0 = a_0 * b_0. */ fp_muln_low(u0[0][0], a[0][0][0], b[0][0][0]); fp_muln_low(u0[0][1], a[0][0][1], b[0][0][0]); fp_muln_low(u0[1][0], a[0][1][0], b[0][0][0]); fp_muln_low(u0[1][1], a[0][1][1], b[0][0][0]); fp_muln_low(u0[2][0], a[0][2][0], b[0][0][0]); fp_muln_low(u0[2][1], a[0][2][1], b[0][0][0]); /* t2 = b_0 + b_1. */ fp_add(t0[0][0], b[0][0][0], b[1][0][0]); fp_copy(t0[0][1], b[1][0][1]); fp2_copy(t0[1], b[1][1]); #elif EP_ADD == PROJC /* t0 = a_0 * b_0. */ #ifdef RLC_FP_ROOM fp2_mulc_low(u0[0], a[0][0], b[0][0]); fp2_mulc_low(u0[1], a[0][1], b[0][0]); fp2_mulc_low(u0[2], a[0][2], b[0][0]); #else fp2_muln_low(u0[0], a[0][0], b[0][0]); fp2_muln_low(u0[1], a[0][1], b[0][0]); fp2_muln_low(u0[2], a[0][2], b[0][0]); #endif /* t2 = b_0 + b_1. */ fp2_add(t0[0], b[0][0], b[1][0]); fp2_copy(t0[1], b[1][1]); #endif /* t1 = a_1 * b_1. */ fp6_mul_dxs_unr_lazyr(u1, a[1], b[1]); } else { /* t0 = a_0 * b_0. */ fp6_mul_dxs_unr_lazyr(u0, a[0], b[0]); #if EP_ADD == BASIC /* t0 = a_0 * b_0. */ fp_muln_low(u1[1][0], a[1][2][0], b[1][1][0]); fp_muln_low(u1[1][1], a[1][2][1], b[1][1][0]); fp2_nord_low(u1[0], u1[1]); fp_muln_low(u1[1][0], a[1][0][0], b[1][1][0]); fp_muln_low(u1[1][1], a[1][0][1], b[1][1][0]); fp_muln_low(u1[2][0], a[1][1][0], b[1][1][0]); fp_muln_low(u1[2][1], a[1][1][1], b[1][1][0]); /* t2 = b_0 + b_1. */ fp2_copy(t0[0], b[0][0]); fp_add(t0[1][0], b[0][1][0], b[1][1][0]); fp_copy(t0[1][1], b[0][1][1]); #elif EP_ADD == PROJC /* t1 = a_1 * b_1. */ fp2_muln_low(u1[1], a[1][2], b[1][1]); fp2_nord_low(u1[0], u1[1]); fp2_muln_low(u1[1], a[1][0], b[1][1]); fp2_muln_low(u1[2], a[1][1], b[1][1]); /* t2 = b_0 + b_1. */ fp2_copy(t0[0], b[0][0]); fp2_add(t0[1], b[0][1], b[1][1]); #endif } /* c_1 = a_0 + a_1. */ fp6_add(c[1], a[0], a[1]); /* c_1 = (a_0 + a_1) * (b_0 + b_1) */ fp6_mul_dxs_unr_lazyr(u2, c[1], t0); for (int i = 0; i < 3; i++) { fp2_subc_low(u2[i], u2[i], u0[i]); fp2_subc_low(u2[i], u2[i], u1[i]); } fp2_rdcn_low(c[1][0], u2[0]); fp2_rdcn_low(c[1][1], u2[1]); fp2_rdcn_low(c[1][2], u2[2]); fp2_nord_low(u2[0], u1[2]); fp2_addc_low(u0[0], u0[0], u2[0]); fp2_addc_low(u0[1], u0[1], u1[0]); fp2_addc_low(u0[2], u0[2], u1[1]); /* c_0 = a_0b_0 + v * a_1b_1. */ fp2_rdcn_low(c[0][0], u0[0]); fp2_rdcn_low(c[0][1], u0[1]); fp2_rdcn_low(c[0][2], u0[2]); } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { fp6_free(t0); dv6_free(u0); dv6_free(u1); dv6_free(u2); } }
void fp3_sqr_basic(fp3_t c, fp3_t a) { dv_t t0, t1, t2, t3, t4, t5; dv_null(t0); dv_null(t1); dv_null(t2); dv_null(t3); dv_null(t4); dv_null(t5); TRY { dv_new(t0); dv_new(t1); dv_new(t2); dv_new(t3); dv_new(t4); dv_new(t5); /* t0 = a_0^2. */ fp_sqrn_low(t0, a[0]); /* t1 = 2 * a_1 * a_2. */ fp_dbl(t2, a[1]); fp_muln_low(t1, t2, a[2]); /* t2 = a_2^2. */ fp_sqrn_low(t2, a[2]); /* t3 = (a_0 + a_2 + a_1)^2, t4 = (a_0 + a_2 - a_1)^2. */ fp_add(t3, a[0], a[2]); fp_add(t4, t3, a[1]); fp_sub(t5, t3, a[1]); fp_sqrn_low(t3, t4); fp_sqrn_low(t4, t5); /* t4 = (t4 + t3)/2. */ fp_addd_low(t4, t4, t3); fp_hlvd_low(t4, t4); /* t3 = t3 - t4 - t1. */ fp_addc_low(t5, t1, t4); fp_subc_low(t3, t3, t5); /* c_2 = t4 - t0 - t2. */ fp_addc_low(t5, t0, t2); fp_subc_low(t4, t4, t5); fp_rdc(c[2], t4); /* c_0 = t0 + t1 * B. */ fp_subc_low(t0, t0, t1); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(t0, t0, t1); } fp_rdc(c[0], t0); /* c_1 = t3 + t2 * B. */ fp_subc_low(t3, t3, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(t3, t3, t2); } fp_rdc(c[1], t3); } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { dv_free(t0); dv_free(t1); dv_free(t2); dv_free(t3); dv_free(t4); dv_free(t5); } }
void fp_mulm_low(dig_t *c, const dig_t *a, const dig_t *b) { dig_t align t[2 * FP_DIGS]; fp_muln_low(t, a, b); fp_rdc(c, t); }
void fp2_sqrn_low(dv2_t c, fp2_t a) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* t0 = (a0 + a1). */ #ifdef FP_SPACE /* if we have room for carries, we can avoid reductions here. */ fp_addn_low(t0, a[0], a[1]); #else fp_addm_low(t0, a[0], a[1]); #endif /* t1 = (a0 - a1). */ fp_subm_low(t1, a[0], a[1]); #ifdef FP_QNRES #ifdef FP_SPACE fp_dbln_low(t2, a[0]); #else fp_dblm_low(t2, a[0]); #endif /* c1 = 2 * a0 * a1. */ fp_muln_low(c[1], t2, a[1]); /* c_0 = a_0^2 + a_1^2 * u^2. */ fp_muln_low(c[0], t0, t1); #else /* !FP_QNRES */ /* t1 = u^2 * (a1 * b1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subm_low(t1, t1, a[1]); } if (fp_prime_get_qnr() == -1) { /* t2 = 2 * a0. */ fp_dbl(t2, a[0]); /* c1 = 2 * a0 * a1. */ fp_muln_low(c[1], t2, a[1]); /* c0 = a0^2 + b_0^2 * u^2. */ fp_muln_low(c[0], t0, t1); } else { /* c1 = a0 * a1. */ fp_muln_low(c[1], a[0], a[1]); /* c0 = a0^2 + b_0^2 * u^2. */ fp_muln_low(c[0], t0, t1); #ifdef FP_SPACE for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_addd_low(c[0], c[0], c[1]); } /* c1 = 2 * a0 * a1. */ fp_addd_low(c[1], c[1], c[1]); #else for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_addc_low(c[0], c[0], c[1]); } /* c1 = 2 * a0 * a1. */ fp_addc_low(c[1], c[1], c[1]); #endif } #endif /* c = c0 + c1 * u. */ }
void pp_dbl_k2_projc_lazyr(fp2_t l, ep_t r, ep_t p, ep_t q) { fp_t t0, t1, t2, t3, t4, t5; dv_t u0, u1; fp_null(t0); fp_null(t1); fp_null(t2); fp_null(t3); fp_null(t4); fp_null(t5); dv_null(u0); dv_null(u1); TRY { fp_new(t0); fp_new(t1); fp_new(t2); fp_new(t3); fp_new(t4); fp_new(t5); dv_new(u0); dv_new(u1); /* For these curves, we always can choose a = -3. */ /* dbl-2001-b formulas: 3M + 5S + 8add + 1*4 + 2*8 + 1*3 */ /* http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b */ /* t0 = delta = z1^2. */ fp_sqr(t0, p->z); /* t1 = gamma = y1^2. */ fp_sqr(t1, p->y); /* t2 = beta = x1 * y1^2. */ fp_mul(t2, p->x, t1); /* t3 = alpha = 3 * (x1 - z1^2) * (x1 + z1^2). */ fp_sub(t3, p->x, t0); fp_add(t4, p->x, t0); fp_mul(t4, t3, t4); fp_dbl(t3, t4); fp_add(t3, t3, t4); /* t2 = 4 * beta. */ fp_dbl(t2, t2); fp_dbl(t2, t2); /* z3 = (y1 + z1)^2 - gamma - delta. */ fp_add(r->z, p->y, p->z); fp_sqr(r->z, r->z); fp_sub(r->z, r->z, t1); fp_sub(r->z, r->z, t0); /* l0 = 2 * gamma - alpha * (delta * xq + x1). */ fp_dbl(t1, t1); fp_mul(t5, t0, q->x); fp_add(t5, t5, p->x); fp_mul(t5, t5, t3); fp_sub(l[0], t1, t5); /* x3 = alpha^2 - 8 * beta. */ fp_dbl(t5, t2); fp_sqr(r->x, t3); fp_sub(r->x, r->x, t5); /* y3 = alpha * (4 * beta - x3) - 8 * gamma^2. */ fp_sqrn_low(u0, t1); fp_addc_low(u0, u0, u0); fp_subm_low(r->y, t2, r->x); fp_muln_low(u1, r->y, t3); fp_subc_low(u1, u1, u0); fp_rdcn_low(r->y, u1); /* l1 = - z3 * delta * yq. */ fp_mul(l[1], r->z, t0); fp_mul(l[1], l[1], q->y); r->norm = 0; } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { fp_free(t0); fp_free(t1); fp_free(t2); fp_free(t3); fp_free(t4); fp_free(t5); dv_free(u0); dv_free(u1); } }
void fp3_mul_basic(fp3_t c, fp3_t a, fp3_t b) { dv_t t, t0, t1, t2, t3, t4, t5, t6; dv_null(t); dv_null(t0); dv_null(t1); dv_null(t2); dv_null(t3); dv_null(t4); dv_null(t5); dv_null(t6); TRY { dv_new(t); dv_new(t0); dv_new(t1); dv_new(t2); dv_new(t3); dv_new(t4); dv_new(t5); dv_new(t6); /* Karatsuba algorithm. */ /* t0 = a_0 * b_0, t1 = a_1 * b_1, t2 = a_2 * b_2. */ fp_muln_low(t0, a[0], b[0]); fp_muln_low(t1, a[1], b[1]); fp_muln_low(t2, a[2], b[2]); /* t3 = (a_1 + a_2) * (b_1 + b_2). */ fp_add(t3, a[1], a[2]); fp_add(t4, b[1], b[2]); fp_muln_low(t, t3, t4); fp_addd_low(t6, t1, t2); fp_subc_low(t4, t, t6); fp_subc_low(t3, t0, t4); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(t3, t3, t4); } fp_add(t4, a[0], a[1]); fp_add(t5, b[0], b[1]); fp_muln_low(t, t4, t5); fp_addd_low(t4, t0, t1); fp_subc_low(t4, t, t4); fp_subc_low(t4, t4, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(t4, t4, t2); } fp_add(t5, a[0], a[2]); fp_add(t6, b[0], b[2]); fp_muln_low(t, t5, t6); fp_addd_low(t6, t0, t2); fp_subc_low(t5, t, t6); fp_addc_low(t5, t5, t1); /* c_0 = t3 mod p. */ fp_rdc(c[0], t3); /* c_1 = t4 mod p. */ fp_rdc(c[1], t4); /* c_2 = t5 mod p. */ fp_rdc(c[2], t5); } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { dv_free(t); dv_free(t0); dv_free(t1); dv_free(t2); dv_free(t3); dv_free(t4); dv_free(t5); dv_free(t6); } }