void fp2_mulc_low(dv2_t c, fp2_t a, fp2_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 + a_1, t1 = b_0 + b_1. */ fp_addn_low(t0, a[0], a[1]); fp_addn_low(t1, b[0], b[1]); /* c_0 = a_0 * b_0, c_1 = a_1 * b_1, t2 = (a_0 + a_1) * (b_0 + b_1). */ fp_muln_low(c[0], a[0], b[0]); fp_muln_low(c[1], a[1], b[1]); fp_muln_low(t2, t0, t1); /* t0 = (a_0 * b_0) + (a_1 * b_1). */ fp_addd_low(t0, c[0], c[1]); /* c_0 = (a_0 * b_0) + u^2 * (a_1 * b_1). */ fp_subd_low(c[0], c[0], c[1]); #ifndef FP_QNRES /* t1 = u^2 * (a_1 * b_1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subd_low(c[0], c[0], c[1]); } #endif /* c_1 = (t2 - t0). */ fp_subd_low(c[1], t2, t0); /* c_0 = c_0 + 2^N * p/4. */ bn_lshb_low(c[0] + FP_DIGS - 1, c[0] + FP_DIGS - 1, FP_DIGS + 1, 2); fp_addn_low(c[0] + FP_DIGS, c[0] + FP_DIGS, fp_prime_get()); bn_rshb_low(c[0] + FP_DIGS - 1, c[0] + FP_DIGS - 1, FP_DIGS + 1, 2); }
void fp3_sqrn_low(dv3_t c, fp3_t a) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; align dig_t t3[2 * FP_DIGS], t4[2 * FP_DIGS], t5[2 * FP_DIGS]; /* t0 = a_0^2. */ fp_sqrn_low(t0, a[0]); /* t1 = 2 * a_1 * a_2. */ #ifdef FP_SPACE fp_dbln_low(t2, a[1]); #else fp_dblm_low(t2, a[1]); #endif fp_muln_low(t1, t2, a[2]); /* t2 = a_2^2. */ fp_sqrn_low(t2, a[2]); /* t3 = (a_0 + a_2 + a_1)^2, t4 = (a_0 + a_2 - a_1)^2. */ #ifdef FP_SPACE fp_addn_low(t3, a[0], a[2]); fp_addn_low(t4, t3, a[1]); #else fp_addm_low(t3, a[0], a[2]); fp_addm_low(t4, t3, a[1]); #endif fp_subm_low(t5, t3, a[1]); fp_sqrn_low(t3, t4); fp_sqrn_low(t4, t5); /* t4 = (t4 + t3)/2. */ fp_addd_low(t4, t4, t3); fp_hlvd_low(t4, t4); /* t3 = t3 - t4 - t1. */ fp_addc_low(t5, t1, t4); fp_subc_low(t3, t3, t5); /* c_2 = t4 - t0 - t2. */ fp_addc_low(t5, t0, t2); fp_subc_low(c[2], t4, t5); /* c_0 = t0 + t1 * B. */ fp_subc_low(c[0], t0, t1); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[0], c[0], t1); } /* c_1 = t3 + t2 * B. */ fp_subc_low(c[1], t3, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[1], c[1], t2); } }
void fp_subc_low(dig_t *c, const dig_t *a, const dig_t *b) { dig_t carry = fp_subd_low(c, a, b); if (carry) { fp_addn_low(c + FP_DIGS, c + FP_DIGS, fp_prime_get()); } }
void fp_subm_low(dig_t *c, const dig_t *a, const dig_t *b) { dig_t carry; carry = fp_subn_low(c, a, b); if (carry) { fp_addn_low(c, c, fp_prime_get()); } }
void fp2_muln_low(dv2_t c, fp2_t a, fp2_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 + a_1, t1 = b_0 + b_1. */ #ifdef FP_SPACE fp_addn_low(t0, a[0], a[1]); fp_addn_low(t1, b[0], b[1]); #else fp_addm_low(t0, a[0], a[1]); fp_addm_low(t1, b[0], b[1]); #endif /* c_0 = a_0 * b_0, c_1 = a_1 * b_1. */ fp_muln_low(c[0], a[0], b[0]); fp_muln_low(c[1], a[1], b[1]); /* t2 = (a_0 + a_1) * (b_0 + b_1). */ fp_muln_low(t2, t0, t1); /* t0 = (a_0 * b_0) + (a_1 * b_1). */ #ifdef FP_SPACE fp_addd_low(t0, c[0], c[1]); #else fp_addc_low(t0, c[0], c[1]); #endif /* c_0 = (a_0 * b_0) + u^2 * (a_1 * b_1). */ fp_subc_low(c[0], c[0], c[1]); #ifndef FP_QNRES /* t1 = u^2 * (a_1 * b_1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subc_low(c[0], c[0], c[1]); } for (int i = 0; i <= fp_prime_get_qnr(); i++) { fp_addc_low(c[0], c[0], c[1]); } #endif /* c_1 = t2 - t0. */ #ifdef FP_SPACE fp_subd_low(c[1], t2, t0); #else fp_subc_low(c[1], t2, t0); #endif }
void fp_hlvm_low(dig_t *c, const dig_t *a) { dig_t carry = 0; if (a[0] & 1) { carry = fp_addn_low(c, a, fp_prime_get()); } else { fp_copy(c, a); } fp_rsh1_low(c, c); if (carry) { c[FP_DIGS - 1] ^= ((dig_t)1 << (FP_DIGIT - 1)); } }
void fp_subc_low(dig_t *c, const dig_t *a, const dig_t *b) { int i; dig_t carry, r0, diff; /* Zero the carry. */ carry = 0; for (i = 0; i < 2 * FP_DIGS; i++, a++, b++) { diff = (*a) - (*b); r0 = diff - carry; carry = ((*a) < (*b)) || (carry && !diff); c[i] = r0; } if (carry) { fp_addn_low(c + FP_DIGS, c + FP_DIGS, fp_prime_get()); } }
void fp3_muln_low(dv3_t c, fp3_t a, fp3_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS], t3[2 * FP_DIGS]; align dig_t t4[2 * FP_DIGS], t5[2 * FP_DIGS], t6[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 * b_0, t1 = a_1 * b_1, t2 = a_2 * b_2. */ fp_muln_low(t0, a[0], b[0]); fp_muln_low(t1, a[1], b[1]); fp_muln_low(t2, a[2], b[2]); /* t3 = (a_1 + a_2) * (b_1 + b_2). */ #ifdef FP_SPACE fp_addn_low(t3, a[1], a[2]); fp_addn_low(t4, b[1], b[2]); #else fp_addm_low(t3, a[1], a[2]); fp_addm_low(t4, b[1], b[2]); #endif fp_muln_low(t5, t3, t4); fp_addd_low(t6, t1, t2); fp_subc_low(t4, t5, t6); fp_subc_low(c[0], t0, t4); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[0], c[0], t4); } #ifdef FP_SPACE fp_addn_low(t4, a[0], a[1]); fp_addn_low(t5, b[0], b[1]); #else fp_addm_low(t4, a[0], a[1]); fp_addm_low(t5, b[0], b[1]); #endif fp_muln_low(t6, t4, t5); fp_addd_low(t4, t0, t1); fp_subc_low(t4, t6, t4); fp_subc_low(c[1], t4, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[1], c[1], t2); } #ifdef FP_SPACE fp_addn_low(t5, a[0], a[2]); fp_addn_low(t6, b[0], b[2]); #else fp_addm_low(t5, a[0], a[2]); fp_addm_low(t6, b[0], b[2]); #endif fp_muln_low(t4, t5, t6); fp_addd_low(t6, t0, t2); fp_subc_low(t5, t4, t6); fp_addc_low(c[2], t5, t1); }
void fp_hlvd_low(dig_t *c, const dig_t *a) { dig_t carry = 0; if (a[0] & 1) { carry = fp_addn_low(c, a, fp_prime_get()); } else { dv_copy(c, a, FP_DIGS); } fp_add1_low(c + FP_DIGS, a + FP_DIGS, carry); carry = fp_rsh1_low(c + FP_DIGS, c + FP_DIGS); fp_rsh1_low(c, c); if (carry) { c[FP_DIGS - 1] ^= ((dig_t)1 << (FP_DIGIT - 1)); } }
void fp_addm_low(dig_t *c, const dig_t *a, const dig_t *b) { fp_addn_low(c, a, b); if (fp_cmp(c, fp_prime_get()) != CMP_LT) { fp_subn_low(c, c, fp_prime_get()); } }
void fp2_addn_low(fp2_t c, fp2_t a, fp2_t b) { fp_addn_low(c[0], a[0], b[0]); fp_addn_low(c[1], a[1], b[1]); }
void fp2_sqrn_low(dv2_t c, fp2_t a) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* t0 = (a0 + a1). */ #ifdef FP_SPACE /* if we have room for carries, we can avoid reductions here. */ fp_addn_low(t0, a[0], a[1]); #else fp_addm_low(t0, a[0], a[1]); #endif /* t1 = (a0 - a1). */ fp_subm_low(t1, a[0], a[1]); #ifdef FP_QNRES #ifdef FP_SPACE fp_dbln_low(t2, a[0]); #else fp_dblm_low(t2, a[0]); #endif /* c1 = 2 * a0 * a1. */ fp_muln_low(c[1], t2, a[1]); /* c_0 = a_0^2 + a_1^2 * u^2. */ fp_muln_low(c[0], t0, t1); #else /* !FP_QNRES */ /* t1 = u^2 * (a1 * b1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subm_low(t1, t1, a[1]); } if (fp_prime_get_qnr() == -1) { /* t2 = 2 * a0. */ fp_dbl(t2, a[0]); /* c1 = 2 * a0 * a1. */ fp_muln_low(c[1], t2, a[1]); /* c0 = a0^2 + b_0^2 * u^2. */ fp_muln_low(c[0], t0, t1); } else { /* c1 = a0 * a1. */ fp_muln_low(c[1], a[0], a[1]); /* c0 = a0^2 + b_0^2 * u^2. */ fp_muln_low(c[0], t0, t1); #ifdef FP_SPACE for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_addd_low(c[0], c[0], c[1]); } /* c1 = 2 * a0 * a1. */ fp_addd_low(c[1], c[1], c[1]); #else for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_addc_low(c[0], c[0], c[1]); } /* c1 = 2 * a0 * a1. */ fp_addc_low(c[1], c[1], c[1]); #endif } #endif /* c = c0 + c1 * u. */ }
void fp_rdcs_low(dig_t *c, const dig_t *a, const dig_t *m) { align dig_t q[2 * FP_DIGS], _q[2 * FP_DIGS]; align dig_t _r[2 * FP_DIGS], r[2 * FP_DIGS], t[2 * FP_DIGS]; const int *sform; int len, first, i, j, b0, d0, b1, d1; dig_t carry; sform = fp_prime_get_sps(&len); SPLIT(b0, d0, sform[len - 1], FP_DIG_LOG); first = (d0) + (b0 == 0 ? 0 : 1); /* q = floor(a/b^k) */ dv_zero(q, 2 * FP_DIGS); bn_rshd_low(q, a, 2 * FP_DIGS, d0); if (b0 > 0) { bn_rshb_low(q, q, 2 * FP_DIGS, b0); } /* r = a - qb^k. */ dv_copy(r, a, first); if (b0 > 0) { r[first - 1] &= MASK(b0); } carry = 0; while (!fp_is_zero(q)) { dv_zero(_q, 2 * FP_DIGS); for (i = len - 2; i > 0; i--) { j = (sform[i] < 0 ? -sform[i] : sform[i]); SPLIT(b1, d1, j, FP_DIG_LOG); dv_zero(t, 2 * FP_DIGS); bn_lshd_low(t, q, FP_DIGS, d1); if (b1 > 0) { bn_lshb_low(t, t, 2 * FP_DIGS, b1); } if (sform[i] > 0) { bn_subn_low(_q, _q, t, 2 * FP_DIGS); } else { bn_addn_low(_q, _q, t, 2 * FP_DIGS); } } if (sform[0] > 0) { bn_subn_low(_q, _q, q, 2 * FP_DIGS); } else { bn_addn_low(_q, _q, q, 2 * FP_DIGS); } bn_rshd_low(q, _q, 2 * FP_DIGS, d0); if (b0 > 0) { bn_rshb_low(q, q, 2 * FP_DIGS, b0); } dv_copy(_r, _q, first); if (b0 > 0) { _r[first - 1] &= MASK(b0); } carry = fp_addn_low(r, r, _r); if (carry) { fp_subn_low(r, r, m); } } while (fp_cmpn_low(r, m) != CMP_LT) { fp_subn_low(r, r, m); } fp_copy(c, r); }
void fp3_addn_low(fp3_t c, fp3_t a, fp3_t b) { fp_addn_low(c[0], a[0], b[0]); fp_addn_low(c[1], a[1], b[1]); fp_addn_low(c[2], a[2], b[2]); }
void fp2_norh_low(dv2_t c, dv2_t a) { dv2_t t; bn_t b; dv2_null(t); bn_null(b); TRY { dv2_new(t); bn_new(b); #ifdef FP_QNRES /* If p = 3 mod 8, (1 + i) is a QNR/CNR. */ /* (a_0 + a_1 * i) * (1 + i) = (a_0 - a_1) + (a_0 + a_1) * u. */ dv_copy(t[1], a[1], 2 * FP_DIGS); fp_addd_low(c[1], a[0], a[1]); /* c_0 = c_0 + 2^N * p/2. */ dv_copy(t[0], a[0], 2 * FP_DIGS); bn_lshb_low(t[0] + FP_DIGS - 1, t[0] + FP_DIGS - 1, FP_DIGS + 1, 1); fp_addn_low(t[0] + FP_DIGS, t[0] + FP_DIGS, fp_prime_get()); bn_rshb_low(t[0] + FP_DIGS - 1, t[0] + FP_DIGS - 1, FP_DIGS + 1, 1); fp_subd_low(c[0], t[0], t[1]); #else switch (fp_prime_get_mod8()) { case 3: /* If p = 3 mod 8, (1 + u) is a QNR, u^2 = -1. */ /* (a_0 + a_1 * u) * (1 + u) = (a_0 - a_1) + (a_0 + a_1) * u. */ dv_copy(t[0], a[1], 2 * FP_DIGS); fp_addc_low(c[1], a[0], a[1]); fp_subc_low(c[0], a[0], t[0]); break; case 1: case 5: /* If p = 1,5 mod 8, (u) is a QNR. */ dv_copy(t[0], a[0], 2 * FP_DIGS); dv_zero(t[1], FP_DIGS); dv_copy(t[1] + FP_DIGS, fp_prime_get(), FP_DIGS); fp_subc_low(c[0], t[1], a[1]); for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subc_low(c[0], c[0], a[1]); } dv_copy(c[1], t[0], 2 * FP_DIGS); break; case 7: /* If p = 7 mod 8, (2 + u) is a QNR/CNR. */ fp2_addc_low(t, a, a); fp_subc_low(c[0], t[0], a[1]); fp_addc_low(c[1], t[1], a[0]); break; default: THROW(ERR_NO_VALID); break; } #endif } CATCH_ANY { THROW(ERR_CAUGHT); } FINALLY { dv2_free(t); bn_free(b); } }