void fp3_sqrn_low(dv3_t c, fp3_t a) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; align dig_t t3[2 * FP_DIGS], t4[2 * FP_DIGS], t5[2 * FP_DIGS]; /* t0 = a_0^2. */ fp_sqrn_low(t0, a[0]); /* t1 = 2 * a_1 * a_2. */ #ifdef FP_SPACE fp_dbln_low(t2, a[1]); #else fp_dblm_low(t2, a[1]); #endif fp_muln_low(t1, t2, a[2]); /* t2 = a_2^2. */ fp_sqrn_low(t2, a[2]); /* t3 = (a_0 + a_2 + a_1)^2, t4 = (a_0 + a_2 - a_1)^2. */ #ifdef FP_SPACE fp_addn_low(t3, a[0], a[2]); fp_addn_low(t4, t3, a[1]); #else fp_addm_low(t3, a[0], a[2]); fp_addm_low(t4, t3, a[1]); #endif fp_subm_low(t5, t3, a[1]); fp_sqrn_low(t3, t4); fp_sqrn_low(t4, t5); /* t4 = (t4 + t3)/2. */ fp_addd_low(t4, t4, t3); fp_hlvd_low(t4, t4); /* t3 = t3 - t4 - t1. */ fp_addc_low(t5, t1, t4); fp_subc_low(t3, t3, t5); /* c_2 = t4 - t0 - t2. */ fp_addc_low(t5, t0, t2); fp_subc_low(c[2], t4, t5); /* c_0 = t0 + t1 * B. */ fp_subc_low(c[0], t0, t1); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[0], c[0], t1); } /* c_1 = t3 + t2 * B. */ fp_subc_low(c[1], t3, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[1], c[1], t2); } }
void fp2_muln_low(dv2_t c, fp2_t a, fp2_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 + a_1, t1 = b_0 + b_1. */ #ifdef FP_SPACE fp_addn_low(t0, a[0], a[1]); fp_addn_low(t1, b[0], b[1]); #else fp_addm_low(t0, a[0], a[1]); fp_addm_low(t1, b[0], b[1]); #endif /* c_0 = a_0 * b_0, c_1 = a_1 * b_1. */ fp_muln_low(c[0], a[0], b[0]); fp_muln_low(c[1], a[1], b[1]); /* t2 = (a_0 + a_1) * (b_0 + b_1). */ fp_muln_low(t2, t0, t1); /* t0 = (a_0 * b_0) + (a_1 * b_1). */ #ifdef FP_SPACE fp_addd_low(t0, c[0], c[1]); #else fp_addc_low(t0, c[0], c[1]); #endif /* c_0 = (a_0 * b_0) + u^2 * (a_1 * b_1). */ fp_subc_low(c[0], c[0], c[1]); #ifndef FP_QNRES /* t1 = u^2 * (a_1 * b_1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subc_low(c[0], c[0], c[1]); } for (int i = 0; i <= fp_prime_get_qnr(); i++) { fp_addc_low(c[0], c[0], c[1]); } #endif /* c_1 = t2 - t0. */ #ifdef FP_SPACE fp_subd_low(c[1], t2, t0); #else fp_subc_low(c[1], t2, t0); #endif }
void fp3_muln_low(dv3_t c, fp3_t a, fp3_t b) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS], t3[2 * FP_DIGS]; align dig_t t4[2 * FP_DIGS], t5[2 * FP_DIGS], t6[2 * FP_DIGS]; /* Karatsuba algorithm. */ /* t0 = a_0 * b_0, t1 = a_1 * b_1, t2 = a_2 * b_2. */ fp_muln_low(t0, a[0], b[0]); fp_muln_low(t1, a[1], b[1]); fp_muln_low(t2, a[2], b[2]); /* t3 = (a_1 + a_2) * (b_1 + b_2). */ #ifdef FP_SPACE fp_addn_low(t3, a[1], a[2]); fp_addn_low(t4, b[1], b[2]); #else fp_addm_low(t3, a[1], a[2]); fp_addm_low(t4, b[1], b[2]); #endif fp_muln_low(t5, t3, t4); fp_addd_low(t6, t1, t2); fp_subc_low(t4, t5, t6); fp_subc_low(c[0], t0, t4); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[0], c[0], t4); } #ifdef FP_SPACE fp_addn_low(t4, a[0], a[1]); fp_addn_low(t5, b[0], b[1]); #else fp_addm_low(t4, a[0], a[1]); fp_addm_low(t5, b[0], b[1]); #endif fp_muln_low(t6, t4, t5); fp_addd_low(t4, t0, t1); fp_subc_low(t4, t6, t4); fp_subc_low(c[1], t4, t2); for (int i = -1; i > fp_prime_get_cnr(); i--) { fp_subc_low(c[1], c[1], t2); } #ifdef FP_SPACE fp_addn_low(t5, a[0], a[2]); fp_addn_low(t6, b[0], b[2]); #else fp_addm_low(t5, a[0], a[2]); fp_addm_low(t6, b[0], b[2]); #endif fp_muln_low(t4, t5, t6); fp_addd_low(t6, t0, t2); fp_subc_low(t5, t4, t6); fp_addc_low(c[2], t5, t1); }
void fp2_sqrn_low(dv2_t c, fp2_t a) { align dig_t t0[2 * FP_DIGS], t1[2 * FP_DIGS], t2[2 * FP_DIGS]; /* t0 = (a0 + a1). */ #ifdef FP_SPACE /* if we have room for carries, we can avoid reductions here. */ fp_addn_low(t0, a[0], a[1]); #else fp_addm_low(t0, a[0], a[1]); #endif /* t1 = (a0 - a1). */ fp_subm_low(t1, a[0], a[1]); #ifdef FP_QNRES #ifdef FP_SPACE fp_dbln_low(t2, a[0]); #else fp_dblm_low(t2, a[0]); #endif /* c1 = 2 * a0 * a1. */ fp_muln_low(c[1], t2, a[1]); /* c_0 = a_0^2 + a_1^2 * u^2. */ fp_muln_low(c[0], t0, t1); #else /* !FP_QNRES */ /* t1 = u^2 * (a1 * b1). */ for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_subm_low(t1, t1, a[1]); } if (fp_prime_get_qnr() == -1) { /* t2 = 2 * a0. */ fp_dbl(t2, a[0]); /* c1 = 2 * a0 * a1. */ fp_muln_low(c[1], t2, a[1]); /* c0 = a0^2 + b_0^2 * u^2. */ fp_muln_low(c[0], t0, t1); } else { /* c1 = a0 * a1. */ fp_muln_low(c[1], a[0], a[1]); /* c0 = a0^2 + b_0^2 * u^2. */ fp_muln_low(c[0], t0, t1); #ifdef FP_SPACE for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_addd_low(c[0], c[0], c[1]); } /* c1 = 2 * a0 * a1. */ fp_addd_low(c[1], c[1], c[1]); #else for (int i = -1; i > fp_prime_get_qnr(); i--) { fp_addc_low(c[0], c[0], c[1]); } /* c1 = 2 * a0 * a1. */ fp_addc_low(c[1], c[1], c[1]); #endif } #endif /* c = c0 + c1 * u. */ }
void fp2_addm_low(fp2_t c, fp2_t a, fp2_t b) { fp_addm_low(c[0], a[0], b[0]); fp_addm_low(c[1], a[1], b[1]); }
void fp3_addm_low(fp3_t c, fp3_t a, fp3_t b) { fp_addm_low(c[0], a[0], b[0]); fp_addm_low(c[1], a[1], b[1]); fp_addm_low(c[2], a[2], b[2]); }