Exemple #1
0
void point_add(struct affine_point *p1, const struct affine_point *p2,
	       const struct domain_params *dp)
{
  if (! point_is_zero(p2)) {
    if (! point_is_zero(p1)) {
      if (! gcry_mpi_cmp(p1->x, p2->x)) {
	if (! gcry_mpi_cmp(p1->y, p2->y))
	  point_double(p1, dp);
	else
	  point_load_zero(p1);
      }
      else {
	gcry_mpi_t t;
	t = gcry_mpi_snew(0);
	gcry_mpi_subm(t, p1->y, p2->y, dp->m);
	gcry_mpi_subm(p1->y, p1->x, p2->x, dp->m);
	gcry_mpi_invm(p1->y, p1->y, dp->m);
	gcry_mpi_mulm(p1->y, t, p1->y, dp->m);
	gcry_mpi_mulm(t, p1->y, p1->y, dp->m);
	gcry_mpi_addm(p1->x, p1->x, p2->x, dp->m);
	gcry_mpi_subm(p1->x, t, p1->x, dp->m);
	gcry_mpi_subm(t, p2->x, p1->x, dp->m);
	gcry_mpi_mulm(p1->y, p1->y, t, dp->m);
	gcry_mpi_subm(p1->y, p1->y, p2->y, dp->m);
	gcry_mpi_release(t);
      }
    }
    else
      point_set(p1, p2);
  }
}
Exemple #2
0
static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
                                const EC_RAW_POINT *a) {
  fe x, y, z;
  fe_from_generic(x, &a->X);
  fe_from_generic(y, &a->Y);
  fe_from_generic(z, &a->Z);
  point_double(x, y, z, x, y, z);
  fe_to_generic(&r->X, x);
  fe_to_generic(&r->Y, y);
  fe_to_generic(&r->Z, z);
}
Exemple #3
0
static void point_add(struct point *r, struct point *p, struct point *q)
{
	u8 s[20], t[20], u[20];
	u8 *px, *py, *qx, *qy, *rx, *ry;
	struct point pp, qq;

	pp = *p;
	qq = *q;

	px = pp.x;
	py = pp.y;
	qx = qq.x;
	qy = qq.y;
	rx = r->x;
	ry = r->y;

	if (point_is_zero(&pp)) {
		elt_copy(rx, qx);
		elt_copy(ry, qy);
		return;
	}

	if (point_is_zero(&qq)) {
		elt_copy(rx, px);
		elt_copy(ry, py);
		return;
	}

	elt_sub(u, qx, px);

	if (elt_is_zero(u)) {
		elt_sub(u, qy, py);
		if (elt_is_zero(u))
			point_double(r, &pp);
		else
			point_zero(r);

		return;
	}

	elt_inv(t, u);		// t = 1/(qx-px)
	elt_sub(u, qy, py);	// u = qy-py
	elt_mul(s, t, u);	// s = (qy-py)/(qx-px)

	elt_square(rx, s);	// rx = s*s
	elt_add(t, px, qx);	// t = px+qx
	elt_sub(rx, rx, t);	// rx = s*s - (px+qx)

	elt_sub(t, px, rx);	// t = -(rx-px)
	elt_mul(ry, s, t);	// ry = -s*(rx-px)
	elt_sub(ry, ry, py);	// ry = -s*(rx-px) - py
}
Exemple #4
0
static void point_mul(struct point *d, u8 *a, struct point *b)	// a is bignum
{
	u32 i;
	u8 mask;

	point_zero(d);

	for (i = 0; i < 21; i++)
		for (mask = 0x80; mask != 0; mask >>= 1) {
			point_double(d, d);
			if ((a[i] & mask) != 0)
				point_add(d, d, b);
		}
}
Exemple #5
0
struct affine_point pointmul(const struct affine_point *p,
			     const gcry_mpi_t exp, 
			     const struct domain_params *dp)
{
  struct affine_point r = point_new();
  int n = gcry_mpi_get_nbits(exp);
  while (n) {
    point_double(&r, dp);
    if (gcry_mpi_test_bit(exp, --n))
      point_add(&r, p, dp);
  }
  assert(point_on_curve(&r, dp));
  return r;
}
Exemple #6
0
static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                                       const EC_SCALAR *g_scalar,
                                       const EC_RAW_POINT *p,
                                       const EC_SCALAR *p_scalar) {
  fe p_pre_comp[17][3];
  fe x_out, y_out, z_out;

  if (p != NULL && p_scalar != NULL) {
    // We treat NULL scalars as 0, and NULL points as points at infinity, i.e.,
    // they contribute nothing to the linear combination.
    OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
    // Precompute multiples.
    fe_from_generic(p_pre_comp[1][0], &p->X);
    fe_from_generic(p_pre_comp[1][1], &p->Y);
    fe_from_generic(p_pre_comp[1][2], &p->Z);
    for (size_t j = 2; j <= 16; ++j) {
      if (j & 1) {
        point_add(p_pre_comp[j][0], p_pre_comp[j][1],
                  p_pre_comp[j][2], p_pre_comp[1][0],
                  p_pre_comp[1][1], p_pre_comp[1][2],
                  0,
                  p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
                  p_pre_comp[j - 1][2]);
      } else {
        point_double(p_pre_comp[j][0], p_pre_comp[j][1],
                     p_pre_comp[j][2], p_pre_comp[j / 2][0],
                     p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
      }
    }
  }

  batch_mul(x_out, y_out, z_out,
            (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL,
            g_scalar != NULL ? g_scalar->bytes : NULL,
            (const fe (*) [3])p_pre_comp);

  fe_to_generic(&r->X, x_out);
  fe_to_generic(&r->Y, y_out);
  fe_to_generic(&r->Z, z_out);
}
Exemple #7
0
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
                                             EC_RAW_POINT *r,
                                             const EC_SCALAR *g_scalar,
                                             const EC_RAW_POINT *p,
                                             const EC_SCALAR *p_scalar) {
#define P256_WSIZE_PUBLIC 4
  // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
  fe p_pre_comp[1 << (P256_WSIZE_PUBLIC-1)][3];
  fe_from_generic(p_pre_comp[0][0], &p->X);
  fe_from_generic(p_pre_comp[0][1], &p->Y);
  fe_from_generic(p_pre_comp[0][2], &p->Z);
  fe p2[3];
  point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], p_pre_comp[0][1],
               p_pre_comp[0][2]);
  for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) {
    point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
              p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], p_pre_comp[i - 1][2],
              0 /* not mixed */, p2[0], p2[1], p2[2]);
  }

  // Set up the coefficients for |p_scalar|.
  int8_t p_wNAF[257];
  ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);

  // Set |ret| to the point at infinity.
  int skip = 1;  // Save some point operations.
  fe ret[3] = {{0},{0},{0}};
  for (int i = 256; i >= 0; i--) {
    if (!skip) {
      point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]);
    }

    // For the |g_scalar|, we use the precomputed table without the
    // constant-time lookup.
    if (i <= 31) {
      // First, look 32 bits upwards.
      uint64_t bits = get_bit(g_scalar->bytes, i + 224) << 3;
      bits |= get_bit(g_scalar->bytes, i + 160) << 2;
      bits |= get_bit(g_scalar->bytes, i + 96) << 1;
      bits |= get_bit(g_scalar->bytes, i + 32);
      point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 1 /* mixed */,
                g_pre_comp[1][bits][0], g_pre_comp[1][bits][1],
                g_pre_comp[1][bits][2]);
      skip = 0;

      // Second, look at the current position.
      bits = get_bit(g_scalar->bytes, i + 192) << 3;
      bits |= get_bit(g_scalar->bytes, i + 128) << 2;
      bits |= get_bit(g_scalar->bytes, i + 64) << 1;
      bits |= get_bit(g_scalar->bytes, i);
      point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 1 /* mixed */,
                g_pre_comp[0][bits][0], g_pre_comp[0][bits][1],
                g_pre_comp[0][bits][2]);
    }

    int digit = p_wNAF[i];
    if (digit != 0) {
      assert(digit & 1);
      int idx = digit < 0 ? (-digit) >> 1 : digit >> 1;
      fe *y = &p_pre_comp[idx][1], tmp;
      if (digit < 0) {
        fe_opp(tmp, p_pre_comp[idx][1]);
        y = &tmp;
      }
      if (!skip) {
        point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
                  0 /* not mixed */, p_pre_comp[idx][0], *y, p_pre_comp[idx][2]);
      } else {
        fe_copy(ret[0], p_pre_comp[idx][0]);
        fe_copy(ret[1], *y);
        fe_copy(ret[2], p_pre_comp[idx][2]);
        skip = 0;
      }
    }
  }
Exemple #8
0
// Interleaved point multiplication using precomputed point multiples: The
// small point multiples 0*P, 1*P, ..., 17*P are in p_pre_comp, the scalar
// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple
// of the generator, using certain (large) precomputed multiples in g_pre_comp.
// Output point (X, Y, Z) is stored in x_out, y_out, z_out.
static void batch_mul(fe x_out, fe y_out, fe z_out,
                      const uint8_t *p_scalar, const uint8_t *g_scalar,
                      const fe p_pre_comp[17][3]) {
  // set nq to the point at infinity
  fe nq[3] = {{0},{0},{0}}, ftmp, tmp[3];
  uint64_t bits;
  uint8_t sign, digit;

  // Loop over both scalars msb-to-lsb, interleaving additions of multiples
  // of the generator (two in each of the last 32 rounds) and additions of p
  // (every 5th round).

  int skip = 1;  // save two point operations in the first round
  size_t i = p_scalar != NULL ? 255 : 31;
  for (;;) {
    // double
    if (!skip) {
      point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
    }

    // add multiples of the generator
    if (g_scalar != NULL && i <= 31) {
      // first, look 32 bits upwards
      bits = get_bit(g_scalar, i + 224) << 3;
      bits |= get_bit(g_scalar, i + 160) << 2;
      bits |= get_bit(g_scalar, i + 96) << 1;
      bits |= get_bit(g_scalar, i + 32);
      // select the point to add, in constant time
      select_point(bits, 16, g_pre_comp[1], tmp);

      if (!skip) {
        point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
                  tmp[0], tmp[1], tmp[2]);
      } else {
        fe_copy(nq[0], tmp[0]);
        fe_copy(nq[1], tmp[1]);
        fe_copy(nq[2], tmp[2]);
        skip = 0;
      }

      // second, look at the current position
      bits = get_bit(g_scalar, i + 192) << 3;
      bits |= get_bit(g_scalar, i + 128) << 2;
      bits |= get_bit(g_scalar, i + 64) << 1;
      bits |= get_bit(g_scalar, i);
      // select the point to add, in constant time
      select_point(bits, 16, g_pre_comp[0], tmp);
      point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
                tmp[1], tmp[2]);
    }

    // do other additions every 5 doublings
    if (p_scalar != NULL && i % 5 == 0) {
      bits = get_bit(p_scalar, i + 4) << 5;
      bits |= get_bit(p_scalar, i + 3) << 4;
      bits |= get_bit(p_scalar, i + 2) << 3;
      bits |= get_bit(p_scalar, i + 1) << 2;
      bits |= get_bit(p_scalar, i) << 1;
      bits |= get_bit(p_scalar, i - 1);
      ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);

      // select the point to add or subtract, in constant time.
      select_point(digit, 17, p_pre_comp, tmp);
      fe_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
      fe_cmovznz(tmp[1], sign, tmp[1], ftmp);

      if (!skip) {
        point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
                  tmp[0], tmp[1], tmp[2]);
      } else {
        fe_copy(nq[0], tmp[0]);
        fe_copy(nq[1], tmp[1]);
        fe_copy(nq[2], tmp[2]);
        skip = 0;
      }
    }

    if (i == 0) {
      break;
    }
    --i;
  }
  fe_copy(x_out, nq[0]);
  fe_copy(y_out, nq[1]);
  fe_copy(z_out, nq[2]);
}
Exemple #9
0
// point_add calcuates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
//   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
//
// This function includes a branch for checking whether the two input points
// are equal, (while not equal to the point at infinity). This case never
// happens during single point multiplication, so there is no timing leak for
// ECDH or ECDSA signing.
static void point_add(fe x3, fe y3, fe z3, const fe x1,
                      const fe y1, const fe z1, const int mixed,
                      const fe x2, const fe y2, const fe z2) {
  fe x_out, y_out, z_out;
  limb_t z1nz = fe_nz(z1);
  limb_t z2nz = fe_nz(z2);

  // z1z1 = z1z1 = z1**2
  fe z1z1; fe_sqr(z1z1, z1);

  fe u1, s1, two_z1z2;
  if (!mixed) {
    // z2z2 = z2**2
    fe z2z2; fe_sqr(z2z2, z2);

    // u1 = x1*z2z2
    fe_mul(u1, x1, z2z2);

    // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
    fe_add(two_z1z2, z1, z2);
    fe_sqr(two_z1z2, two_z1z2);
    fe_sub(two_z1z2, two_z1z2, z1z1);
    fe_sub(two_z1z2, two_z1z2, z2z2);

    // s1 = y1 * z2**3
    fe_mul(s1, z2, z2z2);
    fe_mul(s1, s1, y1);
  } else {
    // We'll assume z2 = 1 (special case z2 = 0 is handled later).

    // u1 = x1*z2z2
    fe_copy(u1, x1);
    // two_z1z2 = 2z1z2
    fe_add(two_z1z2, z1, z1);
    // s1 = y1 * z2**3
    fe_copy(s1, y1);
  }

  // u2 = x2*z1z1
  fe u2; fe_mul(u2, x2, z1z1);

  // h = u2 - u1
  fe h; fe_sub(h, u2, u1);

  limb_t xneq = fe_nz(h);

  // z_out = two_z1z2 * h
  fe_mul(z_out, h, two_z1z2);

  // z1z1z1 = z1 * z1z1
  fe z1z1z1; fe_mul(z1z1z1, z1, z1z1);

  // s2 = y2 * z1**3
  fe s2; fe_mul(s2, y2, z1z1z1);

  // r = (s2 - s1)*2
  fe r;
  fe_sub(r, s2, s1);
  fe_add(r, r, r);

  limb_t yneq = fe_nz(r);

  if (!xneq && !yneq && z1nz && z2nz) {
    point_double(x3, y3, z3, x1, y1, z1);
    return;
  }

  // I = (2h)**2
  fe i;
  fe_add(i, h, h);
  fe_sqr(i, i);

  // J = h * I
  fe j; fe_mul(j, h, i);

  // V = U1 * I
  fe v; fe_mul(v, u1, i);

  // x_out = r**2 - J - 2V
  fe_sqr(x_out, r);
  fe_sub(x_out, x_out, j);
  fe_sub(x_out, x_out, v);
  fe_sub(x_out, x_out, v);

  // y_out = r(V-x_out) - 2 * s1 * J
  fe_sub(y_out, v, x_out);
  fe_mul(y_out, y_out, r);
  fe s1j;
  fe_mul(s1j, s1, j);
  fe_sub(y_out, y_out, s1j);
  fe_sub(y_out, y_out, s1j);

  fe_cmovznz(x_out, z1nz, x2, x_out);
  fe_cmovznz(x3, z2nz, x1, x_out);
  fe_cmovznz(y_out, z1nz, y2, y_out);
  fe_cmovznz(y3, z2nz, y1, y_out);
  fe_cmovznz(z_out, z1nz, z2, z_out);
  fe_cmovznz(z3, z2nz, z1, z_out);
}