Пример #1
0
static inline void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
{
  fe_add(r->YplusX,p->Y,p->X);
  fe_sub(r->YminusX,p->Y,p->X);
  fe_copy(r->Z,p->Z);
  fe_mul(r->T2d,p->T,d2);
}
void curve25519_keygen(unsigned char* curve25519_pubkey_out,
                       const unsigned char* curve25519_privkey_in)
{
  ge_p3 ed; /* Ed25519 pubkey point */
  fe ed_y, ed_y_plus_one, one_minus_ed_y, inv_one_minus_ed_y;
  fe mont_x;

  /* Perform a fixed-base multiplication of the Edwards base point,
     (which is efficient due to precalculated tables), then convert
     to the Curve25519 montgomery-format public key.  In particular,
     convert Curve25519's "montgomery" x-coordinate into an Ed25519
     "edwards" y-coordinate:

     mont_x = (ed_y + 1) / (1 - ed_y)
     
     with projective coordinates:

     mont_x = (ed_y + ed_z) / (ed_z - ed_y)

     NOTE: ed_y=1 is converted to mont_x=0 since fe_invert is mod-exp
  */

  ge_scalarmult_base(&ed, curve25519_privkey_in);
  fe_add(ed_y_plus_one, ed.Y, ed.Z);
  fe_sub(one_minus_ed_y, ed.Z, ed.Y);  
  fe_invert(inv_one_minus_ed_y, one_minus_ed_y);
  fe_mul(mont_x, ed_y_plus_one, inv_one_minus_ed_y);
  fe_tobytes(curve25519_pubkey_out, mont_x);
}
Пример #3
0
static void
edwards_to_montgomery(fe montgomeryX, const fe edwardsY, const fe edwardsZ)
{
  fe tempX;
  fe tempZ;

  fe_add(tempX, edwardsZ, edwardsY);
  fe_sub(tempZ, edwardsZ, edwardsY);
  fe_invert(tempZ, tempZ);
  fe_mul(montgomeryX, tempX, tempZ);
}
Пример #4
0
// point_double calculates 2*(x_in, y_in, z_in)
//
// The method is taken from:
//   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
//
// Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
//
// Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
// while x_out == y_in is not (maybe this works, but it's not tested).
static void point_double(fe x_out, fe y_out, fe z_out,
                         const fe x_in, const fe y_in, const fe z_in) {
  fe delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
  // delta = z^2
  fe_sqr(delta, z_in);
  // gamma = y^2
  fe_sqr(gamma, y_in);
  // beta = x*gamma
  fe_mul(beta, x_in, gamma);

  // alpha = 3*(x-delta)*(x+delta)
  fe_sub(ftmp, x_in, delta);
  fe_add(ftmp2, x_in, delta);

  fe_add(tmptmp, ftmp2, ftmp2);
  fe_add(ftmp2, ftmp2, tmptmp);
  fe_mul(alpha, ftmp, ftmp2);

  // x' = alpha^2 - 8*beta
  fe_sqr(x_out, alpha);
  fe_add(fourbeta, beta, beta);
  fe_add(fourbeta, fourbeta, fourbeta);
  fe_add(tmptmp, fourbeta, fourbeta);
  fe_sub(x_out, x_out, tmptmp);

  // z' = (y + z)^2 - gamma - delta
  fe_add(delta, gamma, delta);
  fe_add(ftmp, y_in, z_in);
  fe_sqr(z_out, ftmp);
  fe_sub(z_out, z_out, delta);

  // y' = alpha*(4*beta - x') - 8*gamma^2
  fe_sub(y_out, fourbeta, x_out);
  fe_add(gamma, gamma, gamma);
  fe_sqr(gamma, gamma);
  fe_mul(y_out, alpha, y_out);
  fe_add(gamma, gamma, gamma);
  fe_sub(y_out, y_out, gamma);
}
Пример #5
0
int curve25519_verify(const unsigned char* signature,
                      const unsigned char* curve25519_pubkey,
                      const unsigned char* msg, const unsigned long msg_len)
{
  fe mont_x, mont_x_minus_one, mont_x_plus_one, inv_mont_x_plus_one;
  fe one;
  fe ed_y;
  unsigned char ed_pubkey[32];
  unsigned long long some_retval;
  unsigned char verifybuf[MAX_MSG_LEN + 64]; /* working buffer */
  unsigned char verifybuf2[MAX_MSG_LEN + 64]; /* working buffer #2 */

  if (msg_len > MAX_MSG_LEN) {
    return -1;
  }

  /* Convert the Curve25519 public key into an Ed25519 public key.  In
     particular, convert Curve25519's "montgomery" x-coordinate into an
     Ed25519 "edwards" y-coordinate:

     ed_y = (mont_x - 1) / (mont_x + 1)

     NOTE: mont_x=-1 is converted to ed_y=0 since fe_invert is mod-exp

     Then move the sign bit into the pubkey from the signature.
  */
  fe_frombytes(mont_x, curve25519_pubkey);
  fe_1(one);
  fe_sub(mont_x_minus_one, mont_x, one);
  fe_add(mont_x_plus_one, mont_x, one);
  fe_invert(inv_mont_x_plus_one, mont_x_plus_one);
  fe_mul(ed_y, mont_x_minus_one, inv_mont_x_plus_one);
  fe_tobytes(ed_pubkey, ed_y);

  /* Copy the sign bit, and remove it from signature */
  ed_pubkey[31] &= 0x7F;  /* bit should be zero already, but just in case */
  ed_pubkey[31] |= (signature[63] & 0x80);
  memmove(verifybuf, signature, 64);
  verifybuf[63] &= 0x7F;

  memmove(verifybuf+64, msg, msg_len);

  /* Then perform a normal Ed25519 verification, return 0 on success */
  /* The below call has a strange API: */
  /* verifybuf = R || S || message */
  /* verifybuf2 = internal to next call gets a copy of verifybuf, S gets 
     replaced with pubkey for hashing, then the whole thing gets zeroized
     (if bad sig), or contains a copy of msg (good sig) */
  return crypto_sign_open(verifybuf2, &some_retval, verifybuf, 64 + msg_len, ed_pubkey);
}
Пример #6
0
void fe_montx_to_edy(fe y, const fe u)
{
  /* 
     y = (u - 1) / (u + 1)

     NOTE: u=-1 is converted to y=0 since fe_invert is mod-exp
  */
  fe one, um1, up1;

  fe_1(one);
  fe_sub(um1, u, one);
  fe_add(up1, u, one);
  fe_invert(up1, up1);
  fe_mul(y, um1, up1);
}
Пример #7
0
int crypto_sign_ed25519_pk_to_curve25519(unsigned char *curve25519_pk,
                                         const unsigned char *ed25519_pk)
{
    ge_p3 A;
    fe    x;
    fe    one_minus_y;

    ge_frombytes_negate_vartime(&A, ed25519_pk);
    fe_1(one_minus_y);
    fe_sub(one_minus_y, one_minus_y, A.Y);
    fe_invert(one_minus_y, one_minus_y);
    fe_1(x);
    fe_add(x, x, A.Y);
    fe_mul(x, x, one_minus_y);
    fe_tobytes(curve25519_pk, x);

    return 0;
}
Пример #8
0
/*
   Test if the public key can be uncommpressed and negate it (-X,Y,Z,-T)
   return 0 on success
 */
int ge_frombytes_negate_vartime(ge_p3 *p,const unsigned char *s)
{

    byte parity;
    byte x[F25519_SIZE];
    byte y[F25519_SIZE];
    byte a[F25519_SIZE];
    byte b[F25519_SIZE];
    byte c[F25519_SIZE];
    int ret = 0;

    /* unpack the key s */
    parity = s[31] >> 7;
    fe_copy(y, s);
    y[31] &= 127;

    fe_mul__distinct(c, y, y);
    fe_mul__distinct(b, c, ed25519_d);
    fe_add(a, b, f25519_one);
    fe_inv__distinct(b, a);
    fe_sub(a, c, f25519_one);
    fe_mul__distinct(c, a, b);
    fe_sqrt(a, c);
    fe_neg(b, a);
    fe_select(x, a, b, (a[0] ^ parity) & 1);

    /* test that x^2 is equal to c */
    fe_mul__distinct(a, x, x);
    fe_normalize(a);
    fe_normalize(c);
    ret |= ConstantCompare(a, c, F25519_SIZE);

    /* project the key s onto p */
    fe_copy(p->X, x);
    fe_copy(p->Y, y);
    fe_load(p->Z, 1);
    fe_mul__distinct(p->T, x, y);

    /* negate, the point becomes (-X,Y,Z,-T) */
    fe_neg(p->X,p->X);
    fe_neg(p->T,p->T);

    return ret;
}
Пример #9
0
int Sign_publicSigningKeyToCurve25519(uint8_t curve25519keyOut[32], uint8_t publicSigningKey[32])
{
    ge_p3 A;
    fe    x;
    fe    one_minus_y;

    if (ge_frombytes_negate_vartime(&A, publicSigningKey) != 0) {
        return -1;
    }
    fe_1(one_minus_y);
    fe_sub(one_minus_y, one_minus_y, A.Y);
    fe_invert(one_minus_y, one_minus_y);
    fe_1(x);
    fe_add(x, x, A.Y);
    fe_mul(x, x, one_minus_y);
    fe_tobytes(curve25519keyOut, x);

    return 0;
}
Пример #10
0
void ed25519_double(ge_p3 *r, const ge_p3 *p)
{
    /* Explicit formulas database: dbl-2008-hwcd
     *
     * source 2008 Hisil--Wong--Carter--Dawson,
     *     http://eprint.iacr.org/2008/522, Section 3.3
     * compute A = X1^2
     * compute B = Y1^2
     * compute C = 2 Z1^2
     * compute D = a A
     * compute E = (X1+Y1)^2-A-B
     * compute G = D + B
     * compute F = G - C
     * compute H = D - B
     * compute X3 = E F
     * compute Y3 = G H
     * compute T3 = E H
     * compute Z3 = F G
     */
    byte a[F25519_SIZE];
    byte b[F25519_SIZE];
    byte c[F25519_SIZE];
    byte e[F25519_SIZE];
    byte f[F25519_SIZE];
    byte g[F25519_SIZE];
    byte h[F25519_SIZE];

    /* A = X1^2 */
    fe_mul__distinct(a, p->X, p->X);

    /* B = Y1^2 */
    fe_mul__distinct(b, p->Y, p->Y);

    /* C = 2 Z1^2 */
    fe_mul__distinct(c, p->Z, p->Z);
    fe_add(c, c, c);

    /* D = a A (alter sign) */
    /* E = (X1+Y1)^2-A-B */
    fe_add(f, p->X, p->Y);
    fe_mul__distinct(e, f, f);
    fe_sub(e, e, a);
    fe_sub(e, e, b);

    /* G = D + B */
    fe_sub(g, b, a);

    /* F = G - C */
    fe_sub(f, g, c);

    /* H = D - B */
    fe_neg(h, b);
    fe_sub(h, h, a);

    /* X3 = E F */
    fe_mul__distinct(r->X, e, f);

    /* Y3 = G H */
    fe_mul__distinct(r->Y, g, h);

    /* T3 = E H */
    fe_mul__distinct(r->T, e, h);

    /* Z3 = F G */
    fe_mul__distinct(r->Z, f, g);
}
Пример #11
0
void ed25519_add(ge_p3 *r,
         const ge_p3 *p1, const ge_p3 *p2)
{
    /* Explicit formulas database: add-2008-hwcd-3
     *
     * source 2008 Hisil--Wong--Carter--Dawson,
     *     http://eprint.iacr.org/2008/522, Section 3.1
     * appliesto extended-1
     * parameter k
     * assume k = 2 d
     * compute A = (Y1-X1)(Y2-X2)
     * compute B = (Y1+X1)(Y2+X2)
     * compute C = T1 k T2
     * compute D = Z1 2 Z2
     * compute E = B - A
     * compute F = D - C
     * compute G = D + C
     * compute H = B + A
     * compute X3 = E F
     * compute Y3 = G H
     * compute T3 = E H
     * compute Z3 = F G
     */
    byte a[F25519_SIZE];
    byte b[F25519_SIZE];
    byte c[F25519_SIZE];
    byte d[F25519_SIZE];
    byte e[F25519_SIZE];
    byte f[F25519_SIZE];
    byte g[F25519_SIZE];
    byte h[F25519_SIZE];

    /* A = (Y1-X1)(Y2-X2) */
    fe_sub(c, p1->Y, p1->X);
    fe_sub(d, p2->Y, p2->X);
    fe_mul__distinct(a, c, d);

    /* B = (Y1+X1)(Y2+X2) */
    fe_add(c, p1->Y, p1->X);
    fe_add(d, p2->Y, p2->X);
    fe_mul__distinct(b, c, d);

    /* C = T1 k T2 */
    fe_mul__distinct(d, p1->T, p2->T);
    fe_mul__distinct(c, d, ed25519_k);

    /* D = Z1 2 Z2 */
    fe_mul__distinct(d, p1->Z, p2->Z);
    fe_add(d, d, d);

    /* E = B - A */
    fe_sub(e, b, a);

    /* F = D - C */
    fe_sub(f, d, c);

    /* G = D + C */
    fe_add(g, d, c);

    /* H = B + A */
    fe_add(h, b, a);

    /* X3 = E F */
    fe_mul__distinct(r->X, e, f);

    /* Y3 = G H */
    fe_mul__distinct(r->Y, g, h);

    /* T3 = E H */
    fe_mul__distinct(r->T, e, h);

    /* Z3 = F G */
    fe_mul__distinct(r->Z, f, g);
}
Пример #12
0
static int
crypto_scalarmult_curve25519_ref10(unsigned char *q,
                                   const unsigned char *n,
                                   const unsigned char *p)
{
  unsigned char e[32];
  unsigned int i;
  fe x1;
  fe x2;
  fe z2;
  fe x3;
  fe z3;
  fe tmp0;
  fe tmp1;
  int pos;
  unsigned int swap;
  unsigned int b;

  for (i = 0;i < 32;++i) e[i] = n[i];
  e[0] &= 248;
  e[31] &= 127;
  e[31] |= 64;
  fe_frombytes(x1,p);
  fe_1(x2);
  fe_0(z2);
  fe_copy(x3,x1);
  fe_1(z3);

  swap = 0;
  for (pos = 254;pos >= 0;--pos) {
    b = e[pos / 8] >> (pos & 7);
    b &= 1;
    swap ^= b;
    fe_cswap(x2,x3,swap);
    fe_cswap(z2,z3,swap);
    swap = b;
    fe_sub(tmp0,x3,z3);
    fe_sub(tmp1,x2,z2);
    fe_add(x2,x2,z2);
    fe_add(z2,x3,z3);
    fe_mul(z3,tmp0,x2);
    fe_mul(z2,z2,tmp1);
    fe_sq(tmp0,tmp1);
    fe_sq(tmp1,x2);
    fe_add(x3,z3,z2);
    fe_sub(z2,z3,z2);
    fe_mul(x2,tmp1,tmp0);
    fe_sub(tmp1,tmp1,tmp0);
    fe_sq(z2,z2);
    fe_mul121666(z3,tmp1);
    fe_sq(x3,x3);
    fe_add(tmp0,tmp0,z3);
    fe_mul(z3,x1,z2);
    fe_mul(z2,tmp1,tmp0);
  }
  fe_cswap(x2,x3,swap);
  fe_cswap(z2,z3,swap);

  fe_invert(z2,z2);
  fe_mul(x2,x2,z2);
  fe_tobytes(q,x2);
  return 0;
}
Пример #13
0
int elligator_fast_test(int silent)
{
  unsigned char elligator_correct_output[32] = 
  {
  0x5f, 0x35, 0x20, 0x00, 0x1c, 0x6c, 0x99, 0x36, 
  0xa3, 0x12, 0x06, 0xaf, 0xe7, 0xc7, 0xac, 0x22, 
  0x4e, 0x88, 0x61, 0x61, 0x9b, 0xf9, 0x88, 0x72, 
  0x44, 0x49, 0x15, 0x89, 0x9d, 0x95, 0xf4, 0x6e
  };

  unsigned char hashtopoint_correct_output1[32] = 
  {
  0xce, 0x89, 0x9f, 0xb2, 0x8f, 0xf7, 0x20, 0x91,
  0x5e, 0x14, 0xf5, 0xb7, 0x99, 0x08, 0xab, 0x17,
  0xaa, 0x2e, 0xe2, 0x45, 0xb4, 0xfc, 0x2b, 0xf6,
  0x06, 0x36, 0x29, 0x40, 0xed, 0x7d, 0xe7, 0xed
  };

  unsigned char hashtopoint_correct_output2[32] = 
  {
  0xa0, 0x35, 0xbb, 0xa9, 0x4d, 0x30, 0x55, 0x33, 
  0x0d, 0xce, 0xc2, 0x7f, 0x83, 0xde, 0x79, 0xd0, 
  0x89, 0x67, 0x72, 0x4c, 0x07, 0x8d, 0x68, 0x9d, 
  0x61, 0x52, 0x1d, 0xf9, 0x2c, 0x5c, 0xba, 0x77
  };

  unsigned char calculatev_correct_output[32] = 
  {
  0x1b, 0x77, 0xb5, 0xa0, 0x44, 0x84, 0x7e, 0xb9, 
  0x23, 0xd7, 0x93, 0x18, 0xce, 0xc2, 0xc5, 0xe2, 
  0x84, 0xd5, 0x79, 0x6f, 0x65, 0x63, 0x1b, 0x60, 
  0x9b, 0xf1, 0xf8, 0xce, 0x88, 0x0b, 0x50, 0x9c,
  };

  int count;
  fe in, out;
  unsigned char bytes[32];
  fe_0(in);
  fe_0(out);
  for (count = 0; count < 32; count++) {
    bytes[count] = count;
  }
  fe_frombytes(in, bytes);
  elligator(out, in);
  fe_tobytes(bytes, out);
  TEST("Elligator vector", memcmp(bytes, elligator_correct_output, 32) == 0);

  /* Elligator(0) == 0 test */
  fe_0(in);
  elligator(out, in);
  TEST("Elligator(0) == 0", memcmp(in, out, 32) == 0);

  /* ge_montx_to_p3(0) -> order2 point test */
  fe one, negone, zero;
  fe_1(one);
  fe_0(zero);
  fe_sub(negone, zero, one);
  ge_p3 p3;
  ge_montx_to_p3(&p3, zero, 0);
  TEST("ge_montx_to_p3(0) == order 2 point", 
      fe_isequal(p3.X, zero) &&
      fe_isequal(p3.Y, negone) &&
      fe_isequal(p3.Z, one) && 
      fe_isequal(p3.T, zero));

  /* Hash to point vector test */
  unsigned char htp[32];
  
  for (count=0; count < 32; count++) {
    htp[count] = count;
  }

  hash_to_point(&p3, htp, 32);
  ge_p3_tobytes(htp, &p3);
  TEST("hash_to_point #1", memcmp(htp, hashtopoint_correct_output1, 32) == 0);

  for (count=0; count < 32; count++) {
    htp[count] = count+1;
  }

  hash_to_point(&p3, htp, 32);
  ge_p3_tobytes(htp, &p3);
  TEST("hash_to_point #2", memcmp(htp, hashtopoint_correct_output2, 32) == 0);

  /* calculate_U vector test */
  ge_p3 Bv;
  unsigned char V[32];
  unsigned char Vbuf[200];
  unsigned char a[32];
  unsigned char A[32];
  unsigned char Vmsg[3];
  Vmsg[0] = 0;
  Vmsg[1] = 1;
  Vmsg[2] = 2;
  for (count=0; count < 32; count++) {
    a[count] = 8 + count;
    A[count] = 9 + count;
  }
  sc_clamp(a);
  calculate_Bv_and_V(&Bv, V, Vbuf, a, A, Vmsg, 3);
  TEST("calculate_Bv_and_V vector", memcmp(V, calculatev_correct_output, 32) == 0);
  return 0;
}
Пример #14
0
void ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key) {
    unsigned char e[32];
    unsigned int i;
    
    fe x1;
    fe x2;
    fe z2;
    fe x3;
    fe z3;
    fe tmp0;
    fe tmp1;
	
    int pos;
    unsigned int swap;
    unsigned int b;
	
    /* copy the private key and make sure it's valid */
    for (i = 0; i < 32; ++i) {
        e[i] = private_key[i];
    }
	
    e[0] &= 248;
    e[31] &= 63;
    e[31] |= 64;
	
    /* unpack the public key and convert edwards to montgomery */
    /* due to CodesInChaos: montgomeryX = (edwardsY + 1)*inverse(1 - edwardsY) mod p */
    fe_frombytes(x1, public_key);
    fe_1(tmp1);
    fe_add(tmp0, x1, tmp1);
    fe_sub(tmp1, tmp1, x1);
    fe_invert(tmp1, tmp1);
    fe_mul(x1, tmp0, tmp1);
	
    fe_1(x2);
    fe_0(z2);
    fe_copy(x3, x1);
    fe_1(z3);
	
    swap = 0;
    for (pos = 254; pos >= 0; --pos) {
        b = e[pos / 8] >> (pos & 7);
        b &= 1;
        swap ^= b;
        fe_cswap(x2, x3, swap);
        fe_cswap(z2, z3, swap);
        swap = b;
		
        /* from montgomery.h */
        fe_sub(tmp0, x3, z3);
        fe_sub(tmp1, x2, z2);
        fe_add(x2, x2, z2);
        fe_add(z2, x3, z3);
        fe_mul(z3, tmp0, x2);
        fe_mul(z2, z2, tmp1);
        fe_sq(tmp0, tmp1);
        fe_sq(tmp1, x2);
        fe_add(x3, z3, z2);
        fe_sub(z2, z3, z2);
        fe_mul(x2, tmp1, tmp0);
        fe_sub(tmp1, tmp1, tmp0);
        fe_sq(z2, z2);
        fe_mul121666(z3, tmp1);
        fe_sq(x3, x3);
        fe_add(tmp0, tmp0, z3);
        fe_mul(z3, x1, z2);
        fe_mul(z2, tmp1, tmp0);
    }
	
    fe_cswap(x2, x3, swap);
    fe_cswap(z2, z3, swap);
	
    fe_invert(z2, z2);
    fe_mul(x2, x2, z2);
    fe_tobytes(shared_secret, x2);
}
Пример #15
0
// point_add calcuates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
//   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
//
// This function includes a branch for checking whether the two input points
// are equal, (while not equal to the point at infinity). This case never
// happens during single point multiplication, so there is no timing leak for
// ECDH or ECDSA signing.
static void point_add(fe x3, fe y3, fe z3, const fe x1,
                      const fe y1, const fe z1, const int mixed,
                      const fe x2, const fe y2, const fe z2) {
  fe x_out, y_out, z_out;
  limb_t z1nz = fe_nz(z1);
  limb_t z2nz = fe_nz(z2);

  // z1z1 = z1z1 = z1**2
  fe z1z1; fe_sqr(z1z1, z1);

  fe u1, s1, two_z1z2;
  if (!mixed) {
    // z2z2 = z2**2
    fe z2z2; fe_sqr(z2z2, z2);

    // u1 = x1*z2z2
    fe_mul(u1, x1, z2z2);

    // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
    fe_add(two_z1z2, z1, z2);
    fe_sqr(two_z1z2, two_z1z2);
    fe_sub(two_z1z2, two_z1z2, z1z1);
    fe_sub(two_z1z2, two_z1z2, z2z2);

    // s1 = y1 * z2**3
    fe_mul(s1, z2, z2z2);
    fe_mul(s1, s1, y1);
  } else {
    // We'll assume z2 = 1 (special case z2 = 0 is handled later).

    // u1 = x1*z2z2
    fe_copy(u1, x1);
    // two_z1z2 = 2z1z2
    fe_add(two_z1z2, z1, z1);
    // s1 = y1 * z2**3
    fe_copy(s1, y1);
  }

  // u2 = x2*z1z1
  fe u2; fe_mul(u2, x2, z1z1);

  // h = u2 - u1
  fe h; fe_sub(h, u2, u1);

  limb_t xneq = fe_nz(h);

  // z_out = two_z1z2 * h
  fe_mul(z_out, h, two_z1z2);

  // z1z1z1 = z1 * z1z1
  fe z1z1z1; fe_mul(z1z1z1, z1, z1z1);

  // s2 = y2 * z1**3
  fe s2; fe_mul(s2, y2, z1z1z1);

  // r = (s2 - s1)*2
  fe r;
  fe_sub(r, s2, s1);
  fe_add(r, r, r);

  limb_t yneq = fe_nz(r);

  if (!xneq && !yneq && z1nz && z2nz) {
    point_double(x3, y3, z3, x1, y1, z1);
    return;
  }

  // I = (2h)**2
  fe i;
  fe_add(i, h, h);
  fe_sqr(i, i);

  // J = h * I
  fe j; fe_mul(j, h, i);

  // V = U1 * I
  fe v; fe_mul(v, u1, i);

  // x_out = r**2 - J - 2V
  fe_sqr(x_out, r);
  fe_sub(x_out, x_out, j);
  fe_sub(x_out, x_out, v);
  fe_sub(x_out, x_out, v);

  // y_out = r(V-x_out) - 2 * s1 * J
  fe_sub(y_out, v, x_out);
  fe_mul(y_out, y_out, r);
  fe s1j;
  fe_mul(s1j, s1, j);
  fe_sub(y_out, y_out, s1j);
  fe_sub(y_out, y_out, s1j);

  fe_cmovznz(x_out, z1nz, x2, x_out);
  fe_cmovznz(x3, z2nz, x1, x_out);
  fe_cmovznz(y_out, z1nz, y2, y_out);
  fe_cmovznz(y3, z2nz, y1, y_out);
  fe_cmovznz(z_out, z1nz, z2, z_out);
  fe_cmovznz(z3, z2nz, z1, z_out);
}