static void scrypt_ro_mix (u32 r, unsigned char *B, u64 N, unsigned char *tmp1, unsigned char *tmp2) { unsigned char *X = B, *T = B; u64 i; #if 0 if (r == 1) { printf ("B = "); for (i = 0; i < 128 * r; i++) { if (i && !(i % 16)) printf ("\n "); printf (" %02x", B[i]); } putchar ('\n'); } #endif /* for i = 0 to N - 1 do */ for (i = 0; i <= N - 1; i++) { /* V[i] = X */ memcpy (&tmp1[i * 128 * r], X, 128 * r); /* X = ScryptBlockMix (X) */ scrypt_block_mix (r, X, tmp2); } /* for i = 0 to N - 1 do */ for (i = 0; i <= N - 1; i++) { u64 j; /* j = Integerify (X) mod N */ j = LE_READ_UINT64 (&X[128 * r - 64]) % N; /* T = X xor V[j] */ buf_xor (T, T, &tmp1[j * 128 * r], 128 * r); /* X = scryptBlockMix (T) */ scrypt_block_mix (r, T, tmp2); } #if 0 if (r == 1) { printf ("B' ="); for (i = 0; i < 128 * r; i++) { if (i && !(i % 16)) printf ("\n "); printf (" %02x", B[i]); } putchar ('\n'); } #endif }
/* Note: This function requires LENGTH > 0. */ static void salsa20_do_encrypt_stream (SALSA20_context_t *ctx, byte *outbuf, const byte *inbuf, unsigned int length) { if (ctx->unused) { unsigned char *p = (void*)ctx->pad; unsigned int n; gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE); n = ctx->unused; if (n > length) n = length; buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n); length -= n; outbuf += n; inbuf += n; ctx->unused -= n; if (!length) return; gcry_assert (!ctx->unused); } for (;;) { /* Create the next pad and bump the block counter. Note that it is the user's duty to change to another nonce not later than after 2^70 processed bytes. */ salsa20_core (ctx->pad, ctx->input); if (!++ctx->input[8]) ctx->input[9]++; if (length <= SALSA20_BLOCK_SIZE) { buf_xor (outbuf, inbuf, ctx->pad, length); ctx->unused = SALSA20_BLOCK_SIZE - length; return; } buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE); length -= SALSA20_BLOCK_SIZE; outbuf += SALSA20_BLOCK_SIZE; inbuf += SALSA20_BLOCK_SIZE; } }
/* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOWFISH_BLOCKSIZE. */ void _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { BLOWFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[BLOWFISH_BLOCKSIZE]; int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; int i; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 5 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { _gcry_blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; inbuf += 4 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; inbuf += 2 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ do_encrypt_block(ctx, tmpbuf, ctr); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE); outbuf += BLOWFISH_BLOCKSIZE; inbuf += BLOWFISH_BLOCKSIZE; /* Increment the counter. */ for (i = BLOWFISH_BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); }
/* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size CAMELLIA_BLOCK_SIZE. */ void _gcry_camellia_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAMELLIA_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE]; int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; int i; #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) { int did_use_aesni_avx2 = 0; /* Process data in 32 block chunks. */ while (nblocks >= 32) { _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 32; outbuf += 32 * CAMELLIA_BLOCK_SIZE; inbuf += 32 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx2 = 1; } if (did_use_aesni_avx2) { int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + 2 * sizeof(void *) + ASM_EXTRA_STACK; if (burn_stack_depth < avx2_burn_stack_depth) burn_stack_depth = avx2_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif #ifdef USE_AESNI_AVX if (ctx->use_aesni_avx) { int did_use_aesni_avx = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 16; outbuf += 16 * CAMELLIA_BLOCK_SIZE; inbuf += 16 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx = 1; } if (did_use_aesni_avx) { int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *) + ASM_EXTRA_STACK; if (burn_stack_depth < avx_burn_stack_depth) burn_stack_depth = avx_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE); outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; /* Increment the counter. */ for (i = CAMELLIA_BLOCK_SIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); }
static void scrypt_block_mix (u32 r, unsigned char *B, unsigned char *tmp2) { u64 i; unsigned char *X = tmp2; unsigned char *Y = tmp2 + 64; #if 0 if (r == 1) { for (i = 0; i < 2 * r; i++) { size_t j; printf ("B[%d] = ", (int)i); for (j = 0; j < 64; j++) { if (j && !(j % 16)) printf ("\n "); printf (" %02x", B[i * 64 + j]); } putchar ('\n'); } } #endif /* X = B[2 * r - 1] */ memcpy (X, &B[(2 * r - 1) * 64], 64); /* for i = 0 to 2 * r - 1 do */ for (i = 0; i <= 2 * r - 1; i++) { /* T = X xor B[i] */ buf_xor(X, X, &B[i * 64], 64); /* X = Salsa (T) */ salsa20_core ((u32*)(void*)X, (u32*)(void*)X, 8); /* Y[i] = X */ memcpy (&Y[i * 64], X, 64); } for (i = 0; i < r; i++) { memcpy (&B[i * 64], &Y[2 * i * 64], 64); memcpy (&B[(r + i) * 64], &Y[(2 * i + 1) * 64], 64); } #if 0 if (r==1) { for (i = 0; i < 2 * r; i++) { size_t j; printf ("B'[%d] =", (int)i); for (j = 0; j < 64; j++) { if (j && !(j % 16)) printf ("\n "); printf (" %02x", B[i * 64 + j]); } putchar ('\n'); } } #endif }
gcry_err_code_t _gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen) { size_t n; unsigned char *ivp; int i; size_t blocksize = c->spec->blocksize; gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; size_t nblocks = inbuflen / blocksize; unsigned int burn, nburn; /* Tell compiler that we require a cipher with a 64bit or 128 bit block * length, to allow better optimization of this function. */ if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) return GPG_ERR_INV_LENGTH; if (outbuflen < ((c->flags & GCRY_CIPHER_CBC_MAC)? blocksize : inbuflen)) return GPG_ERR_BUFFER_TOO_SHORT; if ((inbuflen % blocksize) && !(inbuflen > blocksize && (c->flags & GCRY_CIPHER_CBC_CTS))) return GPG_ERR_INV_LENGTH; burn = 0; if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) { if ((inbuflen % blocksize) == 0) nblocks--; } if (c->bulk.cbc_enc) { c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks, (c->flags & GCRY_CIPHER_CBC_MAC)); inbuf += nblocks * blocksize; if (!(c->flags & GCRY_CIPHER_CBC_MAC)) outbuf += nblocks * blocksize; } else { ivp = c->u_iv.iv; for (n=0; n < nblocks; n++ ) { buf_xor (outbuf, inbuf, ivp, blocksize); nburn = enc_fn ( &c->context.c, outbuf, outbuf ); burn = nburn > burn ? nburn : burn; ivp = outbuf; inbuf += blocksize; if (!(c->flags & GCRY_CIPHER_CBC_MAC)) outbuf += blocksize; } if (ivp != c->u_iv.iv) buf_cpy (c->u_iv.iv, ivp, blocksize ); } if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) { /* We have to be careful here, since outbuf might be equal to inbuf. */ size_t restbytes; unsigned char b; if ((inbuflen % blocksize) == 0) restbytes = blocksize; else restbytes = inbuflen % blocksize; outbuf -= blocksize; for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++) { b = inbuf[i]; outbuf[blocksize + i] = outbuf[i]; outbuf[i] = b ^ *ivp++; } for (; i < blocksize; i++) outbuf[i] = 0 ^ *ivp++; nburn = enc_fn (&c->context.c, outbuf, outbuf); burn = nburn > burn ? nburn : burn; buf_cpy (c->u_iv.iv, outbuf, blocksize); } if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return 0; }
gcry_err_code_t _gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen) { size_t n; int i; size_t blocksize = c->spec->blocksize; gcry_cipher_decrypt_t dec_fn = c->spec->decrypt; size_t nblocks = inbuflen / blocksize; unsigned int burn, nburn; /* Tell compiler that we require a cipher with a 64bit or 128 bit block * length, to allow better optimization of this function. */ if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) return GPG_ERR_INV_LENGTH; if (outbuflen < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; if ((inbuflen % blocksize) && !(inbuflen > blocksize && (c->flags & GCRY_CIPHER_CBC_CTS))) return GPG_ERR_INV_LENGTH; burn = 0; if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) { nblocks--; if ((inbuflen % blocksize) == 0) nblocks--; buf_cpy (c->lastiv, c->u_iv.iv, blocksize); } if (c->bulk.cbc_dec) { c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); inbuf += nblocks * blocksize; outbuf += nblocks * blocksize; } else { for (n=0; n < nblocks; n++ ) { /* Because outbuf and inbuf might be the same, we must not overwrite the original ciphertext block. We use LASTIV as intermediate storage here because it is not used otherwise. */ nburn = dec_fn ( &c->context.c, c->lastiv, inbuf ); burn = nburn > burn ? nburn : burn; buf_xor_n_copy_2(outbuf, c->lastiv, c->u_iv.iv, inbuf, blocksize); inbuf += blocksize; outbuf += blocksize; } } if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) { size_t restbytes; if ((inbuflen % blocksize) == 0) restbytes = blocksize; else restbytes = inbuflen % blocksize; buf_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */ buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */ nburn = dec_fn ( &c->context.c, outbuf, inbuf ); burn = nburn > burn ? nburn : burn; buf_xor(outbuf, outbuf, c->u_iv.iv, restbytes); buf_cpy (outbuf + blocksize, outbuf, restbytes); for(i=restbytes; i < blocksize; i++) c->u_iv.iv[i] = outbuf[i]; nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv); burn = nburn > burn ? nburn : burn; buf_xor(outbuf, outbuf, c->lastiv, blocksize); /* c->lastiv is now really lastlastiv, does this matter? */ } if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return 0; }
/* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size sizeof(serpent_block_t). */ void _gcry_serpent_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { serpent_context_t *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[sizeof(serpent_block_t)]; int burn_stack_depth = 2 * sizeof (serpent_block_t); int i; #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 16; outbuf += 16 * sizeof(serpent_block_t); inbuf += 16 * sizeof(serpent_block_t); did_use_avx2 = 1; } if (did_use_avx2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic/sse2 code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif #ifdef USE_SSE2 { int did_use_sse2 = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_sse2 = 1; } if (did_use_sse2) { /* serpent-sse2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif #ifdef USE_NEON if (ctx->use_neon) { int did_use_neon = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_neon = 1; } if (did_use_neon) { /* serpent-neon assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ serpent_encrypt_internal(ctx, ctr, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t)); outbuf += sizeof(serpent_block_t); inbuf += sizeof(serpent_block_t); /* Increment the counter. */ for (i = sizeof(serpent_block_t); i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); }
gcry_err_code_t _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen) { size_t n; int i; gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; unsigned int blocksize = c->spec->blocksize; size_t nblocks; unsigned int burn, nburn; /* Tell compiler that we require a cipher with a 64bit or 128 bit block * length, to allow better optimization of this function. */ if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) return GPG_ERR_INV_LENGTH; if (outbuflen < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; burn = 0; /* First process a left over encrypted counter. */ if (c->unused) { gcry_assert (c->unused < blocksize); i = blocksize - c->unused; n = c->unused > inbuflen ? inbuflen : c->unused; buf_xor(outbuf, inbuf, &c->lastiv[i], n); c->unused -= n; inbuf += n; outbuf += n; inbuflen -= n; } /* Use a bulk method if available. */ nblocks = inbuflen / blocksize; if (nblocks && c->bulk.ctr_enc) { c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks); inbuf += nblocks * blocksize; outbuf += nblocks * blocksize; inbuflen -= nblocks * blocksize; } /* If we don't have a bulk method use the standard method. We also use this method for the a remaining partial block. */ if (inbuflen) { unsigned char tmp[MAX_BLOCKSIZE]; do { nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); burn = nburn > burn ? nburn : burn; for (i = blocksize; i > 0; i--) { c->u_ctr.ctr[i-1]++; if (c->u_ctr.ctr[i-1] != 0) break; } n = blocksize < inbuflen ? blocksize : inbuflen; buf_xor(outbuf, inbuf, tmp, n); inbuflen -= n; outbuf += n; inbuf += n; } while (inbuflen); /* Save the unused bytes of the counter. */ c->unused = blocksize - n; if (c->unused) buf_cpy (c->lastiv+n, tmp+n, c->unused); wipememory (tmp, sizeof tmp); } if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return 0; }
/* Run the self-tests for <block cipher>-CBC-<block size>, tests bulk CBC decryption. Returns NULL on success. */ const char * _gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func, gcry_cipher_encrypt_t encrypt_one, gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec, const int nblocks, const int blocksize, const int context_size) { int i, offs; unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem; unsigned int ctx_aligned_size, memsize; // blad3master: change alignment to MSVC specific //static const unsigned char key[16] ATTR_ALIGNED_16 = { __declspec(align(16)) static const unsigned char key[16] = { 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22 }; /* Allocate buffers, align first two elements to 16 bytes and latter to block size. */ ctx_aligned_size = context_size + 15; ctx_aligned_size -= ctx_aligned_size & 0xf; memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16; mem = xtrycalloc (1, memsize); if (!mem) return "failed to allocate memory"; offs = (16 - ((uintptr_t)mem & 15)) & 15; ctx = (void*)(mem + offs); iv = ctx + ctx_aligned_size; iv2 = iv + blocksize; plaintext = iv2 + blocksize; plaintext2 = plaintext + nblocks * blocksize; ciphertext = plaintext2 + nblocks * blocksize; /* Initialize ctx */ setkey_func (ctx, key, sizeof(key)); /* Test single block code path */ memset (iv, 0x4e, blocksize); memset (iv2, 0x4e, blocksize); for (i = 0; i < blocksize; i++) plaintext[i] = i; /* CBC manually. */ buf_xor (ciphertext, iv, plaintext, blocksize); encrypt_one (ctx, ciphertext, ciphertext); memcpy (iv, ciphertext, blocksize); /* CBC decrypt. */ bulk_cbc_dec (ctx, iv2, plaintext2, ciphertext, 1); if (memcmp (plaintext2, plaintext, blocksize)) { xfree (mem); #ifdef HAVE_SYSLOG syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " "%s-CBC-%d test failed (plaintext mismatch)", cipher, blocksize * 8); #endif return "selftest for CBC failed - see syslog for details"; } if (memcmp (iv2, iv, blocksize)) { xfree (mem); #ifdef HAVE_SYSLOG syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " "%s-CBC-%d test failed (IV mismatch)", cipher, blocksize * 8); #endif return "selftest for CBC failed - see syslog for details"; } /* Test parallelized code paths */ memset (iv, 0x5f, blocksize); memset (iv2, 0x5f, blocksize); for (i = 0; i < nblocks * blocksize; i++) plaintext[i] = i; /* Create CBC ciphertext manually. */ for (i = 0; i < nblocks * blocksize; i+=blocksize) { buf_xor (&ciphertext[i], iv, &plaintext[i], blocksize); encrypt_one (ctx, &ciphertext[i], &ciphertext[i]); memcpy (iv, &ciphertext[i], blocksize); } /* Decrypt using bulk CBC and compare result. */ bulk_cbc_dec (ctx, iv2, plaintext2, ciphertext, nblocks); if (memcmp (plaintext2, plaintext, nblocks * blocksize)) { xfree (mem); #ifdef HAVE_SYSLOG syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " "%s-CBC-%d test failed (plaintext mismatch, parallel path)", cipher, blocksize * 8); #endif return "selftest for CBC failed - see syslog for details"; } if (memcmp (iv2, iv, blocksize)) { xfree (mem); #ifdef HAVE_SYSLOG syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " "%s-CBC-%d test failed (IV mismatch, parallel path)", cipher, blocksize * 8); #endif return "selftest for CBC failed - see syslog for details"; } xfree (mem); return NULL; }
/* Perform the AES-Wrap algorithm as specified by RFC3394. We implement this as a mode usable with any cipher algorithm of blocksize 128. */ gcry_err_code_t _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen ) { int j, x; size_t n, i; unsigned char *r, *a, *b; unsigned char t[8]; unsigned int burn, nburn; #if MAX_BLOCKSIZE < 8 #error Invalid block size #endif /* We require a cipher with a 128 bit block length. */ if (c->spec->blocksize != 16) return GPG_ERR_INV_LENGTH; /* The output buffer must be able to hold the input data plus one additional block. */ if (outbuflen < inbuflen + 8) return GPG_ERR_BUFFER_TOO_SHORT; /* Input data must be multiple of 64 bits. */ if (inbuflen % 8) return GPG_ERR_INV_ARG; n = inbuflen / 8; /* We need at least two 64 bit blocks. */ if (n < 2) return GPG_ERR_INV_ARG; burn = 0; r = outbuf; a = outbuf; /* We store A directly in OUTBUF. */ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ /* If an IV has been set we use that IV as the Alternative Initial Value; if it has not been set we use the standard value. */ if (c->marks.iv) memcpy (a, c->u_iv.iv, 8); else memset (a, 0xa6, 8); /* Copy the inbuf to the outbuf. */ memmove (r+8, inbuf, inbuflen); memset (t, 0, sizeof t); /* t := 0. */ for (j = 0; j <= 5; j++) { for (i = 1; i <= n; i++) { /* B := AES_k( A | R[i] ) */ memcpy (b, a, 8); memcpy (b+8, r+i*8, 8); nburn = c->spec->encrypt (&c->context.c, b, b); burn = nburn > burn ? nburn : burn; /* t := t + 1 */ for (x = 7; x >= 0; x--) { t[x]++; if (t[x]) break; } /* A := MSB_64(B) ^ t */ buf_xor(a, b, t, 8); /* R[i] := LSB_64(B) */ memcpy (r+i*8, b+8, 8); } } if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return 0; }
/* Perform the AES-Unwrap algorithm as specified by RFC3394. We implement this as a mode usable with any cipher algorithm of blocksize 128. */ gcry_err_code_t _gcry_cipher_aeswrap_decrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen) { int j, x; size_t n, i; unsigned char *r, *a, *b; unsigned char t[8]; unsigned int burn, nburn; #if MAX_BLOCKSIZE < 8 #error Invalid block size #endif /* We require a cipher with a 128 bit block length. */ if (c->spec->blocksize != 16) return GPG_ERR_INV_LENGTH; /* The output buffer must be able to hold the input data minus one additional block. Fixme: The caller has more restrictive checks - we may want to fix them for this mode. */ if (outbuflen + 8 < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; /* Input data must be multiple of 64 bits. */ if (inbuflen % 8) return GPG_ERR_INV_ARG; n = inbuflen / 8; /* We need at least three 64 bit blocks. */ if (n < 3) return GPG_ERR_INV_ARG; burn = 0; r = outbuf; a = c->lastiv; /* We use c->LASTIV as buffer for A. */ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ /* Copy the inbuf to the outbuf and save A. */ memcpy (a, inbuf, 8); memmove (r, inbuf+8, inbuflen-8); n--; /* Reduce to actual number of data blocks. */ /* t := 6 * n */ i = n * 6; /* The range is valid because: n = inbuflen / 8 - 1. */ for (x=0; x < 8 && x < sizeof (i); x++) t[7-x] = i >> (8*x); for (; x < 8; x++) t[7-x] = 0; for (j = 5; j >= 0; j--) { for (i = n; i >= 1; i--) { /* B := AES_k^1( (A ^ t)| R[i] ) */ buf_xor(b, a, t, 8); memcpy (b+8, r+(i-1)*8, 8); nburn = c->spec->decrypt (&c->context.c, b, b); burn = nburn > burn ? nburn : burn; /* t := t - 1 */ for (x = 7; x >= 0; x--) { t[x]--; if (t[x] != 0xff) break; } /* A := MSB_64(B) */ memcpy (a, b, 8); /* R[i] := LSB_64(B) */ memcpy (r+(i-1)*8, b+8, 8); } } /* If an IV has been set we compare against this Alternative Initial Value; if it has not been set we compare against the standard IV. */ if (c->marks.iv) j = memcmp (a, c->u_iv.iv, 8); else { for (j=0, x=0; x < 8; x++) if (a[x] != 0xa6) { j=1; break; } } if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return j? GPG_ERR_CHECKSUM : 0; }
/* Note: This function requires LENGTH > 0. */ static void salsa20_do_encrypt_stream (SALSA20_context_t *ctx, byte *outbuf, const byte *inbuf, unsigned int length, unsigned rounds) { unsigned int nburn, burn = 0; if (ctx->unused) { unsigned char *p = (void*)ctx->pad; unsigned int n; gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE); n = ctx->unused; if (n > length) n = length; buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n); length -= n; outbuf += n; inbuf += n; ctx->unused -= n; if (!length) return; gcry_assert (!ctx->unused); } #ifdef USE_AMD64 if (length >= SALSA20_BLOCK_SIZE) { unsigned int nblocks = length / SALSA20_BLOCK_SIZE; burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf, nblocks, rounds); length -= SALSA20_BLOCK_SIZE * nblocks; outbuf += SALSA20_BLOCK_SIZE * nblocks; inbuf += SALSA20_BLOCK_SIZE * nblocks; } #endif #ifdef USE_ARM_NEON_ASM if (ctx->use_neon && length >= SALSA20_BLOCK_SIZE) { unsigned int nblocks = length / SALSA20_BLOCK_SIZE; _gcry_arm_neon_salsa20_encrypt (outbuf, inbuf, nblocks, ctx->input, rounds); length -= SALSA20_BLOCK_SIZE * nblocks; outbuf += SALSA20_BLOCK_SIZE * nblocks; inbuf += SALSA20_BLOCK_SIZE * nblocks; } #endif while (length > 0) { /* Create the next pad and bump the block counter. Note that it is the user's duty to change to another nonce not later than after 2^70 processed bytes. */ nburn = ctx->core (ctx->pad, ctx, rounds); burn = nburn > burn ? nburn : burn; if (length <= SALSA20_BLOCK_SIZE) { buf_xor (outbuf, inbuf, ctx->pad, length); ctx->unused = SALSA20_BLOCK_SIZE - length; break; } buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE); length -= SALSA20_BLOCK_SIZE; outbuf += SALSA20_BLOCK_SIZE; inbuf += SALSA20_BLOCK_SIZE; } _gcry_burn_stack (burn); }