void _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds) { uint32_t x[_SALSA20_INPUT_LENGTH]; unsigned i; assert ( (rounds & 1) == 0); memcpy (x, src, sizeof(x)); for (i = 0; i < rounds;i += 2) { DEBUG (i); QROUND(x[0], x[4], x[8], x[12]); QROUND(x[5], x[9], x[13], x[1]); QROUND(x[10], x[14], x[2], x[6]); QROUND(x[15], x[3], x[7], x[11]); DEBUG (i+1); QROUND(x[0], x[1], x[2], x[3]); QROUND(x[5], x[6], x[7], x[4]); QROUND(x[10], x[11], x[8], x[9]); QROUND(x[15], x[12], x[13], x[14]); } DEBUG (i); for (i = 0; i < _SALSA20_INPUT_LENGTH; i++) { uint32_t t = x[i] + src[i]; dst[i] = LE_SWAP32 (t); } }
static void salsa20_core (u32 *dst, const u32 *src, unsigned int rounds) { u32 x[SALSA20_INPUT_LENGTH]; unsigned i; assert ( (rounds & 1) == 0); for (i = 0; i < SALSA20_INPUT_LENGTH; i++) x[i] = LE_SWAP32(src[i]); for (i = 0; i < rounds;i += 2) { QROUND(x[0], x[4], x[8], x[12]); QROUND(x[5], x[9], x[13], x[1]); QROUND(x[10], x[14], x[2], x[6]); QROUND(x[15], x[3], x[7], x[11]); QROUND(x[0], x[1], x[2], x[3]); QROUND(x[5], x[6], x[7], x[4]); QROUND(x[10], x[11], x[8], x[9]); QROUND(x[15], x[12], x[13], x[14]); } for (i = 0; i < SALSA20_INPUT_LENGTH; i++) { u32 t = x[i] + LE_SWAP32(src[i]); dst[i] = LE_SWAP32(t); } }
static void salsa20_core (u32 *dst, const u32 *src) { u32 pad[SALSA20_INPUT_LENGTH]; unsigned int i; memcpy (pad, src, sizeof(pad)); for (i = 0; i < SALSA20_ROUNDS; i += 2) { SALSA20_CORE_DEBUG (i); QROUND (pad[0], pad[4], pad[8], pad[12]); QROUND (pad[5], pad[9], pad[13], pad[1] ); QROUND (pad[10], pad[14], pad[2], pad[6] ); QROUND (pad[15], pad[3], pad[7], pad[11]); SALSA20_CORE_DEBUG (i+1); QROUND (pad[0], pad[1], pad[2], pad[3] ); QROUND (pad[5], pad[6], pad[7], pad[4] ); QROUND (pad[10], pad[11], pad[8], pad[9] ); QROUND (pad[15], pad[12], pad[13], pad[14]); } SALSA20_CORE_DEBUG (i); for (i = 0; i < SALSA20_INPUT_LENGTH; i++) { u32 t = pad[i] + src[i]; dst[i] = LE_SWAP32 (t); } }
static unsigned int salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned rounds) { u32 pad[SALSA20_INPUT_LENGTH], *src = ctx->input; unsigned int i; memcpy (pad, src, sizeof(pad)); for (i = 0; i < rounds; i += 2) { SALSA20_CORE_DEBUG (i); QROUND (pad[0], pad[4], pad[8], pad[12]); QROUND (pad[5], pad[9], pad[13], pad[1] ); QROUND (pad[10], pad[14], pad[2], pad[6] ); QROUND (pad[15], pad[3], pad[7], pad[11]); SALSA20_CORE_DEBUG (i+1); QROUND (pad[0], pad[1], pad[2], pad[3] ); QROUND (pad[5], pad[6], pad[7], pad[4] ); QROUND (pad[10], pad[11], pad[8], pad[9] ); QROUND (pad[15], pad[12], pad[13], pad[14]); } SALSA20_CORE_DEBUG (i); for (i = 0; i < SALSA20_INPUT_LENGTH; i++) { u32 t = pad[i] + src[i]; dst[i] = LE_SWAP32 (t); } /* Update counter. */ if (!++src[8]) src[9]++; /* burn_stack */ return ( 3*sizeof (void*) \ + 2*sizeof (void*) \ + 64 \ + sizeof (unsigned int) \ + sizeof (u32) ); }
void akmos_salsa_stream(akmos_salsa_t *ctx, uint8_t *out_blk) { size_t i; uint32_t s[16]; for(i = 0; i < 16; i++) s[i] = ctx->s[i]; for(i = 0; i < AKMOS_SALSA_ROUNDS / 2; i++) { QROUND( 4, 0, 12, 7); QROUND( 9, 5, 1, 7); QROUND(14, 10, 6, 7); QROUND( 3, 15, 11, 7); QROUND( 8, 4, 0, 9); QROUND(13, 9, 5, 9); QROUND( 2, 14, 10, 9); QROUND( 7, 3, 15, 9); QROUND(12, 8, 4, 13); QROUND( 1, 13, 9, 13); QROUND( 6, 2, 14, 13); QROUND(11, 7, 3, 13); QROUND( 0, 12, 8, 18); QROUND( 5, 1, 13, 18); QROUND(10, 6, 2, 18); QROUND(15, 11, 7, 18); QROUND( 1, 0, 3, 7); QROUND( 6, 5, 4, 7); QROUND(11, 10, 9, 7); QROUND(12, 15, 14, 7); QROUND( 2, 1, 0, 9); QROUND( 7, 6, 5, 9); QROUND( 8, 11, 10, 9); QROUND(13, 12, 15, 9); QROUND( 3, 2, 1, 13); QROUND( 4, 7, 6, 13); QROUND( 9, 8, 11, 13); QROUND(14, 13, 12, 13); QROUND( 0, 3, 2, 18); QROUND( 5, 4, 7, 18); QROUND(10, 9, 8, 18); QROUND(15, 14, 13, 18); } for(i = 0; i < 16; i++) s[i] += ctx->s[i]; ctx->s[8]++; if(!ctx->s[8]) ctx->s[9]++; for(i = 0; i < AKMOS_SALSA_BLKLEN / 4; i++, out_blk += 4) UNPACK32BE(out_blk, s[i]); }