/* chacha_core performs 20 rounds of ChaCha on the input words in * |input| and writes the 64 output bytes to |output|. */ static void chacha20_core(chacha_buf *output, const u32 input[16]) { u32 x[16]; int i; const union { long one; char little; } is_endian = { 1 }; memcpy(x, input, sizeof(x)); for (i = 20; i > 0; i -= 2) { QUARTERROUND(0, 4, 8, 12); QUARTERROUND(1, 5, 9, 13); QUARTERROUND(2, 6, 10, 14); QUARTERROUND(3, 7, 11, 15); QUARTERROUND(0, 5, 10, 15); QUARTERROUND(1, 6, 11, 12); QUARTERROUND(2, 7, 8, 13); QUARTERROUND(3, 4, 9, 14); } if (is_endian.little) { for (i = 0; i < 16; ++i) output->u[i] = x[i] + input[i]; } else { for (i = 0; i < 16; ++i) U32TO8_LITTLE(output->c + 4 * i, (x[i] + input[i])); } }
int crypto_core_hchacha20(unsigned char *out, const unsigned char *in, const unsigned char *k, const unsigned char *c) { int i; uint32_t x0, x1, x2, x3, x4, x5, x6, x7; uint32_t x8, x9, x10, x11, x12, x13, x14, x15; if (c == NULL) { x0 = U32C(0x61707865); x1 = U32C(0x3320646e); x2 = U32C(0x79622d32); x3 = U32C(0x6b206574); } else { x0 = LOAD32_LE(c + 0); x1 = LOAD32_LE(c + 4); x2 = LOAD32_LE(c + 8); x3 = LOAD32_LE(c + 12); } x4 = LOAD32_LE(k + 0); x5 = LOAD32_LE(k + 4); x6 = LOAD32_LE(k + 8); x7 = LOAD32_LE(k + 12); x8 = LOAD32_LE(k + 16); x9 = LOAD32_LE(k + 20); x10 = LOAD32_LE(k + 24); x11 = LOAD32_LE(k + 28); x12 = LOAD32_LE(in + 0); x13 = LOAD32_LE(in + 4); x14 = LOAD32_LE(in + 8); x15 = LOAD32_LE(in + 12); for (i = 0; i < 10; i++) { QUARTERROUND(x0, x4, x8, x12); QUARTERROUND(x1, x5, x9, x13); QUARTERROUND(x2, x6, x10, x14); QUARTERROUND(x3, x7, x11, x15); QUARTERROUND(x0, x5, x10, x15); QUARTERROUND(x1, x6, x11, x12); QUARTERROUND(x2, x7, x8, x13); QUARTERROUND(x3, x4, x9, x14); } STORE32_LE(out + 0, x0); STORE32_LE(out + 4, x1); STORE32_LE(out + 8, x2); STORE32_LE(out + 12, x3); STORE32_LE(out + 16, x12); STORE32_LE(out + 20, x13); STORE32_LE(out + 24, x14); STORE32_LE(out + 28, x15); return 0; }
void main(){ unsigned char i; /* Zero state */ for( i=0; i<64; i++ ) x[i]=0; /* Fills in constants */ i=0; x[i++]='e'; x[i++]='x'; x[i++]='p'; x[i++]='a'; x[i++]='n'; x[i++]='d'; x[i++]=' '; x[i++]='3'; x[i++]='2'; x[i++]='-'; x[i++]='b'; x[i++]='y'; x[i++]='t'; x[i++]='e'; x[i++]=' '; x[i++]='k'; /* Nonce */ i=48; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; x[i++]=0; /* 4 rounds of 8 quarter-rounds */ for( i=0; i<8; i+=2 ){ #ifdef __OPTSPACE__ qr( 0, 4, 8,12); qr( 1, 5, 9,13); qr( 2, 6,10,14); qr( 3, 7,11,15); qr( 0, 5,10,15); qr( 1, 6,11,12); qr( 2, 7, 8,13); qr( 3, 4, 9,14); #else /* Unroll loop */ QUARTERROUND( 0, 4, 8,12); QUARTERROUND( 1, 5, 9,13); QUARTERROUND( 2, 6,10,14); QUARTERROUND( 3, 7,11,15); QUARTERROUND( 0, 5,10,15); QUARTERROUND( 1, 6,11,12); QUARTERROUND( 2, 7, 8,13); QUARTERROUND( 3, 4, 9,14); #endif } #ifdef __TRS80__ /* Display resulting state */ for( i=0; i<64; i++ ){ if( 0==i%8 ) print_crlf(); print_char( x[i] ); } /* Jump back to BASIC */ __asm ld a,#0x0d ; CR call 0x33 ; Print it ld hl,#0x6cc ; BASIC command line jp (hl) __endasm; #endif }
void qr( unsigned char a, unsigned char b, unsigned char c, unsigned char d ){ QUARTERROUND(a,b,c,d); }
static void salsa20_wordtobyte(u8 output[64], const u32 input[16]) { u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; int i; x0 = input[0]; x1 = input[1]; x2 = input[2]; x3 = input[3]; x4 = input[4]; x5 = input[5]; x6 = input[6]; x7 = input[7]; x8 = input[8]; x9 = input[9]; x10 = input[10]; x11 = input[11]; x12 = input[12]; x13 = input[13]; x14 = input[14]; x15 = input[15]; for( i=20; i>0; i-=2 ) { QUARTERROUND( x0, x4, x8,x12); QUARTERROUND( x1, x5, x9,x13); QUARTERROUND( x2, x6,x10,x14); QUARTERROUND( x3, x7,x11,x15); QUARTERROUND( x0, x5,x10,x15); QUARTERROUND( x1, x6,x11,x12); QUARTERROUND( x2, x7, x8,x13); QUARTERROUND( x3, x4, x9,x14); } x0 = PLUS(x0, input[0]); x1 = PLUS(x1, input[1]); x2 = PLUS(x2, input[2]); x3 = PLUS(x3, input[3]); x4 = PLUS(x4, input[4]); x5 = PLUS(x5, input[5]); x6 = PLUS(x6, input[6]); x7 = PLUS(x7, input[7]); x8 = PLUS(x8, input[8]); x9 = PLUS(x9, input[9]); x10 = PLUS(x10, input[10]); x11 = PLUS(x11, input[11]); x12 = PLUS(x12, input[12]); x13 = PLUS(x13, input[13]); x14 = PLUS(x14, input[14]); x15 = PLUS(x15, input[15]); U32TO8_LITTLE(output + 0,x0); U32TO8_LITTLE(output + 4,x1); U32TO8_LITTLE(output + 8,x2); U32TO8_LITTLE(output + 12,x3); U32TO8_LITTLE(output + 16,x4); U32TO8_LITTLE(output + 20,x5); U32TO8_LITTLE(output + 24,x6); U32TO8_LITTLE(output + 28,x7); U32TO8_LITTLE(output + 32,x8); U32TO8_LITTLE(output + 36,x9); U32TO8_LITTLE(output + 40,x10); U32TO8_LITTLE(output + 44,x11); U32TO8_LITTLE(output + 48,x12); U32TO8_LITTLE(output + 52,x13); U32TO8_LITTLE(output + 56,x14); U32TO8_LITTLE(output + 60,x15); }
/* mix full state. needs 2 call sites to avoid inlining */ static void chacha_mix(struct ChaCha *ctx) { const uint32_t *input = ctx->state; uint32_t *output = ctx->u.output32; int i; uint32_t x[16]; /* first "column" round */ QUARTERROUND(input, x, 0, 4, 8, 12); QUARTERROUND(input, x, 1, 5, 9, 13); QUARTERROUND(input, x, 2, 6, 10, 14); QUARTERROUND(input, x, 3, 7, 11, 15); for (i = 0; i < CHACHA_ROUNDS/2 - 1; i++) { /* "diagonal" round */ QUARTERROUND(x, x, 0, 5, 10, 15); QUARTERROUND(x, x, 1, 6, 11, 12); QUARTERROUND(x, x, 2, 7, 8, 13); QUARTERROUND(x, x, 3, 4, 9, 14); /* "column" round */ QUARTERROUND(x, x, 0, 4, 8, 12); QUARTERROUND(x, x, 1, 5, 9, 13); QUARTERROUND(x, x, 2, 6, 10, 14); QUARTERROUND(x, x, 3, 7, 11, 15); } /* last "diagonal" round */ QUARTERROUND(x, x, 0, 5, 10, 15); OUTPUT(0, 5, 10, 15); QUARTERROUND(x, x, 1, 6, 11, 12); OUTPUT(1, 6, 11, 12); QUARTERROUND(x, x, 2, 7, 8, 13); OUTPUT(2, 7, 8, 13); QUARTERROUND(x, x, 3, 4, 9, 14); OUTPUT(3, 4, 9, 14); ctx->pos = 0; ctx->state[12]++; if (!ctx->state[12]) ctx->state[13]++; }