void cryptonight_hash_ctx_aes_ni(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) { hash_process(&ctx->state.hs, (const uint8_t*) input, len); ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); size_t i, j; memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE); for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data); fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data); memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE); } xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a); xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b); for (i = 0; likely(i < ITER / 4); ++i) { /* Dependency chain: address -> read value ------+ * written value <-+ hard function (AES or MUL) <+ * next address <-+ */ /* Iteration 1 */ j = e2i(ctx->a); fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a); xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]); /* Iteration 2 */ mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]); /* Iteration 3 */ j = e2i(ctx->a); fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a); xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]); /* Iteration 4 */ mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]); } memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]); fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); } memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); hash_permutation(&ctx->state.hs); /*memcpy(hash, &state, 32);*/ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); oaes_free((OAES_CTX **) &ctx->aes_ctx); }
void cryptonight_hash(const char* input, char* output, uint32_t len, int variant, uint64_t height) { struct cryptonight_ctx *ctx = alloca(sizeof(struct cryptonight_ctx)); hash_process(&ctx->state.hs, (const uint8_t*) input, len); memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); size_t i, j; VARIANT1_INIT(); VARIANT2_INIT(ctx->b, ctx->state); VARIANT4_RANDOM_MATH_INIT(ctx->state); oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) { for (j = 0; j < INIT_SIZE_BLK; j++) { aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], &ctx->text[AES_BLOCK_SIZE * j], ctx->aes_ctx->key->exp_data); } memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); } for (i = 0; i < 16; i++) { ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; } for (i = 0; i < ITER / 2; i++) { /* Dependency chain: address -> read value ------+ * written value <-+ hard function (AES or MUL) <+ * next address <-+ */ /* Iteration 1 */ j = e2i(ctx->a); aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b, ctx->c); xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); /* Iteration 2 */ j = e2i(ctx->c); uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; uint64_t t[2]; t[0] = dst[0]; t[1] = dst[1]; VARIANT2_INTEGER_MATH(t, ctx->c); copy_block(ctx->a1, ctx->a); VARIANT4_RANDOM_MATH(ctx->a, t, r, ctx->b, ctx->b + AES_BLOCK_SIZE); uint64_t hi; uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); VARIANT2_2(); VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a1, ctx->b, ctx->c); ((uint64_t*)ctx->a)[0] += hi; ((uint64_t*)ctx->a)[1] += lo; dst[0] = ((uint64_t*)ctx->a)[0]; dst[1] = ((uint64_t*)ctx->a)[1]; ((uint64_t*)ctx->a)[0] ^= t[0]; ((uint64_t*)ctx->a)[1] ^= t[1]; VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); copy_block(ctx->b, ctx->c); } memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) { for (j = 0; j < INIT_SIZE_BLK; j++) { xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], &ctx->text[j * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); } } memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); hash_permutation(&ctx->state.hs); /*memcpy(hash, &state, 32);*/ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); oaes_free((OAES_CTX **) &ctx->aes_ctx); }