static void scrypt_1024_1_1_256_12way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) { uint32_t tstate[12 * 8] __attribute__((aligned(128))); uint32_t ostate[12 * 8] __attribute__((aligned(128))); uint32_t W[12 * 32] __attribute__((aligned(128))); uint32_t X[12 * 32] __attribute__((aligned(128))); uint32_t *V; int i, j, k; int Nfactor; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (j = 0; j < 3; j++) for (i = 0; i < 20; i++) for (k = 0; k < 4; k++) W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i]; for (j = 0; j < 3; j++) for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) tstate[32 * j + 4 * i + k] = midstate[i]; HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32); HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64); PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k]; // need to get the Nfactor from the pool server. // Nfactor = 10; // scrypt_core_sidm_3way(X+0*96, Nfactor); // scrypt_core_sidm_3way(X+1*96, Nfactor); // scrypt_core_sidm_3way(X+2*96, Nfactor); // scrypt_core_sidm_3way(X+3*96, Nfactor); scrypt_core_3way(X + 0 * 96, V); scrypt_core_3way(X + 1 * 96, V); scrypt_core_3way(X + 2 * 96, V); scrypt_core_3way(X + 3 * 96, V); for (j = 0; j < 3; j++) for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i]; PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k]; }
static void scrypt_1024_1_1_256_3way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N) { uint32_t tstate[3 * 8], ostate[3 * 8]; uint32_t X[3 * 32] __attribute__((aligned(64))); uint32_t *V; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); memcpy(tstate + 0, midstate, 32); memcpy(tstate + 8, midstate, 32); memcpy(tstate + 16, midstate, 32); HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8); HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16); PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0); PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32); PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64); scrypt_core_3way(X, V, N); PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0); PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8); PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16); }
static void scrypt_3way(const void *input1, const void *input2, const void *input3, size_t input1len, size_t input2len, size_t input3len, uint32_t *res1, uint32_t *res2, uint32_t *res3, void *scratchpad) { uint32_t *V; uint32_t X[32], Y[32], Z[32]; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); PBKDF2_SHA256((const uint8_t*)input1, input1len, (const uint8_t*)input1, input1len, 1, (uint8_t *)X, 128); PBKDF2_SHA256((const uint8_t*)input2, input2len, (const uint8_t*)input2, input2len, 1, (uint8_t *)Y, 128); PBKDF2_SHA256((const uint8_t*)input3, input3len, (const uint8_t*)input3, input3len, 1, (uint8_t *)Z, 128); scrypt_core_3way(X, Y, Z, V); PBKDF2_SHA256((const uint8_t*)input1, input1len, (uint8_t *)X, 128, 1, (uint8_t*)res1, 32); PBKDF2_SHA256((const uint8_t*)input2, input2len, (uint8_t *)Y, 128, 1, (uint8_t*)res2, 32); PBKDF2_SHA256((const uint8_t*)input3, input3len, (uint8_t *)Z, 128, 1, (uint8_t*)res3, 32); }
static void scrypt_1024_1_1_256_12way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) { uint32_t tstate[12 * 8] ; uint32_t ostate[12 * 8] ; uint32_t W[12 * 32] ; uint32_t X[12 * 32] ; uint32_t *V; int i, j; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (j = 0; j < 3; j++) { for (i = 0; i < 20; i++) { W[128 * j + 4 * i + 0] = input[80 * j + 0 * 20 + i]; W[128 * j + 4 * i + 1] = input[80 * j + 1 * 20 + i]; W[128 * j + 4 * i + 2] = input[80 * j + 2 * 20 + i]; W[128 * j + 4 * i + 3] = input[80 * j + 3 * 20 + i]; } } for (j = 0; j < 3; j++) { for (i = 0; i < 8; i++) { tstate[32 * j + 4 * i + 0] = midstate[i]; tstate[32 * j + 4 * i + 1] = midstate[i]; tstate[32 * j + 4 * i + 2] = midstate[i]; tstate[32 * j + 4 * i + 3] = midstate[i]; } } HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32); HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64); PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) { for (i = 0; i < 32; i++) { X[128 * j + 0 * 32 + i] = W[128 * j + 4 * i + 0]; X[128 * j + 1 * 32 + i] = W[128 * j + 4 * i + 1]; X[128 * j + 2 * 32 + i] = W[128 * j + 4 * i + 2]; X[128 * j + 3 * 32 + i] = W[128 * j + 4 * i + 3]; } } scrypt_core_3way(X + 0 * 96, V); scrypt_core_3way(X + 1 * 96, V); scrypt_core_3way(X + 2 * 96, V); scrypt_core_3way(X + 3 * 96, V); for (j = 0; j < 3; j++) { for (i = 0; i < 32; i++) { W[128 * j + 4 * i + 0] = X[128 * j + 0 * 32 + i]; W[128 * j + 4 * i + 1] = X[128 * j + 1 * 32 + i]; W[128 * j + 4 * i + 2] = X[128 * j + 2 * 32 + i]; W[128 * j + 4 * i + 3] = X[128 * j + 3 * 32 + i]; } } PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) { for (i = 0; i < 8; i++) { output[32 * j + 0 * 8 + i] = W[128 * j + 4 * i + 0]; output[32 * j + 1 * 8 + i] = W[128 * j + 4 * i + 1]; output[32 * j + 2 * 8 + i] = W[128 * j + 4 * i + 2]; output[32 * j + 3 * 8 + i] = W[128 * j + 4 * i + 3]; } } }