static void scrypt_1024_1_1_256_12way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) { uint32_t tstate[12 * 8] __attribute__((aligned(128))); uint32_t ostate[12 * 8] __attribute__((aligned(128))); uint32_t W[12 * 32] __attribute__((aligned(128))); uint32_t X[12 * 32] __attribute__((aligned(128))); uint32_t *V; int i, j, k; int Nfactor; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (j = 0; j < 3; j++) for (i = 0; i < 20; i++) for (k = 0; k < 4; k++) W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i]; for (j = 0; j < 3; j++) for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) tstate[32 * j + 4 * i + k] = midstate[i]; HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32); HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64); PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k]; // need to get the Nfactor from the pool server. // Nfactor = 10; // scrypt_core_sidm_3way(X+0*96, Nfactor); // scrypt_core_sidm_3way(X+1*96, Nfactor); // scrypt_core_sidm_3way(X+2*96, Nfactor); // scrypt_core_sidm_3way(X+3*96, Nfactor); scrypt_core_3way(X + 0 * 96, V); scrypt_core_3way(X + 1 * 96, V); scrypt_core_3way(X + 2 * 96, V); scrypt_core_3way(X + 3 * 96, V); for (j = 0; j < 3; j++) for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i]; PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k]; }
static void scrypt_1024_1_1_256_4way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N) { uint32_t tstate[4 * 8] __attribute__((aligned(128))); uint32_t ostate[4 * 8] __attribute__((aligned(128))); uint32_t W[4 * 32] __attribute__((aligned(128))); uint32_t X[4 * 32] __attribute__((aligned(128))); uint32_t *V; int i, k; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (i = 0; i < 20; i++) for (k = 0; k < 4; k++) W[4 * i + k] = input[k * 20 + i]; for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) tstate[4 * i + k] = midstate[i]; HMAC_SHA256_80_init_4way(W, tstate, ostate); PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W); for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) X[k * 32 + i] = W[4 * i + k]; scrypt_core(X + 0 * 32, V, N); scrypt_core(X + 1 * 32, V, N); scrypt_core(X + 2 * 32, V, N); scrypt_core(X + 3 * 32, V, N); for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) W[4 * i + k] = X[k * 32 + i]; PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W); for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) output[k * 8 + i] = W[4 * i + k]; }
static void scrypt_1024_1_1_256_4way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) { uint32_t tstate[4 * 8]; uint32_t ostate[4 * 8] ; uint32_t W[4 * 32] ; uint32_t X[4 * 32] ; uint32_t *V; int i; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (i = 0; i < 20; i++) { W[4 * i + 0] = input[0 * 20 + i]; W[4 * i + 1] = input[1 * 20 + i]; W[4 * i + 2] = input[2 * 20 + i]; W[4 * i + 3] = input[3 * 20 + i]; } for (i = 0; i < 8; i++) { tstate[4 * i + 0] = midstate[i]; tstate[4 * i + 1] = midstate[i]; tstate[4 * i + 2] = midstate[i]; tstate[4 * i + 3] = midstate[i]; } HMAC_SHA256_80_init_4way(W, tstate, ostate); PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W); for (i = 0; i < 32; i++) { X[0 * 32 + i] = W[4 * i + 0]; X[1 * 32 + i] = W[4 * i + 1]; X[2 * 32 + i] = W[4 * i + 2]; X[3 * 32 + i] = W[4 * i + 3]; } scrypt_core(X + 0 * 32, V); scrypt_core(X + 1 * 32, V); scrypt_core(X + 2 * 32, V); scrypt_core(X + 3 * 32, V); for (i = 0; i < 32; i++) { W[4 * i + 0] = X[0 * 32 + i]; W[4 * i + 1] = X[1 * 32 + i]; W[4 * i + 2] = X[2 * 32 + i]; W[4 * i + 3] = X[3 * 32 + i]; } PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W); for (i = 0; i < 8; i++) { output[0 * 8 + i] = W[4 * i + 0]; output[1 * 8 + i] = W[4 * i + 1]; output[2 * 8 + i] = W[4 * i + 2]; output[3 * 8 + i] = W[4 * i + 3]; } }
static void scrypt_1024_1_1_256_12way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) { uint32_t tstate[12 * 8] ; uint32_t ostate[12 * 8] ; uint32_t W[12 * 32] ; uint32_t X[12 * 32] ; uint32_t *V; int i, j; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (j = 0; j < 3; j++) { for (i = 0; i < 20; i++) { W[128 * j + 4 * i + 0] = input[80 * j + 0 * 20 + i]; W[128 * j + 4 * i + 1] = input[80 * j + 1 * 20 + i]; W[128 * j + 4 * i + 2] = input[80 * j + 2 * 20 + i]; W[128 * j + 4 * i + 3] = input[80 * j + 3 * 20 + i]; } } for (j = 0; j < 3; j++) { for (i = 0; i < 8; i++) { tstate[32 * j + 4 * i + 0] = midstate[i]; tstate[32 * j + 4 * i + 1] = midstate[i]; tstate[32 * j + 4 * i + 2] = midstate[i]; tstate[32 * j + 4 * i + 3] = midstate[i]; } } HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32); HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64); PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) { for (i = 0; i < 32; i++) { X[128 * j + 0 * 32 + i] = W[128 * j + 4 * i + 0]; X[128 * j + 1 * 32 + i] = W[128 * j + 4 * i + 1]; X[128 * j + 2 * 32 + i] = W[128 * j + 4 * i + 2]; X[128 * j + 3 * 32 + i] = W[128 * j + 4 * i + 3]; } } scrypt_core_3way(X + 0 * 96, V); scrypt_core_3way(X + 1 * 96, V); scrypt_core_3way(X + 2 * 96, V); scrypt_core_3way(X + 3 * 96, V); for (j = 0; j < 3; j++) { for (i = 0; i < 32; i++) { W[128 * j + 4 * i + 0] = X[128 * j + 0 * 32 + i]; W[128 * j + 4 * i + 1] = X[128 * j + 1 * 32 + i]; W[128 * j + 4 * i + 2] = X[128 * j + 2 * 32 + i]; W[128 * j + 4 * i + 3] = X[128 * j + 3 * 32 + i]; } } PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) { for (i = 0; i < 8; i++) { output[32 * j + 0 * 8 + i] = W[128 * j + 4 * i + 0]; output[32 * j + 1 * 8 + i] = W[128 * j + 4 * i + 1]; output[32 * j + 2 * 8 + i] = W[128 * j + 4 * i + 2]; output[32 * j + 3 * 8 + i] = W[128 * j + 4 * i + 3]; } } }