/** * smix(B, r, N, V, XY): * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; the * temporary storage V must be 128rN bytes in length; the temporary storage * XY must be 256r bytes in length. The value N must be a power of 2. */ static void smix(uint8_t * B, size_t r, uint64_t N, uint8_t * V, uint8_t * XY) { uint8_t * X = XY; uint8_t * Y = &XY[128 * r]; uint64_t i; uint64_t j; /* 1: X <-- B */ blkcpy(X, B, 128 * r); /* 2: for i = 0 to N - 1 do */ for (i = 0; i < N; i++) { /* 3: V_i <-- X */ blkcpy(&V[i * (128 * r)], X, 128 * r); /* 4: X <-- H(X) */ blockmix_salsa8(X, Y, r); } /* 6: for i = 0 to N - 1 do */ for (i = 0; i < N; i++) { /* 7: j <-- Integerify(X) mod N */ j = integerify(X, r) & (N - 1); /* 8: X <-- H(X \xor V_j) */ blkxor(X, &V[j * (128 * r)], 128 * r); blockmix_salsa8(X, Y, r); } /* 10: B' <-- X */ blkcpy(B, X, 128 * r); }
/** * smix(B, r, N, V, XY): * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; * the temporary storage V must be 128rN bytes in length; the temporary * storage XY must be 256r + 64 bytes in length. The value N must be a * power of 2 greater than 1. The arrays B, V, and XY must be aligned to a * multiple of 64 bytes. */ static void smix(uint8_t * B, size_t r, uint64_t N, void * V, void * XY) { __m128i * X = XY; __m128i * Y = (void *)((uintptr_t)(XY) + 128 * r); __m128i * Z = (void *)((uintptr_t)(XY) + 256 * r); uint32_t * X32 = (void *)X; uint64_t i, j; size_t k; /* 1: X <-- B */ for (k = 0; k < 2 * r; k++) { for (i = 0; i < 16; i++) { X32[k * 16 + i] = le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]); } } /* 2: for i = 0 to N - 1 do */ for (i = 0; i < N; i += 2) { /* 3: V_i <-- X */ blkcpy((void *)((uintptr_t)(V) + i * 128 * r), X, 128 * r); /* 4: X <-- H(X) */ blockmix_salsa8(X, Y, Z, r); /* 3: V_i <-- X */ blkcpy((void *)((uintptr_t)(V) + (i + 1) * 128 * r), Y, 128 * r); /* 4: X <-- H(X) */ blockmix_salsa8(Y, X, Z, r); } /* 6: for i = 0 to N - 1 do */ for (i = 0; i < N; i += 2) { /* 7: j <-- Integerify(X) mod N */ j = integerify(X, r) & (N - 1); /* 8: X <-- H(X \xor V_j) */ blkxor(X, (void *)((uintptr_t)(V) + j * 128 * r), 128 * r); blockmix_salsa8(X, Y, Z, r); /* 7: j <-- Integerify(X) mod N */ j = integerify(Y, r) & (N - 1); /* 8: X <-- H(X \xor V_j) */ blkxor(Y, (void *)((uintptr_t)(V) + j * 128 * r), 128 * r); blockmix_salsa8(Y, X, Z, r); } /* 10: B' <-- X */ for (k = 0; k < 2 * r; k++) { for (i = 0; i < 16; i++) { le32enc(&B[(k * 16 + (i * 5 % 16)) * 4], X32[k * 16 + i]); } } }
/** * crypto_scrypt_smix(B, r, N, V, XY): * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; * the temporary storage V must be 128rN bytes in length; the temporary * storage XY must be 256r + 64 bytes in length. The value N must be a * power of 2 greater than 1. The arrays B, V, and XY must be aligned to a * multiple of 64 bytes. */ void crypto_scrypt_smix(uint8_t * B, size_t r, uint64_t N, void * _V, void * XY) { uint32_t * X = XY; uint32_t * Y = (void *)((uint8_t *)(XY) + 128 * r); uint32_t * Z = (void *)((uint8_t *)(XY) + 256 * r); uint32_t * V = _V; uint64_t i; uint64_t j; size_t k; /* 1: X <-- B */ for (k = 0; k < 32 * r; k++) X[k] = le32dec(&B[4 * k]); /* 2: for i = 0 to N - 1 do */ for (i = 0; i < N; i += 2) { /* 3: V_i <-- X */ blkcpy(&V[i * (32 * r)], X, 128 * r); /* 4: X <-- H(X) */ blockmix_salsa8(X, Y, Z, r); /* 3: V_i <-- X */ blkcpy(&V[(i + 1) * (32 * r)], Y, 128 * r); /* 4: X <-- H(X) */ blockmix_salsa8(Y, X, Z, r); } /* 6: for i = 0 to N - 1 do */ for (i = 0; i < N; i += 2) { /* 7: j <-- Integerify(X) mod N */ j = integerify(X, r) & (N - 1); /* 8: X <-- H(X \xor V_j) */ blkxor(X, &V[j * (32 * r)], 128 * r); blockmix_salsa8(X, Y, Z, r); /* 7: j <-- Integerify(X) mod N */ j = integerify(Y, r) & (N - 1); /* 8: X <-- H(X \xor V_j) */ blkxor(Y, &V[j * (32 * r)], 128 * r); blockmix_salsa8(Y, X, Z, r); } /* 10: B' <-- X */ for (k = 0; k < 32 * r; k++) le32enc(&B[4 * k], X[k]); }