void Panama<B>::Iterate(unsigned int count, const word32 *p, word32 *z, const word32 *y) { unsigned int bstart = m_bstart; word32 *const a = m_state; #define c (a+17) #define b ((Stage *)(a+34)) // output #define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a[i+9]) #define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a[i+9]) // buffer update #define US(i) {word32 t=b0[i]; b0[i]=ConditionalByteReverse(B::ToEnum(), p[i])^t; b25[(i+6)%8]^=t;} #define UL(i) {word32 t=b0[i]; b0[i]=a[i+1]^t; b25[(i+6)%8]^=t;} // gamma and pi #define GP(i) c[5*i%17] = rotlFixed(a[i] ^ (a[(i+1)%17] | ~a[(i+2)%17]), ((5*i%17)*((5*i%17)+1)/2)%32) // theta and sigma #define T(i,x) a[i] = c[i] ^ c[(i+1)%17] ^ c[(i+4)%17] ^ x #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i])) #define TS1L(i) T(i+1, b4[i]) #define TS2(i) T(i+9, b16[i]) while (count--) { if (z) { if (y) { OX(0); OX(1); OX(2); OX(3); OX(4); OX(5); OX(6); OX(7); y += 8; } else { OA(0); OA(1); OA(2); OA(3); OA(4); OA(5); OA(6); OA(7); } z += 8; } word32 *const b16 = b[(bstart+16) % STAGES]; word32 *const b4 = b[(bstart+4) % STAGES]; bstart = (bstart + STAGES - 1) % STAGES; word32 *const b0 = b[bstart]; word32 *const b25 = b[(bstart+25) % STAGES]; if (p) { US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7); } else { UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7); } GP(0); GP(1); GP(2); GP(3); GP(4); GP(5); GP(6); GP(7); GP(8); GP(9); GP(10); GP(11); GP(12); GP(13); GP(14); GP(15); GP(16); T(0,1); if (p) { TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7); p += 8; } else { TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7); } TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7); } m_bstart = bstart; }
void Panama<B>::Iterate(size_t count, const word32 *p, word32 *z, const word32 *y) { word32 bstart = m_state[17]; word32 *const aPtr = m_state; word32 cPtr[17]; #define bPtr ((byte *)(aPtr+20)) // reorder the state for SSE2 // a and c: 4 8 12 16 | 3 7 11 15 | 2 6 10 14 | 1 5 9 13 | 0 // xmm0 xmm1 xmm2 xmm3 eax #define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17 #define c(i) cPtr[((i)*13+16) % 17] // b: 0 4 | 1 5 | 2 6 | 3 7 #define b(i, j) b##i[(j)*2%8 + (j)/4] // output #define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a(i+9)) #define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a(i+9)) // buffer update #define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;} #define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;} // gamma and pi #define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32) // theta and sigma #define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i])) #define TS1L(i) T(i+1, b(4,i)) #define TS2(i) T(i+9, b(16,i)) while (count--) { if (z) { if (y) { OX(0); OX(1); OX(2); OX(3); OX(4); OX(5); OX(6); OX(7); y += 8; } else { OA(0); OA(1); OA(2); OA(3); OA(4); OA(5); OA(6); OA(7); } z += 8; } word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32)); word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32)); bstart += 32; word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32)); word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32)); if (p) { US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7); } else { UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7); } GP(0); GP(1); GP(2); GP(3); GP(4); GP(5); GP(6); GP(7); GP(8); GP(9); GP(10); GP(11); GP(12); GP(13); GP(14); GP(15); GP(16); T(0,1); if (p) { TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7); p += 8; } else { TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7); } TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7); } m_state[17] = bstart; }