void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) { UINT64 *curData0 = (UINT64 *)data; UINT64 *curData1 = (UINT64 *)(data+laneOffset*1*SnP_laneLengthInBytes); UINT64 *curData2 = (UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); UINT64 *curData3 = (UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); const V256 *stateAsLanes = (const V256 *)states; const UINT64 *stateAsLanes64 = (const UINT64*)states; V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; unsigned int i; #define Extr( argIndex ) curData0[argIndex] = stateAsLanes64[4*(argIndex)], \ curData1[argIndex] = stateAsLanes64[4*(argIndex)+1], \ curData2[argIndex] = stateAsLanes64[4*(argIndex)+2], \ curData3[argIndex] = stateAsLanes64[4*(argIndex)+3] #define Extr4( argIndex ) lanes0 = LOAD256( stateAsLanes[argIndex+0] ), \ lanes1 = LOAD256( stateAsLanes[argIndex+1] ), \ lanes2 = LOAD256( stateAsLanes[argIndex+2] ), \ lanes3 = LOAD256( stateAsLanes[argIndex+3] ), \ UNINTLEAVE(), \ STORE256u( curData0[argIndex], lanes0 ), \ STORE256u( curData1[argIndex], lanes1 ), \ STORE256u( curData2[argIndex], lanes2 ), \ STORE256u( curData3[argIndex], lanes3 ) if ( laneCount >= 16 ) { Extr4( 0 ); Extr4( 4 ); Extr4( 8 ); Extr4( 12 ); if ( laneCount >= 20 ) { Extr4( 16 ); for(i=20; i<laneCount; i++) Extr( i ); } else { for(i=16; i<laneCount; i++) Extr( i ); } } else { for(i=0; i<laneCount; i++) Extr( i ); } #undef Extr #undef Extr4 }
void KeccakF1600_Pl2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) { const V128 *stateAsLanes = states; V128 lanes; unsigned int i; UINT64 *curData0 = (UINT64 *)data; UINT64 *curData1 = (UINT64 *)(data+laneOffset*SnP_laneLengthInBytes); #define Extr( argIndex ) lanes = LOAD128( stateAsLanes[argIndex] ), \ STORE64L( curData0[argIndex], lanes ), \ STORE64H( curData1[argIndex], lanes ) #if defined(UseSSE2) #define Extr2( argIndex ) lanes0 = LOAD128( stateAsLanes[argIndex] ), \ lanes1 = LOAD128( stateAsLanes[(argIndex)+1] ), \ lanes = UNPACKL( lanes0, lanes1 ), \ lanes0 = UNPACKH( lanes0, lanes1 ), \ STORE128u( *(V128*)&curData0[argIndex], lanes ), \ STORE128u( *(V128*)&curData1[argIndex], lanes0 ) if ( laneCount >= 16 ) { V128 lanes0, lanes1; Extr2( 0 ); Extr2( 2 ); Extr2( 4 ); Extr2( 6 ); Extr2( 8 ); Extr2( 10 ); Extr2( 12 ); Extr2( 14 ); if ( laneCount >= 20 ) { Extr2( 16 ); Extr2( 18 ); for(i=20; i<laneCount; i++) Extr( i ); } else { for(i=16; i<laneCount; i++) Extr( i ); } } #undef Extr2 #else if ( laneCount >= 17 ) { Extr( 0 ); Extr( 1 ); Extr( 2 ); Extr( 3 ); Extr( 4 ); Extr( 5 ); Extr( 6 ); Extr( 7 ); Extr( 8 ); Extr( 9 ); Extr( 10 ); Extr( 11 ); Extr( 12 ); Extr( 13 ); Extr( 14 ); Extr( 15 ); Extr( 16 ); if ( laneCount >= 21 ) { Extr( 17 ); Extr( 18 ); Extr( 19 ); Extr( 20 ); for(i=21; i<laneCount; i++) Extr( i ); } else { for(i=17; i<laneCount; i++) Extr( i ); } } #endif else { for(i=0; i<laneCount; i++) Extr( i ); } #undef Extr }
void KeccakP1600times8_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) { const V512 *stateAsLanes = states; uint64_t *dataAsLanes = (uint64_t *)data; unsigned int i; V256 index; #define Extr( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index, stateAsLanes[argIndex]) index = LOAD8_32(7*laneOffset, 6*laneOffset, 5*laneOffset, 4*laneOffset, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset); if ( laneCount >= 16 ) { Extr( 0 ); Extr( 1 ); Extr( 2 ); Extr( 3 ); Extr( 4 ); Extr( 5 ); Extr( 6 ); Extr( 7 ); Extr( 8 ); Extr( 9 ); Extr( 10 ); Extr( 11 ); Extr( 12 ); Extr( 13 ); Extr( 14 ); Extr( 15 ); if ( laneCount >= 20 ) { Extr( 16 ); Extr( 17 ); Extr( 18 ); Extr( 19 ); for(i=20; i<laneCount; i++) Extr( i ); } else { for(i=16; i<laneCount; i++) Extr( i ); } } else { for(i=0; i<laneCount; i++) Extr( i ); } #undef Extr }