Example #1
0
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
    const V256 *stateAsLanes256 = states;
    const V512 *stateAsLanes512 = states;
    UINT64 *dataAsLanes = (UINT64 *)data;
    unsigned int i;
    V256 index512;
    V128 index256;

    #define Extr1( argIndex )  STORE_SCATTER4_64(dataAsLanes+argIndex, index256, stateAsLanes256[argIndex])
    #define Extr2( argIndex )  STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/2])
    index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
    index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
    if ( laneCount >= 16 )  {
        Extr2( 0 );
        Extr2( 2 );
        Extr2( 4 );
        Extr2( 6 );
        Extr2( 8 );
        Extr2( 10 );
        Extr2( 12 );
        Extr2( 14 );
        if ( laneCount >= 20 )  {
            Extr2( 16 );
            Extr2( 18 );
            for(i=20; i<laneCount; i++)
                Extr1( i );
        }
        else {
            for(i=16; i<laneCount; i++)
                Extr1( i );
        }
    }
    else {
        for(i=0; i<laneCount; i++)
            Extr1( i );
    }
    #undef  Extr1
    #undef  Extr2
}
void KeccakF1600_Pl2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
    const V128 *stateAsLanes = states;
    V128 lanes;
    unsigned int i;
    UINT64 *curData0 = (UINT64 *)data;
    UINT64 *curData1 = (UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);

	#define	Extr( argIndex )	lanes = LOAD128( stateAsLanes[argIndex] ),	        \
								STORE64L( curData0[argIndex], lanes ), 		        \
								STORE64H( curData1[argIndex], lanes )

	#if defined(UseSSE2)
	#define	Extr2( argIndex )	lanes0 = LOAD128( stateAsLanes[argIndex] ),         \
								lanes1 = LOAD128( stateAsLanes[(argIndex)+1] ),     \
								lanes =  UNPACKL( lanes0, lanes1 ),                 \
								lanes0 = UNPACKH( lanes0, lanes1 ),                 \
								STORE128u( *(V128*)&curData0[argIndex], lanes ),	\
								STORE128u( *(V128*)&curData1[argIndex], lanes0 )
	if ( laneCount >= 16 )	{
	    V128 lanes0, lanes1;
		Extr2( 0 );
		Extr2( 2 );
		Extr2( 4 );
		Extr2( 6 );
		Extr2( 8 );
		Extr2( 10 );
		Extr2( 12 );
		Extr2( 14 );
		if ( laneCount >= 20 )	{
			Extr2( 16 );
			Extr2( 18 );
			for(i=20; i<laneCount; i++)
				Extr( i );
		}
		else {
			for(i=16; i<laneCount; i++)
				Extr( i );
		}
	}
	#undef	Extr2
	#else
	if ( laneCount >= 17 )	{
		Extr( 0 );
		Extr( 1 );
		Extr( 2 );
		Extr( 3 );
		Extr( 4 );
		Extr( 5 );
		Extr( 6 );
		Extr( 7 );
		Extr( 8 );
		Extr( 9 );
		Extr( 10 );
		Extr( 11 );
		Extr( 12 );
		Extr( 13 );
		Extr( 14 );
		Extr( 15 );
		Extr( 16 );
		if ( laneCount >= 21 )	{
			Extr( 17 );
			Extr( 18 );
			Extr( 19 );
			Extr( 20 );
			for(i=21; i<laneCount; i++)
				Extr( i );
		}
		else {
			for(i=17; i<laneCount; i++)
				Extr( i );
		}
	}
	#endif
	else {
		for(i=0; i<laneCount; i++)
			Extr( i );
    }
	#undef	Extr
}