// Multiply matrix to data vector. When encoding, it contains data in Data
// and stores error correction codes in Out. When decoding it contains
// broken data followed by ECC in Data and stores recovered data to Out.
// We do not use this function now, everything is moved to UpdateECC.
void RSCoder16::Process(const uint *Data, uint *Out)
{
    uint ProcData[gfSize];

    for (uint I = 0; I < ND; I++)
        ProcData[I]=Data[I];

    if (Decoding)
    {
        // Replace broken data units with first available valid recovery codes.
        // 'Data' array must contain recovery codes after data.
        for (uint I=0, R=ND; I < ND; I++)
            if (!ValidFlags[I]) // For every broken data unit.
            {
                while (!ValidFlags[R]) // Find a valid recovery unit.
                    R++;
                ProcData[I]=Data[R];
                R++;
            }
    }

    uint H=Decoding ? NE : NR;
    for (uint I = 0; I < H; I++)
    {
        uint R = 0; // Result of matrix row multiplication to data.

        uint *MXi=MX + I * ND;
        for (uint J = 0; J < ND; J++)
            R ^= gfMul(MXi[J], ProcData[J]);

        Out[I] = R;
    }
}
// Data and ECC addresses must be properly aligned for SSE.
bool RSCoder16::SSE_UpdateECC(uint DataNum, uint ECCNum, const byte *Data, byte *ECC, size_t BlockSize)
{
    // Check data alignment and SSSE3 support.
    if ((size_t(Data) & (SSE_ALIGNMENT-1))!=0 || (size_t(ECC) & (SSE_ALIGNMENT-1))!=0 ||
            _SSE_Version<SSE_SSSE3)
        return false;

    uint M=MX[ECCNum * ND + DataNum];

    // Prepare tables containing products of M and 4, 8, 12, 16 bit length
    // numbers, which have 4 high bits in 0..15 range and other bits set to 0.
    // Store high and low bytes of resulting 16 bit product in separate tables.
    __m128i T0L,T1L,T2L,T3L; // Low byte tables.
    __m128i T0H,T1H,T2H,T3H; // High byte tables.

    for (uint I=0; I<16; I++)
    {
        ((byte *)&T0L)[I]=gfMul(I,M);
        ((byte *)&T0H)[I]=gfMul(I,M)>>8;
        ((byte *)&T1L)[I]=gfMul(I<<4,M);
        ((byte *)&T1H)[I]=gfMul(I<<4,M)>>8;
        ((byte *)&T2L)[I]=gfMul(I<<8,M);
        ((byte *)&T2H)[I]=gfMul(I<<8,M)>>8;
        ((byte *)&T3L)[I]=gfMul(I<<12,M);
        ((byte *)&T3H)[I]=gfMul(I<<12,M)>>8;
    }

    size_t Pos=0;

    __m128i LowByteMask=_mm_set1_epi16(0xff);     // 00ff00ff...00ff
    __m128i Low4Mask=_mm_set1_epi8(0xf);          // 0f0f0f0f...0f0f
    __m128i High4Mask=_mm_slli_epi16(Low4Mask,4); // f0f0f0f0...f0f0

    for (; Pos+2*sizeof(__m128i)<=BlockSize; Pos+=2*sizeof(__m128i))
    {
        // We process two 128 bit chunks of source data at once.
        __m128i *D=(__m128i *)(Data+Pos);

        // Place high bytes of both chunks to one variable and low bytes to
        // another, so we can use the table lookup multiplication for 16 values
        // 4 bit length each at once.
        __m128i HighBytes0=_mm_srli_epi16(D[0],8);
        __m128i LowBytes0=_mm_and_si128(D[0],LowByteMask);
        __m128i HighBytes1=_mm_srli_epi16(D[1],8);
        __m128i LowBytes1=_mm_and_si128(D[1],LowByteMask);
        __m128i HighBytes=_mm_packus_epi16(HighBytes0,HighBytes1);
        __m128i LowBytes=_mm_packus_epi16(LowBytes0,LowBytes1);

        // Multiply bits 0..3 of low bytes. Store low and high product bytes
        // separately in cumulative sum variables.
        __m128i LowBytesLow4=_mm_and_si128(LowBytes,Low4Mask);
        __m128i LowBytesMultSum=_mm_shuffle_epi8(T0L,LowBytesLow4);
        __m128i HighBytesMultSum=_mm_shuffle_epi8(T0H,LowBytesLow4);

        // Multiply bits 4..7 of low bytes. Store low and high product bytes separately.
        __m128i LowBytesHigh4=_mm_and_si128(LowBytes,High4Mask);
        LowBytesHigh4=_mm_srli_epi16(LowBytesHigh4,4);
        __m128i LowBytesHigh4MultLow=_mm_shuffle_epi8(T1L,LowBytesHigh4);
        __m128i LowBytesHigh4MultHigh=_mm_shuffle_epi8(T1H,LowBytesHigh4);

        // Add new product to existing sum, low and high bytes separately.
        LowBytesMultSum=_mm_xor_si128(LowBytesMultSum,LowBytesHigh4MultLow);
        HighBytesMultSum=_mm_xor_si128(HighBytesMultSum,LowBytesHigh4MultHigh);

        // Multiply bits 0..3 of high bytes. Store low and high product bytes separately.
        __m128i HighBytesLow4=_mm_and_si128(HighBytes,Low4Mask);
        __m128i HighBytesLow4MultLow=_mm_shuffle_epi8(T2L,HighBytesLow4);
        __m128i HighBytesLow4MultHigh=_mm_shuffle_epi8(T2H,HighBytesLow4);

        // Add new product to existing sum, low and high bytes separately.
        LowBytesMultSum=_mm_xor_si128(LowBytesMultSum,HighBytesLow4MultLow);
        HighBytesMultSum=_mm_xor_si128(HighBytesMultSum,HighBytesLow4MultHigh);

        // Multiply bits 4..7 of high bytes. Store low and high product bytes separately.
        __m128i HighBytesHigh4=_mm_and_si128(HighBytes,High4Mask);
        HighBytesHigh4=_mm_srli_epi16(HighBytesHigh4,4);
        __m128i HighBytesHigh4MultLow=_mm_shuffle_epi8(T3L,HighBytesHigh4);
        __m128i HighBytesHigh4MultHigh=_mm_shuffle_epi8(T3H,HighBytesHigh4);

        // Add new product to existing sum, low and high bytes separately.
        LowBytesMultSum=_mm_xor_si128(LowBytesMultSum,HighBytesHigh4MultLow);
        HighBytesMultSum=_mm_xor_si128(HighBytesMultSum,HighBytesHigh4MultHigh);

        // Combine separate low and high cumulative sum bytes to 16-bit words.
        __m128i HighBytesHigh4Mult0=_mm_unpacklo_epi8(LowBytesMultSum,HighBytesMultSum);
        __m128i HighBytesHigh4Mult1=_mm_unpackhi_epi8(LowBytesMultSum,HighBytesMultSum);

        // Add result to ECC.
        __m128i *StoreECC=(__m128i *)(ECC+Pos);

        StoreECC[0]=_mm_xor_si128(StoreECC[0],HighBytesHigh4Mult0);
        StoreECC[1]=_mm_xor_si128(StoreECC[1],HighBytesHigh4Mult1);
    }

    // If we have non 128 bit aligned data in the end of block, process them
    // in a usual way. We cannot do the same in the beginning of block,
    // because Data and ECC can have different alignment offsets.
    for (; Pos<BlockSize; Pos+=2)
        *(ushort*)(ECC+Pos) ^= gfMul( M, *(ushort*)(Data+Pos) );

    return true;
}
// Apply Gauss–Jordan elimination to find inverse of decoder matrix.
// We have the square NDxND matrix, but we do not store its trivial
// diagonal "1" rows matching valid data, so we work with NExND matrix.
// Our original Cauchy matrix does not contain 0, so we skip search
// for non-zero pivot.
void RSCoder16::InvertDecoderMatrix()
{
    uint *MI=new uint[NE * ND]; // We'll create inverse matrix here.
    memset(MI, 0, ND * NE * sizeof(*MI)); // Initialize to identity matrix.
    for (uint Kr = 0, Kf = 0; Kr < NE; Kr++, Kf++)
    {
        while (ValidFlags[Kf]) // Skip trivial rows.
            Kf++;
        MI[Kr * ND + Kf] = 1;  // Set diagonal 1.
    }

    // Kr is the number of row in our actual reduced NE x ND matrix,
    // which does not contain trivial diagonal 1 rows.
    // Kf is the number of row in full ND x ND matrix with all trivial rows
    // included.
    for (uint Kr = 0, Kf = 0; Kf < ND; Kr++, Kf++) // Select pivot row.
    {
        while (ValidFlags[Kf] && Kf < ND)
        {
            // Here we process trivial diagonal 1 rows matching valid data units.
            // Their processing can be simplified comparing to usual rows.
            // In full version of elimination we would set MX[I * ND + Kf] to zero
            // after MI[..]^=, but we do not need it for matrix inversion.
            for (uint I = 0; I < NE; I++)
                MI[I * ND + Kf] ^= MX[I * ND + Kf];
            Kf++;
        }

        if (Kf == ND)
            break;

        uint *MXk = MX + Kr * ND; // k-th row of main matrix.
        uint *MIk = MI + Kr * ND; // k-th row of inversion matrix.

        uint PInv = gfInv( MXk[Kf] ); // Pivot inverse.
        // Divide the pivot row by pivot, so pivot cell contains 1.
        for (uint I = 0; I < ND; I++)
        {
            MXk[I] = gfMul( MXk[I], PInv );
            MIk[I] = gfMul( MIk[I], PInv );
        }

        for (uint I = 0; I < NE; I++)
            if (I != Kr) // For all rows except containing the pivot cell.
            {
                // Apply Gaussian elimination Mij -= Mkj * Mik / pivot.
                // Since pivot is already 1, it is reduced to Mij -= Mkj * Mik.
                uint *MXi = MX + I * ND; // i-th row of main matrix.
                uint *MIi = MI + I * ND; // i-th row of inversion matrix.
                uint Mik = MXi[Kf]; // Cell in pivot position.
                for (uint J = 0; J < ND; J++)
                {
                    MXi[J] ^= gfMul(MXk[J] , Mik);
                    MIi[J] ^= gfMul(MIk[J] , Mik);
                }
            }
    }

    // Copy data to main matrix.
    for (uint I = 0; I < NE * ND; I++)
        MX[I] = MI[I];

    delete[] MI;
}
Exemple #4
0
// -----------------------------------------------------------------------------
int main ()
// -----------------------------------------------------------------------------
{
	// verify basic operations (mul, add, div):
	// a+b = b+a
	// a*b = b*a
	// a+(b+c) = (a+b)+c
	// a*(b*c) = (a*b)*c
	// a*c + b*c = (a+b)*c
	// a*(b/a) = b (if a != 0)
	gfInit();

	for (int a=0; a<GF_N; a++) {
		for (int b=0; b<GF_N; b++) {
			if (gfAdd(a, b) != gfAdd(b, a))
				return 1;
			if (gfMul(a, b) != gfMul(b, a))
				return 2;
			if (gfMul(a, b) != gfMul(b, a))
				return 2;
			for (int c=0; c<GF_N; c++) {
				if (gfAdd(a, gfAdd(b, c)) != gfAdd(gfAdd(a, b), c))
					return 3;
				if (gfMul(a, gfMul(b, c)) != gfMul(gfMul(a, b), c))
					return 4;
				if (gfAdd(gfMul(a, c), gfMul(b, c)) != gfMul(gfAdd(a, b), c))
					return 5;
			}
			if (a != GF_0)
				if (gfMul(a, gfDiv(b, a)) != b)
					return 6;
		}
	}

	// verify polynomial operations:
	// A = BQ + R
	const int M = GF_N;	// max deg
	gfExp  A[M+1];		int nA;
	gfExp  B[M+1];		int nB;
	gfExp  Q[M+1];		int nQ;
	gfExp  R1[M+2];
	gfExp* R = R1 + 1;	int nR;
	gfExp  Z[M+1];		int nZ;
	gfExp  Z1[2*M];		int nZ1;
	gfExp  Z2[2*M];		int nZ2;
	gfExp* P = R;		int nP;	// alias
	gfExp* N = B;		int nN;	// alias
	gfExp* Y = Q;		int nY;	// alias
	gfExp  Mem[8*(M+1) + 3];

// 	// -------------------- test polDiv, polMul, gfPolAdd(): --------------------
// 	for (int test=0; test<100000; test++) {
// 		// // clear all -- should not be required:
// 		// for (int i=0; i<=M; i++)
// 		// 	A[i] = B[i] = Q[i] = R[i] = Z[i] = 0;
// 		// R[-1] = 0;

// 		nB = randInt(0, M);
// 		nA = randInt(nB, M);
// 		randPol(A, nA);
// 		randPol(B, nB);
// 		if (gfPolDeg(B, nB) == -1)		// avoid dividing by B=0
// 			continue;
// 		nB = polDiv(A, nA, B, nB, Q, &nQ, R, &nR);
// 		nZ = gfPolMul(Q, nQ, B, nB, Z);				// B * Q
// 		if (nZ > nA)
// 			return 7;
// 		nZ = gfPolAdd(Z, nZ, R, nR, Z);				//       + R
// 		if (! polCmp(Z, A, nZ, nA))
// 			return 8;
// 	}

// 	// -------------------- test gfPolEEA(): --------------------
// 	for (int test=0; test<100000; test++) {
// 		nN = randInt(1, M);
// 		nA = randInt(0, nN-1);
// 		randPol(A, nA);
// 		randPol(N, nN);
// 		if (gfPolDeg(A, nA) == -1)		// avoid dividing by A=0
// 			continue;
// 		gfPolEEA(N, nN, A, nA, P, &nP, Q, &nQ, Mem);
// 		nZ1 = gfPolMul(P, nP, N, nN, Z1);
// 		nZ2 = gfPolMul(Q, nQ, A, nA, Z2);
// 		if (! polCmp(Z1, Z2, nZ1, nZ2))
// 			return 9;
// 	}

	// -------------------- test gfPolEvalSeq() against gfPolEval(): --------------------
	for (int test=0; test<10000; test++) {
		nA = randInt(0, GF_N - 2);	// limit of gfPolEvalSeq()
		nY = randInt(0, M);
		gfVec* Yv = Y;
		randPol(A, nA);
		gfExp x = GF_Z(1);//randE1();
		gfPolEvalSeq(A, nA, Yv, nY, x);
		for (int i=0; i<=nY; i++) {
			gfExp y2 = gfPolEval(A, nA, x);
			gfExp y1 = gfV2E[Yv[nY-i]];
			if (y2 != y1)
				return 10;
			x = gfMul(x, GF_Z(1));
		}
	}

	return 0;
}