static AYCW_INLINE void aycw_bs_stream_sbox2(dvbcsa_bs_word_t *fa, dvbcsa_bs_word_t *fb, dvbcsa_bs_word_t *fc, dvbcsa_bs_word_t *fd, dvbcsa_bs_word_t *fe, dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb) { dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3; tmp0 = BS_XOR (*fa, BS_XOR (BS_AND (*fb, BS_OR (*fc, *fd)), BS_XOR (*fc, BS_NOT (*fd)))); tmp1 = BS_OR (BS_AND (*fa, BS_XOR (*fb, *fd)), BS_AND (BS_OR (*fa, *fb), *fc)); tmp2 = BS_XOR (BS_AND (*fb, *fd), BS_OR (BS_AND (*fa, *fd), BS_XOR (*fb, BS_NOT (*fc)))); tmp3 = BS_OR (BS_AND (*fa, *fd), BS_XOR (*fa, BS_XOR (*fb, BS_AND (*fc, *fd)))); *sa = BS_XOR (tmp0, BS_AND (*fe, tmp1)); *sb = BS_XOR (tmp2, BS_AND (*fe, tmp3)); }
static void DVBCSA_INLINE dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb) { dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3; tmp0 = BS_XOR (fa, BS_XOR (BS_AND (fb, BS_OR (fc, fd)), BS_XOR (fc, BS_NOT (fd)))); tmp1 = BS_OR (BS_AND (fa, BS_XOR (fb, fd)), BS_AND (BS_OR (fa, fb), fc)); tmp2 = BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), BS_XOR (fb, BS_NOT (fc)))); tmp3 = BS_OR (BS_AND (fa, fd), BS_XOR (fa, BS_XOR (fb, BS_AND (fc, fd)))); *sa = BS_XOR (tmp0, BS_AND (fe, tmp1)); *sb = BS_XOR (tmp2, BS_AND (fe, tmp3)); }
/** set up data used for stream. Depends on scrambled data only, so can be global @param data_return[out] bit sliced output data IB1 @param outbits[in] number of bits to calculate @param BS_key[in] bit sliced key array 64 elements @param bs_data_sb0[in] bit sliced 1st data block used for initialization */ void aycw_stream_decrypt(dvbcsa_bs_word_t * data_return, unsigned int outbits, dvbcsa_bs_word_t * BS_key, dvbcsa_bs_word_t *bs_data_sb0) { unsigned int i; dvbcsa_bs_word_t BS_Streambit0, BS_Streambit1; aycw_tstRegister stRegister/*= { {0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0/*,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0* /}, {0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0/*,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0* /}, }*/; aycw_tstPQXYZ stPQXYZ = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; aycw_tstCDEF stCDEF = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // aycw__vInitVariables( &stPQXYZ, &stCDEF , & BS_Streambit0, & BS_Streambit1); /* aycw_vInitVariables();*/ /* BS_vTransformMultiKey((char *) cw);*/ /* init A and B - bs */ aycw__vInitShiftRegister(BS_key, &stRegister); /* init A and B - bs */ for (i = 0; i < 8; i++) { aycw__vInitRound(0, i, &stPQXYZ, &stCDEF, &stRegister, bs_data_sb0); aycw__vInitRound(1, i, &stPQXYZ, &stCDEF, &stRegister, bs_data_sb0); aycw__vInitRound(2, i, &stPQXYZ, &stCDEF, &stRegister, bs_data_sb0); aycw__vInitRound(3, i, &stPQXYZ, &stCDEF, &stRegister, bs_data_sb0); } for (i = 0; i < outbits; i += 8) { aycw__vRound(&stPQXYZ, &stCDEF, &stRegister, &BS_Streambit0, &BS_Streambit1); data_return[i + 6] = BS_XOR(bs_data_sb0[i + 64 + 6], BS_Streambit0); data_return[i + 7] = BS_XOR(bs_data_sb0[i + 64 + 7], BS_Streambit1); aycw__vRound(&stPQXYZ, &stCDEF, &stRegister, &BS_Streambit0, &BS_Streambit1); data_return[i + 4] = BS_XOR(bs_data_sb0[i + 64 + 4], BS_Streambit0); data_return[i + 5] = BS_XOR(bs_data_sb0[i + 64 + 5], BS_Streambit1); aycw__vRound( &stPQXYZ, &stCDEF, &stRegister, &BS_Streambit0, &BS_Streambit1); data_return[i + 2] = BS_XOR(bs_data_sb0[i + 64 + 2], BS_Streambit0); data_return[i + 3] = BS_XOR(bs_data_sb0[i + 64 + 3], BS_Streambit1); aycw__vRound( &stPQXYZ, &stCDEF, &stRegister, &BS_Streambit0, &BS_Streambit1); data_return[i + 0] = BS_XOR(bs_data_sb0[i + 64 + 0], BS_Streambit0); data_return[i + 1] = BS_XOR(bs_data_sb0[i + 64 + 1], BS_Streambit1); } }
void dvbcsa_bs_stream_cipher_batch(const struct dvbcsa_bs_key_s *key, const struct dvbcsa_bs_batch_s *pcks, unsigned int maxlen) { dvbcsa_bs_word_t A[32 + 10][4]; // 32 because we will move back (virtual shift register) dvbcsa_bs_word_t B[32 + 10][4]; // 32 because we will move back (virtual shift register) dvbcsa_bs_word_t X[4]; dvbcsa_bs_word_t Y[4]; dvbcsa_bs_word_t Z[4]; dvbcsa_bs_word_t D[4]; dvbcsa_bs_word_t E[4]; dvbcsa_bs_word_t F[4]; dvbcsa_bs_word_t p; dvbcsa_bs_word_t q; dvbcsa_bs_word_t r; dvbcsa_bs_word_t in1[4]; dvbcsa_bs_word_t in2[4]; dvbcsa_bs_word_t extra_B[4]; dvbcsa_bs_word_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b, s5a, s5b, s6a, s6b, s7a, s7b; dvbcsa_bs_word_t next_E[4]; dvbcsa_bs_word_t tmp0, tmp1, tmp3, tmp4; dvbcsa_bs_word_t sb[64]; int h, i, j, k, b; dvbcsa_bs_stream_transpose_in(pcks, sb); for (b = 0; b < 4; b++) { for (i = 0; i < 8; i++) { A[i][b] = key->stream[b + i * 4]; B[i][b] = key->stream[b + i * 4 + 32]; } // all other regs = 0 A[8][b] = BS_VAL8(00); A[9][b] = BS_VAL8(00); B[8][b] = BS_VAL8(00); B[9][b] = BS_VAL8(00); X[b] = BS_VAL8(00); Y[b] = BS_VAL8(00); Z[b] = BS_VAL8(00); D[b] = BS_VAL8(00); E[b] = BS_VAL8(00); F[b] = BS_VAL8(00); } p = BS_VAL8(00); q = BS_VAL8(00); r = BS_VAL8(00); /* Stream INIT */ for (i = 0; i < 8; i++) { for (b = 0; b < 4; b++) { in1[b] = sb[8 * i + 4 + b]; in2[b] = sb[8 * i + b]; } for (j = 0; j < 4; j++) { dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0], A[3][0], &s1a, &s1b); dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1], A[1][1], &s2a, &s2b); dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2], A[0][3], &s3a, &s3b); dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0], A[2][3], &s4a, &s4b); dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2], A[4][2], &s5a, &s5b); dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3], A[2][1], &s6a, &s6b); dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3], A[1][2], &s7a, &s7b); extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]), B[6][2]), B[8][3]); extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]), B[2][3]), B[3][2]); extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]), B[3][0]), B[4][1]); extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]), B[2][1]), B[7][0]); for (b = 0; b < 4; b++) { dvbcsa_bs_word_t A_next; A_next = BS_XOR (A[9][b], X[b]); A_next = BS_XOR (BS_XOR (A_next, D[b]), ((j % 2) ? in2[b] : in1[b])); for (k = 9; k > 0; k--) A[k][b] = A[k - 1][b]; A[0][b] = A_next; } dvbcsa_bs_word_t B_next[4]; for (b = 0; b < 4; b++) { B_next[b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), Y[b]); B_next[b] = BS_XOR (B_next[b], ((j % 2) ? in1[b] : in2[b])); } tmp3 = B_next[3]; B_next[3] = BS_XOR (B_next[3], BS_AND (BS_XOR (B_next[3], B_next[2]), p)); B_next[2] = BS_XOR (B_next[2], BS_AND (BS_XOR (B_next[2], B_next[1]), p)); B_next[1] = BS_XOR (B_next[1], BS_AND (BS_XOR (B_next[1], B_next[0]), p)); B_next[0] = BS_XOR (B_next[0], BS_AND (BS_XOR (B_next[0], tmp3), p)); for (b = 0; b < 4; b++) { for (k = 9; k > 0; k--) B[k][b] = B[k - 1][b]; B[0][b] = B_next[b]; } for (b = 0; b < 4; b++) D[b] = BS_XOR (BS_XOR (E[b], Z[b]), extra_B[b]); for (b = 0; b < 4; b++) next_E[b] = F[b]; tmp0 = BS_XOR (Z[0], E[0]); tmp1 = BS_AND (Z[0], E[0]); F[0] = BS_XOR (E[0], BS_AND (q, BS_XOR (Z[0], r))); tmp3 = BS_AND (tmp0, r); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (Z[1], E[1]); tmp1 = BS_AND (Z[1], E[1]); F[1] = BS_XOR (E[1], BS_AND (q, BS_XOR (Z[1], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (Z[2], E[2]); tmp1 = BS_AND (Z[2], E[2]); F[2] = BS_XOR (E[2], BS_AND (q, BS_XOR (Z[2], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (Z[3], E[3]); tmp1 = BS_AND (Z[3], E[3]); F[3] = BS_XOR (E[3], BS_AND (q, BS_XOR (Z[3], tmp4))); tmp3 = BS_AND (tmp0, tmp4); r = BS_XOR (r, BS_AND (q, BS_XOR (BS_OR (tmp1, tmp3), r))); // ultimate carry for (b = 0; b < 4; b++) E[b] = next_E[b]; X[0] = s1a; X[1] = s2a; X[2] = s3b; X[3] = s4b; Y[0] = s3a; Y[1] = s4a; Y[2] = s5b; Y[3] = s6b; Z[0] = s5a; Z[1] = s6a; Z[2] = s1b; Z[3] = s2b; p = s7a; q = s7b; } } /* Stream GEN */ for (h = 8; h < maxlen; h++) { dvbcsa_bs_word_t cb[8]; for (j = 0; j < 4; j++) { dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0], A[3][0], &s1a, &s1b); dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1], A[1][1], &s2a, &s2b); dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2], A[0][3], &s3a, &s3b); dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0], A[2][3], &s4a, &s4b); dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2], A[4][2], &s5a, &s5b); dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3], A[2][1], &s6a, &s6b); dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3], A[1][2], &s7a, &s7b); // use 4x4 xor to produce extra nibble for T3 extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]), B[6][2]), B[8][3]); extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]), B[2][3]), B[3][2]); extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]), B[3][0]), B[4][1]); extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]), B[2][1]), B[7][0]); // T1 = xor all inputs // in1, in2, D are only used in T1 during initialisation, not generation for (b = 0; b < 4; b++) { dvbcsa_bs_word_t A_next; A_next = BS_XOR (A[9][b], X[b]); for (k = 9; k > 0; k--) A[k][b] = A[k - 1][b]; A[0][b] = A_next; } dvbcsa_bs_word_t B_next[4]; // T2 = xor all inputs // in1, in2 are only used in T1 during initialisation, not generation // if p=0, use this, if p=1, rotate the result left for (b = 0; b < 4; b++) B_next[b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), Y[b]); // if p=1, rotate left (yes, this is what we're doing) tmp3 = B_next[3]; B_next[3] = BS_XOR (B_next[3], BS_AND (BS_XOR (B_next[3], B_next[2]), p)); B_next[2] = BS_XOR (B_next[2], BS_AND (BS_XOR (B_next[2], B_next[1]), p)); B_next[1] = BS_XOR (B_next[1], BS_AND (BS_XOR (B_next[1], B_next[0]), p)); B_next[0] = BS_XOR (B_next[0], BS_AND (BS_XOR (B_next[0], tmp3), p)); for (b = 0; b < 4; b++) { for (k = 9; k > 0; k--) B[k][b] = B[k - 1][b]; B[0][b] = B_next[b]; } // T3 = xor all inputs for (b = 0; b < 4; b++) D[b] = BS_XOR (BS_XOR (E[b], Z[b]), extra_B[b]); // T4 = sum, carry of Z + E + r for (b = 0; b < 4; b++) next_E[b] = F[b]; tmp0 = BS_XOR (Z[0], E[0]); tmp1 = BS_AND (Z[0], E[0]); F[0] = BS_XOR (E[0], BS_AND (q, BS_XOR (Z[0], r))); tmp3 = BS_AND (tmp0, r); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (Z[1], E[1]); tmp1 = BS_AND (Z[1], E[1]); F[1] = BS_XOR (E[1], BS_AND (q, BS_XOR (Z[1], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (Z[2], E[2]); tmp1 = BS_AND (Z[2], E[2]); F[2] = BS_XOR (E[2], BS_AND (q, BS_XOR (Z[2], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (Z[3], E[3]); tmp1 = BS_AND (Z[3], E[3]); F[3] = BS_XOR (E[3], BS_AND (q, BS_XOR (Z[3], tmp4))); tmp3 = BS_AND (tmp0, tmp4); r = BS_XOR (r, BS_AND (q, BS_XOR (BS_OR (tmp1, tmp3), r))); // ultimate carry for (b = 0; b < 4; b++) E[b] = next_E[b]; X[0] = s1a; X[1] = s2a; X[2] = s3b; X[3] = s4b; Y[0] = s3a; Y[1] = s4a; Y[2] = s5b; Y[3] = s6b; Z[0] = s5a; Z[1] = s6a; Z[2] = s1b; Z[3] = s2b; p = s7a; q = s7b; // require 4 loops per output byte // 2 output bits are a function of the 4 bits of D // xor 2 by 2 cb[7 - 2 * j] = BS_XOR (D[2], D[3]); cb[6 - 2 * j] = BS_XOR (D[0], D[1]); } // EXTERNAL LOOP //////////////////////////////////////////////////////////////////////////////// dvbcsa_bs_stream_transpose_out(pcks, h, cb); } }
/****** execute round **************/ AYCW_INLINE void aycw__vRound(aycw_tstPQXYZ *stPQXYZ, aycw_tstCDEF *stCDEF , aycw_tstRegister *stRegister, dvbcsa_bs_word_t *BS_Streambit0, dvbcsa_bs_word_t *BS_Streambit1) { uint8 k; dvbcsa_bs_word_t tmp0,tmp4,tmp1,tmp3; uint8 b; dvbcsa_bs_word_t BS_TMP_B[5]; dvbcsa_bs_word_t BS_TMP_B_Result[4]; dvbcsa_bs_word_t BS_Bout[4]; dvbcsa_bs_word_t BS_Enew[4]; aycw__ShiftRegisterLeft( stRegister->A_BS, 4, 80); for (k = 0; k < 4; k++) { stRegister->A_BS[k] = stRegister->A_BS[k+40]; BS_XOREQ(stRegister->A_BS[k], stPQXYZ->BS_X[k]); } for (k = 0; k < 4; k++) { //A: 0x0000000bb6510468 //B: 0x00000006baf6a610 // BS_ConverToDoubleArray(&tmpDoubleArray,B_BS,40); BS_TMP_B[k] = BS_XOR(stRegister->B_BS[k+6*4], stRegister->B_BS[k+9*4]); // BS_ConverToByteArray(&tmpByteArray,BS_TMP_B,4); BS_TMP_B[k] = BS_XOR(BS_TMP_B[k], stPQXYZ->BS_Y[k]); // BS_ConverToIntArray(tmpIntArray,B_BS,40); #if Init BS_TMP_B[k] = BS_TMP_B[k] ^ A_tBITVALUE(&u32IV, 0, k); BS_ConverToIntArray(tmpIntArray,B_BS,40); #endif BS_TMP_B_Result[k] = BS_AND(BS_TMP_B[k], BS_NOT(stPQXYZ->BS_P)); //Die nicht zu rotierenden Daten zunächst zwischenspeichern. // BS_ConverToIntArray(tmpIntArray,B_BS,40); //B: 0x00000006baf6a617 } for (k = 4; k > 0; k--) { BS_TMP_B[k] = BS_TMP_B[k-1]; } BS_TMP_B[0] = BS_TMP_B[4]; aycw__ShiftRegisterLeft( stRegister->B_BS, 4, 80); for ( k = 0; k < 4; k++) { stRegister->B_BS[k] = BS_OR(BS_AND(BS_TMP_B[k], stPQXYZ->BS_P), BS_TMP_B_Result[k]); } // BS_ConverToIntArray(tmpIntArray,B_BS,40); // BS_ConverToDoubleArray(&tmpDoubleArray,B_BS,40); /********** execute combiner calc **********/ /* calc of bout*/ /* Bout 3 := b2,0 b5,1 b6,2 b8,3 Bout 2 := b5,0 b7,1 b2,3 b3,2 Bout 1 := b4,3 b7,2 b3,0 b4,1 Bout 0 := b8,2 b5,3 b2,1 b7,0 */ /*12 25 30 37*/ /* 10000000 01000010000000000001000000000000 */ /*15 18 24 33*/ /* 00000010 00000001000001001000000000000000 */ /*16 21 23 34*/ /* 00000100 00000000101000010000000000000000 */ /*13 27 32 38*/ /* 01000001 00001000000000000010000000000000 */ /*8 21 26 35*/ /*extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]), B[6][2]), B[8][3]); extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]), B[2][3]), B[3][2]); extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]), B[3][0]), B[4][1]); extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]), B[2][1]), B[7][0]);*/ /* #define shiftB 4 BS_Bout[3] = stRegister->B_BS[2*4 + 0 + shiftB ] ^ stRegister->B_BS[5*4 + 1 + shiftB ] ^ stRegister->B_BS[6*4 + 2 + shiftB ] ^ stRegister->B_BS[8*4 + 3 + shiftB ] ; BS_Bout[2] = stRegister->B_BS[5*4 + 0 + shiftB ] ^ stRegister->B_BS[7*4 + 1 + shiftB ] ^ stRegister->B_BS[2*4 + 3 + shiftB ] ^ stRegister->B_BS[3*4 + 2 + shiftB ] ; BS_Bout[1] = stRegister->B_BS[4*4 + 3 + shiftB ] ^ stRegister->B_BS[7*4 + 2 + shiftB ] ^ stRegister->B_BS[3*4 + 0 + shiftB ] ^ stRegister->B_BS[4*4 + 1 + shiftB ] ; BS_Bout[0] = stRegister->B_BS[8*4 + 2 + shiftB ] ^ stRegister->B_BS[5*4 + 3 + shiftB ] ^ stRegister->B_BS[2*4 + 1 + shiftB ] ^ stRegister->B_BS[7*4 + 0 + shiftB ] ; */ BS_Bout[3] = BS_XOR(BS_XOR(stRegister->B_BS[12], stRegister->B_BS[25]), BS_XOR(stRegister->B_BS[30], stRegister->B_BS[39])); BS_Bout[2] = BS_XOR(BS_XOR(stRegister->B_BS[24], stRegister->B_BS[33]), BS_XOR(stRegister->B_BS[15], stRegister->B_BS[18])); BS_Bout[1] = BS_XOR(BS_XOR(stRegister->B_BS[23], stRegister->B_BS[34]), BS_XOR(stRegister->B_BS[16], stRegister->B_BS[21])); BS_Bout[0] = BS_XOR(BS_XOR(stRegister->B_BS[38], stRegister->B_BS[27]), BS_XOR(stRegister->B_BS[13], stRegister->B_BS[32])); /* Bout[3] = B_BS[1*4 + 0 ] ^ B_BS[4*4 + 1 ] ^ B_BS[5*4 + 2 ] ^ B_BS[7*4 + 3 ] ; Bout[2] = B_BS[4*4 + 0 ] ^ B_BS[6*4 + 1 ] ^ B_BS[1*4 + 3 ] ^ B_BS[2*4 + 2 ] ; Bout[1] = B_BS[3*4 + 3 ] ^ B_BS[6*4 + 2 ] ^ B_BS[2*4 + 0 ] ^ B_BS[3*4 + 1 ] ; Bout[0] = B_BS[7*4 + 2 ] ^ B_BS[4*4 + 3 ] ^ B_BS[1*4 + 1 ] ^ B_BS[6*4 + 0 ] ; */ /* calc of D */ for (k = 0; k < 4; k++) { /* use old E????*/ stCDEF->BS_D[k] = BS_XOR(BS_XOR(BS_Bout[k], stCDEF->BS_E [k]), stPQXYZ->BS_Z[k]); // BS_ConverToByteArray(&tmpByteArray,BS_E,4); // BS_ConverToByteArray(&tmpByteArray,BS_Z,4); } // BS_ConverToByteArray(&tmpByteArray,BS_D,4); for (b = 0; b < 4; b++) BS_Enew[b] = stCDEF->BS_F[b]; tmp0 = BS_XOR (stPQXYZ->BS_Z[0], stCDEF->BS_E[0]); tmp1 = BS_AND (stPQXYZ->BS_Z[0], stCDEF->BS_E[0]); stCDEF->BS_F[0] = BS_XOR (stCDEF->BS_E[0], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[0], stCDEF->BS_C))); tmp3 = BS_AND (tmp0, stCDEF->BS_C); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (stPQXYZ->BS_Z[1], stCDEF->BS_E[1]); tmp1 = BS_AND (stPQXYZ->BS_Z[1], stCDEF->BS_E[1]); stCDEF->BS_F[1] = BS_XOR (stCDEF->BS_E[1], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[1], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (stPQXYZ->BS_Z[2], stCDEF->BS_E[2]); tmp1 = BS_AND (stPQXYZ->BS_Z[2], stCDEF->BS_E[2]); stCDEF->BS_F[2] = BS_XOR (stCDEF->BS_E[2], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[2], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (stPQXYZ->BS_Z[3], stCDEF->BS_E[3]); tmp1 = BS_AND (stPQXYZ->BS_Z[3], stCDEF->BS_E[3]); stCDEF->BS_F[3] = BS_XOR (stCDEF->BS_E[3], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[3], tmp4))); tmp3 = BS_AND (tmp0, tmp4); stCDEF->BS_C = BS_XOR (stCDEF->BS_C, BS_AND (stPQXYZ->BS_Q, BS_XOR (BS_OR (tmp1, tmp3), stCDEF->BS_C))); // ultimate carry for (b = 0; b < 4; b++) stCDEF->BS_E[b] = BS_Enew[b]; aycw__vCaculatePQXYZ(stRegister->A_BS, stPQXYZ); /* calc stream bit */ *BS_Streambit0 = BS_XOR(stCDEF->BS_D[0], stCDEF->BS_D[1]); *BS_Streambit1 = BS_XOR(stCDEF->BS_D[2], stCDEF->BS_D[3]); }
/****** execute init round **************/ AYCW_INLINE void aycw__vInitRound(uint8 j, uint8 u8Byte, aycw_tstPQXYZ * stPQXYZ , aycw_tstCDEF *stCDEF, aycw_tstRegister *stRegister, dvbcsa_bs_word_t *bs_data_sb0) { uint8 k; dvbcsa_bs_word_t /*tmp,*/tmp0,tmp4,tmp1,tmp3; uint8 b; /*uint8 u32IV;*/ dvbcsa_bs_word_t BS_TMP_B[5]; dvbcsa_bs_word_t BS_TMP_B_Result[4]; dvbcsa_bs_word_t BS_Bout[4]; dvbcsa_bs_word_t BS_Enew[4]; /* if (j & 0x1) // all odd { u32IV = (iv >> 4 | (iv << 4)); } else { u32IV = iv; } */ aycw__ShiftRegisterLeft( stRegister->A_BS, 4, 44); for (k = 0; k < 4; k++) { stRegister->A_BS[k] = stRegister->A_BS[k+40]; BS_XOREQ(stRegister->A_BS[k],stPQXYZ->BS_X[k]); BS_XOREQ(stRegister->A_BS[k],stCDEF->BS_D[k]); /* data, byte, bit */ BS_XOREQ(stRegister->A_BS[k],( (j & 0x1)?bs_data_sb0[k+u8Byte*8]:bs_data_sb0[k+4+u8Byte*8])); } for (k = 0; k < 4; k++) { BS_TMP_B[k] = BS_XOR(stRegister->B_BS[k+6*4], stRegister->B_BS[k+9*4]); BS_TMP_B[k] = BS_XOR(BS_TMP_B[k], stPQXYZ->BS_Y[k]); // OPTIMIZEME: optimizeable by using the upper A_tBITVALUE calculation /*tmp = (j & 0x1)?bs_data_sb0[k+4+u8Byte*8]:bs_data_sb0[k+u8Byte*8]; tmp0 = aycw__BitExpandOfByteToBsWord(&u32IV, 0, k); if (tmp0 != tmp) return 0;*/ BS_TMP_B[k] = BS_XOR(BS_TMP_B[k], ( (j & 0x1)?bs_data_sb0[k+4+u8Byte*8]:bs_data_sb0[k+u8Byte*8])); BS_TMP_B_Result[k] = BS_AND(BS_TMP_B[k], BS_NOT(stPQXYZ->BS_P)); //Die nicht zu rotierenden Daten zunächst zwischenspeichern. } /* yet rotate B */ /* OPTIMIZEME: write into B directly?? */ for (k = 4; k > 0; k--) { BS_TMP_B[k] = BS_TMP_B[k-1]; } BS_TMP_B[0] = BS_TMP_B[4]; /* B must not be moved by 4 before handling is finished */ aycw__ShiftRegisterLeft(stRegister->B_BS, 4, 44); /* now write the result, both rotated and unrotated */ for ( k = 0; k < 4; k++) { // rotated //not rotated stRegister->B_BS[k] = BS_OR(BS_AND(BS_TMP_B[k], stPQXYZ->BS_P), BS_TMP_B_Result[k]); } /********** Combiner calculation **********/ /* calc bout Bout 3 := b2,0 b5,1 b6,2 b8,3 Bout 2 := b5,0 b7,1 b2,3 b3,2 Bout 1 := b4,3 b7,2 b3,0 b4,1 Bout 0 := b8,2 b5,3 b2,1 b7,0 */ /* #define shiftB 4 BS_Bout[3] = stRegister->B_BS[2*4 + 0 + shiftB ] ^ stRegister->B_BS[5*4 + 1 + shiftB ] ^ stRegister->B_BS[6*4 + 2 + shiftB ] ^ stRegister->B_BS[8*4 + 3 + shiftB ] ; BS_Bout[2] = stRegister->B_BS[5*4 + 0 + shiftB ] ^ stRegister->B_BS[7*4 + 1 + shiftB ] ^ stRegister->B_BS[2*4 + 3 + shiftB ] ^ stRegister->B_BS[3*4 + 2 + shiftB ] ; BS_Bout[1] = stRegister->B_BS[4*4 + 3 + shiftB ] ^ stRegister->B_BS[7*4 + 2 + shiftB ] ^ stRegister->B_BS[3*4 + 0 + shiftB ] ^ stRegister->B_BS[4*4 + 1 + shiftB ] ; BS_Bout[0] = stRegister->B_BS[8*4 + 2 + shiftB ] ^ stRegister->B_BS[5*4 + 3 + shiftB ] ^ stRegister->B_BS[2*4 + 1 + shiftB ] ^ stRegister->B_BS[7*4 + 0 + shiftB ] ; */ BS_Bout[3] = BS_XOR(BS_XOR(stRegister->B_BS[12], stRegister->B_BS[25]), BS_XOR(stRegister->B_BS[30], stRegister->B_BS[39])); BS_Bout[2] = BS_XOR(BS_XOR(stRegister->B_BS[24], stRegister->B_BS[33]), BS_XOR(stRegister->B_BS[15], stRegister->B_BS[18])); BS_Bout[1] = BS_XOR(BS_XOR(stRegister->B_BS[23], stRegister->B_BS[34]), BS_XOR(stRegister->B_BS[16], stRegister->B_BS[21])); BS_Bout[0] = BS_XOR(BS_XOR(stRegister->B_BS[38], stRegister->B_BS[27]), BS_XOR(stRegister->B_BS[13], stRegister->B_BS[32])); /* calc D */ for (k = 0; k < 4; k++) { stCDEF->BS_D[k] = BS_XOR(BS_XOR(BS_Bout[k], stCDEF->BS_E[k]), stPQXYZ->BS_Z[k]); } for (b = 0; b < 4; b++) BS_Enew[b] = stCDEF->BS_F[b]; tmp0 = BS_XOR (stPQXYZ->BS_Z[0], stCDEF->BS_E[0]); tmp1 = BS_AND (stPQXYZ->BS_Z[0], stCDEF->BS_E[0]); stCDEF->BS_F[0] = BS_XOR (stCDEF->BS_E[0], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[0], stCDEF->BS_C))); tmp3 = BS_AND (tmp0, stCDEF->BS_C); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (stPQXYZ->BS_Z[1], stCDEF->BS_E[1]); tmp1 = BS_AND (stPQXYZ->BS_Z[1], stCDEF->BS_E[1]); stCDEF->BS_F[1] = BS_XOR (stCDEF->BS_E[1], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[1], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (stPQXYZ->BS_Z[2], stCDEF->BS_E[2]); tmp1 = BS_AND (stPQXYZ->BS_Z[2], stCDEF->BS_E[2]); stCDEF->BS_F[2] = BS_XOR (stCDEF->BS_E[2], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[2], tmp4))); tmp3 = BS_AND (tmp0, tmp4); tmp4 = BS_OR (tmp1, tmp3); tmp0 = BS_XOR (stPQXYZ->BS_Z[3], stCDEF->BS_E[3]); tmp1 = BS_AND (stPQXYZ->BS_Z[3], stCDEF->BS_E[3]); stCDEF->BS_F[3] = BS_XOR (stCDEF->BS_E[3], BS_AND (stPQXYZ->BS_Q, BS_XOR (stPQXYZ->BS_Z[3], tmp4))); tmp3 = BS_AND (tmp0, tmp4); stCDEF->BS_C = BS_XOR (stCDEF->BS_C, BS_AND (stPQXYZ->BS_Q, BS_XOR (BS_OR (tmp1, tmp3), stCDEF->BS_C))); // ultimate carry for (b = 0; b < 4; b++) stCDEF->BS_E[b] = BS_Enew[b]; aycw__vCaculatePQXYZ(stRegister->A_BS, stPQXYZ); }