void gf_mul(gf_t a, const gf_t b) { gf_t p[8]; uint8_t *q, ch; int i; copy_block_aligned(p[0], a); for(i = 0; i < 7; ++i) gf_mulx1(mode)(p[i + 1], p[i]); q = (uint8_t*)(a == b ? p[0] : b); memset(a, 0, GF_BYTE_LEN); for(i = 15 ; ; ) { ch = q[GF_INDEX(i)]; if(ch & X_0) xor_block_aligned(a, a, p[0]); if(ch & X_1) xor_block_aligned(a, a, p[1]); if(ch & X_2) xor_block_aligned(a, a, p[2]); if(ch & X_3) xor_block_aligned(a, a, p[3]); if(ch & X_4) xor_block_aligned(a, a, p[4]); if(ch & X_5) xor_block_aligned(a, a, p[5]); if(ch & X_6) xor_block_aligned(a, a, p[6]); if(ch & X_7) xor_block_aligned(a, a, p[7]); if(!i--) break; gf_mulx8(mode)(a); } }
void init_64k_table(gf_t g, gf_t64k_t t) { int i = 0, j, k; /* the byte value 0x80 at the lowest byte position in a[] is unity in this field representation g[] goes into this position in the table. 0x40 corresponds to a field value of 2 so we can determine this value by multiplying the 0x80 value by x, a process we can repeat for 8 field values. */ #ifdef CHANGE_GF_REPRESENTATION convert_representation(t[0][128], g, CONVERT); #else memcpy(t[0][128], g, GF_BYTE_LEN); #endif memset(t[0][0], 0, GF_BYTE_LEN); for(j = 64; j > 0; j >>= 1) gf_mulx1_lb(t[0][j], t[0][j + j]); for( ; ; ) { /* if { n } stands for the field value represented by the integer n, we can express higher multiplies in the table as follows: 1. g * { 3} = g * {2} ^ g * {1} 2. g * { 5} = g * {4} ^ g * {1} g * { 6} = g * {4} ^ g * {2} g * { 7} = g * {4} ^ g * {3} 3. g * { 9} = g * {8} ^ g * {1} g * {10} = g * {8} ^ g * {2} .... and so on */ for(j = 2; j < 256; j += j) for(k = 1; k < j; ++k) xor_block_aligned(t[i][j + k], t[i][j], t[i][k]); if(++i == GF_BYTE_LEN) /* all 16 byte positions done */ return; /* We now move to the next byte up and set up its eight starting values by multiplying the values in the lower table by x^8 */ memset(t[i][0], 0, GF_BYTE_LEN); for(j = 128; j > 0; j >>= 1) { memcpy(t[i][j], t[i - 1][j], GF_BYTE_LEN); gf_mulx8_lb(t[i][j]); } } }
/* This version uses 8k bytes of table space on the stack. An input field value in a[] has to be multiplied by a key value in g[]. To do this a[] is split up into 32 smaller field values each 4-bits in length. For the 16 values of each of these smaller field values we can precompute the result of mulltiplying g[] by the field value in question. So for each of 32 nibbles we have a table of 16 field values, each of 16 bytes - 8k bytes in total. */ void init_8k_table(const gf_t g, gf_t8k_t t) { int i = 0, j, k; /* do the low 4-bit nibble first - t[0][16] - and note that the unit multiplier sits at 0x01 - t[0][1] in the table. Then multiplies by x go at 2, 4, 8 */ /* set the table elements for a zero multiplier */ memset(t[0][0], 0, GF_BYTE_LEN); memset(t[1][0], 0, GF_BYTE_LEN); #if defined( GF_MODE_LL ) || defined( GF_MODE_BL ) /* t[0][1] = g, compute t[0][2], t[0][4], t[0][8] */ memcpy(t[0][1], g, GF_BYTE_LEN); for(j = 1; j <= 4; j <<= 1) gf_mulx1(mode)(t[0][j + j], t[0][j]); /* t[1][1] = t[0][1] * x^4 = t[0][8] * x */ gf_mulx1(mode)(t[1][1], t[0][8]); for(j = 1; j <= 4; j <<= 1) gf_mulx1(mode)(t[1][j + j], t[1][j]); #else /* g -> t[0][8], compute t[0][4], t[0][2], t[0][1] */ memcpy(t[1][8], g, GF_BYTE_LEN); for(j = 4; j >= 1; j >>= 1) gf_mulx1(mode)(t[1][j], t[1][j + j]); /* t[1][1] = t[0][1] * x^4 = t[0][8] * x */ gf_mulx1(mode)(t[0][8], t[1][1]); for(j = 4; j >= 1; j >>= 1) gf_mulx1(mode)(t[0][j], t[0][j + j]); #endif for( ; ; ) { for(j = 2; j < 16; j += j) for(k = 1; k < j; ++k) xor_block_aligned(t[i][j + k], t[i][j], t[i][k]); if(++i == 2 * GF_BYTE_LEN) return; if(i > 1) { memset(t[i][0], 0, GF_BYTE_LEN); for(j = 8; j > 0; j >>= 1) { memcpy(t[i][j], t[i - 2][j], GF_BYTE_LEN); gf_mulx8(mode)(t[i][j]); } } }
void gf_mul(gf_t a, const gf_t b) { gf_t p[8]; uint_8t *q, ch; int i; #ifdef CHANGE_GF_REPRESENTATION convert_representation(a, a, CONVERT); if(a != b) convert_representation(b, b, CONVERT); #endif q = (uint_8t*)(a == b ? p[0] : b); move_block_aligned(p[0], a); for(i = 0; i < 7; ++i) gf_mulx1_lb(p[i + 1], p[i]); memset(a, 0, GF_BYTE_LEN); for(i = 15; ; ) { ch = q[i]; if(ch & X_0) xor_block_aligned(a, a, p[0]); if(ch & X_1) xor_block_aligned(a, a, p[1]); if(ch & X_2) xor_block_aligned(a, a, p[2]); if(ch & X_3) xor_block_aligned(a, a, p[3]); if(ch & X_4) xor_block_aligned(a, a, p[4]); if(ch & X_5) xor_block_aligned(a, a, p[5]); if(ch & X_6) xor_block_aligned(a, a, p[6]); if(ch & X_7) xor_block_aligned(a, a, p[7]); if(!i--) break; gf_mulx8_lb(a); } #ifdef CHANGE_GF_REPRESENTATION convert_representation(a, a, CONVERT); if(a != b) convert_representation(b, b, CONVERT); #endif }
void init_8k_table(gf_t g, gf_t8k_t t) { int i = 0, j, k; /* do the high 4-bit nibble first - t[1][16] - and note that the unit multiplier sits at 0x80 - t[1][8] in the table. Then multiplies by x go at 4, 2, 1 */ #ifdef CHANGE_GF_REPRESENTATION convert_representation(t[1][8], g, CONVERT); #else memcpy(t[1][8], g, GF_BYTE_LEN); #endif memset(t[0][0], 0, GF_BYTE_LEN); memset(t[1][0], 0, GF_BYTE_LEN); for(j = 4; j > 0; j >>= 1) gf_mulx1_lb(t[1][j], t[1][j + j]); /* now do the low nibble: g * {x^4} = x * g * {x^3} */ gf_mulx1_lb(t[0][8], t[1][1]); for(j = 4; j > 0; j >>= 1) gf_mulx1_lb(t[0][j], t[0][j + j]); for( ; ; ) { for(j = 2; j < 16; j += j) for(k = 1; k < j; ++k) xor_block_aligned(t[i][j + k], t[i][j], t[i][k]); if(++i == 2 * GF_BYTE_LEN) return; if(i > 1) { memset(t[i][0], 0, GF_BYTE_LEN); for(j = 8; j > 0; j >>= 1) { memcpy(t[i][j], t[i - 2][j], GF_BYTE_LEN); gf_mulx8_lb(t[i][j]); } } }
void init_64k_table(const gf_t g, gf_t64k_t t) { int i = 0, j, k; /* depending on the representation we have to process bits within bytes high to low (0xe1 style ) or low to high (0x87 style). We start by producing the powers x ,x^2 .. x^7 and put them in t[0][1], t[0][2] .. t[128] or in t[128], t[64] .. t[1] depending on the bit order in use. */ /* clear the element for the zero field element */ memset(t[0][0], 0, GF_BYTE_LEN); #if defined( GF_MODE_LL ) || defined( GF_MODE_BL ) /* g -> t[0][1], generate t[0][2] ... */ memcpy(t[0][1], g, GF_BYTE_LEN); for(j = 1; j <= 64; j <<= 1) gf_mulx1(mode)(t[0][j + j], t[0][j]); #else /* g -> t[0][128], generate t[0][64] ... */ memcpy(t[0][128], g, GF_BYTE_LEN); for(j = 64; j >= 1; j >>= 1) gf_mulx1(mode)(t[0][j], t[0][j + j]); #endif for( ; ; ) { /* if { n } stands for the field value represented by the integer n, we can express higher multiplies in the table as follows: 1. g * { 3} = g * {2} ^ g * {1} 2. g * { 5} = g * {4} ^ g * {1} g * { 6} = g * {4} ^ g * {2} g * { 7} = g * {4} ^ g * {3} 3. g * { 9} = g * {8} ^ g * {1} g * {10} = g * {8} ^ g * {2} .... and so on. This is what the following loops do. */ for(j = 2; j < 256; j += j) for(k = 1; k < j; ++k) xor_block_aligned(t[i][j + k], t[i][j], t[i][k]); if(++i == GF_BYTE_LEN) /* all 16 byte positions done */ return; /* We now move to the next byte up and set up its eight starting values by multiplying the values in the lower table by x^8 */ memset(t[i][0], 0, GF_BYTE_LEN); for(j = 128; j > 0; j >>= 1) { memcpy(t[i][j], t[i - 1][j], GF_BYTE_LEN); gf_mulx8(mode)(t[i][j]); } } }