void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, unsigned char **coding) { if (len < 16) { ec_encode_data_base(len, k, rows, g_tbls, data, coding); return; } while (rows >= 4) { gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding); g_tbls += 4 * k * 32; coding += 4; rows -= 4; } switch (rows) { case 3: gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding); break; case 2: gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding); break; case 1: gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding); break; case 0: break; } }
int main(int argc, char *argv[]) { int i,j, rtest, srcs; void *buf; u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2*TEST_SOURCES*32]; u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; u8 *buffs[TEST_SOURCES]; int align, size; unsigned char *efence_buffs[TEST_SOURCES]; unsigned int offset; u8 *ubuffs[TEST_SOURCES]; u8 *udest_ptrs[2]; printf("gf_2vect_dot_prod_sse: %dx%d ", TEST_SOURCES, TEST_LEN); mk_gf_field(); // Allocate the arrays for(i=0; i<TEST_SOURCES; i++){ if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } buffs[i] = buf; } if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest1 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest2 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref1 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref2 = buf; dest_ptrs[0] = dest1; dest_ptrs[1] = dest2; // Test of all zeros for(i=0; i<TEST_SOURCES; i++) memset(buffs[i], 0, TEST_LEN); memset(dest1, 0, TEST_LEN); memset(dest2, 0, TEST_LEN); memset(dest_ref1, 0, TEST_LEN); memset(dest_ref2, 0, TEST_LEN); memset(g1, 2, TEST_SOURCES); memset(g2, 1, TEST_SOURCES); for(i=0; i<TEST_SOURCES; i++){ gf_vect_mul_init(g1[i], &g_tbls[i*32]); gf_vect_mul_init(g2[i], &g_tbls[32*TEST_SOURCES + i*32]); } gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2); gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){ printf("Fail zero vect_dot_prod_sse test1\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(dest1, 25); return -1; } if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){ printf("Fail zero vect_dot_prod_sse test2\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(dest2, 25); return -1; } putchar('.'); // Rand data test for(rtest=0; rtest<RANDOMS; rtest++){ for(i=0; i<TEST_SOURCES; i++) for(j=0; j<TEST_LEN; j++) buffs[i][j] = rand(); for (i=0; i<TEST_SOURCES; i++){ g1[i] = rand(); g2[i] = rand(); } for(i=0; i<TEST_SOURCES; i++){ gf_vect_mul_init(g1[i], &g_tbls[i*32]); gf_vect_mul_init(g2[i], &g_tbls[(32*TEST_SOURCES) + (i*32)]); } gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2); gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){ printf("Fail rand 2vect_dot_prod_sse test1 %d\n", rtest); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(dest1, 25); return -1; } if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){ printf("Fail rand 2vect_dot_prod_sse test2 %d\n", rtest); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(dest2, 25); return -1; } putchar('.'); } // Rand data test with varied parameters for(rtest=0; rtest<RANDOMS; rtest++){ for (srcs = TEST_SOURCES; srcs > 0; srcs--){ for(i=0; i<srcs; i++) for(j=0; j<TEST_LEN; j++) buffs[i][j] = rand(); for (i=0; i<srcs; i++){ g1[i] = rand(); g2[i] = rand(); } for(i=0; i<srcs; i++){ gf_vect_mul_init(g1[i], &g_tbls[i*32]); gf_vect_mul_init(g2[i], &g_tbls[(32*srcs) + (i*32)]); } gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32*srcs], buffs, dest_ref2); gf_2vect_dot_prod_sse(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){ printf("Fail rand 2vect_dot_prod_sse test1 srcs=%d\n", srcs); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(dest1, 25); return -1; } if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){ printf("Fail rand 2vect_dot_prod_sse test2 srcs=%d\n", srcs); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(dest2, 25); return -1; } putchar('.'); } } // Run tests at end of buffer for Electric Fence align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; for(size=EFENCE_TEST_MIN_SIZE; size<=TEST_SIZE; size+=align){ for(i=0; i<TEST_SOURCES; i++) for(j=0; j<TEST_LEN; j++) buffs[i][j] = rand(); for(i=0; i<TEST_SOURCES; i++) // Line up TEST_SIZE from end efence_buffs[i] = buffs[i] + TEST_LEN - size; for (i=0; i<TEST_SOURCES; i++){ g1[i] = rand(); g2[i] = rand(); } for(i=0; i<TEST_SOURCES; i++){ gf_vect_mul_init(g1[i], &g_tbls[i*32]); gf_vect_mul_init(g2[i], &g_tbls[(32*TEST_SOURCES) + (i*32)]); } gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], efence_buffs, dest_ref2); gf_2vect_dot_prod_sse(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); if (0 != memcmp(dest_ref1, dest1, size)){ printf("Fail rand 2vect_dot_prod_sse test1 %d\n", rtest); dump_matrix(efence_buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, align); printf("dprod_sse:"); dump(dest1, align); return -1; } if (0 != memcmp(dest_ref2, dest2, size)){ printf("Fail rand 2vect_dot_prod_sse test2 %d\n", rtest); dump_matrix(efence_buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, align); printf("dprod_sse:"); dump(dest2, align); return -1; } putchar('.'); } // Test rand ptr alignment if available for(rtest=0; rtest<RANDOMS; rtest++){ size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; srcs = rand() % TEST_SOURCES; if (srcs == 0) continue; offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; // Add random offsets for(i=0; i<srcs; i++) ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); memset(dest1, 0, TEST_LEN); // zero pad to check write-over memset(dest2, 0, TEST_LEN); for(i=0; i<srcs; i++) for(j=0; j<size; j++) ubuffs[i][j] = rand(); for (i=0; i<srcs; i++){ g1[i] = rand(); g2[i] = rand(); } for(i=0; i<srcs; i++){ gf_vect_mul_init(g1[i], &g_tbls[i*32]); gf_vect_mul_init(g2[i], &g_tbls[(32*srcs) + (i*32)]); } gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); gf_vect_dot_prod_base(size, srcs, &g_tbls[32*srcs], ubuffs, dest_ref2); gf_2vect_dot_prod_sse(size, srcs, g_tbls, ubuffs, udest_ptrs); if (memcmp(dest_ref1, udest_ptrs[0], size)){ printf("Fail rand 2vect_dot_prod_sse test ualign srcs=%d\n", srcs); dump_matrix(ubuffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(udest_ptrs[0], 25); return -1; } if (memcmp(dest_ref2, udest_ptrs[1], size)){ printf("Fail rand 2vect_dot_prod_sse test ualign srcs=%d\n", srcs); dump_matrix(ubuffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(udest_ptrs[1], 25); return -1; } // Confirm that padding around dests is unchanged memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff offset = udest_ptrs[0] - dest1; if (memcmp(dest1, dest_ref1, offset)){ printf("Fail rand ualign pad1 start\n"); return -1; } if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)){ printf("Fail rand ualign pad1 end\n"); return -1; } offset = udest_ptrs[1] - dest2; if (memcmp(dest2, dest_ref1, offset)){ printf("Fail rand ualign pad2 start\n"); return -1; } if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)){ printf("Fail rand ualign pad2 end\n"); return -1; } putchar('.'); } // Test all size alignment align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; for(size=TEST_LEN; size>15; size-=align){ srcs = TEST_SOURCES; for(i=0; i<srcs; i++) for(j=0; j<size; j++) buffs[i][j] = rand(); for (i=0; i<srcs; i++){ g1[i] = rand(); g2[i] = rand(); } for(i=0; i<srcs; i++){ gf_vect_mul_init(g1[i], &g_tbls[i*32]); gf_vect_mul_init(g2[i], &g_tbls[(32*srcs) + (i*32)]); } gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(size, srcs, &g_tbls[32*srcs], buffs, dest_ref2); gf_2vect_dot_prod_sse(size, srcs, g_tbls, buffs, dest_ptrs); if (memcmp(dest_ref1, dest_ptrs[0], size)){ printf("Fail rand 2vect_dot_prod_sse test ualign len=%d\n", size); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(dest_ptrs[0], 25); return -1; } if (memcmp(dest_ref2, dest_ptrs[1], size)){ printf("Fail rand 2vect_dot_prod_sse test ualign len=%d\n", size); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(dest_ptrs[1], 25); return -1; } } printf("Pass\n"); return 0; }
int main(int argc, char *argv[]) { int i,j; void *buf; u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2*TEST_SOURCES*32]; u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; u8 *buffs[TEST_SOURCES]; struct perf start, stop; printf("gf_2vect_dot_prod_sse: %dx%d\n", TEST_SOURCES, TEST_LEN); mk_gf_field(); // Allocate the arrays for(i=0; i<TEST_SOURCES; i++){ if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } buffs[i] = buf; } if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest1 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest2 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref1 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref2 = buf; dest_ptrs[0] = dest1; dest_ptrs[1] = dest2; // Performance test for(i=0; i<TEST_SOURCES; i++) for(j=0; j<TEST_LEN; j++) buffs[i][j] = rand(); memset(dest1, 0, TEST_LEN); memset(dest2, 0, TEST_LEN); memset(dest_ref1, 0, TEST_LEN); memset(dest_ref2, 0, TEST_LEN); for (i=0; i<TEST_SOURCES; i++){ g1[i] = rand(); g2[i] = rand(); } for(j=0; j<TEST_SOURCES; j++){ gf_vect_mul_init(g1[j], &g_tbls[j*32]); gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]); } gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2); #ifdef DO_REF_PERF perf_start(&start); for (i=0; i<TEST_LOOPS/100; i++){ for(j=0; j<TEST_SOURCES; j++){ gf_vect_mul_init(g1[j], &g_tbls[j*32]); gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]); } gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2); } perf_stop(&stop); printf("gf_2vect_dot_prod_base" TEST_TYPE_STR ": "); perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+2)*i); #endif gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); perf_start(&start); for (i=0; i<TEST_LOOPS; i++) { for (j=0; j<TEST_SOURCES; j++){ gf_vect_mul_init(g1[j], &g_tbls[j*32]); gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]); } gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); } perf_stop(&stop); printf("gf_2vect_dot_prod_sse" TEST_TYPE_STR ": "); perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+2)*i); if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){ printf("Fail perf vect_dot_prod_sse test1\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(dest1, 25); return -1; } if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){ printf("Fail perf vect_dot_prod_sse test2\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(dest2, 25); return -1; } printf("pass perf check\n"); return 0; }