void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, unsigned char **coding) { if (len < 16) { ec_encode_data_base(len, k, rows, g_tbls, data, coding); return; } while (rows >= 4) { gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding); g_tbls += 4 * k * 32; coding += 4; rows -= 4; } switch (rows) { case 3: gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding); break; case 2: gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding); break; case 1: gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding); break; case 0: break; } }
int main(int argc, char *argv[]) { int i,j; void *buf; u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; u8 g_tbls[3*TEST_SOURCES*32], *dest_ptrs[3], *buffs[TEST_SOURCES]; u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3; struct perf start, stop; printf("gf_3vect_dot_prod_sse: %dx%d\n", TEST_SOURCES, TEST_LEN); mk_gf_field(); // Allocate the arrays for(i=0; i<TEST_SOURCES; i++){ if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } buffs[i] = buf; } if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest1 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest2 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest3 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref1 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref2 = buf; if (posix_memalign(&buf, 64, TEST_LEN)) { printf("alloc error: Fail"); return -1; } dest_ref3 = buf; dest_ptrs[0] = dest1; dest_ptrs[1] = dest2; dest_ptrs[2] = dest3; // Performance test for(i=0; i<TEST_SOURCES; i++) for(j=0; j<TEST_LEN; j++) buffs[i][j] = rand(); memset(dest1, 0, TEST_LEN); memset(dest2, 0, TEST_LEN); memset(dest_ref1, 0, TEST_LEN); memset(dest_ref2, 0, TEST_LEN); for (i=0; i<TEST_SOURCES; i++){ g1[i] = rand(); g2[i] = rand(); g3[i] = rand(); } for(j=0; j<TEST_SOURCES; j++){ gf_vect_mul_init(g1[j], &g_tbls[j*32]); gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]); gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]); } gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64*TEST_SOURCES], buffs, dest_ref3); #ifdef DO_REF_PERF perf_start(&start); for (i=0; i<TEST_LOOPS/100; i++){ for (j=0; j<TEST_SOURCES; j++){ gf_vect_mul_init(g1[j], &g_tbls[j*32]); gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]); gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]); } gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64*TEST_SOURCES], buffs, dest_ref3); } perf_stop(&stop); printf("gf_3vect_dot_prod_base" TEST_TYPE_STR ": "); perf_print(stop,start,(long long)TEST_LEN*(TEST_SOURCES+3)*i); #endif gf_3vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); perf_start(&start); for (i=0; i<TEST_LOOPS; i++) { for (j=0; j<TEST_SOURCES; j++){ gf_vect_mul_init(g1[j], &g_tbls[j*32]); gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]); gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]); } gf_3vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); } perf_stop(&stop); printf("gf_3vect_dot_prod_sse" TEST_TYPE_STR ": "); perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+3)*i); if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){ printf("Fail perf vect_dot_prod_sse test1\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref1, 25); printf("dprod_sse:"); dump(dest1, 25); return -1; } if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){ printf("Fail perf vect_dot_prod_sse test2\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref2, 25); printf("dprod_sse:"); dump(dest2, 25); return -1; } if (0 != memcmp(dest_ref3, dest3, TEST_LEN)){ printf("Fail perf vect_dot_prod_sse test3\n"); dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:"); dump(dest_ref3, 25); printf("dprod_sse:"); dump(dest3, 25); return -1; } printf("pass perf check\n"); return 0; }