示例#1
0
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
                        unsigned char **coding)
{

        if (len < 16) {
                ec_encode_data_base(len, k, rows, g_tbls, data, coding);
                return;
        }

        while (rows >= 4) {
                gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
                g_tbls += 4 * k * 32;
                coding += 4;
                rows -= 4;
        }
        switch (rows) {
        case 3:
                gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
                break;
        case 2:
                gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
                break;
        case 1:
                gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
                break;
        case 0:
                break;
        }

}
int main(int argc, char *argv[])
{
	int i,j;
	void *buf;
	u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
	u8 g_tbls[3*TEST_SOURCES*32], *dest_ptrs[3], *buffs[TEST_SOURCES];
	u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
	struct perf start, stop;

	printf("gf_3vect_dot_prod_sse: %dx%d\n", TEST_SOURCES, TEST_LEN);

	mk_gf_field();


	// Allocate the arrays
	for(i=0; i<TEST_SOURCES; i++){
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest3 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref3 = buf;

	dest_ptrs[0] = dest1;
	dest_ptrs[1] = dest2;
	dest_ptrs[2] = dest3;


	// Performance test
	for(i=0; i<TEST_SOURCES; i++)
		for(j=0; j<TEST_LEN; j++)
			buffs[i][j] = rand();

	memset(dest1, 0, TEST_LEN);
	memset(dest2, 0, TEST_LEN);
	memset(dest_ref1, 0, TEST_LEN);
	memset(dest_ref2, 0, TEST_LEN);

	for (i=0; i<TEST_SOURCES; i++){
		g1[i] = rand();
		g2[i] = rand();
		g3[i] = rand();
	}

	for(j=0; j<TEST_SOURCES; j++){
		gf_vect_mul_init(g1[j], &g_tbls[j*32]);
		gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
		gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]);
	}

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64*TEST_SOURCES], buffs, dest_ref3);

#ifdef DO_REF_PERF
	perf_start(&start);
	for (i=0; i<TEST_LOOPS/100; i++){
		for (j=0; j<TEST_SOURCES; j++){
			gf_vect_mul_init(g1[j], &g_tbls[j*32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
			gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]);
		}

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64*TEST_SOURCES], buffs, dest_ref3);
	}
	perf_stop(&stop);
	printf("gf_3vect_dot_prod_base" TEST_TYPE_STR ": ");
	perf_print(stop,start,(long long)TEST_LEN*(TEST_SOURCES+3)*i);
#endif

	gf_3vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

	perf_start(&start);
	for (i=0; i<TEST_LOOPS; i++) {
		for (j=0; j<TEST_SOURCES; j++){
			gf_vect_mul_init(g1[j], &g_tbls[j*32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
			gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]);
		}

		gf_3vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
	}
	perf_stop(&stop);
	printf("gf_3vect_dot_prod_sse" TEST_TYPE_STR ": ");
	perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+3)*i);

	if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test1\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref1, 25);
		printf("dprod_sse:"); 
		dump(dest1, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test2\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref2, 25);
		printf("dprod_sse:"); 
		dump(dest2, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref3, dest3, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test3\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref3, 25);
		printf("dprod_sse:"); 
		dump(dest3, 25);
		return -1;
	}

	printf("pass perf check\n");
	return 0;

}