Пример #1
0
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
                        unsigned char **coding)
{

        if (len < 16) {
                ec_encode_data_base(len, k, rows, g_tbls, data, coding);
                return;
        }

        while (rows >= 4) {
                gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
                g_tbls += 4 * k * 32;
                coding += 4;
                rows -= 4;
        }
        switch (rows) {
        case 3:
                gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
                break;
        case 2:
                gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
                break;
        case 1:
                gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
                break;
        case 0:
                break;
        }

}
int main(int argc, char *argv[])
{
	int i,j, rtest, srcs;
	void *buf;
	u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2*TEST_SOURCES*32];
	u8 *dest1, *dest2,  *dest_ref1,  *dest_ref2, *dest_ptrs[2];
	u8 *buffs[TEST_SOURCES];

	int align, size;
	unsigned char *efence_buffs[TEST_SOURCES];
	unsigned int offset;
	u8 *ubuffs[TEST_SOURCES];
	u8 *udest_ptrs[2];

	printf("gf_2vect_dot_prod_sse: %dx%d ", TEST_SOURCES, TEST_LEN);

	mk_gf_field();

	// Allocate the arrays
	for(i=0; i<TEST_SOURCES; i++){
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref1 = buf;
	
	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref2 = buf;

	dest_ptrs[0] = dest1;
	dest_ptrs[1] = dest2;

	// Test of all zeros
	for(i=0; i<TEST_SOURCES; i++)
		memset(buffs[i], 0, TEST_LEN);

	memset(dest1, 0, TEST_LEN);
	memset(dest2, 0, TEST_LEN);
	memset(dest_ref1, 0, TEST_LEN);
	memset(dest_ref2, 0, TEST_LEN);
	memset(g1, 2, TEST_SOURCES);
	memset(g2, 1, TEST_SOURCES);


	for(i=0; i<TEST_SOURCES; i++){
		gf_vect_mul_init(g1[i], &g_tbls[i*32]);
		gf_vect_mul_init(g2[i], &g_tbls[32*TEST_SOURCES + i*32]);
	}

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);

	gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

	if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){
		printf("Fail zero vect_dot_prod_sse test1\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref1, 25);
		printf("dprod_sse:"); 
		dump(dest1, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){
		printf("Fail zero vect_dot_prod_sse test2\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref2, 25);
		printf("dprod_sse:"); 
		dump(dest2, 25);
		return -1;
	}


	putchar('.');


	// Rand data test

	for(rtest=0; rtest<RANDOMS; rtest++){
		for(i=0; i<TEST_SOURCES; i++)
			for(j=0; j<TEST_LEN; j++)
				buffs[i][j] = rand();

		for (i=0; i<TEST_SOURCES; i++){
			g1[i] = rand();
			g2[i] = rand();
		}

		for(i=0; i<TEST_SOURCES; i++){
			gf_vect_mul_init(g1[i], &g_tbls[i*32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32*TEST_SOURCES) + (i*32)]);
		}

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);

		gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

		if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){
			printf("Fail rand 2vect_dot_prod_sse test1 %d\n", rtest);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref1, 25);
			printf("dprod_sse:"); 
			dump(dest1, 25);
			return -1;
		}
		if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){
			printf("Fail rand 2vect_dot_prod_sse test2 %d\n", rtest);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref2, 25);
			printf("dprod_sse:"); 
			dump(dest2, 25);
			return -1;
		}

		putchar('.');
	}





	// Rand data test with varied parameters
	for(rtest=0; rtest<RANDOMS; rtest++){
		for (srcs = TEST_SOURCES; srcs > 0; srcs--){
			for(i=0; i<srcs; i++)
				for(j=0; j<TEST_LEN; j++)
					buffs[i][j] = rand();


			for (i=0; i<srcs; i++){
				g1[i] = rand();
				g2[i] = rand();
			}

			for(i=0; i<srcs; i++){
				gf_vect_mul_init(g1[i], &g_tbls[i*32]);
				gf_vect_mul_init(g2[i], &g_tbls[(32*srcs) + (i*32)]);
			}

			gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
			gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32*srcs], buffs, dest_ref2);

			gf_2vect_dot_prod_sse(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);


			if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){
				printf("Fail rand 2vect_dot_prod_sse test1 srcs=%d\n", srcs);
				dump_matrix(buffs, 5, TEST_SOURCES);
				printf("dprod_base:"); 
				dump(dest_ref1, 25);
				printf("dprod_sse:"); 
				dump(dest1, 25);
				return -1;
			}
			if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){
				printf("Fail rand 2vect_dot_prod_sse test2 srcs=%d\n", srcs);
				dump_matrix(buffs, 5, TEST_SOURCES);
				printf("dprod_base:"); 
				dump(dest_ref2, 25);
				printf("dprod_sse:"); 
				dump(dest2, 25);
				return -1;
			}


		putchar('.');
		}
	}


	// Run tests at end of buffer for Electric Fence
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
	for(size=EFENCE_TEST_MIN_SIZE; size<=TEST_SIZE; size+=align){
		for(i=0; i<TEST_SOURCES; i++)
			for(j=0; j<TEST_LEN; j++)
				buffs[i][j] = rand();

		for(i=0; i<TEST_SOURCES; i++) // Line up TEST_SIZE from end
			efence_buffs[i] = buffs[i] + TEST_LEN - size;

		for (i=0; i<TEST_SOURCES; i++){
			g1[i] = rand();
			g2[i] = rand();
		}

		for(i=0; i<TEST_SOURCES; i++){
			gf_vect_mul_init(g1[i], &g_tbls[i*32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32*TEST_SOURCES) + (i*32)]);
		}

		gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
		gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], efence_buffs, dest_ref2);

		gf_2vect_dot_prod_sse(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);

		if (0 != memcmp(dest_ref1, dest1, size)){
			printf("Fail rand 2vect_dot_prod_sse test1 %d\n", rtest);
			dump_matrix(efence_buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref1, align);
			printf("dprod_sse:");
			dump(dest1, align);
			return -1;
		}
		
		if (0 != memcmp(dest_ref2, dest2, size)){
			printf("Fail rand 2vect_dot_prod_sse test2 %d\n", rtest);
			dump_matrix(efence_buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref2, align);
			printf("dprod_sse:");
			dump(dest2, align);
			return -1;
		}

		putchar('.');
	}

	// Test rand ptr alignment if available

	for(rtest=0; rtest<RANDOMS; rtest++){
		size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
		srcs = rand() % TEST_SOURCES;
		if (srcs == 0)
			continue;

		offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
		// Add random offsets
		for(i=0; i<srcs; i++)
			ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));

		udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
		udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));

		memset(dest1, 0, TEST_LEN);  // zero pad to check write-over
		memset(dest2, 0, TEST_LEN);

		for(i=0; i<srcs; i++)
			for(j=0; j<size; j++)
				ubuffs[i][j] = rand();

		for (i=0; i<srcs; i++){
			g1[i] = rand();
			g2[i] = rand();
		}

		for(i=0; i<srcs; i++){
			gf_vect_mul_init(g1[i], &g_tbls[i*32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32*srcs) + (i*32)]);
		}

		gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
		gf_vect_dot_prod_base(size, srcs, &g_tbls[32*srcs], ubuffs, dest_ref2);

		gf_2vect_dot_prod_sse(size, srcs, g_tbls, ubuffs, udest_ptrs);

		if (memcmp(dest_ref1, udest_ptrs[0], size)){
			printf("Fail rand 2vect_dot_prod_sse test ualign srcs=%d\n", srcs);
			dump_matrix(ubuffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref1, 25);
			printf("dprod_sse:"); 
			dump(udest_ptrs[0], 25);
			return -1;
		}
		if (memcmp(dest_ref2, udest_ptrs[1], size)){
			printf("Fail rand 2vect_dot_prod_sse test ualign srcs=%d\n", srcs);
			dump_matrix(ubuffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref2, 25);
			printf("dprod_sse:"); 
			dump(udest_ptrs[1], 25);
			return -1;
		}

		// Confirm that padding around dests is unchanged
		memset(dest_ref1, 0, PTR_ALIGN_CHK_B);  // Make reference zero buff
		offset = udest_ptrs[0] - dest1;

		if (memcmp(dest1, dest_ref1, offset)){
			printf("Fail rand ualign pad1 start\n");
			return -1;
		}
		if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)){
			printf("Fail rand ualign pad1 end\n");
			return -1;
		}

		offset = udest_ptrs[1] - dest2;
		if (memcmp(dest2, dest_ref1, offset)){
			printf("Fail rand ualign pad2 start\n");
			return -1;
		}
		if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)){
			printf("Fail rand ualign pad2 end\n");
			return -1;
		}

		putchar('.');
	}


	// Test all size alignment
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;

	for(size=TEST_LEN; size>15; size-=align){
		srcs = TEST_SOURCES;

		for(i=0; i<srcs; i++)
			for(j=0; j<size; j++)
				buffs[i][j] = rand();

		for (i=0; i<srcs; i++){
			g1[i] = rand();
			g2[i] = rand();
		}

		for(i=0; i<srcs; i++){
			gf_vect_mul_init(g1[i], &g_tbls[i*32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32*srcs) + (i*32)]);
		}

		gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(size, srcs, &g_tbls[32*srcs], buffs, dest_ref2);

		gf_2vect_dot_prod_sse(size, srcs, g_tbls, buffs, dest_ptrs);

		if (memcmp(dest_ref1, dest_ptrs[0], size)){
			printf("Fail rand 2vect_dot_prod_sse test ualign len=%d\n", size);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref1, 25);
			printf("dprod_sse:"); 
			dump(dest_ptrs[0], 25);
			return -1;
		}
		if (memcmp(dest_ref2, dest_ptrs[1], size)){
			printf("Fail rand 2vect_dot_prod_sse test ualign len=%d\n", size);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref2, 25);
			printf("dprod_sse:"); 
			dump(dest_ptrs[1], 25);
			return -1;
		}
	}


	printf("Pass\n");
	return 0;

}
int main(int argc, char *argv[])
{
	int i,j;
	void *buf;
	u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2*TEST_SOURCES*32];
	u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2];
	u8 *buffs[TEST_SOURCES];
	struct perf start, stop;

	printf("gf_2vect_dot_prod_sse: %dx%d\n", TEST_SOURCES, TEST_LEN);

	mk_gf_field();


	// Allocate the arrays
	for(i=0; i<TEST_SOURCES; i++){
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref1 = buf;
	
	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref2 = buf;

	dest_ptrs[0] = dest1;
	dest_ptrs[1] = dest2;


	// Performance test
	for(i=0; i<TEST_SOURCES; i++)
		for(j=0; j<TEST_LEN; j++)
			buffs[i][j] = rand();

	memset(dest1, 0, TEST_LEN);
	memset(dest2, 0, TEST_LEN);
	memset(dest_ref1, 0, TEST_LEN);
	memset(dest_ref2, 0, TEST_LEN);

	for (i=0; i<TEST_SOURCES; i++){
		g1[i] = rand();
		g2[i] = rand();
	}

	for(j=0; j<TEST_SOURCES; j++){
		gf_vect_mul_init(g1[j], &g_tbls[j*32]);
		gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
	}


	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);

#ifdef DO_REF_PERF
	perf_start(&start);
	for (i=0; i<TEST_LOOPS/100; i++){
		for(j=0; j<TEST_SOURCES; j++){
			gf_vect_mul_init(g1[j], &g_tbls[j*32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
		}

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);
	}
	perf_stop(&stop);
	printf("gf_2vect_dot_prod_base" TEST_TYPE_STR ": ");
	perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+2)*i);
#endif

	gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

	perf_start(&start);
	for (i=0; i<TEST_LOOPS; i++) {
		for (j=0; j<TEST_SOURCES; j++){
			gf_vect_mul_init(g1[j], &g_tbls[j*32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
		}

		gf_2vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
	}
	perf_stop(&stop);
	printf("gf_2vect_dot_prod_sse" TEST_TYPE_STR ": ");
	perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+2)*i);

	if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test1\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref1, 25);
		printf("dprod_sse:"); 
		dump(dest1, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test2\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref2, 25);
		printf("dprod_sse:"); 
		dump(dest2, 25);
		return -1;
	}

	printf("pass perf check\n");
	return 0;

}