Exemple #1
0
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{
        int i, j;

        for (i = 0; i < rows; i++) {
                for (j = 0; j < k; j++) {
                        gf_vect_mul_init(*a++, g_tbls);
                        g_tbls += 32;
                }
        }
}
int main(int argc, char *argv[])
{
	int i;
	u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
	struct perf start, stop;

	printf("gf_vect_mul_perf:\n");
	mk_gf_field();
	gf_vect_mul_init(a, gf_const_tbl);

	// Allocate large mem region
	buff1 = (u8*) malloc(TEST_LEN);
	buff2 = (u8*) malloc(TEST_LEN);
	if (NULL == buff1 || NULL == buff2){
		printf("Failed to allocate %dB\n", TEST_LEN);
		return 1;
	}

	memset(buff1, 0, TEST_LEN);
	memset(buff2, 0, TEST_LEN);

	gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);

	printf("Start timed tests\n"); 
	fflush(0);

	gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
	perf_start(&start);
	for(i=0; i<TEST_LOOPS; i++){
		gf_vect_mul_init(a, gf_const_tbl);
		gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
	}
	perf_stop(&stop);
	printf("gf_vect_mul" TEST_TYPE_STR ": ");
	perf_print(stop,start,(long long)TEST_LEN*i);


	return 0;
}
int main(int argc, char *argv[])
{
	int i, j;
	void *buf;
	u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
	u8 *temp_buff, *buffs[TEST_SOURCES];
	struct perf start, stop;

	printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);

	// Allocate the arrays
	for (i = 0; i < TEST_SOURCES; i++) {
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	temp_buff = buf;

	// Performance test
	for (i = 0; i < TEST_SOURCES; i++)
		for (j = 0; j < TEST_LEN; j++)
			buffs[i][j] = rand();

	memset(dest, 0, TEST_LEN);
	memset(temp_buff, 0, TEST_LEN);
	memset(dest_ref, 0, TEST_LEN);
	memset(g, 0, TEST_SOURCES);

	for (i = 0; i < TEST_SOURCES; i++)
		g[i] = rand();

	for (j = 0; j < TEST_SOURCES; j++)
		gf_vect_mul_init(g[j], &g_tbls[j * 32]);

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);

#ifdef DO_REF_PERF
	perf_start(&start);
	for (i = 0; i < TEST_LOOPS; i++) {
		for (j = 0; j < TEST_SOURCES; j++)
			gf_vect_mul_init(g[j], &g_tbls[j * 32]);

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
	}
	perf_stop(&stop);
	printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
	perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
#endif

	FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);

	perf_start(&start);
	for (i = 0; i < TEST_LOOPS; i++) {
		for (j = 0; j < TEST_SOURCES; j++)
			gf_vect_mul_init(g[j], &g_tbls[j * 32]);

		FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
	}
	perf_stop(&stop);
	printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
	perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);

	if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
		printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref, 25);
		printf("dprod:");
		dump(dest, 25);
		return -1;
	}

	printf("pass perf check\n");
	return 0;
}
int main(int argc, char *argv[])
{
	int i,j;
	void *buf;
	u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
	u8 g_tbls[3*TEST_SOURCES*32], *dest_ptrs[3], *buffs[TEST_SOURCES];
	u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
	struct perf start, stop;

	printf("gf_3vect_dot_prod_sse: %dx%d\n", TEST_SOURCES, TEST_LEN);

	mk_gf_field();


	// Allocate the arrays
	for(i=0; i<TEST_SOURCES; i++){
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest3 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref3 = buf;

	dest_ptrs[0] = dest1;
	dest_ptrs[1] = dest2;
	dest_ptrs[2] = dest3;


	// Performance test
	for(i=0; i<TEST_SOURCES; i++)
		for(j=0; j<TEST_LEN; j++)
			buffs[i][j] = rand();

	memset(dest1, 0, TEST_LEN);
	memset(dest2, 0, TEST_LEN);
	memset(dest_ref1, 0, TEST_LEN);
	memset(dest_ref2, 0, TEST_LEN);

	for (i=0; i<TEST_SOURCES; i++){
		g1[i] = rand();
		g2[i] = rand();
		g3[i] = rand();
	}

	for(j=0; j<TEST_SOURCES; j++){
		gf_vect_mul_init(g1[j], &g_tbls[j*32]);
		gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
		gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]);
	}

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64*TEST_SOURCES], buffs, dest_ref3);

#ifdef DO_REF_PERF
	perf_start(&start);
	for (i=0; i<TEST_LOOPS/100; i++){
		for (j=0; j<TEST_SOURCES; j++){
			gf_vect_mul_init(g1[j], &g_tbls[j*32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
			gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]);
		}

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32*TEST_SOURCES], buffs, dest_ref2);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64*TEST_SOURCES], buffs, dest_ref3);
	}
	perf_stop(&stop);
	printf("gf_3vect_dot_prod_base" TEST_TYPE_STR ": ");
	perf_print(stop,start,(long long)TEST_LEN*(TEST_SOURCES+3)*i);
#endif

	gf_3vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

	perf_start(&start);
	for (i=0; i<TEST_LOOPS; i++) {
		for (j=0; j<TEST_SOURCES; j++){
			gf_vect_mul_init(g1[j], &g_tbls[j*32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32*TEST_SOURCES) + (j*32)]);
			gf_vect_mul_init(g3[j], &g_tbls[(64*TEST_SOURCES) + (j*32)]);
		}

		gf_3vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
	}
	perf_stop(&stop);
	printf("gf_3vect_dot_prod_sse" TEST_TYPE_STR ": ");
	perf_print(stop,start, (long long)TEST_LEN*(TEST_SOURCES+3)*i);

	if (0 != memcmp(dest_ref1, dest1, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test1\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref1, 25);
		printf("dprod_sse:"); 
		dump(dest1, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref2, dest2, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test2\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref2, 25);
		printf("dprod_sse:"); 
		dump(dest2, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref3, dest3, TEST_LEN)){
		printf("Fail perf vect_dot_prod_sse test3\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref3, 25);
		printf("dprod_sse:"); 
		dump(dest3, 25);
		return -1;
	}

	printf("pass perf check\n");
	return 0;

}
Exemple #5
0
int main(int argc, char *argv[])
{
	int i;
	u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
	int align, size;
	unsigned char *efence_buff1;
	unsigned char *efence_buff2;

	printf("gf_vect_mul_base_test:\n");

	gf_vect_mul_init(a, gf_const_tbl);

	buff1 = (u8 *) malloc(TEST_SIZE);
	buff2 = (u8 *) malloc(TEST_SIZE);
	buff3 = (u8 *) malloc(TEST_SIZE);

	if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
		printf("buffer alloc error\n");
		return -1;
	}
	// Fill with rand data
	for (i = 0; i < TEST_SIZE; i++)
		buff1[i] = rand();

	gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);

	for (i = 0; i < TEST_SIZE; i++)
		if (gf_mul(a, buff1[i]) != buff2[i]) {
			printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
			       gf_mul(2, buff1[i]));
			return 1;
		}

	gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);

	// Check reference function
	for (i = 0; i < TEST_SIZE; i++)
		if (buff2[i] != buff3[i]) {
			printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
			       i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
			return 1;
		}

	for (i = 0; i < TEST_SIZE; i++)
		buff1[i] = rand();

	// Check each possible constant
	printf("Random tests ");
	for (a = 0; a != 255; a++) {
		gf_vect_mul_init(a, gf_const_tbl);
		gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);

		for (i = 0; i < TEST_SIZE; i++)
			if (gf_mul(a, buff1[i]) != buff2[i]) {
				printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
				       i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
				return 1;
			}
		putchar('.');
	}

	// Run tests at end of buffer for Electric Fence
	align = 32;
	a = 2;

	gf_vect_mul_init(a, gf_const_tbl);
	for (size = 0; size < TEST_SIZE; size += align) {
		// Line up TEST_SIZE from end
		efence_buff1 = buff1 + size;
		efence_buff2 = buff2 + size;

		gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);

		for (i = 0; i < TEST_SIZE - size; i++)
			if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
				printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
				       i, efence_buff1[i], efence_buff2[i], gf_mul(2,
										   efence_buff1
										   [i]));
				return 1;
			}

		putchar('.');
	}

	printf(" done: Pass\n");
	return 0;
}
int main(int argc, char *argv[])
{
	int i, j, rtest, srcs, m, k, nerrs, r, err;
	void *buf;
	u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES*32], src_in_err[TEST_SOURCES];
	u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
	u8 a[MMAX*KMAX], b[MMAX*KMAX], d[MMAX*KMAX];
	u8  src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];

	int align, size;
	unsigned char *efence_buffs[TEST_SOURCES];
	unsigned int offset;
	u8 *ubuffs[TEST_SOURCES];
	u8 *udest_ptr;

	printf("gf_vect_dot_prod_sse: %dx%d ", TEST_SOURCES, TEST_LEN);

	mk_gf_field();


	// Allocate the arrays
	for(i=0; i<TEST_SOURCES; i++){
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref = buf;
	
	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	temp_buff = buf;


	// Test of all zeros
	for(i=0; i<TEST_SOURCES; i++)
		memset(buffs[i], 0, TEST_LEN);

	memset(dest, 0, TEST_LEN);
	memset(temp_buff, 0, TEST_LEN);
	memset(dest_ref, 0, TEST_LEN);
	memset(g, 0, TEST_SOURCES);


	for(i=0; i<TEST_SOURCES; i++)
		gf_vect_mul_init(g[i], &g_tbls[i*32]);

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);

	gf_vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);

	if (0 != memcmp(dest_ref, dest, TEST_LEN)){
		printf("Fail zero vect_dot_prod_sse test\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:"); 
		dump(dest_ref, 25);
		printf("dprod_sse:"); 
		dump(dest, 25);;
		return -1;
	}
	else
		putchar('.');

	// Rand data test
	for(rtest=0; rtest<RANDOMS; rtest++){
		for(i=0; i<TEST_SOURCES; i++)
			for(j=0; j<TEST_LEN; j++)
				buffs[i][j] = rand();

		for (i=0; i<TEST_SOURCES; i++)
			g[i] = rand();

		for(i=0; i<TEST_SOURCES; i++)
			gf_vect_mul_init(g[i], &g_tbls[i*32]);

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
		gf_vect_dot_prod_sse(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);

		if (0 != memcmp(dest_ref, dest, TEST_LEN)){
			printf("Fail rand vect_dot_prod_sse test 1\n");
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref, 25);
			printf("dprod_sse:"); 
			dump(dest, 25);
			return -1;
		}

		putchar('.');
	}

	// Rand data test with varied parameters
	for(rtest=0; rtest < RANDOMS; rtest++){
		for (srcs = TEST_SOURCES; srcs > 0; srcs--){
			for(i=0; i<srcs; i++)
				for(j=0; j<TEST_LEN; j++)
					buffs[i][j] = rand();

			for (i=0; i<srcs; i++)
				g[i] = rand();

			for(i=0; i<srcs; i++)
				gf_vect_mul_init(g[i], &g_tbls[i*32]);

			gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
			gf_vect_dot_prod_sse(TEST_LEN, srcs, g_tbls, buffs, dest);

			if (0 != memcmp(dest_ref, dest, TEST_LEN)){
				printf("Fail rand vect_dot_prod_sse test 2\n");
				dump_matrix(buffs, 5, srcs);
				printf("dprod_base:"); 
				dump(dest_ref, 5);
				printf("dprod_sse:"); 
				dump(dest, 5);
				return -1;
			}

			putchar('.');
		}
	}




	// Test erasure code using gf_vect_dot_prod

	// Pick a first test
	m = 9;
	k = 5;
	if (m > MMAX || k > KMAX)
		return -1;

	gf_gen_rs_matrix(a, m, k);

	// Make random data
	for(i=0; i<k; i++)
		for(j=0; j<TEST_LEN; j++)
			buffs[i][j] = rand();

	// Make parity vects
	for (i=k; i<m; i++) {
		for (j=0; j<k; j++)
			gf_vect_mul_init(a[k*i+j], &g_tbls[j*32]);
#ifndef USEREF
		gf_vect_dot_prod_sse(TEST_LEN,
				k, g_tbls, buffs, buffs[i]);
#else
		gf_vect_dot_prod_base(TEST_LEN,
				k, &g_tbls[0], buffs, buffs[i]);
#endif
	}


	// Random buffers in erasure
	memset(src_in_err, 0, TEST_SOURCES);
	for (i=0, nerrs=0; i<k && nerrs<m-k; i++){
		err = 1 & rand();
		src_in_err[i] = err;
		if (err)
			src_err_list[nerrs++] = i;
	}

	// construct b by removing error rows
	for(i=0, r=0; i<k; i++, r++){
		while (src_in_err[r]) {
			r++; 
			continue;
		}
		for(j=0; j<k; j++)
			b[k*i+j] = a[k*r+j];
	}

	if (gf_invert_matrix((u8*)b, (u8*)d, k) < 0)
		printf("BAD MATRIX\n");
	

	for(i=0, r=0; i<k; i++, r++){
		while (src_in_err[r]) {
			r++; 
			continue;
		}
		recov[i] = buffs[r];
	}

	// Recover data
	for(i=0; i<nerrs; i++){
		for (j=0; j<k; j++)
			gf_vect_mul_init(d[k*src_err_list[i]+j], &g_tbls[j*32]);
#ifndef USEREF
		gf_vect_dot_prod_sse(TEST_LEN,
				k, g_tbls, recov, temp_buff);
#else
		gf_vect_dot_prod_base(TEST_LEN,
				k, &g_tbls[0], recov, temp_buff);
#endif

		if (0 != memcmp(temp_buff, buffs[src_err_list[i]],
					TEST_LEN)){
			printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
			printf("recov %d:",src_err_list[i]); 
			dump(temp_buff, 25);
			printf("orig   :");     
			dump(buffs[src_err_list[i]],25);
			return -1;
		}
	}


	// Do more random tests

	for (rtest = 0; rtest < RANDOMS; rtest++){
		while ((m = (rand() % MMAX)) < 2);
		while ((k = (rand() % KMAX)) >= m || k < 1);

		if (m>MMAX || k>KMAX)
			continue;

		gf_gen_rs_matrix(a, m, k);

		// Make random data
		for(i=0; i<k; i++)
			for(j=0; j<TEST_LEN; j++)
				buffs[i][j] = rand();

		// Make parity vects
		for (i=k; i<m; i++) {
			for (j=0; j<k; j++)
				gf_vect_mul_init(a[k*i+j], &g_tbls[j*32]);
#ifndef USEREF
			gf_vect_dot_prod_sse(TEST_LEN, k, g_tbls, buffs, buffs[i]);
#else
			gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
#endif
		}

		// Random errors
		memset(src_in_err, 0, TEST_SOURCES);
		for (i=0, nerrs=0; i<k && nerrs<m-k; i++){
			err = 1 & rand();
			src_in_err[i] = err;
			if (err)
				src_err_list[nerrs++] = i;
		}
		if (nerrs == 0){  // should have at least one error
			while ((err = (rand() % KMAX)) >= k) ;
			src_err_list[nerrs++] = err;
			src_in_err[err] = 1;
		}

		// construct b by removing error rows
		for(i=0, r=0; i<k; i++, r++){
			while (src_in_err[r]) {
				r++; 
				continue;
			}
			for(j=0; j<k; j++)
				b[k*i+j] = a[k*r+j];
		}

		if (gf_invert_matrix((u8*)b, (u8*)d, k) < 0)
			printf("BAD MATRIX\n");
	
		for(i=0, r=0; i<k; i++, r++){
			while (src_in_err[r]) {
				r++; 
				continue;
			}
			recov[i] = buffs[r];
		}

		// Recover data
		for(i=0; i<nerrs; i++){
			for (j=0; j<k; j++)
				gf_vect_mul_init(d[k*src_err_list[i]+j], &g_tbls[j*32]);
#ifndef USEREF
			gf_vect_dot_prod_sse(TEST_LEN, k, g_tbls, recov, temp_buff);
#else
			gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
#endif
			if (0 != memcmp(temp_buff, buffs[src_err_list[i]],
						TEST_LEN)){
				printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
				printf(" - erase list = ");
				for (i=0; i<nerrs; i++)
					printf(" %d", src_err_list[i]);
				printf("\na:\n"); 
				dump_u8xu8((u8*)a, m, k);
				printf("inv b:\n");   
				dump_u8xu8((u8*)d, k, k);
				printf("orig data:\n"); 
				dump_matrix(buffs, m, 25);
				printf("orig   :");     
				dump(buffs[src_err_list[i]],25);
				printf("recov %d:",src_err_list[i]); 
				dump(temp_buff, 25);
				return -1;
			}
		}
		putchar('.');
	}

	// Run tests at end of buffer for Electric Fence
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
	for(size=EFENCE_TEST_MIN_SIZE; size<=TEST_SIZE; size+=align){
		for(i=0; i<TEST_SOURCES; i++)
			for(j=0; j<TEST_LEN; j++)
				buffs[i][j] = rand();

		for(i=0; i<TEST_SOURCES; i++) // Line up TEST_SIZE from end
			efence_buffs[i] = buffs[i] + TEST_LEN - size;

		for (i=0; i<TEST_SOURCES; i++)
			g[i] = rand();

		for(i=0; i<TEST_SOURCES; i++)
			gf_vect_mul_init(g[i], &g_tbls[i*32]);

		gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
		gf_vect_dot_prod_sse(size, TEST_SOURCES, g_tbls, efence_buffs, dest);

		if (0 != memcmp(dest_ref, dest, size)){
			printf("Fail rand vect_dot_prod_sse test 3\n");
			dump_matrix(efence_buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref, align);
			printf("dprod_sse:");
			dump(dest, align);
			return -1;
		}

		putchar('.');
	}

	// Test rand ptr alignment if available

	for(rtest=0; rtest<RANDOMS; rtest++){
		size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
		srcs = rand() % TEST_SOURCES;
		if (srcs == 0)
			continue;

		offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
		// Add random offsets
		for(i=0; i<srcs; i++)
			ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));

		udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));

		memset(dest, 0, TEST_LEN);  // zero pad to check write-over

		for(i=0; i<srcs; i++)
			for(j=0; j<size; j++)
				ubuffs[i][j] = rand();

		for (i=0; i<srcs; i++)
			g[i] = rand();

		for(i=0; i<srcs; i++)
			gf_vect_mul_init(g[i], &g_tbls[i*32]);

		gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);

		gf_vect_dot_prod_sse(size, srcs, g_tbls, ubuffs, udest_ptr);

		if (memcmp(dest_ref, udest_ptr, size)){
			printf("Fail rand vect_dot_prod_sse test ualign srcs=%d\n", srcs);
			dump_matrix(ubuffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref, 25);
			printf("dprod_sse:"); 
			dump(udest_ptr, 25);
			return -1;
		}

		// Confirm that padding around dests is unchanged
		memset(dest_ref, 0, PTR_ALIGN_CHK_B);  // Make reference zero buff
		offset = udest_ptr - dest;

		if (memcmp(dest, dest_ref, offset)){
			printf("Fail rand ualign pad start\n");
			return -1;
		}
		if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)){
			printf("Fail rand ualign pad end\n");
			return -1;
		}

		putchar('.');
	}


	// Test all size alignment
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;

	for(size=TEST_LEN; size>15; size-=align){
		srcs = TEST_SOURCES;

		for(i=0; i<srcs; i++)
			for(j=0; j<size; j++)
				buffs[i][j] = rand();

		for (i=0; i<srcs; i++)
			g[i] = rand();

		for(i=0; i<srcs; i++)
			gf_vect_mul_init(g[i], &g_tbls[i*32]);

		gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);

		gf_vect_dot_prod_sse(size, srcs, g_tbls, buffs, dest);

		if (memcmp(dest_ref, dest, size)){
			printf("Fail rand vect_dot_prod_sse test ualign len=%d\n", size);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:"); 
			dump(dest_ref, 25);
			printf("dprod_sse:"); 
			dump(dest, 25);
			return -1;
		}
	}

	printf("done all: Pass\n");
	return 0;
}
int main(int argc, char *argv[])
{
	int i, j, rtest, srcs;
	void *buf;
	u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
	u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES];
	u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;

	int align, size;
	unsigned char *efence_buffs[TEST_SOURCES];
	unsigned int offset;
	u8 *ubuffs[TEST_SOURCES];
	u8 *udest_ptrs[3];
	printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN);

	// Allocate the arrays
	for (i = 0; i < TEST_SOURCES; i++) {
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest3 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");;
		return -1;
	}
	dest_ref2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref3 = buf;

	dest_ptrs[0] = dest1;
	dest_ptrs[1] = dest2;
	dest_ptrs[2] = dest3;

	// Test of all zeros
	for (i = 0; i < TEST_SOURCES; i++)
		memset(buffs[i], 0, TEST_LEN);

	memset(dest1, 0, TEST_LEN);
	memset(dest2, 0, TEST_LEN);
	memset(dest3, 0, TEST_LEN);
	memset(dest_ref1, 0, TEST_LEN);
	memset(dest_ref2, 0, TEST_LEN);
	memset(dest_ref3, 0, TEST_LEN);
	memset(g1, 2, TEST_SOURCES);
	memset(g2, 1, TEST_SOURCES);
	memset(g3, 7, TEST_SOURCES);

	for (i = 0; i < TEST_SOURCES; i++) {
		gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
		gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
		gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
	}

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
			      dest_ref2);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
			      dest_ref3);

	FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

	if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
		printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref1, 25);
		printf("dprod_dut:");
		dump(dest1, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
		printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref2, 25);
		printf("dprod_dut:");
		dump(dest2, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
		printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref3, 25);
		printf("dprod_dut:");
		dump(dest3, 25);
		return -1;
	}

	putchar('.');

	// Rand data test

	for (rtest = 0; rtest < RANDOMS; rtest++) {
		for (i = 0; i < TEST_SOURCES; i++)
			for (j = 0; j < TEST_LEN; j++)
				buffs[i][j] = rand();

		for (i = 0; i < TEST_SOURCES; i++) {
			g1[i] = rand();
			g2[i] = rand();
			g3[i] = rand();
		}

		for (i = 0; i < TEST_SOURCES; i++) {
			gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
			gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
		}

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
				      buffs, dest_ref2);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
				      buffs, dest_ref3);

		FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

		if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref1, 25);
			printf("dprod_dut:");
			dump(dest1, 25);
			return -1;
		}
		if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref2, 25);
			printf("dprod_dut:");
			dump(dest2, 25);
			return -1;
		}
		if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref3, 25);
			printf("dprod_dut:");
			dump(dest3, 25);
			return -1;
		}

		putchar('.');
	}

	// Rand data test with varied parameters
	for (rtest = 0; rtest < RANDOMS; rtest++) {
		for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
			for (i = 0; i < srcs; i++)
				for (j = 0; j < TEST_LEN; j++)
					buffs[i][j] = rand();

			for (i = 0; i < srcs; i++) {
				g1[i] = rand();
				g2[i] = rand();
				g3[i] = rand();
			}

			for (i = 0; i < srcs; i++) {
				gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
				gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
				gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
			}

			gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
			gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
					      dest_ref2);
			gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
					      dest_ref3);

			FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);

			if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
				       " test1 srcs=%d\n", srcs);
				dump_matrix(buffs, 5, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref1, 25);
				printf("dprod_dut:");
				dump(dest1, 25);
				return -1;
			}
			if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
				       " test2 srcs=%d\n", srcs);
				dump_matrix(buffs, 5, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref2, 25);
				printf("dprod_dut:");
				dump(dest2, 25);
				return -1;
			}
			if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
				       " test3 srcs=%d\n", srcs);
				dump_matrix(buffs, 5, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref3, 25);
				printf("dprod_dut:");
				dump(dest3, 25);
				return -1;
			}

			putchar('.');
		}
	}

	// Run tests at end of buffer for Electric Fence
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
	for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
		for (i = 0; i < TEST_SOURCES; i++)
			for (j = 0; j < TEST_LEN; j++)
				buffs[i][j] = rand();

		for (i = 0; i < TEST_SOURCES; i++)	// Line up TEST_SIZE from end
			efence_buffs[i] = buffs[i] + TEST_LEN - size;

		for (i = 0; i < TEST_SOURCES; i++) {
			g1[i] = rand();
			g2[i] = rand();
			g3[i] = rand();
		}

		for (i = 0; i < TEST_SOURCES; i++) {
			gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
			gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
		}

		gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
		gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
				      efence_buffs, dest_ref2);
		gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
				      efence_buffs, dest_ref3);

		FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);

		if (0 != memcmp(dest_ref1, dest1, size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
			dump_matrix(efence_buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref1, align);
			printf("dprod_dut:");
			dump(dest1, align);
			return -1;
		}

		if (0 != memcmp(dest_ref2, dest2, size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
			dump_matrix(efence_buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref2, align);
			printf("dprod_dut:");
			dump(dest2, align);
			return -1;
		}

		if (0 != memcmp(dest_ref3, dest3, size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
			dump_matrix(efence_buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref3, align);
			printf("dprod_dut:");
			dump(dest3, align);
			return -1;
		}

		putchar('.');
	}

	// Test rand ptr alignment if available

	for (rtest = 0; rtest < RANDOMS; rtest++) {
		size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
		srcs = rand() % TEST_SOURCES;
		if (srcs == 0)
			continue;

		offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
		// Add random offsets
		for (i = 0; i < srcs; i++)
			ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));

		udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
		udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
		udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));

		memset(dest1, 0, TEST_LEN);	// zero pad to check write-over
		memset(dest2, 0, TEST_LEN);
		memset(dest3, 0, TEST_LEN);

		for (i = 0; i < srcs; i++)
			for (j = 0; j < size; j++)
				ubuffs[i][j] = rand();

		for (i = 0; i < srcs; i++) {
			g1[i] = rand();
			g2[i] = rand();
			g3[i] = rand();
		}

		for (i = 0; i < srcs; i++) {
			gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
			gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
		}

		gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
		gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
		gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);

		FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);

		if (memcmp(dest_ref1, udest_ptrs[0], size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
			       srcs);
			dump_matrix(ubuffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref1, 25);
			printf("dprod_dut:");
			dump(udest_ptrs[0], 25);
			return -1;
		}
		if (memcmp(dest_ref2, udest_ptrs[1], size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
			       srcs);
			dump_matrix(ubuffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref2, 25);
			printf("dprod_dut:");
			dump(udest_ptrs[1], 25);
			return -1;
		}
		if (memcmp(dest_ref3, udest_ptrs[2], size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
			       srcs);
			dump_matrix(ubuffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref3, 25);
			printf("dprod_dut:");
			dump(udest_ptrs[2], 25);
			return -1;
		}
		// Confirm that padding around dests is unchanged
		memset(dest_ref1, 0, PTR_ALIGN_CHK_B);	// Make reference zero buff
		offset = udest_ptrs[0] - dest1;

		if (memcmp(dest1, dest_ref1, offset)) {
			printf("Fail rand ualign pad1 start\n");
			return -1;
		}
		if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
			printf("Fail rand ualign pad1 end\n");
			return -1;
		}

		offset = udest_ptrs[1] - dest2;
		if (memcmp(dest2, dest_ref1, offset)) {
			printf("Fail rand ualign pad2 start\n");
			return -1;
		}
		if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
			printf("Fail rand ualign pad2 end\n");
			return -1;
		}

		offset = udest_ptrs[2] - dest3;
		if (memcmp(dest3, dest_ref1, offset)) {
			printf("Fail rand ualign pad3 start\n");
			return -1;
		}
		if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
			printf("Fail rand ualign pad3 end\n");;
			return -1;
		}

		putchar('.');
	}

	// Test all size alignment
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;

	for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
		srcs = TEST_SOURCES;

		for (i = 0; i < srcs; i++)
			for (j = 0; j < size; j++)
				buffs[i][j] = rand();

		for (i = 0; i < srcs; i++) {
			g1[i] = rand();
			g2[i] = rand();
			g3[i] = rand();
		}

		for (i = 0; i < srcs; i++) {
			gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
			gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
			gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
		}

		gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
		gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);

		FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);

		if (memcmp(dest_ref1, dest_ptrs[0], size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
			       size);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref1, 25);
			printf("dprod_dut:");
			dump(dest_ptrs[0], 25);
			return -1;
		}
		if (memcmp(dest_ref2, dest_ptrs[1], size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
			       size);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref2, 25);
			printf("dprod_dut:");
			dump(dest_ptrs[1], 25);
			return -1;
		}
		if (memcmp(dest_ref3, dest_ptrs[2], size)) {
			printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
			       size);
			dump_matrix(buffs, 5, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref3, 25);
			printf("dprod_dut:");
			dump(dest_ptrs[2], 25);
			return -1;
		}
	}

	printf("Pass\n");
	return 0;

}
int main(int argc, char *argv[])
{
	int i, j;
	void *buf;
	u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
	u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls;
	u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1;
	u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6;
	u8 *dest_ptrs[6], *buffs[TEST_SOURCES];
	struct perf start, stop;

	printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);

	// Allocate the arrays
	for (i = 0; i < TEST_SOURCES; i++) {
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) {
		printf("alloc error: Fail");
		return -1;
	}
	g_tbls = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest3 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest4 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest5 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest6 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref1 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref2 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref3 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref4 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref5 = buf;

	if (posix_memalign(&buf, 64, TEST_LEN)) {
		printf("alloc error: Fail");
		return -1;
	}
	dest_ref6 = buf;

	dest_ptrs[0] = dest1;
	dest_ptrs[1] = dest2;
	dest_ptrs[2] = dest3;
	dest_ptrs[3] = dest4;
	dest_ptrs[4] = dest5;
	dest_ptrs[5] = dest6;

	// Performance test
	for (i = 0; i < TEST_SOURCES; i++)
		for (j = 0; j < TEST_LEN; j++)
			buffs[i][j] = rand();

	memset(dest1, 0, TEST_LEN);
	memset(dest2, 0, TEST_LEN);
	memset(dest3, 0, TEST_LEN);
	memset(dest4, 0, TEST_LEN);
	memset(dest5, 0, TEST_LEN);
	memset(dest6, 0, TEST_LEN);
	memset(dest_ref1, 0, TEST_LEN);
	memset(dest_ref2, 0, TEST_LEN);
	memset(dest_ref3, 0, TEST_LEN);
	memset(dest_ref4, 0, TEST_LEN);
	memset(dest_ref5, 0, TEST_LEN);
	memset(dest_ref6, 0, TEST_LEN);

	for (i = 0; i < TEST_SOURCES; i++) {
		g1[i] = rand();
		g2[i] = rand();
		g3[i] = rand();
		g4[i] = rand();
		g5[i] = rand();
		g6[i] = rand();
	}

	for (j = 0; j < TEST_SOURCES; j++) {
		gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
		gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
		gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
		gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
		gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
		gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
	}

	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
			      dest_ref2);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
			      dest_ref3);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
			      dest_ref4);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
			      dest_ref5);
	gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs,
			      dest_ref6);

#ifdef DO_REF_PERF
	perf_start(&start);
	for (i = 0; i < TEST_LOOPS / 20; i++) {
		for (j = 0; j < TEST_SOURCES; j++) {
			gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
		}

		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
				      buffs, dest_ref2);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
				      buffs, dest_ref3);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
				      buffs, dest_ref4);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
				      buffs, dest_ref5);
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
				      buffs, dest_ref6);
	}
	perf_stop(&stop);
	printf("gf_6vect_dot_prod_base" TEST_TYPE_STR ": ");
	perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i);
#endif

	FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);

	perf_start(&start);
	for (i = 0; i < TEST_LOOPS; i++) {
		for (j = 0; j < TEST_SOURCES; j++) {
			gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
			gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
			gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
		}

		FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
	}
	perf_stop(&stop);
	printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
	perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i);

	if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
		printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref1, 25);
		printf("dprod_dut:");
		dump(dest1, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
		printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref2, 25);
		printf("dprod_dut:");
		dump(dest2, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
		printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref3, 25);
		printf("dprod_dut:");
		dump(dest3, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
		printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref4, 25);
		printf("dprod_dut:");
		dump(dest4, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
		printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref5, 25);
		printf("dprod_dut:");
		dump(dest5, 25);
		return -1;
	}
	if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
		printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test6\n");
		dump_matrix(buffs, 5, TEST_SOURCES);
		printf("dprod_base:");
		dump(dest_ref6, 25);
		printf("dprod_dut:");
		dump(dest6, 25);
		return -1;
	}

	printf("pass perf check\n");
	return 0;

}
int main(int argc, char *argv[])
{
	int i, j, rtest, srcs;
	void *buf;
	u8 gf[6][TEST_SOURCES];
	u8 *g_tbls;
	u8 *dest_ref[VECT];
	u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
	int vector = VECT;

	int align, size;
	unsigned char *efence_buffs[TEST_SOURCES];
	unsigned int offset;
	u8 *ubuffs[TEST_SOURCES];
	u8 *udest_ptrs[VECT];
	printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);

	// Allocate the arrays
	for (i = 0; i < TEST_SOURCES; i++) {
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		buffs[i] = buf;
	}

	if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
		printf("alloc error: Fail");
		return -1;
	}
	g_tbls = buf;

	for (i = 0; i < vector; i++) {
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		dest_ptrs[i] = buf;
		memset(dest_ptrs[i], 0, TEST_LEN);
	}

	for (i = 0; i < vector; i++) {
		if (posix_memalign(&buf, 64, TEST_LEN)) {
			printf("alloc error: Fail");
			return -1;
		}
		dest_ref[i] = buf;
		memset(dest_ref[i], 0, TEST_LEN);
	}

	// Test of all zeros
	for (i = 0; i < TEST_SOURCES; i++)
		memset(buffs[i], 0, TEST_LEN);

	switch (vector) {
	case 6:
		memset(gf[5], 0xe6, TEST_SOURCES);
	case 5:
		memset(gf[4], 4, TEST_SOURCES);
	case 4:
		memset(gf[3], 9, TEST_SOURCES);
	case 3:
		memset(gf[2], 7, TEST_SOURCES);
	case 2:
		memset(gf[1], 1, TEST_SOURCES);
	case 1:
		memset(gf[0], 2, TEST_SOURCES);
		break;
	default:
		return -1;
	}

	for (i = 0; i < TEST_SOURCES; i++)
		for (j = 0; j < TEST_LEN; j++)
			buffs[i][j] = rand();

	for (i = 0; i < vector; i++)
		for (j = 0; j < TEST_SOURCES; j++) {
			gf[i][j] = rand();
			gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
		}

	for (i = 0; i < vector; i++)
		gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
				      buffs, dest_ref[i]);

	for (i = 0; i < vector; i++)
		memset(dest_ptrs[i], 0, TEST_LEN);
	for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
		FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
#else
		FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
#endif
	}
	for (i = 0; i < vector; i++) {
		if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
			printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
			dump_matrix(buffs, vector, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref[i], 25);
			printf("dprod_dut:");
			dump(dest_ptrs[i], 25);
			return -1;
		}
	}

#if (VECT == 1)
	REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
#else
	REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
#endif
	for (i = 0; i < vector; i++) {
		if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
			printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
			dump_matrix(buffs, vector, TEST_SOURCES);
			printf("dprod_base:");
			dump(dest_ref[i], 25);
			printf("dprod_dut:");
			dump(dest_ptrs[i], 25);
			return -1;
		}
	}

	putchar('.');

	// Rand data test

	for (rtest = 0; rtest < RANDOMS; rtest++) {
		for (i = 0; i < TEST_SOURCES; i++)
			for (j = 0; j < TEST_LEN; j++)
				buffs[i][j] = rand();

		for (i = 0; i < vector; i++)
			for (j = 0; j < TEST_SOURCES; j++) {
				gf[i][j] = rand();
				gf_vect_mul_init(gf[i][j],
						 &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
			}

		for (i = 0; i < vector; i++)
			gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
					      &g_tbls[i * 32 * TEST_SOURCES], buffs,
					      dest_ref[i]);

		for (i = 0; i < vector; i++)
			memset(dest_ptrs[i], 0, TEST_LEN);
		for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
			FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
					    *dest_ptrs);
#else
			FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
					    dest_ptrs);
#endif
		}
		for (i = 0; i < vector; i++) {
			if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n",
				       i, rtest);
				dump_matrix(buffs, vector, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref[i], 25);
				printf("dprod_dut:");
				dump(dest_ptrs[i], 25);
				return -1;
			}
		}

		putchar('.');
	}

	// Rand data test with varied parameters
	for (rtest = 0; rtest < RANDOMS; rtest++) {
		for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
			for (i = 0; i < srcs; i++)
				for (j = 0; j < TEST_LEN; j++)
					buffs[i][j] = rand();

			for (i = 0; i < vector; i++)
				for (j = 0; j < srcs; j++) {
					gf[i][j] = rand();
					gf_vect_mul_init(gf[i][j],
							 &g_tbls[i * (32 * srcs) + j * 32]);
				}

			for (i = 0; i < vector; i++)
				gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs],
						      buffs, dest_ref[i]);

			for (i = 0; i < vector; i++)
				memset(dest_ptrs[i], 0, TEST_LEN);
			for (i = 0; i < srcs; i++) {
#if (VECT == 1)
				FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
						    *dest_ptrs);
#else
				FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
						    dest_ptrs);
#endif

			}
			for (i = 0; i < vector; i++) {
				if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
					printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
					       " test%d srcs=%d\n", i, srcs);
					dump_matrix(buffs, vector, TEST_SOURCES);
					printf("dprod_base:");
					dump(dest_ref[i], 25);
					printf("dprod_dut:");
					dump(dest_ptrs[i], 25);
					return -1;
				}
			}

			putchar('.');
		}
	}

	// Run tests at end of buffer for Electric Fence
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
	for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
		for (i = 0; i < TEST_SOURCES; i++)
			for (j = 0; j < TEST_LEN; j++)
				buffs[i][j] = rand();

		for (i = 0; i < TEST_SOURCES; i++)	// Line up TEST_SIZE from end
			efence_buffs[i] = buffs[i] + TEST_LEN - size;

		for (i = 0; i < vector; i++)
			for (j = 0; j < TEST_SOURCES; j++) {
				gf[i][j] = rand();
				gf_vect_mul_init(gf[i][j],
						 &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
			}

		for (i = 0; i < vector; i++)
			gf_vect_dot_prod_base(size, TEST_SOURCES,
					      &g_tbls[i * 32 * TEST_SOURCES], efence_buffs,
					      dest_ref[i]);

		for (i = 0; i < vector; i++)
			memset(dest_ptrs[i], 0, size);
		for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
			FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
					    *dest_ptrs);
#else
			FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
					    dest_ptrs);
#endif
		}
		for (i = 0; i < vector; i++) {
			if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
				       " test%d size=%d\n", i, size);
				dump_matrix(buffs, vector, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref[i], TEST_MIN_SIZE + align);
				printf("dprod_dut:");
				dump(dest_ptrs[i], TEST_MIN_SIZE + align);
				return -1;
			}
		}

		putchar('.');
	}

	// Test rand ptr alignment if available

	for (rtest = 0; rtest < RANDOMS; rtest++) {
		size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
		srcs = rand() % TEST_SOURCES;
		if (srcs == 0)
			continue;

		offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
		// Add random offsets
		for (i = 0; i < srcs; i++)
			ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));

		for (i = 0; i < vector; i++) {
			udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
			memset(dest_ptrs[i], 0, TEST_LEN);	// zero pad to check write-over
		}

		for (i = 0; i < srcs; i++)
			for (j = 0; j < size; j++)
				ubuffs[i][j] = rand();

		for (i = 0; i < vector; i++)
			for (j = 0; j < srcs; j++) {
				gf[i][j] = rand();
				gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
			}

		for (i = 0; i < vector; i++)
			gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
					      dest_ref[i]);

		for (i = 0; i < srcs; i++) {
#if (VECT == 1)
			FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
#else
			FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
#endif
		}
		for (i = 0; i < vector; i++) {
			if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
				       " test%d ualign srcs=%d\n", i, srcs);
				dump_matrix(buffs, vector, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref[i], 25);
				printf("dprod_dut:");
				dump(udest_ptrs[i], 25);
				return -1;
			}
		}

		// Confirm that padding around dests is unchanged
		memset(dest_ref[0], 0, PTR_ALIGN_CHK_B);	// Make reference zero buff

		for (i = 0; i < vector; i++) {
			offset = udest_ptrs[i] - dest_ptrs[i];
			if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
				printf("Fail rand ualign pad1 start\n");
				return -1;
			}
			if (memcmp
			    (dest_ptrs[i] + offset + size, dest_ref[0],
			     PTR_ALIGN_CHK_B - offset)) {
				printf("Fail rand ualign pad1 end\n");
				return -1;
			}
		}

		putchar('.');
	}

	// Test all size alignment
	align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;

	for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
		for (i = 0; i < TEST_SOURCES; i++)
			for (j = 0; j < size; j++)
				buffs[i][j] = rand();

		for (i = 0; i < vector; i++) {
			for (j = 0; j < TEST_SOURCES; j++) {
				gf[i][j] = rand();
				gf_vect_mul_init(gf[i][j],
						 &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
			}
			memset(dest_ptrs[i], 0, TEST_LEN);	// zero pad to check write-over
		}

		for (i = 0; i < vector; i++)
			gf_vect_dot_prod_base(size, TEST_SOURCES,
					      &g_tbls[i * 32 * TEST_SOURCES], buffs,
					      dest_ref[i]);

		for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
			FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
					    *dest_ptrs);
#else
			FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
					    dest_ptrs);
#endif
		}
		for (i = 0; i < vector; i++) {
			if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
				printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
				       " test%d ualign len=%d\n", i, size);
				dump_matrix(buffs, vector, TEST_SOURCES);
				printf("dprod_base:");
				dump(dest_ref[i], 25);
				printf("dprod_dut:");
				dump(dest_ptrs[i], 25);
				return -1;
			}
		}

		putchar('.');

	}

	printf("Pass\n");
	return 0;

}