예제 #1
0
파일: dgemm_ta.c 프로젝트: aocalderon/PhD
int main(int argc, char *argv[])
{
	int n, i;
	double *a, *b, *c0, *c1, *c2, *c3;
	double t0, t1, t2, t3;
	double diff1, diff2, diff3;
	struct timespec start, end;

	n = atoi(argv[1]);
	a = (double *) malloc(n*n*sizeof(double));
	b = (double *) malloc(n*n*sizeof(double));
	c0 = (double *)malloc(n*n*sizeof(double));
	c1 = (double *)malloc(n*n*sizeof(double));
	c2 = (double *)malloc(n*n*sizeof(double));
	c3 = (double *)malloc(n*n*sizeof(double));

	RandMatrixGen(a, n);
	RandMatrixGen(b, n);

	clock_gettime(CLOCK_MONOTONIC, &start);
	dgemm0(a, b, c0, n);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t0 = (end.tv_sec - start.tv_sec)*BILLION + end.tv_nsec - start.tv_nsec;


	clock_gettime(CLOCK_MONOTONIC, &start);
	dgemm1(a, b, c1, n);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t1 = (end.tv_sec - start.tv_sec)*BILLION + end.tv_nsec - start.tv_nsec;

	clock_gettime(CLOCK_MONOTONIC, &start);
	dgemm2(a, b, c2, n);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t2 = (end.tv_sec - start.tv_sec)*BILLION + end.tv_nsec - start.tv_nsec;

	clock_gettime(CLOCK_MONOTONIC, &start);
	dgemm3(a, b, c3, n);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t3 = (end.tv_sec - start.tv_sec)*BILLION + end.tv_nsec - start.tv_nsec;

	diff1 = verification(c0, c1, n);
	diff2 = verification(c0, c2, n);
	diff3 = verification(c0, c3, n);

	printf ("matrix size: %d\n", n);
	printf ("dgemm0 runtime: %llu nanoseconds\n", (long long unsigned int) t0);
	printf ("dgemm1 runtime: %llu nanoseconds\n", (long long unsigned int) t1);
	printf ("dgemm2 runtime: %llu nanoseconds\n", (long long unsigned int) t2);
	printf ("dgemm3 runtime: %llu nanoseconds\n", (long long unsigned int) t3);
	printf ("maximum difference between dgemm0 and dgemm1: %f\n", diff1);
	printf ("maximum difference between dgemm0 and dgemm2: %f\n", diff2);
	printf ("maximum difference between dgemm0 and dgemm3: %f\n", diff3);

	return 0;

}
예제 #2
0
 void do_block3(const int lda,
                const double *A, const double *B, double *C,
                const int i, const int j, const int k)
 {
     const int M = (i+BLOCK_SIZE3 > lda? lda-i : BLOCK_SIZE3);
     const int N = (j+BLOCK_SIZE3 > lda? lda-j : BLOCK_SIZE3);
     const int K = (k+BLOCK_SIZE3 > lda? lda-k : BLOCK_SIZE3);
     //printf("%d, %d %d %d, %.0f %.0f %.0f\n",lda, M,N,K, A[i + k*lda], B[k + j*lda], C[i + j*lda]);
     dgemm3(lda, M, N, K,
            A + i + k*lda, B + k + j*lda, C + i + j*lda);
 }