int main (int argc, char *argv[]) { float *A, *B, *C; int N, M, i, j; #ifdef CHECK FILE *fd; #endif if (argc != 3) { printf("Usage: mm-test matrix_size iter\n"); exit(0); } N = atoi(argv[1]); M = atoi(argv[2]); /* Alokacja i inicjowanie macierzy */ A = (float *)malloc(N * N * sizeof(float)); B = (float *)malloc(N * N * sizeof(float)); C = (float *)malloc(N * N * sizeof(float)); #ifdef CHECK if ((fd = fopen("tmp111", "r")) == NULL) { printf("Cannot open tmp111\n"); exit(0); } for (i = 0; i < N; i++) for (j = 0; j < N; j++) fscanf(fd, "%f", (A + i * N + j)); fclose(fd); if ((fd = fopen("tmp222", "r")) == NULL) { printf("Cannot open tmp222\n"); exit(0); } for (i = 0; i < N; i++) for (j = 0; j < N; j++) fscanf(fd, " %f", (B + i * N + j)); fclose(fd); #else srand48(100); for (i = 0; i < N * N; ++i) { A[i] = drand48(); B[i] = drand48(); } #endif /* Wielokrotne mnożenie */ gettimeofday(&t1, NULL); for (i = 0; i < M; i++) naive_mm(N, B, A, C); gettimeofday(&t2, NULL); printf("Time for the matrix multiplication is %d milliseconds\n", (t2.tv_sec - t1.tv_sec) * 1000 + (t2.tv_usec - t1.tv_usec) / 1000); #ifdef CHECK if ((fd = fopen("tmp333", "w")) == NULL) { printf("Cannot open tmp333\n"); exit(0); } for (i = 0; i < N; i++) { for (j = 0; j < N; j++) fprintf(fd, "%6.2lf ", *(C + i * N + j)); fprintf(fd, "\n"); } fclose(fd); #endif return 0; }
void checkCorrect () { double *A,*B,*C,*cA,*cB,*cC; int minDim = 1; int maxDim = 256; int maxnbytes = sizeof(double) * SQR (maxDim); int i, j; A = (double*) malloc (maxnbytes); B = (double*) malloc (maxnbytes); C = (double*) malloc (maxnbytes); cA = (double*) malloc (maxnbytes); cB = (double*) malloc (maxnbytes); cC = (double*) malloc (maxnbytes); fprintf (stderr, "Checking for correctness on sizes:"); #if !RANDOM_TESTS for (i = 0; i < 2; i++) for (j = 0; j < num_tests[i]; j++) { int matdim = test_sizes[i][j]; #else for (i = 0; i < NUM_CORRECTNESS_CHECKS; i++) { int matdim = rrand (minDim, maxDim); #endif double err; int nbytes = sizeof(double) * SQR(matdim); fprintf (stderr, " %d", matdim); mat_init (A, matdim, matdim); mat_init (B, matdim, matdim); mat_init (C, matdim, matdim); bcopy ((void*)A, (void*)cA, nbytes); bcopy ((void*)B, (void*)cB, nbytes); bcopy ((void*)C, (void*)cC, nbytes); naive_mm (matdim, matdim, matdim, cA, cB, cC); MUL_MFMF_MF (matdim, A, B, C); if (bcmp ((void*)A, (void*)cA, nbytes) != 0 || bcmp ((void*)B, (void*)cB, nbytes) != 0) { fprintf (stderr, "Source matrices were modified. DISQUALIFIED!!!\n") ; //exit (0); } if ((err = error (C, cC, matdim, matdim)) > MAX_ERROR) { fprintf (stderr, "Error for test case %dx%d is %f > %f. DISQUALIFIED! !!\n", matdim, matdim, err, MAX_ERROR); //exit (0); } } fprintf (stderr,"\n"); free (A); free (B); free (C); free (cA); free (cB); free (cC); } void timeIt () { double *A, *B, *C; double *oA[TEST_RUNS], *oB[TEST_RUNS], *oC[TEST_RUNS]; int i, j, k; int test; for (k = 0; k < 2; k++) { if (k > 0) printf ("\n"); for (test = 0; test < num_tests[k]; test++) { int matdim = test_sizes[k][test]; const int num_iters = CALC_ITERS (matdim); double max_mflops = 0.0; int run; /* make sure these are quad-word (i.e., 16-byte) aligned */ #if 0 A = oA = (double*) malloc ((SQR(matdim)+1) * sizeof(double)); B = oB = (double*) malloc ((SQR(matdim)+1) * sizeof(double)); C = oC = (double*) malloc ((SQR(matdim)+1) * sizeof(double)); #endif for (run = 0; run < TEST_RUNS; run++) { int iter; double mflops; double utime; /* use different matricies for each trial so that the OS page map ping */ /* won't affect the results... */ A = oA[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double)); B = oB[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double)); C = oC[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double)); if (((unsigned)A) & 0x8) A = (double*)(((unsigned)A)+0x8); if (((unsigned)B) & 0x8) B = (double*)(((unsigned)B)+0x8); if (((unsigned)C) & 0x8) C = (double*)(((unsigned)C)+0x8); mat_init (A, matdim, matdim); mat_init (B, matdim, matdim); mat_init (C, matdim, matdim); START_TIMING; for (iter=0;iter<num_iters;iter++) { // iteratively accumulate into C MUL_MFMF_MF (matdim, A, B, C); } STOP_TIMING; utime = reportTiming(); // (2 * n^3) FLOPs (n^3 mul-adds) mflops = 2.0 * CUBE(matdim) * num_iters * 1e-6 / utime; max_mflops = MAX (max_mflops, mflops); } printf("%d %.0f\n", matdim, max_mflops); fflush(stdout); for (run = 0; run < TEST_RUNS; run++) { free (oA[run]); free (oB[run]); free (oC[run]); } } } }