コード例 #1
0
ファイル: mm-test-org.c プロジェクト: endrjuskr/studies
int main (int argc, char *argv[]) {
  float *A, *B, *C;
  int N, M, i, j;
#ifdef CHECK
  FILE *fd;
#endif

  if (argc != 3) {
    printf("Usage: mm-test matrix_size iter\n");
    exit(0);
  }

  N = atoi(argv[1]);
  M = atoi(argv[2]);

  /* Alokacja i inicjowanie macierzy */
  A = (float *)malloc(N * N * sizeof(float));
  B = (float *)malloc(N * N * sizeof(float));
  C = (float *)malloc(N * N * sizeof(float));
#ifdef CHECK
  if ((fd = fopen("tmp111", "r")) == NULL) {
     printf("Cannot open tmp111\n");
     exit(0);
  }
   
  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      fscanf(fd, "%f", (A + i * N + j));
  fclose(fd);

  if ((fd = fopen("tmp222", "r")) == NULL) {
     printf("Cannot open tmp222\n");
     exit(0);
  }
   
  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      fscanf(fd, " %f", (B + i * N + j));
  fclose(fd);
#else
  srand48(100);

  for (i = 0; i < N * N; ++i) {
    A[i] = drand48();
    B[i] = drand48();
  }
#endif

  /* Wielokrotne mnożenie */
  gettimeofday(&t1, NULL);
  for (i = 0; i < M; i++)
    naive_mm(N, B, A, C);
  gettimeofday(&t2, NULL);

  printf("Time for the matrix multiplication is %d milliseconds\n",
         (t2.tv_sec - t1.tv_sec) * 1000 + 
         (t2.tv_usec - t1.tv_usec) / 1000);

#ifdef CHECK
  if ((fd = fopen("tmp333", "w")) == NULL) {
     printf("Cannot open tmp333\n");
     exit(0);
  }

  for (i = 0; i < N; i++) {
    for (j = 0; j < N; j++)
      fprintf(fd, "%6.2lf ", *(C + i * N + j));
    fprintf(fd, "\n");
  }
  fclose(fd);
#endif

  return 0;
}
コード例 #2
0
void 
checkCorrect ()
{
  double *A,*B,*C,*cA,*cB,*cC;
  int minDim = 1;
  int maxDim = 256;
  int maxnbytes = sizeof(double) * SQR (maxDim);
  int i, j;

  A = (double*) malloc (maxnbytes);
  B = (double*) malloc (maxnbytes);
  C = (double*) malloc (maxnbytes);
  
  cA = (double*) malloc (maxnbytes);
  cB = (double*) malloc (maxnbytes);
  cC = (double*) malloc (maxnbytes);
  
  fprintf (stderr, "Checking for correctness on sizes:"); 

#if !RANDOM_TESTS
  for (i = 0; i < 2; i++) for (j = 0; j < num_tests[i]; j++) 
    {
      int matdim = test_sizes[i][j];
#else
  for (i = 0; i < NUM_CORRECTNESS_CHECKS; i++) 
    {
      int matdim = rrand (minDim, maxDim);
#endif
      double err;
      int nbytes = sizeof(double) * SQR(matdim);

      fprintf (stderr, " %d", matdim); 

      mat_init (A, matdim, matdim);
      mat_init (B, matdim, matdim);
      mat_init (C, matdim, matdim);

      bcopy ((void*)A, (void*)cA, nbytes);
      bcopy ((void*)B, (void*)cB, nbytes);
      bcopy ((void*)C, (void*)cC, nbytes);

      naive_mm (matdim, matdim, matdim, cA, cB, cC);
      MUL_MFMF_MF (matdim, A, B, C);

      if (bcmp ((void*)A, (void*)cA, nbytes) != 0 ||
          bcmp ((void*)B, (void*)cB, nbytes) != 0) 
        {
          fprintf (stderr, "Source matrices were modified.  DISQUALIFIED!!!\n")
;
          //exit (0);
        }

      if ((err = error (C, cC, matdim, matdim)) > MAX_ERROR)
        {
          fprintf (stderr, "Error for test case %dx%d is %f > %f. DISQUALIFIED!
!!\n",
                   matdim, matdim, err, MAX_ERROR);
          //exit (0);
        }

    }
  fprintf (stderr,"\n"); 

  free (A); free (B); free (C); 
  free (cA); free (cB); free (cC);
}

void
timeIt ()
{
  double *A, *B, *C;
  double *oA[TEST_RUNS], *oB[TEST_RUNS], *oC[TEST_RUNS];
  int i, j, k;
  int test;

  for (k = 0; k < 2; k++)
    {
      if (k > 0) printf ("\n");

      for (test = 0; test < num_tests[k]; test++) 
        {
          int matdim = test_sizes[k][test];
          const int num_iters = CALC_ITERS (matdim);
          double max_mflops = 0.0;
          int run;
        
          /* make sure these are quad-word (i.e., 16-byte) aligned */
#if 0
          A = oA = (double*) malloc ((SQR(matdim)+1) * sizeof(double));
          B = oB = (double*) malloc ((SQR(matdim)+1) * sizeof(double));
          C = oC = (double*) malloc ((SQR(matdim)+1) * sizeof(double));
#endif
          
          for (run = 0; run < TEST_RUNS; run++) 
            {
              int iter;
              double mflops;
              double utime;

              /* use different matricies for each trial so that the OS page map
ping */
              /* won't affect the results... */
              A = oA[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double));
              B = oB[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double));
              C = oC[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double));

              if (((unsigned)A) & 0x8) A = (double*)(((unsigned)A)+0x8);
              if (((unsigned)B) & 0x8) B = (double*)(((unsigned)B)+0x8);
              if (((unsigned)C) & 0x8) C = (double*)(((unsigned)C)+0x8);

              mat_init (A, matdim, matdim);
              mat_init (B, matdim, matdim);
              mat_init (C, matdim, matdim);

              START_TIMING;
              for (iter=0;iter<num_iters;iter++) 
                {
                  // iteratively accumulate into C
                  MUL_MFMF_MF (matdim, A, B, C);
                }
              STOP_TIMING;

              utime = reportTiming();
              // (2 * n^3) FLOPs (n^3 mul-adds)
              mflops = 2.0 * CUBE(matdim) * num_iters * 1e-6 / utime;
              max_mflops = MAX (max_mflops, mflops);
            }

          printf("%d %.0f\n", matdim, max_mflops); 
          fflush(stdout);

          for (run = 0; run < TEST_RUNS; run++) 
            {
              free (oA[run]); 
              free (oB[run]); 
              free (oC[run]); 
            }
        }
    }
}