示例#1
0
文件: matmul.c 项目: bzurkowski/tpr
int main(int argc, char* argv[]) {
  int dim, nthreads;
  double **a, *b;
  double start_time;
  double total_time;

  if (argc != 2) {
    fprintf(stderr, "Usage: ./matmul dim\n");
    exit(1);
  }

  dim = atoi(argv[1]);

  a = rand_mat(dim, dim);
  b = rand_vec(dim);

  omp_set_dynamic(0);

  printf("dim,nthreads,time\n");
  for (nthreads = 1; nthreads <= MAX_THREADS; nthreads++) {
    start_time = omp_get_wtime();

    #pragma omp parallel num_threads(nthreads)
    {
      mat_mul_vec(a, b, dim, dim);
    }

    total_time = omp_get_wtime() - start_time;
    printf("%d,%d,%f\n", dim, nthreads, total_time);
  }

  return 0;
}
示例#2
0
文件: test.c 项目: shihuwenbo/lin
int main() {

    /** initialize A **/
    printf("init A\n");
    float *A = NULL;
    size_t nr_A = 1000;
    size_t nc_A = 1000;
    rand_mat(&A, nr_A, nc_A);
    /*
    for(size_t i=0; i<nr_A; i++) {
        for(size_t j=0; j<nc_A; j++) {
            A[i*nc_A+j] = i*nc_A+j;
        }
    }
    print_mat(A, nr_A, nc_A);
    */

    /** initialize B **/
    printf("init B\n");
    float *B = NULL;
    size_t nr_B = 1000;
    size_t nc_B = 1000;
    rand_mat(&B, nr_B, nc_B);
    /*
    for(size_t i=0; i<nr_B; i++) {
        for(size_t j=0; j<nc_B; j++) {
            B[i*nc_B+j] = i*nc_B+j;
        }
    }
    print_mat(B, nr_B, nc_B);
    */

    /** initialize C **/
    printf("init C\n");
    float *C = NULL;
    size_t nr_C = 0;
    size_t nc_C = 0;
    
    // timing variables
    clock_t begin;
    clock_t end;
    float difftime_ms;

    /** test mat_mult_v1 **/
    float *C_true;
    printf("test mat_mult_v1\n");
    begin = clock();
    mat_mult_v1(A, nr_A, nc_A, B, nr_B, nc_B, &C_true, &nr_C, &nc_C);
    end = clock();
    difftime_ms = (float)(end-begin);
    difftime_ms /= (float)(CLOCKS_PER_SEC/1000.0); 
    printf("took %f ms\n", difftime_ms);
    //print_mat(C, nr_C, nc_C);
    /** test mat_mult_v1 **/

    /** test mat_mult_v2 **/
    printf("test mat_mult_v2\n");
    begin = clock();
    mat_mult_v2(A, nr_A, nc_A, B, nr_B, nc_B, &C, &nr_C, &nc_C);
    end = clock();
    difftime_ms = (float)(end-begin);
    difftime_ms /= (float)(CLOCKS_PER_SEC/1000.0); 
    printf("took %f ms\n", difftime_ms);
    //print_mat(C, nr_C, nc_C);
    free(C);
    /** test mat_mult_v2 **/

    /** test mat_mult_v3 **/
    printf("test mat_mult_v3\n");
    begin = clock();
    mat_mult_v3(A, nr_A, nc_A, B, nr_B, nc_B, &C, &nr_C, &nc_C);
    end = clock();
    difftime_ms = (float)(end-begin);
    difftime_ms /= (float)(CLOCKS_PER_SEC/1000.0); 
    printf("took %f ms\n", difftime_ms);
    //print_mat(C, nr_C, nc_C);
    free(C);
    /** test mat_mult_v3 **/

    /** test mat_mult_v2_gpu **/
    float *gA;
    cu_safe_falloc(&gA, nr_A*nc_A);
    memcpy_htod(gA, A, nr_A*nc_A);

    float *gB;
    cu_safe_falloc(&gB, nr_B*nc_B);
    memcpy_htod(gB, B, nr_B*nc_B);

    float *gC;
    printf("test mat_mult_gpu_v1\n");
    begin = clock();
    mat_mult_gpu_v1(gA, nr_A, nc_A, gB, nr_B, nc_B, &gC, &nr_C, &nc_C);
    end = clock();
    difftime_ms = (float)(end-begin);
    difftime_ms /= (float)(CLOCKS_PER_SEC/1000.0); 
    printf("took %f ms\n", difftime_ms);
    
    float *C_validate = (float*) safe_calloc(nr_C*nc_C, sizeof(float));
    memcpy_dtoh(C_validate, gC, nr_C*nc_C);
    float sum_diff = 0.0;
    for(size_t i=0; i<nr_C*nc_C; i++) {
        sum_diff += fabs(C_validate[i]-C_true[i]);
    }
    sum_diff = sum_diff/((float)nr_C*nc_C);
    printf("diff: %f\n", sum_diff);
    
    // free memory
    cu_free(gA);
    cu_free(gB);
    cu_free(gC);

    return 0;
}
示例#3
0
文件: mm.c 项目: g-koutsou/CoS-2
int
main(int argc, char *argv[])
{
  int nargs = 3;  
  if(argc != nargs) {
    usage(argv);
    return 1;
  } 
  int M = atoi(argv[1]);
  int N = atoi(argv[2]);
  
  double *A = alloc(sizeof(double)*M*N);
  double *B = alloc(sizeof(double)*M*N);

  rand_mat(A, M, N);
  rand_mat(B, N, M);

  double *C = alloc(sizeof(double)*M*M);
  zero_mat(C, M, M);
  
  mat_mul(C, M, N, A, B);
  {
    double t0 = stop_watch(0);
    mat_mul(C, M, N, A, B);
    t0 = stop_watch(t0);
    double beta_fp = 0 /* _TODO_A_ calculate beta_fp from timing t0 */;
    printf(" ORIG: M = %d, N = %d,", M, N);
    printf(" took: %4.2e sec,", t0);
    printf(" P = %4.2e Mflop/s\n", beta_fp);
  }

#ifdef BLCK
  double *Cb = alloc(sizeof(double)*M*M);
  zero_mat(Cb, M, M);

  mat_mul_blocked(Cb, M, N, A, B);
  {
    double t0 = stop_watch(0);
    mat_mul_blocked(Cb, M, N, A, B);
    t0 = stop_watch(t0);
    double beta_fp = 0 /* _TODO_A_ calculate beta_fp from timing t0 */;    
    printf(" BLCK: M = %d, N = %d,", M, N);
    printf(" took: %4.2e sec,", t0);
    printf(" P = %4.2e Mflop/s, BM = %d, BN = %d\n", beta_fp, BM, BN);
  }
#endif
  
#ifdef BLCK
  double eps = 1e-12;
  double diff = 0;
  for(int i=0; i<M*M; i++) {
    diff += fabs((C[i] - Cb[i])/C[i]);
  }
  /*
   * If the difference between the flat and blocked result is larger
   * than eps, complain to stdout and write the two matrices to file
   * "diffs.out".
   */
  diff /= (double)M*M;
  if(diff > eps) {
    printf(" Non zero diff: %e\n", diff);
    FILE *fp = fopen("diffs.out", "w");
    for(int i=0; i<M*M; i++)
      fprintf(fp, "%e\n", fabs((C[i]-Cb[i])/C[i]));
    fclose(fp);
  }
#endif
  
  free(A);
  free(B);
  free(C);

#ifdef BLCK
  free(Cb);
#endif 
  return 0;
}