Пример #1
0
int main(int argc, char *argv[])
{
    int n;
    double *A, *B, *C;
    double start, end;
  	struct timeval tim;

    if (argc != 2) {
        fprintf(stderr, "Usage: matmul <n>\n");
        exit(1);
    }
    n = atoi(argv[1]);

    A = malloc(n * n * sizeof(double));
    B = malloc(n * n * sizeof(double));
    C = malloc(n * n * sizeof(double));

    initA(A, n);
    initB(B, n);
    initC(C, n);
    //verify(A, n);
    //verify(B, n);

	acc_init(acc_device_default);

    /* sequential run */
    gettimeofday(&tim, NULL);
    start = tim.tv_sec + (tim.tv_usec/1000000.0);
    iter_matmul(A, B, C, n);
    gettimeofday(&tim, NULL);
    end = tim.tv_sec + (tim.tv_usec/1000000.0);

	printf("Execution time is: %.2f s\n", end-start);
	
	verify(C, n);

    free(C);
    free(B);
    free(A);
    return 0;
}
Пример #2
0
int main(int argc, char *argv[]) {
    long n;
    float *A;
    float *B;
    float *C_seq;
    float *C_ompacc;
    double seq_elapsed;
    double ompacc_elapsed;
    if (argc < 2) {
        fprintf(stderr, "Usage: matmul <n> [dist_dim(1|2|3)] [dist_policy(1|2|3)]\\n");
        fprintf(stderr, "\tn: matrix size (nxn)\n");
        fprintf(stderr, "\tdist_dim: 1: row dist; 2: column dist; 3: both row/column dist; default 1\n");
        fprintf(stderr, "\tdist_policy: 1: block_block; 2: block_align; 3: auto_align; default 1\n");
        exit(1);
    }
    n = atoi(argv[1]);
    int dist_dim = 1;
    int dist_policy = 1;
    if (argc == 3) dist_dim = atoi(argv[2]);
    if (argc == 4) dist_policy = atoi(argv[3]);
    if (dist_dim != 1 && dist_dim != 2 && dist_dim != 3) {
        fprintf(stderr, "Unknown dist dimensions: %d, now fall to default (1)\n", dist_dim);
        dist_dim = 1;
    }
    if (dist_policy != 1 && dist_policy != 2 && dist_policy != 3) {
        fprintf(stderr, "Unknown dist policy: %d, now fall to default (1)\n", dist_policy);
        dist_policy = 1;
    }

    A = ((float *) (omp_unified_malloc(((n * n) * sizeof(float)))));
    B = ((float *) (omp_unified_malloc(((n * n) * sizeof(float)))));
    C_seq = ((float *) (malloc(((n * n) * sizeof(float)))));
    C_ompacc = ((float *) (omp_unified_malloc(((n * n) * sizeof(float)))));
    srand48((1 << 12));
    init(A, n);
    init(B, n);

//  print_array("Array A", "A", A, n, n);
//  print_array("Array B", "B", B, n, n);

    zero(C_seq, n);
    zero(C_ompacc, n);

/* sequential run */
    seq_elapsed = read_timer_ms();
    int i;
    int num_its = 10;
    for (i=0; i<num_its;i++) iter_matmul(A, B, C_seq, n);
    seq_elapsed = (read_timer_ms() - seq_elapsed)/num_its;
    // print_array("Array C_seq", "C", C_seq, n, n);

/* we currently cannot do the OpenMP acc and OpenACC run in once */
/* openmp acc version */
    omp_init_devices();
    ompacc_elapsed = matmul_ompacc_mdev(A, B, C_ompacc, n, dist_dim, dist_policy);
    //print_array("Array C_ompacc", "C", C_ompacc, n, n);

    omp_fini_devices();

    printf("======================================================================================================\n");
    printf("\tmatmul(%dx%d) example on %d devices, dist policy: %d (1: row; 2: column; 3: row-column)\n",
           n, n, omp_get_num_active_devices(), dist_dim);
    printf("------------------------------------------------------------------------------------------------------\n");
    printf("Error: %g\n", maxerror(C_seq, C_ompacc, n));
    printf("------------------------------------------------------------------------------------------------------\n");
    printf("Performance:\t\tRuntime (ms)\t MFLOPS\n");
    printf("Sequential:\t\t%4f\t%4f\n", seq_elapsed, ((((2.0 * n) * n) * n) / (1.0e3 * seq_elapsed)));
    printf("OMPACC mdev:\t\t%4f\t%4f\n", ompacc_elapsed, ((((2.0 * n) * n) * n) / (1.0e3 * ompacc_elapsed)));
    omp_unified_free(C_ompacc);
    free(C_seq);
    omp_unified_free(B);
    omp_unified_free(A);
    return 0;
}