int main(int argc, char *argv[]) { int n; double *A, *B, *C; double start, end; struct timeval tim; if (argc != 2) { fprintf(stderr, "Usage: matmul <n>\n"); exit(1); } n = atoi(argv[1]); A = malloc(n * n * sizeof(double)); B = malloc(n * n * sizeof(double)); C = malloc(n * n * sizeof(double)); initA(A, n); initB(B, n); initC(C, n); //verify(A, n); //verify(B, n); acc_init(acc_device_default); /* sequential run */ gettimeofday(&tim, NULL); start = tim.tv_sec + (tim.tv_usec/1000000.0); iter_matmul(A, B, C, n); gettimeofday(&tim, NULL); end = tim.tv_sec + (tim.tv_usec/1000000.0); printf("Execution time is: %.2f s\n", end-start); verify(C, n); free(C); free(B); free(A); return 0; }
int main(int argc, char *argv[]) { long n; float *A; float *B; float *C_seq; float *C_ompacc; double seq_elapsed; double ompacc_elapsed; if (argc < 2) { fprintf(stderr, "Usage: matmul <n> [dist_dim(1|2|3)] [dist_policy(1|2|3)]\\n"); fprintf(stderr, "\tn: matrix size (nxn)\n"); fprintf(stderr, "\tdist_dim: 1: row dist; 2: column dist; 3: both row/column dist; default 1\n"); fprintf(stderr, "\tdist_policy: 1: block_block; 2: block_align; 3: auto_align; default 1\n"); exit(1); } n = atoi(argv[1]); int dist_dim = 1; int dist_policy = 1; if (argc == 3) dist_dim = atoi(argv[2]); if (argc == 4) dist_policy = atoi(argv[3]); if (dist_dim != 1 && dist_dim != 2 && dist_dim != 3) { fprintf(stderr, "Unknown dist dimensions: %d, now fall to default (1)\n", dist_dim); dist_dim = 1; } if (dist_policy != 1 && dist_policy != 2 && dist_policy != 3) { fprintf(stderr, "Unknown dist policy: %d, now fall to default (1)\n", dist_policy); dist_policy = 1; } A = ((float *) (omp_unified_malloc(((n * n) * sizeof(float))))); B = ((float *) (omp_unified_malloc(((n * n) * sizeof(float))))); C_seq = ((float *) (malloc(((n * n) * sizeof(float))))); C_ompacc = ((float *) (omp_unified_malloc(((n * n) * sizeof(float))))); srand48((1 << 12)); init(A, n); init(B, n); // print_array("Array A", "A", A, n, n); // print_array("Array B", "B", B, n, n); zero(C_seq, n); zero(C_ompacc, n); /* sequential run */ seq_elapsed = read_timer_ms(); int i; int num_its = 10; for (i=0; i<num_its;i++) iter_matmul(A, B, C_seq, n); seq_elapsed = (read_timer_ms() - seq_elapsed)/num_its; // print_array("Array C_seq", "C", C_seq, n, n); /* we currently cannot do the OpenMP acc and OpenACC run in once */ /* openmp acc version */ omp_init_devices(); ompacc_elapsed = matmul_ompacc_mdev(A, B, C_ompacc, n, dist_dim, dist_policy); //print_array("Array C_ompacc", "C", C_ompacc, n, n); omp_fini_devices(); printf("======================================================================================================\n"); printf("\tmatmul(%dx%d) example on %d devices, dist policy: %d (1: row; 2: column; 3: row-column)\n", n, n, omp_get_num_active_devices(), dist_dim); printf("------------------------------------------------------------------------------------------------------\n"); printf("Error: %g\n", maxerror(C_seq, C_ompacc, n)); printf("------------------------------------------------------------------------------------------------------\n"); printf("Performance:\t\tRuntime (ms)\t MFLOPS\n"); printf("Sequential:\t\t%4f\t%4f\n", seq_elapsed, ((((2.0 * n) * n) * n) / (1.0e3 * seq_elapsed))); printf("OMPACC mdev:\t\t%4f\t%4f\n", ompacc_elapsed, ((((2.0 * n) * n) * n) / (1.0e3 * ompacc_elapsed))); omp_unified_free(C_ompacc); free(C_seq); omp_unified_free(B); omp_unified_free(A); return 0; }